Merge branch 'master' into fix-potentially-bad-code

This commit is contained in:
Alexey Milovidov 2023-07-24 03:02:57 +03:00 committed by GitHub
commit 1bc83afa54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
119 changed files with 2486 additions and 1093 deletions

2
.gitignore vendored
View File

@ -69,6 +69,7 @@ cmake-build-*
*.pyc
__pycache__
*.pytest_cache
.mypy_cache
test.cpp
CPackConfig.cmake
@ -167,3 +168,4 @@ tests/integration/**/_gen
/rust/**/target
# It is autogenerated from *.in
/rust/**/.cargo/config.toml
/rust/**/vendor

View File

@ -58,6 +58,33 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
rustup target add aarch64-apple-darwin && \
rustup target add powerpc64le-unknown-linux-gnu
# Create vendor cache for cargo.
#
# Note, that the config.toml for the root is used, you will not be able to
# install any other crates, except those which had been vendored (since if
# there is "replace-with" for some source, then cargo will not look to other
# remotes except this).
#
# Notes for the command itself:
# - --chown is required to preserve the rights
# - unstable-options for -C
# - chmod is required to fix the permissions, since builds are running from a different user
# - copy of the Cargo.lock is required for proper dependencies versions
# - cargo vendor --sync is requried to overcome [1] bug.
#
# [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23
COPY --chown=root:root /rust /rust/packages
RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \
cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \
cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \
rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \
sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \
cat $CARGO_HOME/config.toml && \
mv /rust/packages/vendor /rust/vendor && \
chmod -R o=r+X /rust/vendor && \
ls -R -l /rust/packages && \
rm -r /rust/packages
# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
# A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work):
RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \

1
docker/packager/binary/rust Symbolic link
View File

@ -0,0 +1 @@
../../../rust

View File

@ -631,3 +631,53 @@ Result:
│ 100 │ 200 │ 100-200 │ 100 │
└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
```
## hasSubsequence
Returns 1 if needle is a subsequence of haystack, or 0 otherwise.
A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements.
**Syntax**
``` sql
hasSubsequence(haystack, needle)
```
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
**Returned values**
- 1, if needle is a subsequence of haystack.
- 0, otherwise.
Type: `UInt8`.
**Examples**
``` sql
SELECT hasSubsequence('garbage', 'arg') ;
```
Result:
``` text
┌─hasSubsequence('garbage', 'arg')─┐
│ 1 │
└──────────────────────────────────┘
```
## hasSubsequenceCaseInsensitive
Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
## hasSubsequenceUTF8
Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
## hasSubsequenceCaseInsensitiveUTF8
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.

View File

@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
│ 3 │
└────────────────────────────────────────────────────────────┘
```
## hasSubsequence(haystack, needle) {#hasSubsequence}
Возвращает 1 если needle является подпоследовательностью haystack, иначе 0.
**Синтаксис**
``` sql
hasSubsequence(haystack, needle)
```
**Аргументы**
- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
- `needle` — подпоследовательность, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal).
**Возвращаемые значения**
- 1, если
- 0, если подстрока не найдена.
Тип: `UInt8`.
**Примеры**
Запрос:
``` sql
SELECT hasSubsequence('garbage', 'arg') ;
```
Результат:
``` text
┌─hasSubsequence('garbage', 'arg')─┐
│ 1 │
└──────────────────────────────────┘
```
## hasSubsequenceCaseInsensitive
Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра.
## hasSubsequenceUTF8
Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8.
## hasSubsequenceCaseInsensitiveUTF8
Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра.

View File

@ -739,11 +739,12 @@ try
[&]() -> std::vector<ProtocolServerMetrics>
{
std::vector<ProtocolServerMetrics> metrics;
metrics.reserve(servers_to_start_before_tables.size());
std::lock_guard lock(servers_lock);
metrics.reserve(servers_to_start_before_tables.size() + servers.size());
for (const auto & server : servers_to_start_before_tables)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
std::lock_guard lock(servers_lock);
for (const auto & server : servers)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
return metrics;
@ -1304,7 +1305,7 @@ try
global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
std::lock_guard lock(servers_lock);
updateServers(*config, server_pool, async_metrics, servers);
updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
}
global_context->updateStorageConfiguration(*config);
@ -1406,10 +1407,27 @@ try
}
for (auto & server : servers_to_start_before_tables)
{
server.start();
LOG_INFO(log, "Listening for {}", server.getDescription());
std::lock_guard lock(servers_lock);
/// We should start interserver communications before (and more imporant shutdown after) tables.
/// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
/// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
/// communicate with zookeeper, execute merges, etc.
createInterserverServers(
config(),
interserver_listen_hosts,
listen_try,
server_pool,
async_metrics,
servers_to_start_before_tables,
/* start_servers= */ false);
for (auto & server : servers_to_start_before_tables)
{
server.start();
LOG_INFO(log, "Listening for {}", server.getDescription());
}
}
/// Initialize access storages.
@ -1529,10 +1547,13 @@ try
{
LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
size_t current_connections = 0;
for (auto & server : servers_to_start_before_tables)
{
server.stop();
current_connections += server.currentConnections();
std::lock_guard lock(servers_lock);
for (auto & server : servers_to_start_before_tables)
{
server.stop();
current_connections += server.currentConnections();
}
}
if (current_connections)
@ -1637,26 +1658,17 @@ try
global_context->initializeTraceCollector();
/// Set up server-wide memory profiler (for total memory tracker).
if (server_settings.total_memory_profiler_step)
UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0);
if (total_memory_profiler_step)
{
total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
total_memory_tracker.setProfilerStep(total_memory_profiler_step);
}
if (server_settings.total_memory_tracker_sample_probability > 0.0)
double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0);
if (total_memory_tracker_sample_probability > 0.0)
{
total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability);
}
if (server_settings.total_memory_profiler_sample_min_allocation_size)
{
total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
}
if (server_settings.total_memory_profiler_sample_max_allocation_size)
{
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
}
}
#endif
@ -1714,7 +1726,7 @@ try
{
std::lock_guard lock(servers_lock);
createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers);
createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers);
if (servers.empty())
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
"No servers started (add valid listen_host and 'tcp_port' or 'http_port' "
@ -1972,7 +1984,6 @@ HTTPContextPtr Server::httpContext() const
void Server::createServers(
Poco::Util::AbstractConfiguration & config,
const Strings & listen_hosts,
const Strings & interserver_listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
@ -2194,6 +2205,23 @@ void Server::createServers(
httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
});
}
}
void Server::createInterserverServers(
Poco::Util::AbstractConfiguration & config,
const Strings & interserver_listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers,
bool start_servers)
{
const Settings & settings = global_context->getSettingsRef();
Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
http_params->setTimeout(settings.http_receive_timeout);
http_params->setKeepAliveTimeout(keep_alive_timeout);
/// Now iterate over interserver_listen_hosts
for (const auto & interserver_listen_host : interserver_listen_hosts)
@ -2242,14 +2270,14 @@ void Server::createServers(
#endif
});
}
}
void Server::updateServers(
Poco::Util::AbstractConfiguration & config,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers)
std::vector<ProtocolServerAdapter> & servers,
std::vector<ProtocolServerAdapter> & servers_to_start_before_tables)
{
Poco::Logger * log = &logger();
@ -2275,11 +2303,19 @@ void Server::updateServers(
Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config();
std::vector<ProtocolServerAdapter *> all_servers;
all_servers.reserve(servers.size() + servers_to_start_before_tables.size());
for (auto & server : servers)
all_servers.push_back(&server);
for (auto & server : servers_to_start_before_tables)
all_servers.push_back(&server);
for (auto * server : all_servers)
{
if (!server.isStopping())
if (!server->isStopping())
{
std::string port_name = server.getPortName();
std::string port_name = server->getPortName();
bool has_host = false;
bool is_http = false;
if (port_name.starts_with("protocols."))
@ -2317,27 +2353,29 @@ void Server::updateServers(
/// NOTE: better to compare using getPortName() over using
/// dynamic_cast<> since HTTPServer is also used for prometheus and
/// internal replication communications.
is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port";
is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port";
}
if (!has_host)
has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end();
has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end();
bool has_port = !config.getString(port_name, "").empty();
bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers");
if (force_restart)
LOG_TRACE(log, "<http_handlers> had been changed, will reload {}", server.getDescription());
LOG_TRACE(log, "<http_handlers> had been changed, will reload {}", server->getDescription());
if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart)
if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart)
{
server.stop();
LOG_INFO(log, "Stopped listening for {}", server.getDescription());
server->stop();
LOG_INFO(log, "Stopped listening for {}", server->getDescription());
}
}
}
createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true);
std::erase_if(servers, std::bind_front(check_server, ""));
std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, ""));
}
}

View File

@ -102,6 +102,14 @@ private:
void createServers(
Poco::Util::AbstractConfiguration & config,
const Strings & listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers,
bool start_servers = false);
void createInterserverServers(
Poco::Util::AbstractConfiguration & config,
const Strings & interserver_listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
@ -113,7 +121,8 @@ private:
Poco::Util::AbstractConfiguration & config,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector<ProtocolServerAdapter> & servers);
std::vector<ProtocolServerAdapter> & servers,
std::vector<ProtocolServerAdapter> & servers_to_start_before_tables);
};
}

4
rust/.dockerignore Normal file
View File

@ -0,0 +1,4 @@
# Just in case ignore any cargo stuff (and just in case someone will run this
# docker build locally with build context using folder root):
target
vendor

4
rust/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
# This is for tar --exclude-vcs-ignores (and just in case someone will run
# docker build locally with build context created via tar):
target
vendor

92
rust/BLAKE3/Cargo.lock generated
View File

@ -1,92 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "_ch_rust_blake3"
version = "0.1.0"
dependencies = [
"blake3",
"libc",
]
[[package]]
name = "arrayref"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "blake3"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
"digest",
]
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
"generic-array",
]
[[package]]
name = "generic-array"
version = "0.14.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "libc"
version = "0.2.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

View File

@ -55,6 +55,8 @@ function(clickhouse_import_crate)
endif()
endif()
# Note, here --offline is not used, since on CI vendor archive is used, and
# passing --offline here will be inconvenient for local development.
corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile})
endfunction()

View File

@ -2,6 +2,22 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "_ch_rust_blake3"
version = "0.1.0"
dependencies = [
"blake3",
"libc",
]
[[package]]
name = "_ch_rust_prql"
version = "0.1.0"
dependencies = [
"prql-compiler",
"serde_json",
]
[[package]]
name = "_ch_rust_skim_rust"
version = "0.1.0"
@ -12,6 +28,32 @@ dependencies = [
"term",
]
[[package]]
name = "addr2line"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "1.0.2"
@ -36,6 +78,31 @@ dependencies = [
"libc",
]
[[package]]
name = "anyhow"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
dependencies = [
"backtrace",
]
[[package]]
name = "ariadne"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
dependencies = [
"unicode-width",
"yansi",
]
[[package]]
name = "arrayref"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
[[package]]
name = "arrayvec"
version = "0.7.4"
@ -48,6 +115,21 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "backtrace"
version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "beef"
version = "0.5.2"
@ -60,6 +142,29 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "blake3"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
"digest",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bumpalo"
version = "3.13.0"
@ -93,6 +198,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "chumsky"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
dependencies = [
"hashbrown 0.12.3",
"stacker",
]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
@ -103,6 +218,12 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "constant_time_eq"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
[[package]]
name = "core-foundation-sys"
version = "0.8.4"
@ -177,10 +298,41 @@ dependencies = [
]
[[package]]
name = "cxx"
version = "1.0.101"
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "csv"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "cxx"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d"
dependencies = [
"cc",
"cxxbridge-flags",
@ -190,9 +342,9 @@ dependencies = [
[[package]]
name = "cxx-build"
version = "1.0.101"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397"
dependencies = [
"cc",
"codespan-reporting",
@ -200,24 +352,24 @@ dependencies = [
"proc-macro2",
"quote",
"scratch",
"syn 2.0.26",
"syn 2.0.27",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.101"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0"
[[package]]
name = "cxxbridge-macro"
version = "1.0.101"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.26",
"syn 2.0.27",
]
[[package]]
@ -296,6 +448,17 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
@ -319,9 +482,27 @@ dependencies = [
[[package]]
name = "either"
version = "1.8.1"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "enum-as-inner"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fnv"
@ -338,6 +519,16 @@ dependencies = [
"thread_local",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.10"
@ -349,6 +540,33 @@ dependencies = [
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "gimli"
version = "0.27.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
version = "0.3.2"
@ -384,6 +602,31 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "indexmap"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
dependencies = [
"equivalent",
"hashbrown 0.14.0",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "js-sys"
version = "0.3.64"
@ -444,6 +687,21 @@ dependencies = [
"autocfg",
]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "nix"
version = "0.24.3"
@ -470,10 +728,20 @@ dependencies = [
]
[[package]]
name = "num-traits"
version = "0.2.15"
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "num-traits"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
dependencies = [
"autocfg",
]
@ -488,6 +756,15 @@ dependencies = [
"libc",
]
[[package]]
name = "object"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.18.0"
@ -509,6 +786,41 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "prql-compiler"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
dependencies = [
"anyhow",
"ariadne",
"chumsky",
"csv",
"enum-as-inner",
"itertools",
"lazy_static",
"log",
"once_cell",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
"strum_macros",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.31"
@ -589,12 +901,24 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustversion"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "scopeguard"
version = "1.2.0"
@ -608,10 +932,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
[[package]]
name = "serde"
version = "1.0.171"
name = "semver"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.174"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.174"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.27",
]
[[package]]
name = "serde_json"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "skim"
@ -638,12 +1009,74 @@ dependencies = [
"vte",
]
[[package]]
name = "sqlformat"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
dependencies = [
"itertools",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
dependencies = [
"log",
"serde",
]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 1.0.109",
]
[[package]]
name = "subtle"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "syn"
version = "1.0.109"
@ -657,9 +1090,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.26"
version = "2.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
dependencies = [
"proc-macro2",
"quote",
@ -688,22 +1121,22 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.43"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.43"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.26",
"syn 2.0.27",
]
[[package]]
@ -766,6 +1199,12 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "typenum"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
[[package]]
name = "unicode-ident"
version = "1.0.11"
@ -778,12 +1217,30 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "vte"
version = "0.11.1"
@ -838,7 +1295,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.26",
"syn 2.0.27",
"wasm-bindgen-shared",
]
@ -860,7 +1317,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.26",
"syn 2.0.27",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -967,3 +1424,9 @@ name = "windows_x86_64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

12
rust/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
# workspace is required to vendor crates for all packages.
[workspace]
members = [
"BLAKE3",
"skim",
"prql",
]
resolver = "2"
# FIXME: even though the profiles should be defined in the main cargo config we
# cannot do this yet, since we compile each package separatelly, so you should
# ignore warning from cargo about this.

569
rust/prql/Cargo.lock generated
View File

@ -1,569 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "_ch_rust_prql"
version = "0.1.0"
dependencies = [
"prql-compiler",
"serde_json",
]
[[package]]
name = "addr2line"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
dependencies = [
"backtrace",
]
[[package]]
name = "ariadne"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
dependencies = [
"unicode-width",
"yansi",
]
[[package]]
name = "backtrace"
version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
dependencies = [
"hashbrown 0.12.3",
"stacker",
]
[[package]]
name = "csv"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "either"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "enum-as-inner"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "equivalent"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.27.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "indexmap"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
dependencies = [
"equivalent",
"hashbrown 0.14.0",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "log"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "object"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "proc-macro2"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
dependencies = [
"unicode-ident",
]
[[package]]
name = "prql-compiler"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
dependencies = [
"anyhow",
"ariadne",
"chumsky",
"csv",
"enum-as-inner",
"itertools",
"lazy_static",
"log",
"once_cell",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
"strum_macros",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustversion"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
[[package]]
name = "ryu"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
[[package]]
name = "semver"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
]
[[package]]
name = "serde_json"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "sqlformat"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
dependencies = [
"itertools",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
dependencies = [
"log",
"serde",
]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 1.0.109",
]
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

View File

@ -187,6 +187,7 @@
M(CacheFileSegments, "Number of existing cache file segments") \
M(CacheDetachedFileSegments, "Number of existing detached cache file segments") \
M(FilesystemCacheSize, "Filesystem cache size in bytes") \
M(FilesystemCacheSizeLimit, "Filesystem cache size limit in bytes") \
M(FilesystemCacheElements, "Filesystem cache elements (file segments)") \
M(FilesystemCacheDownloadQueueElements, "Filesystem cache elements in download queue") \
M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \

View File

@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
}
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng)))
if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
{
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size});
@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size)
}
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng)))
if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
{
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size});
@ -534,12 +534,6 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
;
}
bool MemoryTracker::isSizeOkForSampling(UInt64 size) const
{
/// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation
return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes);
}
bool canEnqueueBackgroundTask()
{
auto limit = background_memory_tracker.getSoftLimit();

View File

@ -67,12 +67,6 @@ private:
/// To randomly sample allocations and deallocations in trace_log.
double sample_probability = 0;
/// Randomly sample allocations only larger or equal to this size
UInt64 min_allocation_size_bytes = 0;
/// Randomly sample allocations only smaller or equal to this size
UInt64 max_allocation_size_bytes = 0;
/// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy).
/// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker.
std::atomic<MemoryTracker *> parent {};
@ -94,8 +88,6 @@ private:
void setOrRaiseProfilerLimit(Int64 value);
bool isSizeOkForSampling(UInt64 size) const;
/// allocImpl(...) and free(...) should not be used directly
friend struct CurrentMemoryTracker;
void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
@ -173,16 +165,6 @@ public:
sample_probability = value;
}
void setSampleMinAllocationSize(UInt64 value)
{
min_allocation_size_bytes = value;
}
void setSampleMaxAllocationSize(UInt64 value)
{
max_allocation_size_bytes = value;
}
void setProfilerStep(Int64 value)
{
profiler_step = value;

View File

@ -15,6 +15,7 @@
#include <base/sort.h>
#include <base/getFQDNOrHostName.h>
#include "Common/ZooKeeper/IKeeper.h"
#include <Common/DNSResolver.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
@ -82,6 +83,9 @@ void ZooKeeper::init(ZooKeeperArgs args_)
if (secure)
host_string.erase(0, strlen("secure://"));
/// We want to resolve all hosts without DNS cache for keeper connection.
Coordination::DNSResolver::instance().removeHostFromCache(host_string);
const Poco::Net::SocketAddress host_socket_addr{host_string};
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, host_socket_addr.toString());
nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, secure});

View File

@ -43,11 +43,12 @@ void LimitedReadPacket::readPayloadWithUnpacked(ReadBuffer & in)
IMySQLReadPacket::readPayloadWithUnpacked(limited);
}
uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
uint64_t readLengthEncodedNumber(ReadBuffer & buffer, UInt16 & bytes_read)
{
char c{};
uint64_t buf = 0;
buffer.readStrict(c);
bytes_read = 1;
auto cc = static_cast<uint8_t>(c);
switch (cc)
{
@ -56,12 +57,15 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
break;
case 0xfc:
buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
bytes_read += 2;
break;
case 0xfd:
buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
bytes_read += 3;
break;
case 0xfe:
buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
bytes_read += 8;
break;
default:
return cc;
@ -69,6 +73,12 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
return buf;
}
uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
{
UInt16 bytes_read = 0;
return readLengthEncodedNumber(buffer, bytes_read);
}
void readLengthEncodedString(String & s, ReadBuffer & buffer)
{
uint64_t len = readLengthEncodedNumber(buffer);

View File

@ -34,6 +34,7 @@ public:
};
uint64_t readLengthEncodedNumber(ReadBuffer & buffer);
uint64_t readLengthEncodedNumber(ReadBuffer & buffer, UInt16 & bytes_read);
void readLengthEncodedString(String & s, ReadBuffer & buffer);
}

View File

@ -0,0 +1,301 @@
#include "MySQLCharset.h"
#include "config.h"
#include <iostream>
#include <Common/Exception.h>
#if USE_ICU
#include <unicode/ucnv.h>
#define CHUNK_SIZE 1024
static const char * TARGET_CHARSET = "utf8";
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_EXCEPTION;
}
const std::unordered_map<Int32, String> MySQLCharset::charsets
= {
{1, "big5"},
{2, "latin2"},
{3, "dec8"},
{4, "cp850"},
{5, "latin1"},
{6, "hp8"},
{7, "koi8r"},
{8, "latin1"},
{9, "latin2"},
{10, "swe7"},
{11, "ascii"},
{12, "ujis"},
{13, "sjis"},
{14, "cp1251"},
{15, "latin1"},
{16, "hebrew"},
{18, "tis620"},
{19, "euckr"},
{20, "latin7"},
{21, "latin2"},
{22, "koi8u"},
{23, "cp1251"},
{24, "gb2312"},
{25, "greek"},
{26, "cp1250"},
{27, "latin2"},
{28, "gbk"},
{29, "cp1257"},
{30, "latin5"},
{31, "latin1"},
{32, "armscii8"},
{34, "cp1250"},
{35, "ucs2"},
{36, "cp866"},
{37, "keybcs2"},
{38, "macce"},
{39, "macroman"},
{40, "cp852"},
{41, "latin7"},
{42, "latin7"},
{43, "macce"},
{44, "cp1250"},
{47, "latin1"},
{48, "latin1"},
{49, "latin1"},
{50, "cp1251"},
{51, "cp1251"},
{52, "cp1251"},
{53, "macroman"},
{54, "utf16"},
{55, "utf16"},
{56, "utf16le"},
{57, "cp1256"},
{58, "cp1257"},
{59, "cp1257"},
{60, "utf32"},
{61, "utf32"},
{62, "utf16le"},
{64, "armscii8"},
{65, "ascii"},
{66, "cp1250"},
{67, "cp1256"},
{68, "cp866"},
{69, "dec8"},
{70, "greek"},
{71, "hebrew"},
{72, "hp8"},
{73, "keybcs2"},
{74, "koi8r"},
{75, "koi8u"},
{77, "latin2"},
{78, "latin5"},
{79, "latin7"},
{80, "cp850"},
{81, "cp852"},
{82, "swe7"},
{84, "big5"},
{85, "euckr"},
{86, "gb2312"},
{87, "gbk"},
{88, "sjis"},
{89, "tis620"},
{90, "ucs2"},
{91, "ujis"},
{92, "geostd8"},
{93, "geostd8"},
{94, "latin1"},
{95, "cp932"},
{96, "cp932"},
{97, "eucjpms"},
{98, "eucjpms"},
{99, "cp1250"},
{101, "utf16"},
{102, "utf16"},
{103, "utf16"},
{104, "utf16"},
{105, "utf16"},
{106, "utf16"},
{107, "utf16"},
{108, "utf16"},
{109, "utf16"},
{110, "utf16"},
{111, "utf16"},
{112, "utf16"},
{113, "utf16"},
{114, "utf16"},
{115, "utf16"},
{116, "utf16"},
{117, "utf16"},
{118, "utf16"},
{119, "utf16"},
{120, "utf16"},
{121, "utf16"},
{122, "utf16"},
{123, "utf16"},
{124, "utf16"},
{128, "ucs2"},
{129, "ucs2"},
{130, "ucs2"},
{131, "ucs2"},
{132, "ucs2"},
{133, "ucs2"},
{134, "ucs2"},
{135, "ucs2"},
{136, "ucs2"},
{137, "ucs2"},
{138, "ucs2"},
{139, "ucs2"},
{140, "ucs2"},
{141, "ucs2"},
{142, "ucs2"},
{143, "ucs2"},
{144, "ucs2"},
{145, "ucs2"},
{146, "ucs2"},
{147, "ucs2"},
{148, "ucs2"},
{149, "ucs2"},
{150, "ucs2"},
{151, "ucs2"},
{159, "ucs2"},
{160, "utf32"},
{161, "utf32"},
{162, "utf32"},
{163, "utf32"},
{164, "utf32"},
{165, "utf32"},
{166, "utf32"},
{167, "utf32"},
{168, "utf32"},
{169, "utf32"},
{170, "utf32"},
{171, "utf32"},
{172, "utf32"},
{173, "utf32"},
{174, "utf32"},
{175, "utf32"},
{176, "utf32"},
{177, "utf32"},
{178, "utf32"},
{179, "utf32"},
{180, "utf32"},
{181, "utf32"},
{182, "utf32"},
{183, "utf32"},
{248, "gb18030"},
{249, "gb18030"},
{250, "gb18030"}
};
MySQLCharset::~MySQLCharset()
{
#if USE_ICU
std::lock_guard lock(mutex);
for (auto & conv : conv_cache)
{
ucnv_close(conv.second);
}
conv_cache.clear();
#endif
}
bool MySQLCharset::needConvert(UInt32 id)
{
return charsets.contains(id);
}
String MySQLCharset::getCharsetFromId(UInt32 id)
{
return charsets.at(id);
}
UConverter * MySQLCharset::getCachedConverter(const String & charset [[maybe_unused]])
{
UConverter * conv = nullptr;
#if USE_ICU
UErrorCode error = U_ZERO_ERROR;
/// Get conv from cache
auto result = conv_cache.find(charset);
if (result != conv_cache.end())
{
conv = result->second;
//reset to init state
ucnv_reset(conv);
}
else
{
conv = ucnv_open(charset.c_str(), &error);
if (error != U_ZERO_ERROR)
{
throw Exception(
ErrorCodes::UNKNOWN_EXCEPTION, "MySQLCharset::getCachedConveter: ucnv_open failed, error={}", std::to_string(error));
}
conv_cache[charset.c_str()] = conv;
}
#endif
return conv;
}
Int32 MySQLCharset::convertFromId(UInt32 id [[maybe_unused]], String & to, const String & from)
{
#if USE_ICU
std::lock_guard lock(mutex);
UErrorCode error = U_ZERO_ERROR;
String source_charset = getCharsetFromId(id);
to.clear();
if (source_charset.empty())
{
return U_ILLEGAL_ARGUMENT_ERROR;
}
UChar pivot_buf[CHUNK_SIZE]; // stream mode must use this buf
char target_buf[CHUNK_SIZE];
UChar * pivot;
UChar * pivot2;
UConverter * in_conv;
UConverter * out_conv;
char * cur_target;
const char * source_end;
const char * target_end;
size_t source_len = from.size();
const char * source = from.data();
source_end = source + source_len;
out_conv = getCachedConverter(TARGET_CHARSET);
in_conv = getCachedConverter(source_charset);
pivot = pivot_buf;
pivot2 = pivot_buf;
target_end = target_buf + CHUNK_SIZE;
do
{
error = U_ZERO_ERROR;
cur_target = target_buf;
ucnv_convertEx(
out_conv,
in_conv,
&cur_target,
target_end,
&source,
source_end,
pivot_buf,
&pivot,
&pivot2,
pivot_buf + CHUNK_SIZE,
false,
true,
&error);
to.append(target_buf, cur_target - target_buf);
} while (error == U_BUFFER_OVERFLOW_ERROR);
return error;
#else
to = from;
return 0;
#endif
}
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <unordered_map>
#include <base/types.h>
#include <boost/noncopyable.hpp>
#include <mutex>
struct UConverter;
namespace DB
{
class MySQLCharset final : boost::noncopyable
{
public:
~MySQLCharset();
String getCharsetFromId(UInt32 id);
Int32 convertFromId(UInt32 id, String & to, const String & from);
bool needConvert(UInt32 id);
private:
std::mutex mutex;
std::unordered_map<String, UConverter *> conv_cache;
UConverter * getCachedConverter(const String & charset);
static const std::unordered_map<Int32, String> charsets;
};
using MySQLCharsetPtr = std::shared_ptr<MySQLCharset>;
}

View File

@ -187,9 +187,9 @@ namespace MySQLReplication
size_t null_bitmap_size = (column_count + 7) / 8;
readBitmap(payload, null_bitmap, null_bitmap_size);
/// Ignore MySQL 8.0 optional metadata fields.
/// Parse MySQL 8.0 optional metadata fields.
/// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
payload.ignoreAll();
parseOptionalMetaField(payload);
}
/// Types that do not used in the binlog event:
@ -263,6 +263,118 @@ namespace MySQLReplication
}
}
void TableMapEvent::parseOptionalMetaField(ReadBuffer & payload)
{
char type = 0;
while (payload.read(type))
{
UInt64 len = readLengthEncodedNumber(payload);
if (len == 0)
{
payload.ignoreAll();
return;
}
switch (type)
{
/// It may be useful, parse later
case SIGNEDNESS:
payload.ignore(len);
break;
case DEFAULT_CHARSET:
{
UInt32 total_read = 0;
UInt16 once_read = 0;
default_charset = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
total_read += once_read;
while (total_read < len)
{
UInt32 col_index = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
total_read += once_read;
UInt32 col_charset = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
total_read += once_read;
default_charset_pairs.emplace(col_index, col_charset);
}
break;
}
case COLUMN_CHARSET:
{
UInt32 total_read = 0;
UInt16 once_read = 0;
while (total_read < len)
{
UInt32 collation_id = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
column_charset.emplace_back(collation_id);
total_read += once_read;
}
break;
}
case COLUMN_NAME:
payload.ignore(len);
break;
case SET_STR_VALUE:
case GEOMETRY_TYPE:
case SIMPLE_PRIMARY_KEY:
case PRIMARY_KEY_WITH_PREFIX:
case ENUM_AND_SET_DEFAULT_CHARSET:
case COLUMN_VISIBILITY:
default:
payload.ignore(len);
break;
}
}
}
UInt32 TableMapEvent::getColumnCharsetId(UInt32 column_index)
{
if (!column_charset.empty())
{
UInt32 str_index = 0xFFFFFFFF;
/// Calc the index in the column_charset
for (UInt32 i = 0; i <= column_index; ++i)
{
switch (column_type[i])
{
case MYSQL_TYPE_STRING:
case MYSQL_TYPE_VAR_STRING:
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_BLOB:
++str_index;
break;
default:
break;
}
}
if (str_index != 0xFFFFFFFF && str_index < column_charset.size())
{
return column_charset[str_index];
}
}
else if (!default_charset_pairs.empty())
{
UInt32 str_index = 0xFFFFFFFF;
for (UInt32 i = 0; i <= column_index; ++i)
{
switch (column_type[i])
{
case MYSQL_TYPE_STRING:
case MYSQL_TYPE_VAR_STRING:
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_BLOB:
++str_index;
break;
default:
break;
}
}
if (default_charset_pairs.contains(str_index))
{
return default_charset_pairs[str_index];
}
}
return default_charset;
}
void TableMapEvent::dump(WriteBuffer & out) const
{
header.dump(out);
@ -319,6 +431,22 @@ namespace MySQLReplication
}
}
static inline String convertCharsetIfNeeded(
const std::shared_ptr<TableMapEvent> & table_map,
UInt32 i,
const String & val)
{
const auto collation_id = table_map->getColumnCharsetId(i);
if (table_map->charset_ptr->needConvert(collation_id))
{
String target;
auto err = table_map->charset_ptr->convertFromId(collation_id, target, val);
if (err == 0)
return target;
}
return val;
}
/// Types that do not used in the binlog event:
/// MYSQL_TYPE_SET
/// MYSQL_TYPE_TINY_BLOB
@ -727,7 +855,7 @@ namespace MySQLReplication
String val;
val.resize(size);
payload.readStrict(reinterpret_cast<char *>(val.data()), size);
row.push_back(Field{String{val}});
row.emplace_back(Field{convertCharsetIfNeeded(table_map, i, val)});
break;
}
case MYSQL_TYPE_STRING:
@ -745,7 +873,7 @@ namespace MySQLReplication
String val;
val.resize(size);
payload.readStrict(reinterpret_cast<char *>(val.data()), size);
row.push_back(Field{String{val}});
row.emplace_back(Field{convertCharsetIfNeeded(table_map, i, val)});
break;
}
case MYSQL_TYPE_GEOMETRY:
@ -777,7 +905,10 @@ namespace MySQLReplication
String val;
val.resize(size);
payload.readStrict(reinterpret_cast<char *>(val.data()), size);
row.push_back(Field{String{val}});
row.emplace_back(Field{
field_type == MYSQL_TYPE_BLOB
? convertCharsetIfNeeded(table_map, i, val)
: val});
break;
}
default:
@ -977,7 +1108,7 @@ namespace MySQLReplication
map_event_header.parse(event_payload);
if (doReplicate(map_event_header.schema, map_event_header.table))
{
event = std::make_shared<TableMapEvent>(std::move(event_header), map_event_header);
event = std::make_shared<TableMapEvent>(std::move(event_header), map_event_header, flavor_charset);
event->parseEvent(event_payload);
auto table_map = std::static_pointer_cast<TableMapEvent>(event);
table_maps[table_map->table_id] = table_map;

View File

@ -2,6 +2,7 @@
#include <Core/Field.h>
#include <Core/MySQL/PacketsReplication.h>
#include <Core/MySQL/MySQLGtid.h>
#include <Core/MySQL/MySQLCharset.h>
#include <base/types.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
@ -324,9 +325,24 @@ namespace MySQLReplication
UInt32 column_count;
std::vector<UInt8> column_type;
std::vector<UInt16> column_meta;
/// Character set of string columns
std::vector<UInt32> column_charset;
/// Character set of string columns,
/// optimized to minimize space when many
/// columns have the same charset
UInt32 default_charset = 255; /// utf8mb4_0900_ai_ci
std::unordered_map<UInt32, UInt32> default_charset_pairs;
/// Points to flavor_charset object
MySQLCharsetPtr charset_ptr;
Bitmap null_bitmap;
TableMapEvent(EventHeader && header_, const TableMapEventHeader & map_event_header) : EventBase(std::move(header_)), column_count(0)
TableMapEvent(
EventHeader && header_,
const TableMapEventHeader & map_event_header,
const MySQLCharsetPtr & charset_ptr_)
: EventBase(std::move(header_))
, column_count(0)
, charset_ptr(charset_ptr_)
{
table_id = map_event_header.table_id;
flags = map_event_header.flags;
@ -336,10 +352,52 @@ namespace MySQLReplication
table = map_event_header.table;
}
void dump(WriteBuffer & out) const override;
UInt32 getColumnCharsetId(UInt32 column_index);
/// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
/// https://github.com/mysql/mysql-server/blob/8.0/libbinlogevents/include/rows_event.h#L50
/// DEFAULT_CHARSET and COLUMN_CHARSET don't appear together, and
/// ENUM_AND_SET_DEFAULT_CHARSET and ENUM_AND_SET_COLUMN_CHARSET don't appear together.
enum OptionalMetaType : char
{
/// UNSIGNED flag of numeric columns
SIGNEDNESS = 1,
/// Character set of string columns, optimized to
/// minimize space when many columns have the
/// same charset
DEFAULT_CHARSET,
/// Character set of string columns, optimized to
/// minimize space when columns have many
/// different charsets
COLUMN_CHARSET,
COLUMN_NAME,
/// String value of SET columns
SET_STR_VALUE,
/// String value of ENUM columns
ENUM_STR_VALUE,
/// Real type of geometry columns
GEOMETRY_TYPE,
/// Primary key without prefix
SIMPLE_PRIMARY_KEY,
/// Primary key with prefix
PRIMARY_KEY_WITH_PREFIX,
/// Character set of enum and set
/// columns, optimized to minimize
/// space when many columns have the
/// same charset
ENUM_AND_SET_DEFAULT_CHARSET,
/// Character set of enum and set
/// columns, optimized to minimize
/// space when many columns have the
/// same charset
ENUM_AND_SET_COLUMN_CHARSET,
/// Flag to indicate column visibility attribute
COLUMN_VISIBILITY
};
protected:
void parseImpl(ReadBuffer & payload) override;
void parseMeta(String meta);
void parseOptionalMetaField(ReadBuffer & payload);
};
enum RowsEventFlags
@ -486,6 +544,7 @@ namespace MySQLReplication
std::unordered_set<String> replicate_tables;
std::map<UInt64, std::shared_ptr<TableMapEvent> > table_maps;
size_t checksum_signature_length = 4;
MySQLCharsetPtr flavor_charset = std::make_shared<MySQLCharset>();
bool doReplicate(UInt64 table_id);
bool doReplicate(const String & db, const String & table_name);

View File

@ -81,12 +81,8 @@ namespace DB
M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
\
M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0)
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0)
DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)

View File

@ -427,9 +427,7 @@ class IColumn;
M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \
M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \
M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \
M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \
\
M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \

View File

@ -0,0 +1,351 @@
#include <Core/MySQL/MySQLCharset.h>
#include <gtest/gtest.h>
#include <cstdio>
namespace DB
{
struct CheckResult
{
Int32 id;
String name;
bool need_convert;
};
TEST(CharsetTest, CharsetTest)
{
MySQLCharset charset;
UInt32 big5_id = 1;
UInt32 gbk_id = 28;
UInt32 gb2312_id = 24;
UInt32 utf8mb4_ai_ci_id = 255;
EXPECT_TRUE(charset.needConvert(big5_id));
EXPECT_TRUE(charset.needConvert(gbk_id));
EXPECT_TRUE(charset.needConvert(gb2312_id));
EXPECT_FALSE(charset.needConvert(utf8mb4_ai_ci_id));
EXPECT_FALSE(charset.needConvert(0));
EXPECT_FALSE(charset.needConvert(1000));
EXPECT_EQ(charset.getCharsetFromId(big5_id), String("big5"));
EXPECT_EQ(charset.getCharsetFromId(gbk_id), String("gbk"));
EXPECT_EQ(charset.getCharsetFromId(gb2312_id), String("gb2312"));
}
TEST(CharsetTest, ConvTest)
{
MySQLCharset charset;
UInt32 big5_id = 1;
UInt32 gbk_id = 28;
UInt32 gb2312_id = 24;
Int32 error = 0;
String source("\xc4\xe3\xba\xc3"); // gbk "你好"
String target;
String expect("\xe4\xbd\xa0\xe5\xa5\xbd");
error = charset.convertFromId(gbk_id, target, source);
EXPECT_EQ(error, 0);
EXPECT_TRUE(target == expect);
error = charset.convertFromId(gb2312_id, target, source);
EXPECT_EQ(error, 0);
EXPECT_TRUE(target == expect);
source.assign("\xa7\x41\xa6\x6e"); // big5 "你好"
error = charset.convertFromId(big5_id, target, source);
EXPECT_EQ(error, 0);
EXPECT_TRUE(target == expect);
}
TEST(CharsetTest, FullCharsetCheck)
{
CheckResult result[] =
{
{1, "big5", true}, // "big5_chinese_ci",
{2, "latin2", true}, // "latin2_czech_cs",
{3, "dec8", true}, // "dec8_swedish_ci",
{4, "cp850", true}, // "cp850_general_ci",
{5, "latin1", true}, // "latin1_german1_ci",
{6, "hp8", true}, // "hp8_english_ci",
{7, "koi8r", true}, // "koi8r_general_ci",
{8, "latin1", true}, // "latin1_swedish_ci",
{9, "latin2", true}, // "latin2_general_ci",
{10, "swe7", true}, // "swe7_swedish_ci",
{11, "ascii", true}, // "ascii_general_ci",
{12, "ujis", true}, // "ujis_japanese_ci",
{13, "sjis", true}, // "sjis_japanese_ci",
{14, "cp1251", true}, // "cp1251_bulgarian_ci",
{15, "latin1", true}, // "latin1_danish_ci",
{16, "hebrew", true}, // "hebrew_general_ci",
{18, "tis620", true}, // "tis620_thai_ci",
{19, "euckr", true}, // "euckr_korean_ci",
{20, "latin7", true}, // "latin7_estonian_cs",
{21, "latin2", true}, // "latin2_hungarian_ci",
{22, "koi8u", true}, // "koi8u_general_ci",
{23, "cp1251", true}, // "cp1251_ukrainian_ci",
{24, "gb2312", true}, // "gb2312_chinese_ci",
{25, "greek", true}, // "greek_general_ci",
{26, "cp1250", true}, // "cp1250_general_ci",
{27, "latin2", true}, // "latin2_croatian_ci",
{28, "gbk", true}, // "gbk_chinese_ci",
{29, "cp1257", true}, // "cp1257_lithuanian_ci",
{30, "latin5", true}, // "latin5_turkish_ci",
{31, "latin1", true}, // "latin1_german2_ci",
{32, "armscii8", true}, // "armscii8_general_ci",
{33, "utf8", false}, // "utf8_general_ci",
{34, "cp1250", true}, // "cp1250_czech_cs",
{35, "ucs2", true}, // "ucs2_general_ci",
{36, "cp866", true}, // "cp866_general_ci",
{37, "keybcs2", true}, // "keybcs2_general_ci",
{38, "macce", true}, // "macce_general_ci",
{39, "macroman", true}, // "macroman_general_ci",
{40, "cp852", true}, // "cp852_general_ci",
{41, "latin7", true}, // "latin7_general_ci",
{42, "latin7", true}, // "latin7_general_cs",
{43, "macce", true}, // "macce_bin",
{44, "cp1250", true}, // "cp1250_croatian_ci",
{45, "utf8mb4", false}, // "utf8mb4_general_ci",
{46, "utf8mb4", false}, // "utf8mb4_bin",
{47, "latin1", true}, // "latin1_bin",
{48, "latin1", true}, // "latin1_general_ci",
{49, "latin1", true}, // "latin1_general_cs",
{50, "cp1251", true}, // "cp1251_bin",
{51, "cp1251", true}, // "cp1251_general_ci",
{52, "cp1251", true}, // "cp1251_general_cs",
{53, "macroman", true}, // "macroman_bin",
{54, "utf16", true}, // "utf16_general_ci",
{55, "utf16", true}, // "utf16_bin",
{56, "utf16le", true}, // "utf16le_general_ci",
{57, "cp1256", true}, // "cp1256_general_ci",
{58, "cp1257", true}, // "cp1257_bin",
{59, "cp1257", true}, // "cp1257_general_ci",
{60, "utf32", true}, // "utf32_general_ci",
{61, "utf32", true}, // "utf32_bin",
{62, "utf16le", true}, // "utf16le_bin",
{64, "armscii8", true}, // "armscii8_bin",
{65, "ascii", true}, // "ascii_bin",
{66, "cp1250", true}, // "cp1250_bin",
{67, "cp1256", true}, // "cp1256_bin",
{68, "cp866", true}, // "cp866_bin",
{69, "dec8", true}, // "dec8_bin",
{70, "greek", true}, // "greek_bin",
{71, "hebrew", true}, // "hebrew_bin",
{72, "hp8", true}, // "hp8_bin",
{73, "keybcs2", true}, // "keybcs2_bin",
{74, "koi8r", true}, // "koi8r_bin",
{75, "koi8u", true}, // "koi8u_bin",
{77, "latin2", true}, // "latin2_bin",
{78, "latin5", true}, // "latin5_bin",
{79, "latin7", true}, // "latin7_bin",
{80, "cp850", true}, // "cp850_bin",
{81, "cp852", true}, // "cp852_bin",
{82, "swe7", true}, // "swe7_bin",
{83, "utf8", false}, // "utf8_bin",
{84, "big5", true}, // "big5_bin",
{85, "euckr", true}, // "euckr_bin",
{86, "gb2312", true}, // "gb2312_bin",
{87, "gbk", true}, // "gbk_bin",
{88, "sjis", true}, // "sjis_bin",
{89, "tis620", true}, // "tis620_bin",
{90, "ucs2", true}, // "ucs2_bin",
{91, "ujis", true}, // "ujis_bin",
{92, "geostd8", true}, // "geostd8_general_ci",
{93, "geostd8", true}, // "geostd8_bin",
{94, "latin1", true}, // "latin1_spanish_ci",
{95, "cp932", true}, // "cp932_japanese_ci",
{96, "cp932", true}, // "cp932_bin",
{97, "eucjpms", true}, // "eucjpms_japanese_ci",
{98, "eucjpms", true}, // "eucjpms_bin",
{99, "cp1250", true}, // "cp1250_polish_ci",
{101, "utf16", true}, // "utf16_unicode_ci",
{102, "utf16", true}, // "utf16_icelandic_ci",
{103, "utf16", true}, // "utf16_latvian_ci",
{104, "utf16", true}, // "utf16_romanian_ci",
{105, "utf16", true}, // "utf16_slovenian_ci",
{106, "utf16", true}, // "utf16_polish_ci",
{107, "utf16", true}, // "utf16_estonian_ci",
{108, "utf16", true}, // "utf16_spanish_ci",
{109, "utf16", true}, // "utf16_swedish_ci",
{110, "utf16", true}, // "utf16_turkish_ci",
{111, "utf16", true}, // "utf16_czech_ci",
{112, "utf16", true}, // "utf16_danish_ci",
{113, "utf16", true}, // "utf16_lithuanian_ci",
{114, "utf16", true}, // "utf16_slovak_ci",
{115, "utf16", true}, // "utf16_spanish2_ci",
{116, "utf16", true}, // "utf16_roman_ci",
{117, "utf16", true}, // "utf16_persian_ci",
{118, "utf16", true}, // "utf16_esperanto_ci",
{119, "utf16", true}, // "utf16_hungarian_ci",
{120, "utf16", true}, // "utf16_sinhala_ci",
{121, "utf16", true}, // "utf16_german2_ci",
{122, "utf16", true}, // "utf16_croatian_ci",
{123, "utf16", true}, // "utf16_unicode_520_ci",
{124, "utf16", true}, // "utf16_vietnamese_ci",
{128, "ucs2", true}, // "ucs2_unicode_ci",
{129, "ucs2", true}, // "ucs2_icelandic_ci",
{130, "ucs2", true}, // "ucs2_latvian_ci",
{131, "ucs2", true}, // "ucs2_romanian_ci",
{132, "ucs2", true}, // "ucs2_slovenian_ci",
{133, "ucs2", true}, // "ucs2_polish_ci",
{134, "ucs2", true}, // "ucs2_estonian_ci",
{135, "ucs2", true}, // "ucs2_spanish_ci",
{136, "ucs2", true}, // "ucs2_swedish_ci",
{137, "ucs2", true}, // "ucs2_turkish_ci",
{138, "ucs2", true}, // "ucs2_czech_ci",
{139, "ucs2", true}, // "ucs2_danish_ci",
{140, "ucs2", true}, // "ucs2_lithuanian_ci",
{141, "ucs2", true}, // "ucs2_slovak_ci",
{142, "ucs2", true}, // "ucs2_spanish2_ci",
{143, "ucs2", true}, // "ucs2_roman_ci",
{144, "ucs2", true}, // "ucs2_persian_ci",
{145, "ucs2", true}, // "ucs2_esperanto_ci",
{146, "ucs2", true}, // "ucs2_hungarian_ci",
{147, "ucs2", true}, // "ucs2_sinhala_ci",
{148, "ucs2", true}, // "ucs2_german2_ci",
{149, "ucs2", true}, // "ucs2_croatian_ci",
{150, "ucs2", true}, // "ucs2_unicode_520_ci",
{151, "ucs2", true}, // "ucs2_vietnamese_ci",
{159, "ucs2", true}, // "ucs2_general_mysql500_ci",
{160, "utf32", true}, // "utf32_unicode_ci",
{161, "utf32", true}, // "utf32_icelandic_ci",
{162, "utf32", true}, // "utf32_latvian_ci",
{163, "utf32", true}, // "utf32_romanian_ci",
{164, "utf32", true}, // "utf32_slovenian_ci",
{165, "utf32", true}, // "utf32_polish_ci",
{166, "utf32", true}, // "utf32_estonian_ci",
{167, "utf32", true}, // "utf32_spanish_ci",
{168, "utf32", true}, // "utf32_swedish_ci",
{169, "utf32", true}, // "utf32_turkish_ci",
{170, "utf32", true}, // "utf32_czech_ci",
{171, "utf32", true}, // "utf32_danish_ci",
{172, "utf32", true}, // "utf32_lithuanian_ci",
{173, "utf32", true}, // "utf32_slovak_ci",
{174, "utf32", true}, // "utf32_spanish2_ci",
{175, "utf32", true}, // "utf32_roman_ci",
{176, "utf32", true}, // "utf32_persian_ci",
{177, "utf32", true}, // "utf32_esperanto_ci",
{178, "utf32", true}, // "utf32_hungarian_ci",
{179, "utf32", true}, // "utf32_sinhala_ci",
{180, "utf32", true}, // "utf32_german2_ci",
{181, "utf32", true}, // "utf32_croatian_ci",
{182, "utf32", true}, // "utf32_unicode_520_ci",
{183, "utf32", true}, // "utf32_vietnamese_ci",
{192, "utf8", false}, // "utf8_unicode_ci",
{193, "utf8", false}, // "utf8_icelandic_ci",
{194, "utf8", false}, // "utf8_latvian_ci",
{195, "utf8", false}, // "utf8_romanian_ci",
{196, "utf8", false}, // "utf8_slovenian_ci",
{197, "utf8", false}, // "utf8_polish_ci",
{198, "utf8", false}, // "utf8_estonian_ci",
{199, "utf8", false}, // "utf8_spanish_ci",
{200, "utf8", false}, // "utf8_swedish_ci",
{201, "utf8", false}, // "utf8_turkish_ci",
{202, "utf8", false}, // "utf8_czech_ci",
{203, "utf8", false}, // "utf8_danish_ci",
{204, "utf8", false}, // "utf8_lithuanian_ci",
{205, "utf8", false}, // "utf8_slovak_ci",
{206, "utf8", false}, // "utf8_spanish2_ci",
{207, "utf8", false}, // "utf8_roman_ci",
{208, "utf8", false}, // "utf8_persian_ci",
{209, "utf8", false}, // "utf8_esperanto_ci",
{210, "utf8", false}, // "utf8_hungarian_ci",
{211, "utf8", false}, // "utf8_sinhala_ci",
{212, "utf8", false}, // "utf8_german2_ci",
{213, "utf8", false}, // "utf8_croatian_ci",
{214, "utf8", false}, // "utf8_unicode_520_ci",
{215, "utf8", false}, // "utf8_vietnamese_ci",
{223, "utf8", false}, // "utf8_general_mysql500_ci",
{224, "utf8mb4", false}, // "utf8mb4_unicode_ci",
{225, "utf8mb4", false}, // "utf8mb4_icelandic_ci",
{226, "utf8mb4", false}, // "utf8mb4_latvian_ci",
{227, "utf8mb4", false}, // "utf8mb4_romanian_ci",
{228, "utf8mb4", false}, // "utf8mb4_slovenian_ci",
{229, "utf8mb4", false}, // "utf8mb4_polish_ci",
{230, "utf8mb4", false}, // "utf8mb4_estonian_ci",
{231, "utf8mb4", false}, // "utf8mb4_spanish_ci",
{232, "utf8mb4", false}, // "utf8mb4_swedish_ci",
{233, "utf8mb4", false}, // "utf8mb4_turkish_ci",
{234, "utf8mb4", false}, // "utf8mb4_czech_ci",
{235, "utf8mb4", false}, // "utf8mb4_danish_ci",
{236, "utf8mb4", false}, // "utf8mb4_lithuanian_ci",
{237, "utf8mb4", false}, // "utf8mb4_slovak_ci",
{238, "utf8mb4", false}, // "utf8mb4_spanish2_ci",
{239, "utf8mb4", false}, // "utf8mb4_roman_ci",
{240, "utf8mb4", false}, // "utf8mb4_persian_ci",
{241, "utf8mb4", false}, // "utf8mb4_esperanto_ci",
{242, "utf8mb4", false}, // "utf8mb4_hungarian_ci",
{243, "utf8mb4", false}, // "utf8mb4_sinhala_ci",
{244, "utf8mb4", false}, // "utf8mb4_german2_ci",
{245, "utf8mb4", false}, // "utf8mb4_croatian_ci",
{246, "utf8mb4", false}, // "utf8mb4_unicode_520_ci",
{247, "utf8mb4", false}, // "utf8mb4_vietnamese_ci",
{248, "gb18030", true}, // "gb18030_chinese_ci",
{249, "gb18030", true}, // "gb18030_bin",
{250, "gb18030", true}, // "gb18030_unicode_520_ci",
{255, "utf8mb4", false}, // "utf8mb4_0900_ai_ci",
{256, "utf8mb4", false}, // "utf8mb4_de_pb_0900_ai_ci",
{257, "utf8mb4", false}, // "utf8mb4_is_0900_ai_ci",
{258, "utf8mb4", false}, // "utf8mb4_lv_0900_ai_ci",
{259, "utf8mb4", false}, // "utf8mb4_ro_0900_ai_ci",
{260, "utf8mb4", false}, // "utf8mb4_sl_0900_ai_ci",
{261, "utf8mb4", false}, // "utf8mb4_pl_0900_ai_ci",
{262, "utf8mb4", false}, // "utf8mb4_et_0900_ai_ci",
{263, "utf8mb4", false}, // "utf8mb4_es_0900_ai_ci",
{264, "utf8mb4", false}, // "utf8mb4_is_0900_ai_ci",
{265, "utf8mb4", false}, // "utf8mb4_tr_0900_ai_ci",
{266, "utf8mb4", false}, // "utf8mb4_cs_0900_ai_ci",
{267, "utf8mb4", false}, // "utf8mb4_da_0900_ai_ci",
{268, "utf8mb4", false}, // "utf8mb4_lt_0900_ai_ci",
{269, "utf8mb4", false}, // "utf8mb4_sk_0900_ai_ci",
{270, "utf8mb4", false}, // "utf8mb4_es_trad_0900_ai_ci",
{271, "utf8mb4", false}, // "utf8mb4_la_0900_ai_ci",
{272, "utf8mb4", false}, // "utf8mb4_fa_0900_ai_ci",
{273, "utf8mb4", false}, // "utf8mb4_eo_0900_ai_ci",
{274, "utf8mb4", false}, // "utf8mb4_hu_0900_ai_ci",
{275, "utf8mb4", false}, // "utf8mb4_hr_0900_ai_ci",
{276, "utf8mb4", false}, // "utf8mb4_si_0900_ai_ci",
{277, "utf8mb4", false}, // "utf8mb4_vi_0900_ai_ci",
{278, "utf8mb4", false}, // "utf8mb4_0900_as_cs",
{279, "utf8mb4", false}, // "utf8mb4_de_pb_0900_as_cs",
{280, "utf8mb4", false}, // "utf8mb4_is_0900_as_cs",
{281, "utf8mb4", false}, // "utf8mb4_lv_0900_as_cs",
{282, "utf8mb4", false}, // "utf8mb4_ro_0900_as_cs",
{283, "utf8mb4", false}, // "utf8mb4_sl_0900_as_cs",
{284, "utf8mb4", false}, // "utf8mb4_pl_0900_as_cs",
{285, "utf8mb4", false}, // "utf8mb4_et_0900_as_cs",
{286, "utf8mb4", false}, // "utf8mb4_es_0900_as_cs",
{287, "utf8mb4", false}, // "utf8mb4_sv_0900_as_cs",
{288, "utf8mb4", false}, // "utf8mb4_tr_0900_as_cs",
{289, "utf8mb4", false}, // "utf8mb4_cs_0900_as_cs",
{290, "utf8mb4", false}, // "utf8mb4_da_0900_as_cs"
{291, "utf8mb4", false}, // "utf8mb4_lt_0900_as_cs"
{292, "utf8mb4", false}, // "utf8mb4_sk_0900_as_cs"
{293, "utf8mb4", false}, // "utf8mb4_es_trad_0900_as_cs"
{294, "utf8mb4", false}, // "utf8mb4_la_0900_as_cs"
{295, "utf8mb4", false}, // "utf8mb4_fa_0900_as_cs"
{296, "utf8mb4", false}, // "utf8mb4_eo_0900_as_cs"
{297, "utf8mb4", false}, // "utf8mb4_hu_0900_as_cs"
{298, "utf8mb4", false}, // "utf8mb4_hr_0900_as_cs"
{299, "utf8mb4", false}, // "utf8mb4_si_0900_as_cs"
{300, "utf8mb4", false}, // "utf8mb4_vi_0900_as_cs"
{303, "utf8mb4", false}, // "utf8mb4_ja_0900_as_cs_ks"
{304, "utf8mb4", false}, // "utf8mb4_la_0900_as_cs"
{305, "utf8mb4", false}, // "utf8mb4_0900_as_ci"
{306, "utf8mb4", false}, // "utf8mb4_ru_0900_ai_ci"
{307, "utf8mb4", false}, // "utf8mb4_ru_0900_as_cs"
{308, "utf8mb4", false}, // "utf8mb4_zh_0900_as_cs"
{309, "utf8mb4", false} // "utf8mb4_0900_bin"
};
MySQLCharset charset;
for (auto & item : result)
{
EXPECT_TRUE(charset.needConvert(item.id) == item.need_convert);
if (charset.needConvert(item.id))
{
EXPECT_TRUE(charset.getCharsetFromId(item.id) == item.name);
}
}
}
}

View File

@ -441,11 +441,10 @@ void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, Loadin
}
}
void DatabaseAtomic::loadStoredObjects(
ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
{
beforeLoadingMetadata(local_context, mode);
DatabaseOrdinary::loadStoredObjects(local_context, mode, skip_startup_tables);
DatabaseOrdinary::loadStoredObjects(local_context, mode);
}
void DatabaseAtomic::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode)

View File

@ -48,7 +48,7 @@ public:
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override;

View File

@ -37,8 +37,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_,
}
void DatabaseLazy::loadStoredObjects(
ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/)
{
iterateMetadataFiles(local_context, [this, &local_context](const String & file_name)
{

View File

@ -26,7 +26,7 @@ public:
bool canContainDistributedTables() const override { return false; }
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/) override;
void createTable(
ContextPtr context,

View File

@ -89,8 +89,7 @@ DatabaseOrdinary::DatabaseOrdinary(
{
}
void DatabaseOrdinary::loadStoredObjects(
ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
{
/** Tables load faster if they are loaded in sorted (by name) order.
* Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order,
@ -159,12 +158,6 @@ void DatabaseOrdinary::loadStoredObjects(
}
pool.wait();
if (!skip_startup_tables)
{
/// After all tables was basically initialized, startup them.
startupTables(pool, mode);
}
}
void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata, bool is_startup)

View File

@ -21,7 +21,7 @@ public:
String getEngineName() const override { return "Ordinary"; }
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
bool supportsLoadingInTopologicalOrder() const override { return true; }

View File

@ -495,11 +495,10 @@ void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, Lo
tryConnectToZooKeeperAndInitDatabase(mode);
}
void DatabaseReplicated::loadStoredObjects(
ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
void DatabaseReplicated::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
{
beforeLoadingMetadata(local_context, mode);
DatabaseAtomic::loadStoredObjects(local_context, mode, skip_startup_tables);
DatabaseAtomic::loadStoredObjects(local_context, mode);
}
UInt64 DatabaseReplicated::getMetadataHash(const String & table_name) const

View File

@ -67,7 +67,7 @@ public:
void drop(ContextPtr /*context*/) override;
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override;

View File

@ -134,8 +134,7 @@ public:
/// You can call only once, right after the object is created.
virtual void loadStoredObjects( /// NOLINT
ContextMutablePtr /*context*/,
LoadingStrictnessLevel /*mode*/,
bool /* skip_startup_tables */)
LoadingStrictnessLevel /*mode*/)
{
}

View File

@ -402,7 +402,7 @@ String DatabaseMySQL::getMetadataPath() const
return metadata_path;
}
void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/)
{
std::lock_guard lock{mutex};

View File

@ -76,7 +76,7 @@ public:
void createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override;
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override;
StoragePtr detachTable(ContextPtr context, const String & table_name) override;

View File

@ -296,7 +296,7 @@ void DatabasePostgreSQL::drop(ContextPtr /*context*/)
}
void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/)
{
{
std::lock_guard lock{mutex};

View File

@ -44,7 +44,7 @@ public:
bool empty() const override;
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override;
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

View File

@ -49,7 +49,7 @@ void TablesLoader::loadTables()
if (need_resolve_dependencies && database.second->supportsLoadingInTopologicalOrder())
databases_to_load.push_back(database.first);
else
database.second->loadStoredObjects(global_context, strictness_mode, /* skip_startup_tables */ true);
database.second->loadStoredObjects(global_context, strictness_mode);
}
if (databases_to_load.empty())

View File

@ -33,46 +33,18 @@ const std::string & MetadataStorageFromStaticFilesWebServer::getPath() const
bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) const
{
fs::path fs_path(path);
if (fs_path.has_extension())
fs_path = fs_path.parent_path();
initializeIfNeeded(fs_path);
if (object_storage.files.empty())
return false;
if (object_storage.files.contains(path))
return true;
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
auto it = std::lower_bound(
object_storage.files.begin(),
object_storage.files.end(),
path,
[](const auto & file, const std::string & path_) { return file.first < path_; }
);
if (it == object_storage.files.end())
return false;
if (startsWith(it->first, path)
|| (it != object_storage.files.begin() && startsWith(std::prev(it)->first, path)))
return true;
return false;
return object_storage.exists(path);
}
void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & path) const
{
initializeIfNeeded(path);
if (!exists(path))
#ifdef NDEBUG
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no path {}", path);
#else
{
std::string all_files;
std::shared_lock shared_lock(object_storage.metadata_mutex);
for (const auto & [file, _] : object_storage.files)
{
if (!all_files.empty())
@ -87,33 +59,40 @@ void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & p
bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).type == WebObjectStorage::FileType::File;
}
bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).type == WebObjectStorage::FileType::Directory;
}
uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).size;
}
StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const
{
assertExists(path);
auto fs_path = fs::path(object_storage.url) / path;
std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
remote_path = remote_path.substr(object_storage.url.size());
std::shared_lock shared_lock(object_storage.metadata_mutex);
return {StoredObject(remote_path, object_storage.files.at(path).size, path)};
}
std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
{
std::vector<std::string> result;
std::shared_lock shared_lock(object_storage.metadata_mutex);
for (const auto & [file_path, _] : object_storage.files)
{
if (file_path.starts_with(path))
@ -122,22 +101,14 @@ std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(
return result;
}
void MetadataStorageFromStaticFilesWebServer::initializeIfNeeded(const std::string & path) const
{
if (object_storage.files.find(path) == object_storage.files.end())
{
object_storage.initialize(fs::path(object_storage.url) / path);
}
}
DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(const std::string & path) const
{
std::vector<fs::path> dir_file_paths;
initializeIfNeeded(path);
if (!exists(path))
return std::make_unique<StaticDirectoryIterator>(std::move(dir_file_paths));
std::shared_lock shared_lock(object_storage.metadata_mutex);
for (const auto & [file_path, _] : object_storage.files)
{
if (fs::path(parentPath(file_path)) / "" == fs::path(path) / "")

View File

@ -13,13 +13,14 @@ class MetadataStorageFromStaticFilesWebServer final : public IMetadataStorage
{
private:
friend class MetadataStorageFromStaticFilesWebServerTransaction;
using FileType = WebObjectStorage::FileType;
const WebObjectStorage & object_storage;
std::string root_path;
void assertExists(const std::string & path) const;
void initializeIfNeeded(const std::string & path) const;
void initializeImpl(const String & uri_path, const std::unique_lock<std::shared_mutex> &) const;
public:
explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_);

View File

@ -28,10 +28,9 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int NETWORK_ERROR;
}
void WebObjectStorage::initialize(const String & uri_path) const
void WebObjectStorage::initialize(const String & uri_path, const std::unique_lock<std::shared_mutex> & lock) const
{
std::vector<String> directories_to_load;
LOG_TRACE(log, "Loading metadata for directory: {}", uri_path);
@ -81,8 +80,9 @@ void WebObjectStorage::initialize(const String & uri_path) const
}
file_path = file_path.substr(url.size());
files.emplace(std::make_pair(file_path, file_data));
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, file_data.size);
files.emplace(std::make_pair(file_path, file_data));
}
files.emplace(std::make_pair(dir_name, FileData({ .type = FileType::Directory })));
@ -103,7 +103,7 @@ void WebObjectStorage::initialize(const String & uri_path) const
}
for (const auto & directory_path : directories_to_load)
initialize(directory_path);
initialize(directory_path, lock);
}
@ -118,31 +118,51 @@ WebObjectStorage::WebObjectStorage(
bool WebObjectStorage::exists(const StoredObject & object) const
{
const auto & path = object.remote_path;
return exists(object.remote_path);
}
bool WebObjectStorage::exists(const std::string & path) const
{
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path);
if (files.find(path) != files.end())
std::shared_lock shared_lock(metadata_mutex);
if (files.find(path) == files.end())
{
shared_lock.unlock();
std::unique_lock unique_lock(metadata_mutex);
if (files.find(path) == files.end())
{
fs::path index_file_dir = fs::path(url) / path;
if (index_file_dir.has_extension())
index_file_dir = index_file_dir.parent_path();
initialize(index_file_dir, unique_lock);
}
/// Files are never deleted from `files` as disk is read only, so no worry that we unlock now.
unique_lock.unlock();
shared_lock.lock();
}
if (files.empty())
return false;
if (files.contains(path))
return true;
if (path.ends_with(MergeTreeData::FORMAT_VERSION_FILE_NAME) && files.find(fs::path(path).parent_path() / "") == files.end())
{
try
{
initialize(fs::path(url) / fs::path(path).parent_path());
return files.find(path) != files.end();
}
catch (...)
{
const auto message = getCurrentExceptionMessage(false);
bool can_throw = CurrentThread::isInitialized() && CurrentThread::get().getQueryContext();
if (can_throw)
throw Exception(ErrorCodes::NETWORK_ERROR, "Cannot load disk metadata. Error: {}", message);
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
auto it = std::lower_bound(
files.begin(), files.end(), path,
[](const auto & file, const std::string & path_) { return file.first < path_; }
);
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
return false;
}
}
if (it == files.end())
return false;
if (startsWith(it->first, path)
|| (it != files.begin() && startsWith(std::prev(it)->first, path)))
return true;
return false;
}

View File

@ -3,6 +3,7 @@
#include "config.h"
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <shared_mutex>
namespace Poco
{
@ -93,9 +94,8 @@ public:
bool isReadOnly() const override { return true; }
protected:
void initialize(const String & uri_path) const;
[[noreturn]] static void throwNotAllowed();
bool exists(const std::string & path) const;
enum class FileType
{
@ -111,12 +111,13 @@ protected:
using Files = std::map<String, FileData>; /// file path -> file data
mutable Files files;
String url;
mutable std::shared_mutex metadata_mutex;
private:
Poco::Logger * log;
void initialize(const String & path, const std::unique_lock<std::shared_mutex> &) const;
const String url;
Poco::Logger * log;
size_t min_bytes_for_seek;
};

View File

@ -0,0 +1,158 @@
#pragma once
#include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/GatherUtils/Sources.h>
#include <Functions/GatherUtils/Sinks.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
}
namespace
{
using namespace GatherUtils;
template <typename Name, typename Impl>
class HasSubsequenceImpl : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<HasSubsequenceImpl>(); }
String getName() const override { return name; }
bool isVariadic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return false; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {};}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[0]->getName(), getName());
if (!isString(arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[1]->getName(), getName());
return std::make_shared<DataTypeNumber<UInt8>>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnPtr & column_haystack = arguments[0].column;
const ColumnPtr & column_needle = arguments[1].column;
const ColumnConst * haystack_const_string = checkAndGetColumnConst<ColumnString>(column_haystack.get());
const ColumnConst * needle_const_string = checkAndGetColumnConst<ColumnString>(column_needle.get());
const ColumnString * haystack_string = checkAndGetColumn<ColumnString>(&*column_haystack);
const ColumnString * needle_string = checkAndGetColumn<ColumnString>(&*column_needle);
auto col_res = ColumnVector<UInt8>::create();
typename ColumnVector<UInt8>::Container & vec_res = col_res->getData();
vec_res.resize(input_rows_count);
if (haystack_string && needle_string)
execute(StringSource{*haystack_string}, StringSource{*needle_string}, vec_res);
else if (haystack_string && needle_const_string)
execute(StringSource{*haystack_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
else if (haystack_const_string && needle_string)
execute(ConstSource<StringSource>{*haystack_const_string}, StringSource{*needle_string}, vec_res);
else if (haystack_const_string && needle_const_string)
execute(ConstSource<StringSource>{*haystack_const_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal columns {} and {} of arguments of function {}",
arguments[0].column->getName(),
arguments[1].column->getName(),
getName());
return col_res;
}
private:
template <typename SourceHaystack, typename SourceNeedle>
void execute(
SourceHaystack && haystacks,
SourceNeedle && needles,
PaddedPODArray<UInt8> & res_data) const
{
while (!haystacks.isEnd())
{
auto haystack_slice = haystacks.getWhole();
auto needle_slice = needles.getWhole();
size_t row_num = haystacks.rowNum();
if constexpr (!Impl::is_utf8)
res_data[row_num] = hasSubsequence(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
else
res_data[row_num] = hasSubsequenceUTF8(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
haystacks.next();
needles.next();
}
}
static UInt8 hasSubsequence(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
{
size_t j = 0;
for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++)
if (Impl::toLowerIfNeed(needle[j]) == Impl::toLowerIfNeed(haystack[i]))
++j;
return j == needle_size;
}
static UInt8 hasSubsequenceUTF8(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
{
const auto * haystack_pos = haystack;
const auto * needle_pos = needle;
const auto * haystack_end = haystack + haystack_size;
const auto * needle_end = needle + needle_size;
if (!needle_size)
return 1;
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
if (!haystack_code_point || !needle_code_point)
return 0;
while (haystack_code_point && needle_code_point)
{
if (Impl::toLowerIfNeed(*needle_code_point) == Impl::toLowerIfNeed(*haystack_code_point))
{
needle_pos += UTF8::seqLength(*needle_pos);
if (needle_pos >= needle_end)
break;
needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
}
haystack_pos += UTF8::seqLength(*haystack_pos);
if (haystack_pos >= haystack_end)
break;
haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
}
return needle_pos == needle_end;
}
};
}
}

View File

@ -0,0 +1,30 @@
#include <Functions/FunctionFactory.h>
#include <Functions/HasSubsequenceImpl.h>
namespace DB
{
namespace
{
struct HasSubsequenceCaseSensitiveASCII
{
static constexpr bool is_utf8 = false;
static int toLowerIfNeed(int c) { return c; }
};
struct NameHasSubsequence
{
static constexpr auto name = "hasSubsequence";
};
using FunctionHasSubsequence = HasSubsequenceImpl<NameHasSubsequence, HasSubsequenceCaseSensitiveASCII>;
}
REGISTER_FUNCTION(hasSubsequence)
{
factory.registerFunction<FunctionHasSubsequence>({}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,29 @@
#include <Functions/FunctionFactory.h>
#include <Functions/HasSubsequenceImpl.h>
namespace DB
{
namespace
{
struct HasSubsequenceCaseInsensitiveASCII
{
static constexpr bool is_utf8 = false;
static int toLowerIfNeed(int c) { return std::tolower(c); }
};
struct NameHasSubsequenceCaseInsensitive
{
static constexpr auto name = "hasSubsequenceCaseInsensitive";
};
using FunctionHasSubsequenceCaseInsensitive = HasSubsequenceImpl<NameHasSubsequenceCaseInsensitive, HasSubsequenceCaseInsensitiveASCII>;
}
REGISTER_FUNCTION(hasSubsequenceCaseInsensitive)
{
factory.registerFunction<FunctionHasSubsequenceCaseInsensitive>({}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,31 @@
#include <Functions/FunctionFactory.h>
#include <Functions/HasSubsequenceImpl.h>
#include "Poco/Unicode.h"
namespace DB
{
namespace
{
struct HasSubsequenceCaseInsensitiveUTF8
{
static constexpr bool is_utf8 = true;
static int toLowerIfNeed(int code_point) { return Poco::Unicode::toLower(code_point); }
};
struct NameHasSubsequenceCaseInsensitiveUTF8
{
static constexpr auto name = "hasSubsequenceCaseInsensitiveUTF8";
};
using FunctionHasSubsequenceCaseInsensitiveUTF8 = HasSubsequenceImpl<NameHasSubsequenceCaseInsensitiveUTF8, HasSubsequenceCaseInsensitiveUTF8>;
}
REGISTER_FUNCTION(hasSubsequenceCaseInsensitiveUTF8)
{
factory.registerFunction<FunctionHasSubsequenceCaseInsensitiveUTF8>({}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,30 @@
#include <Functions/FunctionFactory.h>
#include <Functions/HasSubsequenceImpl.h>
namespace DB
{
namespace
{
struct HasSubsequenceCaseSensitiveUTF8
{
static constexpr bool is_utf8 = true;
static int toLowerIfNeed(int code_point) { return code_point; }
};
struct NameHasSubsequenceUTF8
{
static constexpr auto name = "hasSubsequenceUTF8";
};
using FunctionHasSubsequenceUTF8 = HasSubsequenceImpl<NameHasSubsequenceUTF8, HasSubsequenceCaseSensitiveUTF8>;
}
REGISTER_FUNCTION(hasSubsequenceUTF8)
{
factory.registerFunction<FunctionHasSubsequenceUTF8>({}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -1,4 +1,3 @@
#include "FunctionsStringSearch.h"
#include "FunctionFactory.h"
#include "like.h"

View File

@ -2515,11 +2515,21 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr
/// find input node which refers to the output node
/// consider only aliases on the path
const auto * node = output_node;
while (node && node->type == ActionsDAG::ActionType::ALIAS)
while (node)
{
/// alias has only one child
chassert(node->children.size() == 1);
node = node->children.front();
if (node->type == ActionsDAG::ActionType::ALIAS)
{
node = node->children.front();
}
/// materiailze() function can occur when dealing with views
/// TODO: not sure if it should be done here, looks too generic place
else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize")
{
chassert(node->children.size() == 1);
node = node->children.front();
}
else
break;
}
if (node && node->type == ActionsDAG::ActionType::INPUT)
index.emplace(output_node->result_name, node);

View File

@ -5,6 +5,11 @@
#include <Interpreters/Cache/FileCacheKey.h>
#include <Common/logger_useful.h>
namespace CurrentMetrics
{
extern const Metric FilesystemCacheSizeLimit;
}
namespace DB
{
@ -18,7 +23,10 @@ private:
using LRUQueueIterator = typename LRUQueue::iterator;
public:
LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {}
LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_)
{
CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_);
}
size_t getSize(const CacheGuard::Lock &) const override { return current_size; }

View File

@ -548,15 +548,17 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo
void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions)
{
for (const ASTFunction * node : aggregates())
for (const ASTPtr & ast : aggregates())
{
const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
AggregateDescription aggregate;
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, actions);
if (node.arguments)
getRootActionsNoMakeSet(node.arguments, actions);
aggregate.column_name = node->getColumnName();
aggregate.column_name = node.getColumnName();
const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
const ASTs & arguments = node.arguments ? node.arguments->children : ASTs();
aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size());
@ -568,7 +570,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown identifier '{}' in aggregate function '{}'",
name, node->formatForErrorMessage());
name, node.formatForErrorMessage());
}
types[i] = dag_node->result_type;
@ -576,8 +578,8 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
}
AggregateFunctionProperties properties;
aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters, "", getContext()) : Array();
aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters, properties);
aggregate.parameters = (node.parameters) ? getAggregateFunctionParametersArray(node.parameters, "", getContext()) : Array();
aggregate.function = AggregateFunctionFactory::instance().get(node.name, types, aggregate.parameters, properties);
descriptions.push_back(aggregate);
}
@ -744,12 +746,13 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
}
// Window functions
for (const ASTFunction * function_node : syntax->window_function_asts)
for (const ASTPtr & ast : syntax->window_function_asts)
{
assert(function_node->is_window_function);
const ASTFunction & function_node = typeid_cast<const ASTFunction &>(*ast);
assert(function_node.is_window_function);
WindowFunctionDescription window_function;
window_function.function_node = function_node;
window_function.function_node = &function_node;
window_function.column_name
= window_function.function_node->getColumnName();
window_function.function_parameters
@ -760,7 +763,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
// Hence we clone the node (not very sane either, I know).
// Hence, we clone the node (not very sane either, I know).
getRootActionsNoMakeSet(window_function.function_node->clone(), actions);
const ASTs & arguments
@ -793,22 +796,22 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
// Find the window corresponding to this function. It may be either
// referenced by name and previously defined in WINDOW clause, or it
// may be defined inline.
if (!function_node->window_name.empty())
if (!function_node.window_name.empty())
{
auto it = window_descriptions.find(function_node->window_name);
auto it = window_descriptions.find(function_node.window_name);
if (it == std::end(window_descriptions))
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Window '{}' is not defined (referenced by '{}')",
function_node->window_name,
function_node->formatForErrorMessage());
function_node.window_name,
function_node.formatForErrorMessage());
}
it->second.window_functions.push_back(window_function);
}
else
{
const auto & definition = function_node->window_definition->as<
const auto & definition = function_node.window_definition->as<
const ASTWindowDefinition &>();
WindowDescription desc;
desc.window_name = definition.getDefaultWindowName();
@ -1323,10 +1326,13 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
GetAggregatesVisitor(data).visit(select_query->orderBy());
/// TODO: data.aggregates -> aggregates()
for (const ASTFunction * node : data.aggregates)
if (node->arguments)
for (auto & argument : node->arguments->children)
for (const ASTPtr & ast : data.aggregates)
{
const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
if (node.arguments)
for (auto & argument : node.arguments->children)
getRootActions(argument, only_types, step.actions());
}
}
void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(

View File

@ -168,7 +168,7 @@ protected:
const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
const ASTs & aggregates() const { return syntax->aggregates; }
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain);

View File

@ -26,8 +26,8 @@ public:
// Explicit empty initializers are needed to make designated initializers
// work on GCC 10.
std::unordered_set<String> uniq_names {};
std::vector<const ASTFunction *> aggregates {};
std::vector<const ASTFunction *> window_functions {};
ASTs aggregates;
ASTs window_functions;
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child)
@ -61,7 +61,7 @@ public:
}
private:
static void visit(const ASTFunction & node, const ASTPtr &, Data & data)
static void visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
{
if (isAggregateFunction(node))
{
@ -74,7 +74,7 @@ private:
return;
data.uniq_names.insert(column_name);
data.aggregates.push_back(&node);
data.aggregates.push_back(ast);
}
else if (node.is_window_function)
{
@ -87,7 +87,7 @@ private:
return;
data.uniq_names.insert(column_name);
data.window_functions.push_back(&node);
data.window_functions.push_back(ast);
}
}

View File

@ -223,10 +223,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q
{
/// Set up memory profiling
thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size);
thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size);
thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events);
}

View File

@ -83,8 +83,6 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex
const Settings & settings = storage_context->getSettingsRef();
group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size);
group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size);
group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator);
group->memory_tracker.setParent(&background_memory_tracker);
if (settings.memory_tracker_fault_probability > 0.0)

View File

@ -731,7 +731,7 @@ void expandGroupByAll(ASTSelectQuery * select_query)
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
}
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
ASTs getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
if (select_query.where())
@ -743,11 +743,12 @@ std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQu
GetAggregatesVisitor(data).visit(query);
/// There can not be other aggregate functions within the aggregate functions.
for (const ASTFunction * node : data.aggregates)
for (const ASTPtr & ast : data.aggregates)
{
if (node->arguments)
const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
if (node.arguments)
{
for (auto & arg : node->arguments->children)
for (auto & arg : node.arguments->children)
{
assertNoAggregates(arg, "inside another aggregate function");
// We also can't have window functions inside aggregate functions,
@ -759,7 +760,7 @@ std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQu
return data.aggregates;
}
std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query)
ASTs getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be window functions inside the WHERE, PREWHERE and HAVING
if (select_query.having())
@ -777,20 +778,16 @@ std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSel
/// Window functions cannot be inside aggregates or other window functions.
/// Aggregate functions can be inside window functions because they are
/// calculated earlier.
for (const ASTFunction * node : data.window_functions)
for (const ASTPtr & ast : data.window_functions)
{
if (node->arguments)
{
for (auto & arg : node->arguments->children)
{
assertNoWindows(arg, "inside another window function");
}
}
const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
if (node->window_definition)
{
assertNoWindows(node->window_definition, "inside window definition");
}
if (node.arguments)
for (auto & arg : node.arguments->children)
assertNoWindows(arg, "inside another window function");
if (node.window_definition)
assertNoWindows(node.window_definition, "inside window definition");
}
return data.window_functions;
@ -1357,8 +1354,8 @@ TreeRewriterResultPtr TreeRewriter::analyze(
GetAggregatesVisitor(data).visit(query);
/// There can not be other aggregate functions within the aggregate functions.
for (const ASTFunction * node : data.aggregates)
for (auto & arg : node->arguments->children)
for (const ASTPtr & node : data.aggregates)
for (auto & arg : typeid_cast<const ASTFunction &>(*node).arguments->children)
assertNoAggregates(arg, "inside another aggregate function");
result.aggregates = data.aggregates;
}

View File

@ -41,8 +41,8 @@ struct TreeRewriterResult
Aliases aliases;
std::vector<const ASTFunction *> aggregates;
std::vector<const ASTFunction *> window_function_asts;
ASTs aggregates;
ASTs window_function_asts;
ASTs expressions_with_window_function;
/// Which column is needed to be ARRAY-JOIN'ed to get the specified.

View File

@ -470,6 +470,7 @@ TEST_F(FileCacheTest, get)
auto & file_segment2 = get(holder2, 2);
ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId());
ASSERT_EQ(file_segment2.state(), State::DOWNLOADING);
{
std::lock_guard lock(mutex);
@ -478,8 +479,7 @@ TEST_F(FileCacheTest, get)
cv.notify_one();
file_segment2.wait(file_segment2.range().right);
file_segment2.complete();
ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED);
ASSERT_EQ(file_segment2.getDownloadedSize(false), file_segment2.range().size());
});
{
@ -488,7 +488,7 @@ TEST_F(FileCacheTest, get)
}
download(file_segment);
ASSERT_TRUE(file_segment.state() == State::DOWNLOADED);
ASSERT_EQ(file_segment.state(), State::DOWNLOADED);
other_1.join();

View File

@ -92,6 +92,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
break;
}
/// Dangling query plan node. This might be generated by StorageMerge.
if (iter->node->step.get() == reading)
return false;
const auto metadata = reading->getStorageMetadata();
const auto & projections = metadata->projections;
@ -105,8 +109,8 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
QueryDAG query;
{
auto & clild = iter->node->children[iter->next_child - 1];
if (!query.build(*clild))
auto & child = iter->node->children[iter->next_child - 1];
if (!query.build(*child))
return false;
if (query.dag)

View File

@ -33,7 +33,7 @@ struct Settings;
/** Data storing format settings. */ \
M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \
M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \
M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
\
/** Merge settings. */ \
M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \

View File

@ -8,6 +8,7 @@ import shutil
import subprocess
import time
import sys
from glob import glob
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union
@ -31,6 +32,17 @@ TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check")
ImagesDict = Dict[str, dict]
# workaround for mypy issue [1]:
#
# "Argument 1 to "map" has incompatible type overloaded function" [1]
#
# [1]: https://github.com/python/mypy/issues/9864
#
# NOTE: simply lambda will do the trick as well, but pylint will not like it
def realpath(*args, **kwargs):
return os.path.realpath(*args, **kwargs)
class DockerImage:
def __init__(
self,
@ -111,8 +123,23 @@ def get_changed_docker_images(
changed_images = []
for dockerfile_dir, image_description in images_dict.items():
source_dir = GITHUB_WORKSPACE.rstrip("/") + "/"
dockerfile_files = glob(f"{source_dir}/{dockerfile_dir}/**", recursive=True)
# resolve symlinks
dockerfile_files = list(map(realpath, dockerfile_files))
# trim prefix to get relative path again, to match with files_changed
dockerfile_files = list(map(lambda x: x[len(source_dir) :], dockerfile_files))
logging.info(
"Docker %s (source_dir=%s) build context for PR %s @ %s: %s",
dockerfile_dir,
source_dir,
pr_info.number,
pr_info.sha,
str(dockerfile_files),
)
for f in files_changed:
if f.startswith(dockerfile_dir):
if f in dockerfile_files:
name = image_description["name"]
only_amd64 = image_description.get("only_amd64", False)
logging.info(
@ -245,6 +272,8 @@ def build_and_push_one_image(
cache_from = f"{cache_from} --cache-from type=registry,ref={image.repo}:{tag}"
cmd = (
# tar is requried to follow symlinks, since docker-build cannot do this
f"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#{image.full_path.lstrip('/')}#./#' --dereference --create {image.full_path} | "
"docker buildx build --builder default "
f"--label build-url={GITHUB_RUN_URL} "
f"{from_tag_arg}"
@ -254,7 +283,7 @@ def build_and_push_one_image(
f"{cache_from} "
f"--cache-to type=inline,mode=max "
f"{push_arg}"
f"--progress plain {image.full_path}"
f"--progress plain -"
)
logging.info("Docker command to run: %s", cmd)
with TeePopen(cmd, build_log) as proc:

View File

@ -126,12 +126,13 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
"--build-arg FROM_TAG=version "
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
"--tag name:version --cache-from type=registry,ref=name:version "
"--cache-from type=registry,ref=name:latest "
"--cache-to type=inline,mode=max --push --progress plain path",
"--cache-to type=inline,mode=max --push --progress plain -",
mock_popen.call_args.args,
)
self.assertTrue(result)
@ -143,12 +144,13 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
"--build-arg FROM_TAG=version2 "
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
"--cache-from type=registry,ref=name:latest "
"--cache-to type=inline,mode=max --progress plain path",
"--cache-to type=inline,mode=max --progress plain -",
mock_popen.call_args.args,
)
self.assertTrue(result)
@ -160,11 +162,12 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
"--cache-from type=registry,ref=name:latest "
"--cache-to type=inline,mode=max --progress plain path",
"--cache-to type=inline,mode=max --progress plain -",
mock_popen.call_args.args,
)
self.assertFalse(result)
@ -178,13 +181,14 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
"--cache-from type=registry,ref=name:latest "
"--cache-from type=registry,ref=name:cached-version "
"--cache-from type=registry,ref=name:another-cached "
"--cache-to type=inline,mode=max --progress plain path",
"--cache-to type=inline,mode=max --progress plain -",
mock_popen.call_args.args,
)
self.assertFalse(result)

View File

@ -190,7 +190,7 @@ def clear_ip_tables_and_restart_daemons():
try:
logging.info("Killing all alive docker containers")
subprocess.check_output(
"timeout -s 9 10m docker ps --quiet | xargs --no-run-if-empty docker kill",
"timeout --signal=KILL 10m docker ps --quiet | xargs --no-run-if-empty docker kill",
shell=True,
)
except subprocess.CalledProcessError as err:
@ -199,7 +199,7 @@ def clear_ip_tables_and_restart_daemons():
try:
logging.info("Removing all docker containers")
subprocess.check_output(
"timeout -s 9 10m docker ps --all --quiet | xargs --no-run-if-empty docker rm --force",
"timeout --signal=KILL 10m docker ps --all --quiet | xargs --no-run-if-empty docker rm --force",
shell=True,
)
except subprocess.CalledProcessError as err:
@ -321,7 +321,7 @@ class ClickhouseIntegrationTestsRunner:
cmd = (
"cd {repo_path}/tests/integration && "
"timeout -s 9 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
"timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
repo_path=repo_path,
runner_opts=self._get_runner_opts(),
image_cmd=image_cmd,
@ -433,9 +433,9 @@ class ClickhouseIntegrationTestsRunner:
out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log")
cmd = (
"cd {repo_path}/tests/integration && "
"timeout -s 9 1h ./runner {runner_opts} {image_cmd} -- --setup-plan "
"| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' "
"| grep -v 'SKIPPED' | sort -u > {out_file}".format(
"timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} -- --setup-plan "
"| tee '{out_file_full}' | grep -F '::' | sed -r 's/ \(fixtures used:.*//g; s/^ *//g; s/ *$//g' "
"| grep -v -F 'SKIPPED' | sort --unique > {out_file}".format(
repo_path=repo_path,
runner_opts=self._get_runner_opts(),
image_cmd=image_cmd,
@ -677,7 +677,7 @@ class ClickhouseIntegrationTestsRunner:
# -E -- (E)rror
# -p -- (p)assed
# -s -- (s)kipped
cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format(
cmd = "cd {}/tests/integration && timeout --signal=KILL 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format(
repo_path,
self._get_runner_opts(),
image_cmd,

View File

@ -12,6 +12,22 @@ from helpers.network import _NetworkManager
logging.raiseExceptions = False
@pytest.fixture(autouse=True, scope="session")
def tune_local_port_range():
# Lots of services uses non privileged ports:
# - hdfs -- 50020/50070/...
# - minio
# - mysql
# - psql
#
# So instead of tuning all these thirdparty services, let's simply
# prohibit using such ports for outgoing connections, this should fix
# possible "Address already in use" errors.
#
# NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error.
run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True)
@pytest.fixture(autouse=True, scope="session")
def cleanup_environment():
try:

View File

@ -45,5 +45,6 @@
<merge_tree>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>

View File

@ -980,6 +980,89 @@ def query_event_with_empty_transaction(clickhouse_node, mysql_node, service_name
mysql_node.query("DROP DATABASE test_database_event")
def text_blob_with_charset_test(clickhouse_node, mysql_node, service_name):
db = "text_blob_with_charset_test"
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
mysql_node.query(f"CREATE DATABASE {db} DEFAULT CHARACTER SET 'utf8'")
mysql_node.query(
f"CREATE TABLE {db}.test_table_1 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET big5, d longtext, e varchar(256), f char(4)) ENGINE = InnoDB DEFAULT CHARSET=gbk"
)
mysql_node.query(
f"CREATE TABLE {db}.test_table_2 (a INT NOT NULL PRIMARY KEY, b blob, c longblob) ENGINE = InnoDB DEFAULT CHARSET=gbk"
)
mysql_node.query(
f"CREATE TABLE {db}.test_table_3 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET gbk, d tinytext CHARSET big5, e varchar(256), f char(4)) ENGINE = InnoDB"
)
mysql_node.query(
f"INSERT INTO {db}.test_table_1 VALUES (1, '你好', '世界', '哈罗', '您Hi您', '您Hi您')"
)
mysql_node.query(
f"INSERT INTO {db}.test_table_2 VALUES (1, '你好', 0xFAAA00000000000DDCC)"
)
mysql_node.query(
f"INSERT INTO {db}.test_table_3 VALUES (1, '你好', '世界', 'hello', '您Hi您', '您Hi您')"
)
clickhouse_node.query(
f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
)
assert db in clickhouse_node.query("SHOW DATABASES")
# from full replication
check_query(
clickhouse_node,
f"SHOW TABLES FROM {db} FORMAT TSV",
"test_table_1\ntest_table_2\ntest_table_3\n",
)
check_query(
clickhouse_node,
f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 1 FORMAT TSV",
"你好\t世界\t哈罗\t您Hi您\t您Hi您\n",
)
check_query(
clickhouse_node,
f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 1 FORMAT TSV",
"E4BDA0E5A5BD\t0FAAA00000000000DDCC\n",
)
check_query(
clickhouse_node,
f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 1 FORMAT TSV",
"你好\t世界\thello\t您Hi您\t您Hi您\n",
)
# from increment replication
mysql_node.query(
f"INSERT INTO {db}.test_table_1 VALUES (2, '你好', '世界', '哈罗', '您Hi您', '您Hi您')"
)
mysql_node.query(
f"INSERT INTO {db}.test_table_2 VALUES (2, '你好', 0xFAAA00000000000DDCC)"
)
mysql_node.query(
f"INSERT INTO {db}.test_table_3 VALUES (2, '你好', '世界', 'hello', '您Hi您', '您Hi您')"
)
check_query(
clickhouse_node,
f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 2 FORMAT TSV",
"你好\t世界\t哈罗\t您Hi您\t您Hi您\n",
)
check_query(
clickhouse_node,
f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 2 FORMAT TSV",
"E4BDA0E5A5BD\t0FAAA00000000000DDCC\n",
)
check_query(
clickhouse_node,
f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 2 FORMAT TSV",
"你好\t世界\thello\t您Hi您\t您Hi您\n",
)
clickhouse_node.query(f"DROP DATABASE {db}")
mysql_node.query(f"DROP DATABASE {db}")
def select_without_columns(clickhouse_node, mysql_node, service_name):
mysql_node.query("DROP DATABASE IF EXISTS db")
clickhouse_node.query("DROP DATABASE IF EXISTS db")

View File

@ -262,6 +262,12 @@ def test_materialized_database_ddl_with_empty_transaction_8_0(
)
def test_text_blob_charset(started_cluster, started_mysql_8_0, clickhouse_node):
materialized_with_ddl.text_blob_with_charset_test(
clickhouse_node, started_mysql_8_0, "mysql80"
)
def test_select_without_columns_5_7(
started_cluster, started_mysql_5_7, clickhouse_node
):

View File

@ -1 +0,0 @@
#!/usr/bin/env python3

View File

@ -1,7 +0,0 @@
<clickhouse>
<profiles>
<default>
<max_untracked_memory>1</max_untracked_memory>
</default>
</profiles>
</clickhouse>

View File

@ -1,5 +0,0 @@
<clickhouse>
<total_memory_tracker_sample_probability>1</total_memory_tracker_sample_probability>
<total_memory_profiler_sample_min_allocation_size>4096</total_memory_profiler_sample_min_allocation_size>
<total_memory_profiler_sample_max_allocation_size>8192</total_memory_profiler_sample_max_allocation_size>
</clickhouse>

View File

@ -1,37 +0,0 @@
from helpers.cluster import ClickHouseCluster
import pytest
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/memory_profiler.xml"],
user_configs=["configs/max_untracked_memory.xml"],
)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_trace_boundaries_work(started_cluster):
node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null")
node.query("SYSTEM FLUSH LOGS")
assert (
node.query(
"SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'"
)
== "1\n"
)
assert (
node.query(
"SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)"
)
== "0\n"
)

View File

@ -28,6 +28,7 @@
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
<allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>
</clickhouse>

View File

@ -152,6 +152,7 @@
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
<database_catalog_unused_dir_hide_timeout_sec>0</database_catalog_unused_dir_hide_timeout_sec>

View File

@ -0,0 +1,5 @@
<clickhouse>
<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>

View File

@ -1,5 +0,0 @@
<clickhouse>
<profiles>
<default/>
</profiles>
</clickhouse>

View File

@ -1,18 +0,0 @@
<clickhouse>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<openSSL>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>500</max_concurrent_queries>
<path>./clickhouse/</path>
<users_config>users.xml</users_config>
</clickhouse>

View File

@ -67,6 +67,7 @@ def cluster():
"configs/config.d/storage_conf.xml",
"configs/config.d/instant_moves.xml",
"configs/config.d/part_log.xml",
"configs/config.d/merge_tree.xml",
],
with_minio=True,
)

View File

@ -1718,7 +1718,7 @@ def test_freeze(start_cluster):
) ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY toYYYYMM(d)
SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false
SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1
"""
)

View File

@ -38,7 +38,7 @@ def partition_table_simple(started_cluster):
q(
"CREATE TABLE test.partition_simple (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) "
"ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) "
"SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
"SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
q("INSERT INTO test.partition_simple ( x ) VALUES ( now() )")
q("INSERT INTO test.partition_simple ( x ) VALUES ( now()+1 )")
@ -150,7 +150,7 @@ def partition_table_complex(started_cluster):
q("DROP TABLE IF EXISTS test.partition_complex")
q(
"CREATE TABLE test.partition_complex (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) "
"ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
"ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(31), 1)")
q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(1), 2)")
@ -188,7 +188,7 @@ def test_partition_complex(partition_table_complex):
def cannot_attach_active_part_table(started_cluster):
q("DROP TABLE IF EXISTS test.attach_active")
q(
"CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
"CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
q("INSERT INTO test.attach_active SELECT number FROM system.numbers LIMIT 16")
@ -217,7 +217,7 @@ def attach_check_all_parts_table(started_cluster):
q("DROP TABLE IF EXISTS test.attach_partition")
q(
"CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n "
"SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0"
"SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0"
)
q(
"INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
@ -299,7 +299,7 @@ def drop_detached_parts_table(started_cluster):
q("SYSTEM STOP MERGES")
q("DROP TABLE IF EXISTS test.drop_detached")
q(
"CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
"CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
q(
"INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
@ -370,13 +370,13 @@ def test_drop_detached_parts(drop_detached_parts_table):
def test_system_detached_parts(drop_detached_parts_table):
q(
"create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false"
"create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
q(
"create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
"create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
q(
"create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
"create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
q(
"create table sdp_3 (n int, x Enum('broken' = 0, 'all' = 1)) engine=MergeTree order by n partition by x"
@ -497,7 +497,7 @@ def test_system_detached_parts(drop_detached_parts_table):
def test_detached_part_dir_exists(started_cluster):
q(
"create table detached_part_dir_exists (n int) engine=MergeTree order by n "
"SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0"
"SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0"
)
q("insert into detached_part_dir_exists select 1") # will create all_1_1_0
q(
@ -549,7 +549,7 @@ def test_detached_part_dir_exists(started_cluster):
def test_make_clone_in_detached(started_cluster):
q(
"create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false"
"create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
)
path = path_to_data + "data/default/clone_in_detached/"

View File

@ -498,7 +498,7 @@ def test_polymorphic_parts_index(start_cluster):
"""
CREATE TABLE test_index.index_compact(a UInt32, s String)
ENGINE = MergeTree ORDER BY a
SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false"""
SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"""
)
node1.query(

View File

@ -35,6 +35,7 @@
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
<allow_remote_fs_zero_copy_replication>0</allow_remote_fs_zero_copy_replication>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
<remote_servers>

View File

@ -29,6 +29,7 @@
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
<remote_servers>

View File

@ -70,6 +70,7 @@
<min_bytes_for_wide_part>1024</min_bytes_for_wide_part>
<old_parts_lifetime>1</old_parts_lifetime>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
<remote_servers>

View File

@ -32,6 +32,7 @@
<merge_tree>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
<allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>

View File

@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
$CLICKHOUSE_CLIENT -q "INSERT INTO preferred_block_size_bytes (s) SELECT '16_bytes_-_-_-_' AS s FROM system.numbers LIMIT 10, 90"
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE preferred_block_size_bytes"
$CLICKHOUSE_CLIENT --preferred_block_size_bytes=26 -q "SELECT DISTINCT blockSize(), ignore(p, s) FROM preferred_block_size_bytes"
@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
# PREWHERE using empty column
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS pbs"
$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
$CLICKHOUSE_CLIENT -q "INSERT INTO pbs (p, i, sa) SELECT toDate(i % 30) AS p, number AS i, ['a'] AS sa FROM system.numbers LIMIT 1000"
$CLICKHOUSE_CLIENT -q "ALTER TABLE pbs ADD COLUMN s UInt8 DEFAULT 0"
$CLICKHOUSE_CLIENT --preferred_block_size_bytes=100000 -q "SELECT count() FROM pbs PREWHERE s = 0"
@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE pbs"
# Nullable PREWHERE
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
$CLICKHOUSE_CLIENT -q "INSERT INTO nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001"
$CLICKHOUSE_CLIENT -q "SELECT sum(d), sum(f), max(d) FROM nullable_prewhere PREWHERE NOT isNull(f)"
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"

View File

@ -1,7 +1,7 @@
-- Tags: no-random-settings
drop table if exists tab_00484;
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 8192;
set preferred_block_size_bytes = 2000000;
@ -17,19 +17,19 @@ set preferred_max_column_in_block_size_bytes = 4194304;
select max(blockSize()), min(blockSize()), any(ignore(*)) from tab_00484;
drop table if exists tab_00484;
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 47;
set preferred_max_column_in_block_size_bytes = 1152;
select blockSize(), * from tab_00484 where x = 1 or x > 36 format Null;
drop table if exists tab_00484;
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 10;
set preferred_max_column_in_block_size_bytes = 128;
select s from tab_00484 where s == '' format Null;
drop table if exists tab_00484;
create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
insert into tab_00484 select today(), number, 'abc' from system.numbers limit 81920;
set preferred_block_size_bytes = 0;
select count(*) from tab_00484 prewhere s != 'abc' format Null;

View File

@ -12,7 +12,7 @@ CREATE TABLE check_system_tables
ORDER BY name1
PARTITION BY name2
SAMPLE BY name1
SETTINGS min_bytes_for_wide_part = 0, compress_marks=false, compress_primary_key=false;
SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
SELECT name, partition_key, sorting_key, primary_key, sampling_key, storage_policy, total_rows
FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase()

View File

@ -9,12 +9,12 @@ DROP TABLE IF EXISTS default_codec_synthetic;
CREATE TABLE delta_codec_synthetic
(
id UInt64 Codec(Delta, ZSTD(3))
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
CREATE TABLE default_codec_synthetic
(
id UInt64 Codec(ZSTD(3))
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
@ -47,12 +47,12 @@ DROP TABLE IF EXISTS default_codec_float;
CREATE TABLE delta_codec_float
(
id Float64 Codec(Delta, LZ4HC)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
CREATE TABLE default_codec_float
(
id Float64 Codec(LZ4HC)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
INSERT INTO delta_codec_float SELECT number FROM numbers(1547510400, 500000) WHERE number % 3 == 0 OR number % 5 == 0 OR number % 7 == 0 OR number % 11 == 0;
INSERT INTO default_codec_float SELECT * from delta_codec_float;
@ -85,12 +85,12 @@ DROP TABLE IF EXISTS default_codec_string;
CREATE TABLE delta_codec_string
(
id Float64 Codec(Delta, LZ4)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
CREATE TABLE default_codec_string
(
id Float64 Codec(LZ4)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
INSERT INTO delta_codec_string SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000);
INSERT INTO default_codec_string SELECT * from delta_codec_string;

View File

@ -24,7 +24,7 @@ CREATE TABLE codecTest (
valueI8 Int8 CODEC(DoubleDelta),
valueDT DateTime CODEC(DoubleDelta),
valueD Date CODEC(DoubleDelta)
) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0;
) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
-- checking for overflow

View File

@ -4,7 +4,7 @@ DROP TABLE IF EXISTS test_00961;
CREATE TABLE test_00961 (d Date, a String, b UInt8, x String, y Int8, z UInt32)
ENGINE = MergeTree PARTITION BY d ORDER BY (a, b)
SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi';
SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi', ratio_of_defaults_for_sparse_serialization = 1;
INSERT INTO test_00961 VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);

View File

@ -1,11 +1,10 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;"
$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x > 1500000)) ENGINE = Memory;"

View File

@ -1,11 +1,10 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
$CLICKHOUSE_CLIENT --query="""
SELECT * FROM (SELECT number % 5 AS a, count() AS b, c FROM numbers(10)
ARRAY JOIN [1,2] AS c GROUP BY a,c) AS table

View File

@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mt_compact"
$CLICKHOUSE_CLIENT -q "CREATE TABLE mt_compact(a Int, s String) ENGINE = MergeTree ORDER BY a
SETTINGS min_rows_for_wide_part = 1000,
index_granularity = 14;"
index_granularity = 14, ratio_of_defaults_for_sparse_serialization = 1;"
$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES mt_compact"

View File

@ -4,7 +4,7 @@ DROP TABLE IF EXISTS codecs;
CREATE TABLE codecs (id UInt32, val UInt32, s String)
ENGINE = MergeTree ORDER BY id
SETTINGS min_rows_for_wide_part = 10000;
SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1;
INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
FROM system.parts
@ -21,7 +21,7 @@ DROP TABLE codecs;
CREATE TABLE codecs (id UInt32 CODEC(NONE), val UInt32 CODEC(NONE), s String CODEC(NONE))
ENGINE = MergeTree ORDER BY id
SETTINGS min_rows_for_wide_part = 10000;
SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1;
INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
FROM system.parts
@ -38,7 +38,7 @@ DROP TABLE codecs;
CREATE TABLE codecs (id UInt32, val UInt32 CODEC(Delta, ZSTD), s String CODEC(ZSTD))
ENGINE = MergeTree ORDER BY id
SETTINGS min_rows_for_wide_part = 10000;
SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1;
INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
FROM system.parts

View File

@ -4,11 +4,10 @@
set -e
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE t (x Int8) ENGINE = MergeTree ORDER BY tuple()"

View File

@ -2,9 +2,8 @@
# Tags: race
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=debug
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g')
${CLICKHOUSE_CLIENT} --query="SELECT logTrace('logTrace Function Test');" 2>&1 | grep -q "logTrace Function Test" && echo "OK" || echo "FAIL"

View File

@ -1,11 +1,10 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;"
$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64) ENGINE = Memory;"

Some files were not shown because too many files have changed in this diff Show More