This commit is contained in:
Andrey Zvonov 2024-09-19 09:18:31 +02:00 committed by GitHub
commit deecee518a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 155 additions and 62 deletions

View File

@ -2567,6 +2567,18 @@ The time zone is necessary for conversions between String and DateTime formats w
- [session_timezone](../settings/settings.md#session_timezone)
## prefer_system_tzdata {#prefer_system_tzdata}
If set to `true`, the system tzdata library will be preferred over the one embedded in ClickHouse.
The default value is `false`.
**Example**
``` xml
<prefer_system_tzdata>true</prefer_system_tzdata>
```
## tcp_port {#tcp_port}
Port for communicating with clients over the TCP protocol.

View File

@ -1597,6 +1597,18 @@ ClickHouse использует потоки из глобального пул
- [session_timezone](../settings/settings.md#session_timezone)
## prefer_system_tzdata {#prefer_system_tzdata}
Если данный параметр имеет значение `true`, то в первую очередь будет использована системная библиотека tzdata, а не встроенная в ClickHouse.
Значение по умолчанию: `false`.
**Пример**
``` xml
<prefer_system_tzdata>true</prefer_system_tzdata>
```
## tcp_port {#server_configuration_parameters-tcp_port}
Порт для взаимодействия с клиентами по протоколу TCP.

View File

@ -816,6 +816,11 @@
-->
<!-- <timezone>UTC</timezone> -->
<!-- Allows to choose between ClickHouse built-in timezone data and system-wide tzdata (usually located at /usr/share/zoneinfo/)
By default, ClickHouse built-in tzdata is used.
-->
<!-- <prefer_system_tzdata>true</prefer_system_tzdata>-->
<!-- You can specify umask here (see "man umask"). Server will apply it on startup.
Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read).
-->

View File

@ -13,6 +13,10 @@
#include <filesystem>
#include <fstream>
#include <cctz/zone_info_source.h>
/// Embedded timezones.
std::string_view getTimeZone(const char * name);
namespace
{
@ -205,3 +209,81 @@ std::string DateLUT::extractTimezoneFromContext(DB::ContextPtr query_context)
{
return query_context->getSettingsRef().session_timezone.value;
}
/// By default prefer to load timezones from blobs linked to the binary.
/// The blobs are provided by "tzdata" library.
/// This allows to avoid dependency on system tzdata.
namespace cctz_extension
{
namespace
{
class Source : public cctz::ZoneInfoSource
{
public:
Source(const char * data_, size_t size_) : data(data_), size(size_) { }
size_t Read(void * buf, size_t bytes) override
{
bytes = std::min(bytes, size);
memcpy(buf, data, bytes);
data += bytes;
size -= bytes;
return bytes;
}
int Skip(size_t offset) override
{
if (offset <= size)
{
data += offset;
size -= offset;
return 0;
}
else
{
errno = EINVAL;
return -1;
}
}
private:
const char * data;
size_t size;
};
std::unique_ptr<cctz::ZoneInfoSource>
custom_factory(const std::string & name, const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
{
std::string_view tz_file = getTimeZone(name.data());
if (!tz_file.empty())
return std::make_unique<Source>(tz_file.data(), tz_file.size());
return fallback(name);
}
}
cctz_extension::ZoneInfoSourceFactory zone_info_source_factory = custom_factory;
}
/// If `prefer_system_tzdata` is turned on in config, redefine tzdata lookup order:
/// First, try to use system tzdata, then use built-in.
void DateLUT::setPreferSystemTZData()
{
cctz_extension::zone_info_source_factory = [] (
const std::string & name,
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback
) -> std::unique_ptr<cctz::ZoneInfoSource>
{
auto system_tz_source = fallback(name);
if (system_tz_source)
return system_tz_source;
std::string_view tz_file = ::getTimeZone(name.data());
if (!tz_file.empty())
return std::make_unique<cctz_extension::Source>(tz_file.data(), tz_file.size());
/// If not found in system AND in built-in, let fallback() handle this.
return system_tz_source;
};
}

View File

@ -55,6 +55,9 @@ public:
protected:
DateLUT();
friend class BaseDaemon;
static void setPreferSystemTZData();
private:
static DateLUT & getInstance();

View File

@ -21,10 +21,6 @@ namespace ErrorCodes
}
}
/// Embedded timezones.
std::string_view getTimeZone(const char * name);
namespace
{
@ -236,60 +232,3 @@ unsigned int DateLUTImpl::toMillisecond(const DB::DateTime64 & datetime, Int64 s
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
return millisecond;
}
/// Prefer to load timezones from blobs linked to the binary.
/// The blobs are provided by "tzdata" library.
/// This allows to avoid dependency on system tzdata.
namespace cctz_extension
{
namespace
{
class Source : public cctz::ZoneInfoSource
{
public:
Source(const char * data_, size_t size_) : data(data_), size(size_) {}
size_t Read(void * buf, size_t bytes) override
{
bytes = std::min(bytes, size);
memcpy(buf, data, bytes);
data += bytes;
size -= bytes;
return bytes;
}
int Skip(size_t offset) override
{
if (offset <= size)
{
data += offset;
size -= offset;
return 0;
}
else
{
errno = EINVAL;
return -1;
}
}
private:
const char * data;
size_t size;
};
std::unique_ptr<cctz::ZoneInfoSource> custom_factory(
const std::string & name,
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
{
std::string_view tz_file = getTimeZone(name.data());
if (!tz_file.empty())
return std::make_unique<Source>(tz_file.data(), tz_file.size());
return fallback(name);
}
}
ZoneInfoSourceFactory zone_info_source_factory = custom_factory;
}

View File

@ -280,7 +280,11 @@ void BaseDaemon::initialize(Application & self)
}
}
/// This must be done before any usage of DateLUT. In particular, before any logging.
/// `prefer_system_tzdata` and `timezone` parameters must be processed before any usage of DateLUT.
/// In particular, before any logging.
if (config().has("prefer_system_tzdata") && config().getBool("prefer_system_tzdata"))
DateLUT::setPreferSystemTZData();
if (config().has("timezone"))
{
const std::string config_timezone = config().getString("timezone");

View File

@ -0,0 +1,4 @@
<clickhouse>
<prefer_system_tzdata>true</prefer_system_tzdata>
<timezone>UTC</timezone>
</clickhouse>

View File

@ -0,0 +1,31 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node", main_configs=["config.d/prefer_system_tz.xml",], stay_alive=True)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_prefer_system_tzdata(start_cluster):
node.exec_in_container(
[
"bash",
"-c",
f"rm /usr/share/zoneinfo/Africa/Tunis && ln -s /usr/share/zoneinfo/America/New_York /usr/share/zoneinfo/Africa/Tunis",
],
privileged=True,
)
node.restart_clickhouse()
assert node.exec_in_container([f"bash", "-c", f"echo \"SELECT toDateTime(toDateTime('2024-05-01 12:12:12', 'UTC'), 'Africa/Tunis')\" | curl -s {node.hostname}:8123/ --data-binary @-"]) == "2024-05-01 08:12:12\n"

View File

@ -2894,6 +2894,7 @@ tupleToNameValuePairs
turbostat
txt
typename
tzdata
ubuntu
uint
ulid