Merge branch 'master' into analyzer-fix-test_sql_user_defined_functions_on_cluster

This commit is contained in:
Dmitry Novik 2024-02-15 17:45:33 +01:00 committed by GitHub
commit 199ae321a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 332 additions and 34 deletions

View File

@ -327,6 +327,7 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64

View File

@ -228,6 +228,7 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64

View File

@ -4,6 +4,8 @@ sidebar_position: 170
sidebar_label: Strings
---
import VersionBadge from '@theme/badges/VersionBadge';
# Functions for Working with Strings
Functions for [searching](string-search-functions.md) in strings and for [replacing](string-replace-functions.md) in strings are described separately.
@ -783,6 +785,8 @@ SELECT startsWith('Spider-Man', 'Spi');
## startsWithUTF8
<VersionBadge minVersion='23.8' />
Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters.

View File

@ -69,6 +69,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp

View File

@ -14,6 +14,7 @@
#include <Common/assertProcessUserMatchesDataOwner.h>
#include <Common/makeSocketAddress.h>
#include <Server/waitServersToFinish.h>
#include <Server/CloudPlacementInfo.h>
#include <base/getMemoryAmount.h>
#include <base/scope_guard.h>
#include <base/safeExit.h>
@ -352,6 +353,11 @@ try
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
{
PlacementInfo::PlacementInfo::instance().initialize(config());
}
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 100),
config().getUInt("max_thread_pool_free_size", 1000),

View File

@ -97,6 +97,7 @@
#include <Server/ProtocolServerAdapter.h>
#include <Server/KeeperReadinessHandler.h>
#include <Server/HTTP/HTTPServer.h>
#include <Server/CloudPlacementInfo.h>
#include <Interpreters/AsynchronousInsertQueue.h>
#include <Core/ServerSettings.h>
#include <filesystem>
@ -1960,6 +1961,11 @@ try
load_metadata_tasks);
}
if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
{
PlacementInfo::PlacementInfo::instance().initialize(config());
}
/// Do not keep tasks in server, they should be kept inside databases. Used here to make dependent tasks only.
load_metadata_tasks.clear();
load_metadata_tasks.shrink_to_fit();

View File

@ -7,6 +7,7 @@
#include <Poco/Util/AbstractConfiguration.h>
#include <Coordination/KeeperConstants.h>
#include <Common/logger_useful.h>
#include <Server/CloudPlacementInfo.h>
#include <Coordination/KeeperFeatureFlags.h>
#include <boost/algorithm/string.hpp>
@ -37,26 +38,11 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
{
dispatcher = dispatcher_;
if (config.hasProperty("keeper_server.availability_zone"))
const auto keeper_az = PlacementInfo::PlacementInfo::instance().getAvailabilityZone();
if (!keeper_az.empty())
{
auto keeper_az = config.getString("keeper_server.availability_zone.value", "");
const auto auto_detect_for_cloud = config.getBool("keeper_server.availability_zone.enable_auto_detection_on_cloud", false);
if (keeper_az.empty() && auto_detect_for_cloud)
{
try
{
keeper_az = DB::S3::getRunningAvailabilityZone();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (!keeper_az.empty())
{
system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
LOG_INFO(getLogger("KeeperContext"), "Initialize the KeeperContext with availability zone: '{}'", keeper_az);
}
system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
LOG_INFO(getLogger("KeeperContext"), "Initialize the KeeperContext with availability zone: '{}'", keeper_az);
}
updateKeeperMemorySoftLimit(config);

View File

@ -0,0 +1,83 @@
#include <Server/CloudPlacementInfo.h>
#include <Common/logger_useful.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/S3/Credentials.h>
#include <Poco/String.h>
#include <fmt/core.h>
#include <filesystem>
namespace DB
{
namespace PlacementInfo
{
namespace
{
std::string getConfigPath(std::string_view path)
{
return fmt::format("{}.{}", PLACEMENT_CONFIG_PREFIX, path);
}
String loadAvailabilityZoneFromFile(const Poco::Util::AbstractConfiguration & config)
{
auto az_file = config.getString(getConfigPath("availability_zone_from_file"), DEFAULT_AZ_FILE_PATH);
if (!std::filesystem::exists(az_file))
return "";
String availability_zone_from_file;
ReadBufferFromFile in(az_file);
readStringUntilEOF(availability_zone_from_file, in);
Poco::trimInPlace(availability_zone_from_file);
return availability_zone_from_file;
}
}
PlacementInfo & PlacementInfo::instance()
{
static PlacementInfo instance;
return instance;
}
void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config)
{
use_imds = config.getBool(getConfigPath("use_imds"), false);
if (use_imds)
{
availability_zone = S3::getRunningAvailabilityZone();
}
else
{
availability_zone = config.getString(getConfigPath("availability_zone"), "");
if (availability_zone.empty())
availability_zone = loadAvailabilityZoneFromFile(config);
if (availability_zone.empty())
LOG_WARNING(log, "Availability zone info not found");
}
LOG_DEBUG(log, "Loaded info: availability_zone: {}", availability_zone);
initialized = true;
}
std::string PlacementInfo::getAvailabilityZone() const
{
if (!initialized)
{
LOG_WARNING(log, "Placement info has not been loaded");
return "";
}
return availability_zone;
}
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <string>
#include <boost/core/noncopyable.hpp>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/Logger.h>
namespace DB
{
namespace PlacementInfo
{
static constexpr auto PLACEMENT_CONFIG_PREFIX = "placement";
static constexpr auto DEFAULT_AZ_FILE_PATH = "/run/instance-metadata/node-zone";
/// A singleton providing information on where in cloud server is running.
class PlacementInfo : private boost::noncopyable
{
public:
static PlacementInfo & instance();
void initialize(const Poco::Util::AbstractConfiguration & config);
std::string getAvailabilityZone() const;
private:
PlacementInfo() = default;
LoggerPtr log = getLogger("CloudPlacementInfo");
bool initialized;
bool use_imds;
std::string availability_zone;
};
}
}

View File

@ -8,4 +8,3 @@ test_merge_table_over_distributed/test.py::test_global_in
test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed
test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
test_select_access_rights/test_main.py::test_alias_columns
test_settings_profile/test.py::test_show_profiles

View File

@ -56,7 +56,6 @@ def main():
description,
RELEASE_READY_STATUS,
pr_info,
dump_to_file=True,
)

View File

@ -9,11 +9,13 @@
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
</logger>
<placement>
<use_imds>0</use_imds>
<availability_zone>az-zoo1</availability_zone>
</placement>
<keeper_server>
<tcp_port>2181</tcp_port>
<availability_zone>
<value>az-zoo1</value>
</availability_zone>
<server_id>1</server_id>
<coordination_settings>

View File

@ -9,13 +9,14 @@
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
</logger>
<placement>
<use_imds>0</use_imds>
<availability_zone>az-zoo2</availability_zone>
</placement>
<keeper_server>
<tcp_port>2181</tcp_port>
<server_id>2</server_id>
<availability_zone>
<value>az-zoo2</value>
<enable_auto_detection_on_cloud>1</enable_auto_detection_on_cloud>
</availability_zone>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>

View File

@ -0,0 +1,6 @@
<clickhouse>
<placement>
<use_imds>0</use_imds>
<availability_zone>ci-test-1b</availability_zone>
</placement>
</clickhouse>

View File

@ -0,0 +1,6 @@
<clickhouse>
<placement>
<use_imds>0</use_imds>
<availability_zone_from_file>/tmp/node-zone</availability_zone_from_file>
</placement>
</clickhouse>

View File

@ -0,0 +1,8 @@
<clickhouse>
<s3>
<use_environment_credentials>1</use_environment_credentials>
</s3>
<placement>
<use_imds>1</use_imds>
</placement>
</clickhouse>

View File

@ -0,0 +1,5 @@
<clickhouse>
<placement>
<use_imds>0</use_imds>
</placement>
</clickhouse>

View File

@ -0,0 +1,30 @@
import http.server
import sys
class RequestHandler(http.server.BaseHTTPRequestHandler):
def get_response(self):
if self.path == "/":
return "OK", 200
if self.path == "/latest/meta-data/placement/availability-zone":
return "ci-test-1a", 200
# Resource not found.
return 404
def do_HEAD(self):
response, code = self.get_response()
self.send_response(code)
self.send_header("Content-Type", "text/plain")
self.send_header("Content-Length", len(response.encode()))
self.end_headers()
return response, code
def do_GET(self):
response, _ = self.do_HEAD()
self.wfile.write(response.encode())
httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler)
httpd.serve_forever()

View File

@ -0,0 +1,95 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.mock_servers import start_mock_servers
import os
import time
METADATA_SERVER_HOSTNAME = "resolver"
METADATA_SERVER_PORT = 8080
cluster = ClickHouseCluster(__file__)
node_imds = cluster.add_instance(
"node_imds",
with_minio=True,
main_configs=["configs/imds.xml"],
env_variables={
"AWS_EC2_METADATA_SERVICE_ENDPOINT": f"http://{METADATA_SERVER_HOSTNAME}:{METADATA_SERVER_PORT}",
},
stay_alive=True,
)
node_config_value = cluster.add_instance(
"node_config_value",
main_configs=["configs/config_value.xml"],
)
node_file_value = cluster.add_instance(
"node_file_value",
main_configs=["configs/file_value.xml"],
stay_alive=True,
)
node_missing_value = cluster.add_instance(
"node_missing_value",
main_configs=["configs/missing_value.xml"],
)
def start_metadata_server():
script_dir = os.path.join(os.path.dirname(__file__), "metadata_servers")
start_mock_servers(
cluster,
script_dir,
[
(
"simple_server.py",
METADATA_SERVER_HOSTNAME,
METADATA_SERVER_PORT,
)
],
)
@pytest.fixture(scope="module", autouse=True)
def start_cluster():
try:
cluster.start()
start_metadata_server()
yield
finally:
cluster.shutdown()
def test_placement_info_from_imds():
node_imds.stop_clickhouse(kill=True)
node_imds.start_clickhouse()
node_imds.query("SYSTEM FLUSH LOGS")
assert node_imds.contains_in_log(
"CloudPlacementInfo: Loaded info: availability_zone: ci-test-1a"
)
def test_placement_info_from_config():
node_config_value.query("SYSTEM FLUSH LOGS")
assert node_config_value.contains_in_log(
"CloudPlacementInfo: Loaded info: availability_zone: ci-test-1b"
)
def test_placement_info_from_file():
node_file_value.exec_in_container(
["bash", "-c", "echo ci-test-1c > /tmp/node-zone"]
)
node_file_value.stop_clickhouse(kill=True)
node_file_value.start_clickhouse()
node_file_value.query("SYSTEM FLUSH LOGS")
assert node_file_value.contains_in_log(
"CloudPlacementInfo: Loaded info: availability_zone: ci-test-1c"
)
def test_placement_info_missing_data():
node_missing_value.query("SYSTEM FLUSH LOGS")
assert node_missing_value.contains_in_log(
"CloudPlacementInfo: Availability zone info not found"
)

View File

@ -454,22 +454,41 @@ def test_show_profiles():
assert instance.query("SHOW PROFILES") == "default\nreadonly\nxyz\n"
assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n"
query_possible_response = [
"CREATE SETTINGS PROFILE default\n",
"CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n",
]
assert (
instance.query("SHOW CREATE SETTINGS PROFILE default")
== "CREATE SETTINGS PROFILE default\n"
in query_possible_response
)
assert (
instance.query("SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default\n"
query_possible_response = [
"CREATE SETTINGS PROFILE default\n"
"CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE xyz\n"
)
"CREATE SETTINGS PROFILE xyz\n",
"CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n"
"CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE xyz\n",
]
assert instance.query("SHOW CREATE PROFILES") in query_possible_response
expected_access = (
"CREATE SETTINGS PROFILE default\n"
"CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE xyz\n"
)
assert expected_access in instance.query("SHOW ACCESS")
expected_access_analyzer = (
"CREATE SETTINGS PROFILE default SETTINGS allow_experimental_analyzer = true\n"
"CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"
"CREATE SETTINGS PROFILE xyz\n"
)
query_response = instance.query("SHOW ACCESS")
assert (
expected_access in query_response or expected_access_analyzer in query_response
)
def test_set_profile():

View File

@ -991,6 +991,7 @@ VIEWs
Vadim
Valgrind
Vectorized
VersionBadge
VersionInteger
VersionedCollapsingMergeTree
VideoContainer