ClickHouse/programs/local/LocalServer.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1042 lines
39 KiB
C++
Raw Normal View History

#include "LocalServer.h"
#include <sys/resource.h>
#include <Common/logger_useful.h>
#include <Common/formatReadable.h>
#include <base/getMemoryAmount.h>
#include <base/errnoToString.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/String.h>
#include <Poco/Logger.h>
#include <Poco/NullChannel.h>
2021-10-30 12:28:50 +00:00
#include <Poco/SimpleFileChannel.h>
2023-12-30 03:35:00 +00:00
#include <Databases/registerDatabases.h>
#include <Databases/DatabaseFilesystem.h>
#include <Databases/DatabaseMemory.h>
#include <Databases/DatabasesOverlay.h>
#include <Storages/System/attachSystemTables.h>
#include <Storages/System/attachInformationSchemaTables.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/registerInterpreters.h>
2021-10-02 07:13:14 +00:00
#include <base/getFQDNOrHostName.h>
2022-04-27 15:05:45 +00:00
#include <Common/scope_guard_safe.h>
2021-08-12 15:16:55 +00:00
#include <Interpreters/Session.h>
2022-03-10 04:35:01 +00:00
#include <Access/AccessControl.h>
#include <Common/PoolId.h>
#include <Common/Exception.h>
#include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h>
2019-01-28 11:18:00 +00:00
#include <Common/ThreadStatus.h>
#include <Common/TLDListsHolder.h>
#include <Common/quoteString.h>
#include <Common/randomSeed.h>
2023-04-13 16:53:16 +00:00
#include <Common/ThreadPool.h>
2022-04-27 15:05:45 +00:00
#include <Loggers/Loggers.h>
#include <Loggers/OwnFormattingChannel.h>
#include <Loggers/OwnPatternFormatter.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromFileDescriptor.h>
2021-09-04 18:19:01 +00:00
#include <IO/UseSSL.h>
#include <IO/SharedThreadPools.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTInsertQuery.h>
2022-04-27 15:05:45 +00:00
#include <Common/ErrorHandlers.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
2017-04-21 17:47:27 +00:00
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Storages/registerStorages.h>
#include <Dictionaries/registerDictionaries.h>
2019-11-27 09:39:44 +00:00
#include <Disks/registerDisks.h>
2020-10-29 03:39:43 +00:00
#include <Formats/registerFormats.h>
#include <Formats/FormatFactory.h>
#include <boost/algorithm/string/replace.hpp>
#include <boost/program_options/options_description.hpp>
2021-10-02 07:13:14 +00:00
#include <base/argsToConfig.h>
#include <filesystem>
#include "config.h"
2021-12-06 18:27:06 +00:00
#if defined(FUZZING_MODE)
#include <Functions/getFuzzerData.h>
#endif
2023-01-30 19:00:48 +00:00
#if USE_AZURE_BLOB_STORAGE
# include <azure/storage/common/internal/xml_wrapper.hpp>
#endif
2021-05-16 22:06:09 +00:00
namespace fs = std::filesystem;
2021-07-11 23:17:14 +00:00
namespace DB
{
namespace ErrorCodes
{
2020-06-24 22:07:01 +00:00
extern const int BAD_ARGUMENTS;
extern const int CANNOT_LOAD_CONFIG;
extern const int FILE_ALREADY_EXISTS;
}
void applySettingsOverridesForLocal(ContextMutablePtr context)
{
Settings settings = context->getSettings();
settings.allow_introspection_functions = true;
settings.storage_file_read_method = LocalFSReadMethod::mmap;
context->setSettings(settings);
}
2021-10-04 15:23:08 +00:00
void LocalServer::processError(const String &) const
2021-08-21 10:55:54 +00:00
{
2021-09-05 09:27:27 +00:00
if (ignore_error)
2021-08-21 15:29:28 +00:00
return;
2021-09-11 11:34:22 +00:00
if (is_interactive)
2021-08-21 10:55:54 +00:00
{
2021-10-04 15:23:08 +00:00
String message;
2021-09-11 11:34:22 +00:00
if (server_exception)
{
2021-10-04 15:23:08 +00:00
message = getExceptionMessage(*server_exception, print_stack_trace, true);
2021-09-11 11:34:22 +00:00
}
2021-10-04 15:23:08 +00:00
else if (client_exception)
2021-09-11 11:34:22 +00:00
{
2021-10-04 15:23:08 +00:00
message = client_exception->message();
2021-09-11 11:34:22 +00:00
}
2021-10-04 15:23:08 +00:00
fmt::print(stderr, "Received exception:\n{}\n", message);
fmt::print(stderr, "\n");
2021-08-21 10:55:54 +00:00
}
2021-09-11 11:34:22 +00:00
else
2021-08-21 10:55:54 +00:00
{
2021-09-11 11:34:22 +00:00
if (server_exception)
server_exception->rethrow();
if (client_exception)
client_exception->rethrow();
2021-08-21 10:55:54 +00:00
}
}
void LocalServer::initialize(Poco::Util::Application & self)
{
Poco::Util::Application::initialize(self);
/// Load config files if exists
2021-05-16 22:06:09 +00:00
if (config().has("config-file") || fs::exists("config.xml"))
{
const auto config_path = config().getString("config-file", "config.xml");
ConfigProcessor config_processor(config_path, false, true);
2021-05-16 22:06:09 +00:00
config_processor.setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
2022-04-29 07:41:10 +00:00
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 10000),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000)
);
2023-01-30 19:00:48 +00:00
#if USE_AZURE_BLOB_STORAGE
/// See the explanation near the same line in Server.cpp
GlobalThreadPool::instance().addOnDestroyCallback([]
{
Azure::Storage::_internal::XmlGlobalDeinitialize();
});
#endif
getIOThreadPool().initialize(
2022-04-29 07:41:10 +00:00
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64);
getActivePartsLoadingThreadPool().initialize(
active_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
active_parts_loading_threads);
const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
getOutdatedPartsLoadingThreadPool().initialize(
outdated_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
outdated_parts_loading_threads);
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
getPartsCleaningThreadPool().initialize(
cleanup_threads,
0, // We don't need any threads one all the parts will be deleted
cleanup_threads);
}
static DatabasePtr createMemoryDatabaseIfNotExists(ContextPtr context, const String & database_name)
{
DatabasePtr system_database = DatabaseCatalog::instance().tryGetDatabase(database_name);
if (!system_database)
{
/// TODO: add attachTableDelayed into DatabaseMemory to speedup loading
system_database = std::make_shared<DatabaseMemory>(database_name, context);
DatabaseCatalog::instance().attachDatabase(database_name, system_database);
}
return system_database;
}
static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_)
{
auto databaseCombiner = std::make_shared<DatabasesOverlay>(name_, context_);
databaseCombiner->registerNextDatabase(std::make_shared<DatabaseFilesystem>(name_, "", context_));
databaseCombiner->registerNextDatabase(std::make_shared<DatabaseMemory>(name_, context_));
return databaseCombiner;
}
/// If path is specified and not empty, will try to setup server environment and load existing metadata
void LocalServer::tryInitPath()
{
std::string path;
if (config().has("path"))
{
// User-supplied path.
path = config().getString("path");
Poco::trimInPlace(path);
if (path.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
2020-08-08 01:21:04 +00:00
"Cannot work with empty storage path that is explicitly specified"
2020-06-24 22:07:01 +00:00
" by the --path option. Please check the program options and"
" correct the --path.");
}
}
else
{
2020-10-26 07:59:15 +00:00
// The path is not provided explicitly - use a unique path in the system temporary directory
// (or in the current dir if temporary don't exist)
2024-01-23 17:04:50 +00:00
LoggerRawPtr log = &logger();
std::filesystem::path parent_folder;
std::filesystem::path default_path;
try
{
// try to guess a tmp folder name, and check if it's a directory (throw exception otherwise)
parent_folder = std::filesystem::temp_directory_path();
}
catch (const fs::filesystem_error & e)
{
// The tmp folder doesn't exist? Is it a misconfiguration? Or chroot?
LOG_DEBUG(log, "Can not get temporary folder: {}", e.what());
parent_folder = std::filesystem::current_path();
std::filesystem::is_directory(parent_folder); // that will throw an exception if it's not a directory
LOG_DEBUG(log, "Will create working directory inside current directory: {}", parent_folder.string());
}
2020-10-26 07:59:15 +00:00
/// we can have another clickhouse-local running simultaneously, even with the same PID (for ex. - several dockers mounting the same folder)
/// or it can be some leftovers from other clickhouse-local runs
/// as we can't accurately distinguish those situations we don't touch any existent folders
/// we just try to pick some free name for our working folder
default_path = parent_folder / fmt::format("clickhouse-local-{}-{}-{}", getpid(), time(nullptr), randomSeed());
if (exists(default_path))
2024-02-13 23:21:13 +00:00
throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to create working directory: {} already exists.", default_path.string());
create_directory(default_path);
temporary_directory_to_delete = default_path;
path = default_path.string();
LOG_DEBUG(log, "Working directory created: {}", path);
}
if (path.back() != '/')
path += '/';
2022-03-29 10:02:21 +00:00
fs::create_directories(fs::path(path) / "user_defined/");
fs::create_directories(fs::path(path) / "data/");
fs::create_directories(fs::path(path) / "metadata/");
fs::create_directories(fs::path(path) / "metadata_dropped/");
2020-10-22 07:37:03 +00:00
global_context->setPath(path);
2022-12-06 10:04:15 +00:00
global_context->setTemporaryStoragePath(path + "tmp/", 0);
2020-10-22 07:37:03 +00:00
global_context->setFlagsPath(path + "flags");
2020-10-22 07:37:03 +00:00
global_context->setUserFilesPath(""); // user's files are everywhere
std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/");
global_context->setUserScriptsPath(user_scripts_path);
fs::create_directories(user_scripts_path);
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
if (!top_level_domains_path.empty())
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
}
2021-07-11 23:17:14 +00:00
void LocalServer::cleanup()
2020-03-18 00:57:00 +00:00
{
2021-10-16 11:28:57 +00:00
try
2020-03-18 00:57:00 +00:00
{
2021-10-16 11:28:57 +00:00
connection.reset();
2021-09-04 18:19:01 +00:00
/// Suggestions are loaded async in a separate thread and it can use global context.
/// We should reset it before resetting global_context.
if (suggest)
suggest.reset();
2024-01-17 13:54:18 +00:00
2021-10-16 11:28:57 +00:00
if (global_context)
{
global_context->shutdown();
global_context.reset();
}
2021-09-04 18:19:01 +00:00
2022-09-05 21:10:03 +00:00
/// thread status should be destructed before shared context because it relies on process list.
2021-10-16 11:28:57 +00:00
status.reset();
2021-09-04 18:19:01 +00:00
2021-10-16 11:28:57 +00:00
// Delete the temporary directory if needed.
if (temporary_directory_to_delete)
{
const auto dir = *temporary_directory_to_delete;
temporary_directory_to_delete.reset();
LOG_DEBUG(&logger(), "Removing temporary directory: {}", dir.string());
remove_all(dir);
}
}
catch (...)
2021-07-11 23:17:14 +00:00
{
2021-10-16 11:28:57 +00:00
tryLogCurrentException(__PRETTY_FUNCTION__);
2020-03-18 00:57:00 +00:00
}
}
2021-07-11 23:17:14 +00:00
2022-02-10 13:57:32 +00:00
static bool checkIfStdinIsRegularFile()
{
struct stat file_stat;
2022-02-10 13:57:32 +00:00
return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
2021-07-11 23:17:14 +00:00
std::string LocalServer::getInitialCreateTableQuery()
{
2023-09-04 14:48:38 +00:00
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty()))
2022-01-25 07:55:09 +00:00
return {};
2021-07-11 23:17:14 +00:00
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
auto table_structure = config().getString("table-structure", "auto");
2021-07-11 23:17:14 +00:00
String table_file;
2024-01-23 09:36:01 +00:00
std::optional<String> format_from_file_name;
2021-07-11 23:17:14 +00:00
if (!config().has("table-file") || config().getString("table-file") == "-")
{
/// Use Unix tools stdin naming convention
table_file = "stdin";
2024-01-23 09:36:01 +00:00
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDIN_FILENO);
}
2021-07-11 23:17:14 +00:00
else
{
/// Use regular file
auto file_name = config().getString("table-file");
table_file = quoteString(file_name);
2024-01-23 09:36:01 +00:00
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name);
2021-07-11 23:17:14 +00:00
}
2022-02-06 04:17:03 +00:00
auto data_format = backQuoteIfNeed(
2024-01-23 09:36:01 +00:00
config().getString("table-data-format", config().getString("format", format_from_file_name ? *format_from_file_name : "TSV")));
2022-02-06 04:17:03 +00:00
if (table_structure == "auto")
table_structure = "";
else
table_structure = "(" + table_structure + ")";
return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});",
2021-07-11 23:17:14 +00:00
table_name, table_structure, data_format, table_file);
}
static ConfigurationPtr getConfigurationFromXMLString(const char * xml_data)
{
std::stringstream ss{std::string{xml_data}}; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Poco::XML::InputSource input_source{ss};
return {new Poco::Util::XMLConfiguration{&input_source}};
}
void LocalServer::setupUsers()
{
static const char * minimal_default_user_xml =
"<clickhouse>"
2021-07-11 23:17:14 +00:00
" <profiles>"
" <default></default>"
" </profiles>"
" <users>"
" <default>"
" <password></password>"
" <networks>"
" <ip>::/0</ip>"
" </networks>"
" <profile>default</profile>"
" <quota>default</quota>"
" </default>"
" </users>"
" <quotas>"
" <default></default>"
" </quotas>"
"</clickhouse>";
2021-07-11 23:17:14 +00:00
ConfigurationPtr users_config;
2022-03-10 04:35:01 +00:00
auto & access_control = global_context->getAccessControl();
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
2022-07-22 02:46:36 +00:00
if (config().has("config-file") || fs::exists("config.xml"))
{
2022-08-01 14:03:36 +00:00
String config_path = config().getString("config-file", "");
2022-07-20 04:38:36 +00:00
bool has_user_directories = config().has("user_directories");
2022-08-01 14:03:36 +00:00
const auto config_dir = fs::path{config_path}.remove_filename().string();
String users_config_path = config().getString("users_config", "");
if (users_config_path.empty() && has_user_directories)
{
users_config_path = config().getString("user_directories.users_xml.path");
2022-08-01 14:05:50 +00:00
if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path))
users_config_path = fs::path(config_dir) / users_config_path;
2022-08-01 14:03:36 +00:00
}
if (users_config_path.empty())
2022-07-22 02:46:36 +00:00
users_config = getConfigurationFromXMLString(minimal_default_user_xml);
else
2022-07-20 04:38:36 +00:00
{
2022-07-22 02:46:36 +00:00
ConfigProcessor config_processor(users_config_path);
const auto loaded_config = config_processor.loadConfig();
users_config = loaded_config.configuration;
2022-07-20 04:38:36 +00:00
}
2021-07-11 23:17:14 +00:00
}
else
users_config = getConfigurationFromXMLString(minimal_default_user_xml);
if (users_config)
global_context->setUsersConfig(users_config);
else
throw Exception(ErrorCodes::CANNOT_LOAD_CONFIG, "Can't load config for users");
2021-07-11 23:17:14 +00:00
}
2021-08-23 08:50:12 +00:00
void LocalServer::connect()
{
connection_parameters = ConnectionParameters(config(), "localhost");
2022-03-05 06:22:56 +00:00
connection = LocalConnection::createConnection(
connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name);
2021-08-23 08:50:12 +00:00
}
2021-09-04 18:19:01 +00:00
int LocalServer::main(const std::vector<std::string> & /*args*/)
2021-07-31 12:34:29 +00:00
try
{
2021-09-04 18:19:01 +00:00
UseSSL use_ssl;
2022-09-05 21:10:03 +00:00
thread_status.emplace();
2022-07-29 23:50:45 +00:00
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
2021-10-02 08:10:34 +00:00
setupSignalHandler();
2021-07-31 12:34:29 +00:00
2021-12-06 18:27:06 +00:00
std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3);
/// Try to increase limit on number of open files.
{
rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim))
throw Poco::Exception("Cannot getrlimit");
if (rlim.rlim_cur < rlim.rlim_max)
{
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
}
}
2021-12-06 18:27:06 +00:00
#if defined(FUZZING_MODE)
static bool first_time = true;
if (first_time)
{
2021-09-04 18:19:01 +00:00
2023-09-04 14:48:38 +00:00
if (queries_files.empty() && queries.empty())
2021-12-06 18:27:06 +00:00
{
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl;
std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl;
std::cerr << "\033[31m" << "The query have to use function getFuzzerData() inside." << "\033[0m" << std::endl;
exit(1);
}
is_interactive = false;
#else
2021-10-29 12:04:08 +00:00
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
2023-09-04 14:48:38 +00:00
|| (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
2021-12-06 18:27:06 +00:00
#endif
2021-10-01 13:47:39 +00:00
if (!is_interactive)
2021-09-29 19:17:26 +00:00
{
/// We will terminate process on error
static KillingErrorHandler error_handler;
Poco::ErrorHandler::set(&error_handler);
}
2021-07-31 12:34:29 +00:00
registerInterpreters();
2021-07-31 12:34:29 +00:00
/// Don't initialize DateLUT
2017-04-21 17:47:27 +00:00
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
2023-12-30 03:35:00 +00:00
registerDatabases();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ true);
2020-10-29 03:39:43 +00:00
registerFormats();
2017-04-21 17:47:27 +00:00
2021-07-31 12:34:29 +00:00
processConfig();
adjustSettings();
2023-11-23 17:13:12 +00:00
initTTYBuffer(toProgressOption(config().getString("progress", "default")));
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
2022-11-12 02:55:26 +00:00
2021-08-19 11:07:47 +00:00
applyCmdSettings(global_context);
2021-08-17 19:59:51 +00:00
/// try to load user defined executable functions, throw on error and die
try
{
global_context->loadOrReloadUserDefinedExecutableFunctions(config());
}
catch (...)
{
tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions.");
throw;
}
2021-07-31 12:34:29 +00:00
if (is_interactive)
{
2021-09-04 18:19:01 +00:00
clearTerminal();
showClientVersion();
std::cerr << std::endl;
2021-11-14 07:19:59 +00:00
}
2021-08-23 08:50:12 +00:00
connect();
2021-09-04 18:19:01 +00:00
#ifdef FUZZING_MODE
first_time = false;
2021-12-06 18:27:06 +00:00
}
#endif
String initial_query = getInitialCreateTableQuery();
if (!initial_query.empty())
processQueryText(initial_query);
2021-10-29 12:04:08 +00:00
if (is_interactive && !delayed_interactive)
2021-07-31 12:34:29 +00:00
{
runInteractive();
2021-07-31 12:34:29 +00:00
}
else
{
runNonInteractive();
2021-10-29 12:04:08 +00:00
if (delayed_interactive)
runInteractive();
2021-07-31 12:34:29 +00:00
}
#ifndef FUZZING_MODE
2021-07-31 12:34:29 +00:00
cleanup();
#endif
2021-07-31 12:34:29 +00:00
return Application::EXIT_OK;
}
2021-10-16 11:28:57 +00:00
catch (const DB::Exception & e)
2021-07-31 12:34:29 +00:00
{
2021-10-16 11:28:57 +00:00
cleanup();
2021-07-23 20:54:49 +00:00
2021-10-25 08:00:55 +00:00
bool need_print_stack_trace = config().getBool("stacktrace", false);
std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl;
2021-10-16 11:28:57 +00:00
return e.code() ? e.code() : -1;
}
catch (...)
{
cleanup();
2021-07-30 07:46:22 +00:00
2021-10-16 11:28:57 +00:00
std::cerr << getCurrentExceptionMessage(false) << std::endl;
return getCurrentExceptionCode();
2021-07-23 20:54:49 +00:00
}
2022-04-05 13:38:44 +00:00
void LocalServer::updateLoggerLevel(const String & logs_level)
2022-04-05 12:46:18 +00:00
{
config().setString("logger.level", logs_level);
updateLevels(config(), logger());
}
2021-07-23 20:54:49 +00:00
void LocalServer::processConfig()
{
2023-09-04 14:48:38 +00:00
if (!queries.empty() && config().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
if (config().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
2023-09-04 14:48:38 +00:00
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
if (!is_interactive || delayed_interactive)
2021-07-23 20:54:49 +00:00
{
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
2021-07-23 20:54:49 +00:00
ignore_error = config().getBool("ignore-error", false);
}
2021-10-16 11:28:57 +00:00
print_stack_trace = config().getBool("stacktrace", false);
const std::string clickhouse_dialect{"clickhouse"};
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
&& config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
2021-07-23 20:54:49 +00:00
2021-10-30 12:28:50 +00:00
auto logging = (config().has("logger.console")
|| config().has("logger.level")
|| config().has("log-level")
2022-03-29 11:33:17 +00:00
|| config().has("send_logs_level")
2021-10-30 12:28:50 +00:00
|| config().has("logger.log"));
2022-04-05 12:46:18 +00:00
auto level = config().getString("log-level", "trace");
2021-10-30 12:28:50 +00:00
2022-03-29 11:33:17 +00:00
if (config().has("server_logs_file"))
2021-10-30 12:28:50 +00:00
{
2022-04-05 12:46:18 +00:00
auto poco_logs_level = Poco::Logger::parseLevel(level);
Poco::Logger::root().setLevel(poco_logs_level);
Poco::AutoPtr<OwnPatternFormatter> pf = new OwnPatternFormatter;
Poco::AutoPtr<OwnFormattingChannel> log = new OwnFormattingChannel(pf, new Poco::SimpleFileChannel(server_logs_file));
Poco::Logger::root().setChannel(log);
2021-10-30 12:28:50 +00:00
}
2023-11-19 15:09:52 +00:00
else
2021-10-30 12:28:50 +00:00
{
config().setString("logger", "logger");
2023-11-19 15:09:52 +00:00
auto log_level_default = logging ? level : "fatal";
2022-04-05 12:46:18 +00:00
config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
2021-10-30 12:28:50 +00:00
buildLoggers(config(), logger(), "clickhouse-local");
}
2021-07-23 20:54:49 +00:00
shared_context = Context::createShared();
global_context = Context::createGlobal(shared_context.get());
global_context->makeGlobalContext();
global_context->setApplicationType(Context::ApplicationType::LOCAL);
tryInitPath();
2024-01-23 17:04:50 +00:00
LoggerRawPtr log = &logger();
2021-07-23 20:54:49 +00:00
/// Maybe useless
if (config().has("macros"))
2020-10-22 07:37:03 +00:00
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
2021-08-20 08:38:50 +00:00
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
2021-08-18 14:39:04 +00:00
insert_format = "Values";
2021-08-18 14:39:04 +00:00
/// Setting value from cmd arg overrides one from config
if (global_context->getSettingsRef().max_insert_block_size.changed)
{
2021-08-18 14:39:04 +00:00
insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size;
}
2021-08-18 14:39:04 +00:00
else
{
insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
global_context->getSettingsRef().max_insert_block_size);
}
2021-08-18 14:39:04 +00:00
/// Sets external authenticators config (LDAP, Kerberos).
2020-10-22 07:37:03 +00:00
global_context->setExternalAuthenticatorsConfig(config());
setupUsers();
/// Limit on total number of concurrently executing queries.
2020-03-28 03:02:26 +00:00
/// There is no need for concurrent queries, override max_concurrent_queries.
2020-10-22 07:37:03 +00:00
global_context->getProcessList().setMaxSize(0);
const size_t physical_server_memory = getMemoryAmount();
const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setMMappedFileCache(mmap_cache_size);
/// Initialize a dummy query cache.
global_context->setQueryCache(0, 0, 0, 0);
2023-06-29 18:55:09 +00:00
2022-10-19 10:27:00 +00:00
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
2022-10-19 10:27:00 +00:00
#endif
/// NOTE: it is important to apply any overrides before
/// setDefaultProfiles() calls since it will copy current context (i.e.
/// there is separate context for Buffer tables).
applySettingsOverridesForLocal(global_context);
applyCmdOptions(global_context);
/// Load global settings from default_profile and system_profile.
2020-10-22 07:37:03 +00:00
global_context->setDefaultProfiles(config());
2021-08-26 11:01:15 +00:00
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
std::string default_database = config().getString("default_database", "default");
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
2020-10-22 07:37:03 +00:00
global_context->setCurrentDatabase(default_database);
2021-01-16 15:08:21 +00:00
if (config().has("path"))
{
2021-01-16 15:08:21 +00:00
String path = global_context->getPath();
/// Lock path directory before read
2021-07-23 20:54:49 +00:00
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
2020-08-13 19:41:06 +00:00
LOG_DEBUG(log, "Loading metadata from {}", path);
2023-05-12 19:49:47 +00:00
auto startup_system_tasks = loadMetadataSystem(global_context);
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
if (!config().has("only-system-tables"))
{
2023-02-17 16:57:49 +00:00
DatabaseCatalog::instance().createBackgroundTasks();
2023-05-01 11:56:00 +00:00
waitLoad(loadMetadata(global_context));
DatabaseCatalog::instance().startupBackgroundTasks();
}
2021-07-23 20:54:49 +00:00
/// For ClickHouse local if path is not set the loader will be disabled.
global_context->getUserDefinedSQLObjectsStorage().loadObjects();
2020-05-23 22:24:01 +00:00
LOG_DEBUG(log, "Loaded metadata.");
}
else if (!config().has("no-system-tables"))
{
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
}
2021-07-23 20:54:49 +00:00
server_display_name = config().getString("display_name", getFQDNOrHostName());
2021-07-28 12:56:11 +00:00
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
std::map<String, String> prompt_substitutions{{"display_name", server_display_name}};
for (const auto & [key, value] : prompt_substitutions)
boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
global_context->setQueryParameters(query_parameters);
}
2021-12-06 18:27:06 +00:00
[[ maybe_unused ]] static std::string getHelpHeader()
{
return
"usage: clickhouse-local [initial table definition] [--query <query>]\n"
"clickhouse-local allows to execute SQL queries on your data files via single command line call."
" To do so, initially you need to define your data source and its format."
" After you can execute your SQL queries in usual manner.\n"
"There are two ways to define initial table keeping your data."
" Either just in first query like this:\n"
" CREATE TABLE <table> (<structure>) ENGINE = File(<input-format>, <file>);\n"
"Either through corresponding command line parameters --table --structure --input-format and --file.";
}
2021-07-11 23:17:14 +00:00
2021-12-06 18:27:06 +00:00
[[ maybe_unused ]] static std::string getHelpFooter()
{
return
"Example printing memory used by each Unix user:\n"
"ps aux | tail -n +2 | awk '{ printf(\"%s\\t%s\\n\", $1, $4) }' | "
"clickhouse-local -S \"user String, mem Float64\" -q"
" \"SELECT user, round(sum(mem), 2) as mem_total FROM table GROUP BY user ORDER"
" BY mem_total DESC FORMAT PrettyCompact\"";
}
2021-07-11 23:17:14 +00:00
2021-12-06 18:27:06 +00:00
void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description)
{
2021-12-06 18:27:06 +00:00
#if defined(FUZZING_MODE)
std::cout <<
2021-12-06 21:34:52 +00:00
"usage: clickhouse <clickhouse-local arguments> -- <libfuzzer arguments>\n"
"Note: It is important not to use only one letter keys with single dash for \n"
"for clickhouse-local arguments. It may work incorrectly.\n"
2021-12-06 18:27:06 +00:00
"ClickHouse is build with coverage guided fuzzer (libfuzzer) inside it.\n"
"You have to provide a query which contains getFuzzerData function.\n"
"This will take the data from fuzzing engine, pass it to getFuzzerData function and execute a query.\n"
"Each time the data will be different, and it will last until some segfault or sanitizer assertion is found. \n";
#else
2021-07-11 11:36:27 +00:00
std::cout << getHelpHeader() << "\n";
std::cout << options_description.main_description.value() << "\n";
std::cout << getHelpFooter() << "\n";
std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
std::cout << "\nSee also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/\n";
2021-12-06 18:27:06 +00:00
#endif
2021-07-11 11:36:27 +00:00
}
void LocalServer::addOptions(OptionsDescription & options_description)
2021-07-11 11:36:27 +00:00
{
options_description.main_description->add_options()
("table,N", po::value<std::string>(), "name of the initial table")
2021-08-20 21:19:06 +00:00
/// If structure argument is omitted then initial query is not generated
("structure,S", po::value<std::string>(), "structure of the initial table (list of column and type names)")
2024-02-28 04:16:34 +00:00
("file,F", po::value<std::string>(), "path to file with data of the initial table (stdin if not specified)")
2021-08-20 21:19:06 +00:00
("input-format", po::value<std::string>(), "input format of the initial table data")
("output-format", po::value<std::string>(), "default output format")
2020-06-28 23:40:43 +00:00
("logger.console", po::value<bool>()->implicit_value(true), "Log to console")
("logger.log", po::value<std::string>(), "Log file name")
("logger.level", po::value<std::string>(), "Log level")
2021-07-29 12:48:07 +00:00
("no-system-tables", "do not attach system tables (better startup time)")
("path", po::value<std::string>(), "Storage path")
("only-system-tables", "attach only system tables from specified path")
("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
;
2021-07-11 11:36:27 +00:00
}
2021-07-11 23:17:14 +00:00
void LocalServer::applyCmdSettings(ContextMutablePtr context)
{
context->applySettingsChanges(cmd_settings.changes());
}
2021-07-11 23:17:14 +00:00
void LocalServer::applyCmdOptions(ContextMutablePtr context)
{
context->setDefaultFormat(config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
2021-07-11 23:17:14 +00:00
applyCmdSettings(context);
}
2021-07-11 23:17:14 +00:00
void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
2021-07-11 11:36:27 +00:00
{
if (options.count("table"))
config().setString("table-name", options["table"].as<std::string>());
if (options.count("file"))
config().setString("table-file", options["file"].as<std::string>());
if (options.count("structure"))
config().setString("table-structure", options["structure"].as<std::string>());
2021-08-20 21:19:06 +00:00
if (options.count("no-system-tables"))
config().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
if (options.count("database"))
config().setString("default_database", options["database"].as<std::string>());
2021-08-20 21:19:06 +00:00
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());
if (options.count("output-format"))
config().setString("output-format", options["output-format"].as<std::string>());
2020-06-28 23:40:43 +00:00
if (options.count("logger.console"))
config().setBool("logger.console", options["logger.console"].as<bool>());
if (options.count("logger.log"))
config().setString("logger.log", options["logger.log"].as<std::string>());
if (options.count("logger.level"))
config().setString("logger.level", options["logger.level"].as<std::string>());
2022-03-29 11:33:17 +00:00
if (options.count("send_logs_level"))
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
}
2022-03-01 09:22:12 +00:00
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
{
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter requires value");
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty");
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
2023-05-23 00:27:17 +00:00
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else
common_arguments.emplace_back(arg);
2022-03-01 09:22:12 +00:00
}
}
2018-05-11 14:35:32 +00:00
}
2019-12-15 06:34:43 +00:00
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
2017-12-02 02:47:12 +00:00
int mainEntryClickHouseLocal(int argc, char ** argv)
{
try
{
2021-12-25 19:00:28 +00:00
DB::LocalServer app;
2017-12-02 02:47:12 +00:00
app.init(argc, argv);
return app.run();
}
catch (const DB::Exception & e)
{
std::cerr << DB::getExceptionMessage(e, false) << std::endl;
auto code = DB::getCurrentExceptionCode();
return code ? code : 1;
}
catch (const boost::program_options::error & e)
{
std::cerr << "Bad arguments: " << e.what() << std::endl;
return DB::ErrorCodes::BAD_ARGUMENTS;
}
2017-12-02 02:47:12 +00:00
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
2017-12-02 02:47:12 +00:00
auto code = DB::getCurrentExceptionCode();
return code ? code : 1;
}
}
2021-12-06 18:27:06 +00:00
#if defined(FUZZING_MODE)
// linked from programs/main.cpp
bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv);
std::optional<DB::LocalServer> fuzz_app;
extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
{
std::vector<char *> argv(*pargv, *pargv + (*pargc + 1));
if (!isClickhouseApp("local", argv))
{
2023-09-04 13:14:12 +00:00
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode, only clickhouse local is available." << "\033[0m" << std::endl;
exit(1);
}
2021-12-06 21:34:52 +00:00
/// As a user you can add flags to clickhouse binary in fuzzing mode as follows
/// clickhouse local <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
2021-12-06 21:34:52 +00:00
char **p = &(*pargv)[1];
auto it = argv.begin() + 1;
for (; *it; ++it)
if (strcmp(*it, "--") == 0)
{
++it;
break;
}
while (*it)
if (strncmp(*it, "--", 2) != 0)
{
*(p++) = *it;
it = argv.erase(it);
}
else
++it;
*pargc = static_cast<int>(p - &(*pargv)[0]);
*p = nullptr;
2021-12-06 21:34:52 +00:00
/// Initialize clickhouse-local app
fuzz_app.emplace();
fuzz_app->init(static_cast<int>(argv.size() - 1), argv.data());
2021-12-06 21:34:52 +00:00
return 0;
}
2021-12-06 18:27:06 +00:00
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{
try
{
auto input = String(reinterpret_cast<const char *>(data), size);
DB::FunctionGetFuzzerData::update(input);
fuzz_app->run();
}
catch (...)
{
}
2021-12-06 21:34:52 +00:00
return 0;
}
#endif