mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #16192 from filimonov/clickhouse-local-segfault
Add setTemporaryStorage to clickhouse-local to make OPTIMIZE work
This commit is contained in:
commit
a1a7bc0217
@ -57,8 +57,8 @@ LocalServer::LocalServer() = default;
|
||||
|
||||
LocalServer::~LocalServer()
|
||||
{
|
||||
if (context)
|
||||
context->shutdown(); /// required for properly exception handling
|
||||
if (global_context)
|
||||
global_context->shutdown(); /// required for properly exception handling
|
||||
}
|
||||
|
||||
|
||||
@ -95,9 +95,9 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
}
|
||||
}
|
||||
|
||||
void LocalServer::applyCmdSettings()
|
||||
void LocalServer::applyCmdSettings(Context & context)
|
||||
{
|
||||
context->applySettingsChanges(cmd_settings.changes());
|
||||
context.applySettingsChanges(cmd_settings.changes());
|
||||
}
|
||||
|
||||
/// If path is specified and not empty, will try to setup server environment and load existing metadata
|
||||
@ -151,8 +151,12 @@ void LocalServer::tryInitPath()
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
|
||||
context->setPath(path);
|
||||
context->setUserFilesPath(""); // user's files are everywhere
|
||||
global_context->setPath(path);
|
||||
|
||||
global_context->setTemporaryStorage(path + "tmp");
|
||||
global_context->setFlagsPath(path + "flags");
|
||||
|
||||
global_context->setUserFilesPath(""); // user's files are everywhere
|
||||
}
|
||||
|
||||
|
||||
@ -186,9 +190,9 @@ try
|
||||
}
|
||||
|
||||
shared_context = Context::createShared();
|
||||
context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
|
||||
context->makeGlobalContext();
|
||||
context->setApplicationType(Context::ApplicationType::LOCAL);
|
||||
global_context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
|
||||
global_context->makeGlobalContext();
|
||||
global_context->setApplicationType(Context::ApplicationType::LOCAL);
|
||||
tryInitPath();
|
||||
|
||||
std::optional<StatusFile> status;
|
||||
@ -210,32 +214,32 @@ try
|
||||
|
||||
/// Maybe useless
|
||||
if (config().has("macros"))
|
||||
context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
|
||||
/// Skip networking
|
||||
|
||||
/// Sets external authenticators config (LDAP).
|
||||
context->setExternalAuthenticatorsConfig(config());
|
||||
global_context->setExternalAuthenticatorsConfig(config());
|
||||
|
||||
setupUsers();
|
||||
|
||||
/// Limit on total number of concurrently executing queries.
|
||||
/// There is no need for concurrent queries, override max_concurrent_queries.
|
||||
context->getProcessList().setMaxSize(0);
|
||||
global_context->getProcessList().setMaxSize(0);
|
||||
|
||||
/// Size of cache for uncompressed blocks. Zero means disabled.
|
||||
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
|
||||
if (uncompressed_cache_size)
|
||||
context->setUncompressedCache(uncompressed_cache_size);
|
||||
global_context->setUncompressedCache(uncompressed_cache_size);
|
||||
|
||||
/// Size of cache for marks (index of MergeTree family of tables). It is necessary.
|
||||
/// Specify default value for mark_cache_size explicitly!
|
||||
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
|
||||
if (mark_cache_size)
|
||||
context->setMarkCache(mark_cache_size);
|
||||
global_context->setMarkCache(mark_cache_size);
|
||||
|
||||
/// Load global settings from default_profile and system_profile.
|
||||
context->setDefaultProfiles(config());
|
||||
global_context->setDefaultProfiles(config());
|
||||
|
||||
/** Init dummy default DB
|
||||
* NOTE: We force using isolated default database to avoid conflicts with default database from server environment
|
||||
@ -243,34 +247,34 @@ try
|
||||
* if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
|
||||
*/
|
||||
std::string default_database = config().getString("default_database", "_local");
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, *context));
|
||||
context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions();
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, *global_context));
|
||||
global_context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions(*global_context);
|
||||
|
||||
String path = context->getPath();
|
||||
String path = global_context->getPath();
|
||||
if (!path.empty())
|
||||
{
|
||||
/// Lock path directory before read
|
||||
status.emplace(context->getPath() + "status", StatusFile::write_full_info);
|
||||
status.emplace(global_context->getPath() + "status", StatusFile::write_full_info);
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata from {}", path);
|
||||
Poco::File(path + "data/").createDirectories();
|
||||
Poco::File(path + "metadata/").createDirectories();
|
||||
loadMetadataSystem(*context);
|
||||
attachSystemTables(*context);
|
||||
loadMetadata(*context);
|
||||
loadMetadataSystem(*global_context);
|
||||
attachSystemTables(*global_context);
|
||||
loadMetadata(*global_context);
|
||||
DatabaseCatalog::instance().loadDatabases();
|
||||
LOG_DEBUG(log, "Loaded metadata.");
|
||||
}
|
||||
else
|
||||
{
|
||||
attachSystemTables(*context);
|
||||
attachSystemTables(*global_context);
|
||||
}
|
||||
|
||||
processQueries();
|
||||
|
||||
context->shutdown();
|
||||
context.reset();
|
||||
global_context->shutdown();
|
||||
global_context.reset();
|
||||
|
||||
status.reset();
|
||||
cleanup();
|
||||
@ -323,7 +327,7 @@ void LocalServer::processQueries()
|
||||
String initial_create_query = getInitialCreateTableQuery();
|
||||
String queries_str = initial_create_query + config().getRawString("query");
|
||||
|
||||
const auto & settings = context->getSettingsRef();
|
||||
const auto & settings = global_context->getSettingsRef();
|
||||
|
||||
std::vector<String> queries;
|
||||
auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth);
|
||||
@ -331,15 +335,19 @@ void LocalServer::processQueries()
|
||||
if (!parse_res.second)
|
||||
throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
context->makeSessionContext();
|
||||
context->makeQueryContext();
|
||||
/// we can't mutate global global_context (can lead to races, as it was already passed to some background threads)
|
||||
/// so we can't reuse it safely as a query context and need a copy here
|
||||
auto context = Context(*global_context);
|
||||
|
||||
context->setUser("default", "", Poco::Net::SocketAddress{});
|
||||
context->setCurrentQueryId("");
|
||||
applyCmdSettings();
|
||||
context.makeSessionContext();
|
||||
context.makeQueryContext();
|
||||
|
||||
context.setUser("default", "", Poco::Net::SocketAddress{});
|
||||
context.setCurrentQueryId("");
|
||||
applyCmdSettings(context);
|
||||
|
||||
/// Use the same query_id (and thread group) for all queries
|
||||
CurrentThread::QueryScope query_scope_holder(*context);
|
||||
CurrentThread::QueryScope query_scope_holder(context);
|
||||
|
||||
bool echo_queries = config().hasOption("echo") || config().hasOption("verbose");
|
||||
std::exception_ptr exception;
|
||||
@ -358,7 +366,7 @@ void LocalServer::processQueries()
|
||||
|
||||
try
|
||||
{
|
||||
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {});
|
||||
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {});
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -423,7 +431,7 @@ void LocalServer::setupUsers()
|
||||
}
|
||||
|
||||
if (users_config)
|
||||
context->setUsersConfig(users_config);
|
||||
global_context->setUsersConfig(users_config);
|
||||
else
|
||||
throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG);
|
||||
}
|
||||
@ -577,10 +585,10 @@ void LocalServer::init(int argc, char ** argv)
|
||||
argsToConfig(arguments, config(), 100);
|
||||
}
|
||||
|
||||
void LocalServer::applyCmdOptions()
|
||||
void LocalServer::applyCmdOptions(Context & context)
|
||||
{
|
||||
context->setDefaultFormat(config().getString("output-format", config().getString("format", "TSV")));
|
||||
applyCmdSettings();
|
||||
context.setDefaultFormat(config().getString("output-format", config().getString("format", "TSV")));
|
||||
applyCmdSettings(context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -36,15 +36,15 @@ private:
|
||||
std::string getInitialCreateTableQuery();
|
||||
|
||||
void tryInitPath();
|
||||
void applyCmdOptions();
|
||||
void applyCmdSettings();
|
||||
void applyCmdOptions(Context & context);
|
||||
void applyCmdSettings(Context & context);
|
||||
void processQueries();
|
||||
void setupUsers();
|
||||
void cleanup();
|
||||
|
||||
protected:
|
||||
SharedContextHolder shared_context;
|
||||
std::unique_ptr<Context> context;
|
||||
std::unique_ptr<Context> global_context;
|
||||
|
||||
/// Settings specified via command line args
|
||||
Settings cmd_settings;
|
||||
|
13
tests/queries/0_stateless/01527_clickhouse_local_optimize.sh
Executable file
13
tests/queries/0_stateless/01527_clickhouse_local_optimize.sh
Executable file
@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
WORKING_FOLDER_01527="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize"
|
||||
rm -rf "${WORKING_FOLDER_01527}"
|
||||
mkdir -p "${WORKING_FOLDER_01527}"
|
||||
|
||||
# OPTIMIZE was crashing due to lack of temporary volume in local
|
||||
${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}"
|
||||
|
||||
rm -rf "${WORKING_FOLDER_01527}"
|
@ -0,0 +1,19 @@
|
||||
Option 1. Prepare parts from from table with Engine=File defined in metadata, read from an arbitrary path
|
||||
1 2020-01-01 String
|
||||
2 2020-02-02 Another string
|
||||
3 2020-03-03 One more string
|
||||
4 2020-01-02 String for first partition
|
||||
Option 2. Prepare parts from from table with Engine=File defined in metadata, read from stdin (pipe)
|
||||
11 2020-01-01 String
|
||||
12 2020-02-02 Another string
|
||||
13 2020-03-03 One more string
|
||||
14 2020-01-02 String for first partition
|
||||
Option 3. Prepare parts from from table with Engine=File defined via command line, read from stdin (pipe)
|
||||
21 2020-01-01 String
|
||||
22 2020-02-02 Another string
|
||||
23 2020-03-03 One more string
|
||||
24 2020-01-02 String for first partition
|
||||
Possibility to run optimize on prepared parts before sending parts to server
|
||||
202001 1
|
||||
202002 1
|
||||
202003 1
|
83
tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh
Executable file
83
tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh
Executable file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
WORKING_FOLDER_01528="${CLICKHOUSE_TMP}/01528_clickhouse_local_prepare_parts"
|
||||
rm -rf "${WORKING_FOLDER_01528}"
|
||||
|
||||
mkdir -p "${WORKING_FOLDER_01528}/metadata/local"
|
||||
|
||||
## Checks scenario of preparing parts offline by clickhouse-local
|
||||
|
||||
## that is the metadata for the table we want to fill
|
||||
## schema should match the schema of the table from server
|
||||
## (the easiest way is just to copy it from the server)
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/metadata/local/test.sql"
|
||||
ATTACH TABLE local.test (id UInt64, d Date, s String) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d);
|
||||
EOF
|
||||
|
||||
#################
|
||||
|
||||
echo "Option 1. Prepare parts from from table with Engine=File defined in metadata, read from an arbitrary path"
|
||||
|
||||
## Source file:
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/data.csv"
|
||||
1,2020-01-01,"String"
|
||||
2,2020-02-02,"Another string"
|
||||
3,2020-03-03,"One more string"
|
||||
4,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
## metadata written into file
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/metadata/local/data_csv.sql"
|
||||
ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER_01528}/data.csv');
|
||||
EOF
|
||||
|
||||
## feed the table
|
||||
${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
## check the parts were created
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
#################
|
||||
|
||||
echo "Option 2. Prepare parts from from table with Engine=File defined in metadata, read from stdin (pipe)"
|
||||
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/metadata/local/stdin.sql"
|
||||
ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin);
|
||||
EOF
|
||||
|
||||
cat <<EOF | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin;" -- --path="${WORKING_FOLDER_01528}"
|
||||
11,2020-01-01,"String"
|
||||
12,2020-02-02,"Another string"
|
||||
13,2020-03-03,"One more string"
|
||||
14,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id BETWEEN 10 AND 19 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
#################
|
||||
|
||||
echo "Option 3. Prepare parts from from table with Engine=File defined via command line, read from stdin (pipe)"
|
||||
|
||||
cat <<EOF | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM table;" -S "id UInt64, d Date, s String" --input-format=CSV -- --path="${WORKING_FOLDER_01528}"
|
||||
21,2020-01-01,"String"
|
||||
22,2020-02-02,"Another string"
|
||||
23,2020-03-03,"One more string"
|
||||
24,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id BETWEEN 20 AND 29 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
#################
|
||||
|
||||
echo "Possibility to run optimize on prepared parts before sending parts to server"
|
||||
|
||||
${CLICKHOUSE_LOCAL} --query "OPTIMIZE TABLE local.test FINAL;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
# ensure we have one part per partition
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT toYYYYMM(d) m, uniqExact(_part) FROM local.test GROUP BY m ORDER BY m" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
# cleanup
|
||||
rm -rf "${WORKING_FOLDER_01528}"
|
Loading…
Reference in New Issue
Block a user