merged with master

This commit is contained in:
Nikolai Kochetov 2018-04-20 17:15:03 +03:00
commit a6375d6155
33 changed files with 539 additions and 282 deletions

View File

@ -15,7 +15,7 @@
## Improvements:
* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run in the front of replication queue.
* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue.
* `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part.
* A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov).
* The `lengthUTF8` function runs faster (zhang2014).
@ -43,7 +43,7 @@
## Build changes:
* The build supports `ninja` instead of `make` and uses it by default for building releases.
* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use only `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility.
* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility.
## Backward-incompatible changes:

View File

@ -44,7 +44,7 @@
## Изменения сборки:
* Поддержка `ninja` вместо `make` при сборке. `ninja` используется по-умолчанию при сборке релизов.
* Переименованы пакеты `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте только `clickhouse-server clickhouse-client`. Для совместимости, пакеты со старыми именами продолжают загружаться в репозиторий.
* Переименованы пакеты `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте `clickhouse-server clickhouse-client`. Для совместимости, пакеты со старыми именами продолжают загружаться в репозиторий.
## Обратно несовместимые изменения:

View File

@ -1,6 +1,7 @@
#pragma once
#include "ZooKeeper.h"
#include "KeeperException.h"
#include <functional>
#include <memory>
#include <common/logger_useful.h>
@ -68,7 +69,7 @@ private:
std::thread thread;
std::atomic<bool> shutdown_called {false};
zkutil::EventPtr event = std::make_shared<Poco::Event>();
EventPtr event = std::make_shared<Poco::Event>();
CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection};
@ -115,6 +116,13 @@ private:
success = true;
}
catch (const KeeperException & e)
{
DB::tryLogCurrentException("LeaderElection");
if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED)
break;
}
catch (...)
{
DB::tryLogCurrentException("LeaderElection");

View File

@ -432,6 +432,35 @@ void ZooKeeper::read(T & x)
}
void addRootPath(String & path, const String & root_path)
{
if (path.empty())
throw Exception("Path cannot be empty", ZooKeeper::ZBADARGUMENTS);
if (path[0] != '/')
throw Exception("Path must begin with /", ZooKeeper::ZBADARGUMENTS);
if (root_path.empty())
return;
if (path.size() == 1) /// "/"
path = root_path;
else
path = root_path + path;
}
void removeRootPath(String & path, const String & root_path)
{
if (root_path.empty())
return;
if (path.size() <= root_path.size())
throw Exception("Received path is not longer than root_path", ZooKeeper::ZDATAINCONSISTENCY);
path = path.substr(root_path.size());
}
static constexpr int32_t protocol_version = 0;
static constexpr ZooKeeper::XID watch_xid = -1;
@ -735,6 +764,7 @@ void ZooKeeper::sendThread()
if (expired)
break;
info.request->addRootPath(root_path);
info.request->write(*out);
if (info.request->xid == close_xid)
@ -844,35 +874,6 @@ ZooKeeper::ResponsePtr ZooKeeper::MultiRequest::makeResponse() const { return st
ZooKeeper::ResponsePtr ZooKeeper::CloseRequest::makeResponse() const { return std::make_shared<CloseResponse>(); }
void addRootPath(String & path, const String & root_path)
{
if (path.empty())
throw Exception("Path cannot be empty", ZooKeeper::ZBADARGUMENTS);
if (path[0] != '/')
throw Exception("Path must begin with /", ZooKeeper::ZBADARGUMENTS);
if (root_path.empty())
return;
if (path.size() == 1) /// "/"
path = root_path;
else
path = root_path + path;
}
void removeRootPath(String & path, const String & root_path)
{
if (root_path.empty())
return;
if (path.size() <= root_path.size())
throw Exception("Received path is not longer than root_path", ZooKeeper::ZDATAINCONSISTENCY);
path = path.substr(root_path.size());
}
void ZooKeeper::CreateRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); }
void ZooKeeper::RemoveRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); }
void ZooKeeper::ExistsRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); }
@ -1108,7 +1109,6 @@ void ZooKeeper::finalize(bool error_send, bool error_receive)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (info.watch)
{
@ -1335,8 +1335,6 @@ void ZooKeeper::pushRequest(RequestInfo && info)
{
try
{
info.request->addRootPath(root_path);
info.time = clock::now();
if (!info.request->xid)

View File

@ -596,7 +596,7 @@ private:
std::mutex operations_mutex;
using WatchCallbacks = std::vector<WatchCallback>;
using Watches = std::map<String /* path */, WatchCallbacks>;
using Watches = std::map<String /* path, relative of root_path */, WatchCallbacks>;
Watches watches;
std::mutex watches_mutex;

View File

@ -61,7 +61,7 @@ TEST(zkutil, multi_nice_exception_msg)
String msg = getCurrentExceptionMessage(false);
bool msg_has_reqired_patterns = msg.find("/clickhouse_test/zkutil_multi/a") != std::string::npos && msg.find("#2") != std::string::npos;
bool msg_has_reqired_patterns = msg.find("#2") != std::string::npos;
EXPECT_TRUE(msg_has_reqired_patterns) << msg;
}
}
@ -129,40 +129,54 @@ TEST(zkutil, multi_async)
}
}
/// Run this test under sudo
TEST(zkutil, multi_async_libzookeeper_segfault)
TEST(zkutil, watch_get_children_with_chroot)
{
auto zookeeper = std::make_unique<zkutil::ZooKeeper>("localhost:2181", "", 1000);
zkutil::Requests ops;
try
{
const String zk_server = "localhost:2181";
const String prefix = "/clickhouse_test/zkutil/watch_get_children_with_chroot";
ops.emplace_back(zkutil::makeCheckRequest("/clickhouse_test/zkutil_multi", 0));
/// Create chroot node firstly
auto zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_server);
zookeeper->createAncestors(prefix + "/");
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, prefix);
/// Uncomment to test
//auto cmd = ShellCommand::execute("sudo service zookeeper restart");
//cmd->wait();
String queue_path = "/queue";
zookeeper->tryRemoveRecursive(queue_path);
zookeeper->createAncestors(queue_path + "/");
auto future = zookeeper->asyncMulti(ops);
auto res = future.get();
EXPECT_TRUE(zkutil::isHardwareError(res.error));
zkutil::EventPtr event = std::make_shared<Poco::Event>();
zookeeper->getChildren(queue_path, nullptr, event);
{
auto zookeeper2 = std::make_unique<zkutil::ZooKeeper>(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, prefix);
zookeeper2->create(queue_path + "/children-", "", zkutil::CreateMode::PersistentSequential);
}
event->wait();
}
catch (...)
{
std::cerr << getCurrentExceptionMessage(true);
throw;
}
}
TEST(zkutil, multi_create_sequential)
{
try
{
const String zk_server = "localhost:2181";
const String prefix = "/clickhouse_test/zkutil";
/// Create chroot node firstly
auto zookeeper = std::make_unique<zkutil::ZooKeeper>("localhost:2181");
zookeeper->createAncestors("/clickhouse_test/");
auto zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_server);
zookeeper->createAncestors(prefix + "/");
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, "/clickhouse_test");
zookeeper = std::make_unique<zkutil::ZooKeeper>("localhost:2181", "", zkutil::DEFAULT_SESSION_TIMEOUT, "/clickhouse_test");
zkutil::Requests ops;
String base_path = "/zkutil/multi_create_sequential";
String base_path = "/multi_create_sequential";
zookeeper->tryRemoveRecursive(base_path);
zookeeper->createAncestors(base_path + "/");
zkutil::Requests ops;
String sequential_node_prefix = base_path + "/queue-";
ops.emplace_back(zkutil::makeCreateRequest(sequential_node_prefix, "", zkutil::CreateMode::EphemeralSequential));
auto results = zookeeper->multi(ops);
@ -180,3 +194,4 @@ TEST(zkutil, multi_create_sequential)
}

View File

@ -16,6 +16,28 @@ namespace ErrorCodes
}
CreatingSetsBlockInputStream::CreatingSetsBlockInputStream(
const BlockInputStreamPtr & input,
const SubqueriesForSets & subqueries_for_sets_,
const SizeLimits & network_transfer_limits)
: subqueries_for_sets(subqueries_for_sets_),
network_transfer_limits(network_transfer_limits)
{
for (auto & elem : subqueries_for_sets)
{
if (elem.second.source)
{
children.push_back(elem.second.source);
if (elem.second.set)
elem.second.set->setHeader(elem.second.source->getHeader());
}
}
children.push_back(input);
}
Block CreatingSetsBlockInputStream::readImpl()
{
Block res;

View File

@ -20,16 +20,7 @@ public:
CreatingSetsBlockInputStream(
const BlockInputStreamPtr & input,
const SubqueriesForSets & subqueries_for_sets_,
const SizeLimits & network_transfer_limits)
: subqueries_for_sets(subqueries_for_sets_),
network_transfer_limits(network_transfer_limits)
{
for (auto & elem : subqueries_for_sets)
if (elem.second.source)
children.push_back(elem.second.source);
children.push_back(input);
}
const SizeLimits & network_transfer_limits);
String getName() const override { return "CreatingSets"; }

View File

@ -813,7 +813,7 @@ public:
/** The `indexHint` function takes any number of any arguments and always returns one.
*
* This function has a special meaning (see ExpressionAnalyzer, PKCondition)
* This function has a special meaning (see ExpressionAnalyzer, KeyCondition)
* - the expressions inside it are not evaluated;
* - but when analyzing the index (selecting ranges for reading), this function is treated the same way,
* as if instead of using it the expression itself would be.

View File

@ -1475,6 +1475,7 @@ void ExpressionAnalyzer::tryMakeSetFromSubquery(const ASTPtr & subquery_or_table
SetPtr set = std::make_shared<Set>(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode));
set->setHeader(res.in->getHeader());
while (Block block = res.in->read())
{
/// If the limits have been exceeded, give up and let the default subquery processing actions take place.
@ -2021,7 +2022,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
}
/// A special function `indexHint`. Everything that is inside it is not calculated
/// (and is used only for index analysis, see PKCondition).
/// (and is used only for index analysis, see KeyCondition).
if (node->name == "indexHint")
{
actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName(
@ -2068,7 +2069,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
const SetPtr & set = prepared_sets[child.get()];
/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
/// so that sets with the same record do not fuse together (they can have different types).
/// so that sets with the same literal representation do not fuse together (they can have different types).
if (!set->empty())
column.name = getUniqueName(actions_stack.getSampleBlock(), "__set");
else
@ -2950,7 +2951,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast,
}
/// A special function `indexHint`. Everything that is inside it is not calculated
/// (and is used only for index analysis, see PKCondition).
/// (and is used only for index analysis, see KeyCondition).
if (node->name == "indexHint")
return;
}

View File

@ -50,7 +50,14 @@ namespace ErrorCodes
static void throwIfReadOnly(Context & context)
{
if (context.getSettingsRef().readonly)
throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY);
{
const auto & client_info = context.getClientInfo();
if (client_info.interface == ClientInfo::Interface::HTTP && client_info.http_method == ClientInfo::HTTPMethod::GET)
throw Exception("Cannot execute query in readonly mode. "
"For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries.", ErrorCodes::READONLY);
else
throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY);
}
}

View File

@ -236,7 +236,6 @@ BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams()
InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpressions(QueryProcessingStage::Enum from_stage)
{
ExpressionActionsChain chain;
AnalysisResult res;
/// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
@ -255,6 +254,8 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
std::shared_ptr<bool> remove_prewhere_filter;
{
ExpressionActionsChain chain;
if (query_analyzer->appendPrewhere(chain, false, remove_prewhere_filter))
{
res.prewhere_info = std::make_shared<PrewhereInfo>(
@ -356,8 +357,6 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns;
query_analyzer->makeSetsForIndex();
/// PREWHERE optimization
if (storage)
{
@ -379,6 +378,8 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
optimize_prewhere(*merge_tree);
else if (const StorageReplicatedMergeTree * merge_tree = dynamic_cast<const StorageReplicatedMergeTree *>(storage.get()))
optimize_prewhere(*merge_tree);
query_analyzer->makeSetsForIndex();
}
AnalysisResult expressions = analyzeExpressions(from_stage);

View File

@ -21,7 +21,7 @@
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/NullableUtils.h>
#include <Storages/MergeTree/PKCondition.h>
#include <Storages/MergeTree/KeyCondition.h>
namespace DB
@ -62,7 +62,6 @@ void NO_INLINE Set::insertFromBlockImplCase(
{
typename Method::State state;
state.init(key_columns);
size_t keys_size = key_columns.size();
/// For all rows
for (size_t i = 0; i < rows; ++i)
@ -83,19 +82,17 @@ void NO_INLINE Set::insertFromBlockImplCase(
}
bool Set::insertFromBlock(const Block & block, bool fill_set_elements)
void Set::setHeader(const Block & block)
{
std::unique_lock lock(rwlock);
size_t keys_size = block.columns();
if (!empty())
return;
keys_size = block.columns();
ColumnRawPtrs key_columns;
key_columns.reserve(keys_size);
if (empty())
{
data_types.clear();
data_types.reserve(keys_size);
}
data_types.reserve(keys_size);
/// The constant columns to the right of IN are not supported directly. For this, they first materialize.
Columns materialized_columns;
@ -104,9 +101,42 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements)
for (size_t i = 0; i < keys_size; ++i)
{
key_columns.emplace_back(block.safeGetByPosition(i).column.get());
data_types.emplace_back(block.safeGetByPosition(i).type);
if (empty())
data_types.emplace_back(block.safeGetByPosition(i).type);
if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst())
{
materialized_columns.emplace_back(converted);
key_columns.back() = materialized_columns.back().get();
}
}
/// We will insert to the Set only keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
/// Choose data structure to use for the set.
data.init(data.chooseMethod(key_columns, key_sizes));
}
bool Set::insertFromBlock(const Block & block, bool fill_set_elements)
{
std::unique_lock lock(rwlock);
if (empty())
throw Exception("Method Set::setHeader must be called before Set::insertFromBlock", ErrorCodes::LOGICAL_ERROR);
ColumnRawPtrs key_columns;
key_columns.reserve(keys_size);
/// The constant columns to the right of IN are not supported directly. For this, they first materialize.
Columns materialized_columns;
/// Remember the columns we will work with
for (size_t i = 0; i < keys_size; ++i)
{
key_columns.emplace_back(block.safeGetByPosition(i).column.get());
if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst())
{
@ -122,10 +152,6 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements)
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
/// Choose data structure to use for the set.
if (empty())
data.init(data.chooseMethod(key_columns, key_sizes));
switch (data.type)
{
case SetVariants::Type::EMPTY:
@ -153,6 +179,7 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements)
return limits.check(getTotalRowCount(), getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
}
static Field extractValueFromNode(ASTPtr & node, const IDataType & type, const Context & context)
{
if (ASTLiteral * lit = typeid_cast<ASTLiteral *>(node.get()))
@ -173,16 +200,19 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co
{
/// Will form a block with values from the set.
size_t size = types.size();
MutableColumns columns(types.size());
for (size_t i = 0; i < size; ++i)
columns[i] = types[i]->createColumn();
Block header;
size_t num_columns = types.size();
for (size_t i = 0; i < num_columns; ++i)
header.insert(ColumnWithTypeAndName(types[i]->createColumn(), types[i], "_" + toString(i)));
setHeader(header);
MutableColumns columns = header.cloneEmptyColumns();
Row tuple_values;
ASTExpressionList & list = typeid_cast<ASTExpressionList &>(*node);
for (auto & elem : list.children)
{
if (types.size() == 1)
if (num_columns == 1)
{
Field value = extractValueFromNode(elem, *types[0], context);
@ -195,8 +225,9 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co
throw Exception("Incorrect element of set. Must be tuple.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
size_t tuple_size = func->arguments->children.size();
if (tuple_size != types.size())
throw Exception("Incorrect size of tuple in set.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
if (tuple_size != num_columns)
throw Exception("Incorrect size of tuple in set: " + toString(tuple_size) + " instead of " + toString(num_columns),
ErrorCodes::INCORRECT_ELEMENT_OF_SET);
if (tuple_values.empty())
tuple_values.resize(tuple_size);
@ -221,10 +252,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co
throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
}
Block block;
for (size_t i = 0, size = types.size(); i < size; ++i)
block.insert(ColumnWithTypeAndName(std::move(columns[i]), types[i], "_" + toString(i)));
Block block = header.cloneWithColumns(std::move(columns));
insertFromBlock(block, fill_set_elements);
}
@ -321,7 +349,6 @@ void NO_INLINE Set::executeImplCase(
{
typename Method::State state;
state.init(key_columns);
size_t keys_size = key_columns.size();
/// NOTE Optimization is not used for consecutive identical values.
@ -362,21 +389,21 @@ void Set::executeOrdinary(
}
MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector<PKTuplePositionMapping> && index_mapping_)
MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_)
: ordered_set(),
indexes_mapping(std::move(index_mapping_))
{
std::sort(indexes_mapping.begin(), indexes_mapping.end(),
[](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r)
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
{
return std::forward_as_tuple(l.pk_index, l.tuple_index) < std::forward_as_tuple(r.pk_index, r.tuple_index);
return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index);
});
indexes_mapping.erase(std::unique(
indexes_mapping.begin(), indexes_mapping.end(),
[](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r)
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
{
return l.pk_index == r.pk_index;
return l.key_index == r.key_index;
}), indexes_mapping.end());
for (size_t i = 0; i < set_elements.size(); ++i)
@ -408,10 +435,10 @@ BoolMask MergeTreeSetIndex::mayBeTrueInRange(const std::vector<Range> & key_rang
for (size_t i = 0; i < indexes_mapping.size(); ++i)
{
std::optional<Range> new_range = PKCondition::applyMonotonicFunctionsChainToRange(
key_ranges[indexes_mapping[i].pk_index],
std::optional<Range> new_range = KeyCondition::applyMonotonicFunctionsChainToRange(
key_ranges[indexes_mapping[i].key_index],
indexes_mapping[i].functions,
data_types[indexes_mapping[i].pk_index]);
data_types[indexes_mapping[i].key_index]);
if (!new_range)
return {true, true};

View File

@ -38,6 +38,9 @@ public:
bool empty() const { return data.empty(); }
/** Set can be created either from AST or from a stream of data (subquery result).
*/
/** Create a Set from expression (specified literally in the query).
* 'types' - types of what are on the left hand side of IN.
* 'node' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
@ -45,8 +48,12 @@ public:
*/
void createFromAST(const DataTypes & types, ASTPtr node, const Context & context, bool fill_set_elements);
/** Returns false, if some limit was exceeded and no need to insert more data.
/** Create a Set from stream.
* Call setHeader, then call insertFromBlock for each block.
*/
void setHeader(const Block & header);
/// Returns false, if some limit was exceeded and no need to insert more data.
bool insertFromBlock(const Block & block, bool fill_set_elements);
/** For columns of 'block', check belonging of corresponding rows to the set.
@ -62,6 +69,7 @@ public:
SetElements & getSetElements() { return *set_elements.get(); }
private:
size_t keys_size;
Sizes key_sizes;
SetVariants data;
@ -159,21 +167,24 @@ public:
* position of pk index and data type of this pk column
* and functions chain applied to this column.
*/
struct PKTuplePositionMapping
struct KeyTuplePositionMapping
{
size_t tuple_index;
size_t pk_index;
size_t key_index;
std::vector<FunctionBasePtr> functions;
};
MergeTreeSetIndex(const SetElements & set_elements, std::vector<PKTuplePositionMapping> && indexes_mapping_);
MergeTreeSetIndex(const SetElements & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);
size_t size() const { return ordered_set.size(); }
BoolMask mayBeTrueInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types);
private:
using OrderedTuples = std::vector<std::vector<FieldWithInfinity>>;
OrderedTuples ordered_set;
std::vector<PKTuplePositionMapping> indexes_mapping;
std::vector<KeyTuplePositionMapping> indexes_mapping;
};
}

View File

@ -6,12 +6,12 @@
namespace DB
{
struct ASTShowProcesslisIDAndQueryNames
struct ASTShowProcesslistIDAndQueryNames
{
static constexpr auto ID = "ShowProcesslistQuery";
static constexpr auto Query = "SHOW PROCESSLIST";
};
using ASTShowProcesslistQuery = ASTQueryWithOutputImpl<ASTShowProcesslisIDAndQueryNames>;
using ASTShowProcesslistQuery = ASTQueryWithOutputImpl<ASTShowProcesslistIDAndQueryNames>;
}

View File

@ -1,4 +1,4 @@
#include <Storages/MergeTree/PKCondition.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Storages/MergeTree/BoolMask.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/ExpressionAnalyzer.h>
@ -105,7 +105,7 @@ static String firstStringThatIsGreaterThanAllStringsWithPrefix(const String & pr
/// A dictionary containing actions to the corresponding functions to turn them into `RPNElement`
const PKCondition::AtomMap PKCondition::atom_map
const KeyCondition::AtomMap KeyCondition::atom_map
{
{
"notEquals",
@ -249,7 +249,7 @@ bool FieldWithInfinity::operator==(const FieldWithInfinity & other) const
/** Calculate expressions, that depend only on constants.
* For index to work when something like "WHERE Date = toDate(now())" is written.
*/
Block PKCondition::getBlockWithConstants(
Block KeyCondition::getBlockWithConstants(
const ASTPtr & query, const Context & context, const NamesAndTypesList & all_columns)
{
Block result
@ -265,19 +265,19 @@ Block PKCondition::getBlockWithConstants(
}
PKCondition::PKCondition(
KeyCondition::KeyCondition(
const SelectQueryInfo & query_info,
const Context & context,
const NamesAndTypesList & all_columns,
const SortDescription & sort_descr_,
const ExpressionActionsPtr & pk_expr_)
: sort_descr(sort_descr_), pk_expr(pk_expr_), prepared_sets(query_info.sets)
const ExpressionActionsPtr & key_expr_)
: sort_descr(sort_descr_), key_expr(key_expr_), prepared_sets(query_info.sets)
{
for (size_t i = 0; i < sort_descr.size(); ++i)
{
std::string name = sort_descr[i].column_name;
if (!pk_columns.count(name))
pk_columns[name] = i;
if (!key_columns.count(name))
key_columns[name] = i;
}
/** Evaluation of expressions that depend only on constants.
@ -307,11 +307,11 @@ PKCondition::PKCondition(
}
}
bool PKCondition::addCondition(const String & column, const Range & range)
bool KeyCondition::addCondition(const String & column, const Range & range)
{
if (!pk_columns.count(column))
if (!key_columns.count(column))
return false;
rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, pk_columns[column], range);
rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, key_columns[column], range);
rpn.emplace_back(RPNElement::FUNCTION_AND);
return true;
}
@ -368,7 +368,7 @@ static void applyFunction(
}
void PKCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants)
void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants)
{
RPNElement element;
@ -401,23 +401,23 @@ void PKCondition::traverseAST(const ASTPtr & node, const Context & context, Bloc
}
bool PKCondition::canConstantBeWrappedByMonotonicFunctions(
bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type)
{
String expr_name = node->getColumnName();
const auto & sample_block = pk_expr->getSampleBlock();
const auto & sample_block = key_expr->getSampleBlock();
if (!sample_block.has(expr_name))
return false;
bool found_transformation = false;
for (const ExpressionAction & a : pk_expr->getActions())
for (const ExpressionAction & a : key_expr->getActions())
{
/** The primary key functional expression constraint may be inferred from a plain column in the expression.
* For example, if the primary key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
/** The key functional expression constraint may be inferred from a plain column in the expression.
* For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
* it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())`
* condition also holds, so the index may be used to select only parts satisfying this condition.
*
@ -446,12 +446,12 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions(
out_type.swap(new_type);
expr_name = a.result_name;
// Transformation results in a primary key expression, accept
auto it = pk_columns.find(expr_name);
if (pk_columns.end() != it)
// Transformation results in a key expression, accept
auto it = key_columns.find(expr_name);
if (key_columns.end() != it)
{
out_primary_key_column_num = it->second;
out_primary_key_column_type = sample_block.getByName(it->first).type;
out_key_column_num = it->second;
out_key_column_type = sample_block.getByName(it->first).type;
found_transformation = true;
break;
}
@ -461,47 +461,41 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions(
return found_transformation;
}
void PKCondition::getPKTuplePositionMapping(
void KeyCondition::getKeyTuplePositionMapping(
const ASTPtr & node,
const Context & context,
std::vector<MergeTreeSetIndex::PKTuplePositionMapping> & indexes_mapping,
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const size_t tuple_index,
size_t & out_primary_key_column_num)
size_t & out_key_column_num)
{
MergeTreeSetIndex::PKTuplePositionMapping index_mapping;
MergeTreeSetIndex::KeyTuplePositionMapping index_mapping;
index_mapping.tuple_index = tuple_index;
DataTypePtr data_type;
if (isPrimaryKeyPossiblyWrappedByMonotonicFunctions(
node, context, index_mapping.pk_index,
if (isKeyPossiblyWrappedByMonotonicFunctions(
node, context, index_mapping.key_index,
data_type, index_mapping.functions))
{
indexes_mapping.push_back(index_mapping);
if (out_primary_key_column_num < index_mapping.pk_index)
if (out_key_column_num < index_mapping.key_index)
{
out_primary_key_column_num = index_mapping.pk_index;
out_key_column_num = index_mapping.key_index;
}
}
}
/// Try to prepare PKTuplePositionMapping for tuples from IN expression.
bool PKCondition::isTupleIndexable(
/// Try to prepare KeyTuplePositionMapping for tuples from IN expression.
bool KeyCondition::isTupleIndexable(
const ASTPtr & node,
const Context & context,
RPNElement & out,
const SetPtr & prepared_set,
size_t & out_primary_key_column_num)
size_t & out_key_column_num)
{
out_primary_key_column_num = 0;
std::vector<MergeTreeSetIndex::PKTuplePositionMapping> indexes_mapping;
out_key_column_num = 0;
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> indexes_mapping;
size_t num_key_columns = prepared_set->getDataTypes().size();
if (num_key_columns == 0)
{
/// Empty set. It is "indexable" in a sense, that it implies that condition is always false (or true for NOT IN).
out.set_index = std::make_shared<MergeTreeSetIndex>(prepared_set->getSetElements(), std::move(indexes_mapping));
return true;
}
const ASTFunction * node_tuple = typeid_cast<const ASTFunction *>(node.get());
if (node_tuple && node_tuple->name == "tuple")
@ -517,13 +511,13 @@ bool PKCondition::isTupleIndexable(
size_t current_tuple_index = 0;
for (const auto & arg : node_tuple->arguments->children)
{
getPKTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_primary_key_column_num);
getKeyTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_key_column_num);
++current_tuple_index;
}
}
else
{
getPKTuplePositionMapping(node, context, indexes_mapping, 0, out_primary_key_column_num);
getKeyTuplePositionMapping(node, context, indexes_mapping, 0, out_key_column_num);
}
if (indexes_mapping.empty())
@ -536,54 +530,54 @@ bool PKCondition::isTupleIndexable(
}
bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions(
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
const ASTPtr & node,
const Context & context,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_res_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
RPNElement::MonotonicFunctionsChain & out_functions_chain)
{
std::vector<const ASTFunction *> chain_not_tested_for_monotonicity;
DataTypePtr primary_key_column_type;
DataTypePtr key_column_type;
if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_primary_key_column_num, primary_key_column_type, chain_not_tested_for_monotonicity))
if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity))
return false;
for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it)
{
auto func_builder = FunctionFactory::instance().tryGet((*it)->name, context);
ColumnsWithTypeAndName arguments{{ nullptr, primary_key_column_type, "" }};
ColumnsWithTypeAndName arguments{{ nullptr, key_column_type, "" }};
auto func = func_builder->build(arguments);
if (!func || !func->hasInformationAboutMonotonicity())
return false;
primary_key_column_type = func->getReturnType();
key_column_type = func->getReturnType();
out_functions_chain.push_back(func);
}
out_primary_key_res_column_type = primary_key_column_type;
out_key_res_column_type = key_column_type;
return true;
}
bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
std::vector<const ASTFunction *> & out_functions_chain)
{
/** By itself, the primary key column can be a functional expression. for example, `intHash32(UserID)`.
/** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`.
* Therefore, use the full name of the expression for search.
*/
const auto & sample_block = pk_expr->getSampleBlock();
const auto & sample_block = key_expr->getSampleBlock();
String name = node->getColumnName();
auto it = pk_columns.find(name);
if (pk_columns.end() != it)
auto it = key_columns.find(name);
if (key_columns.end() != it)
{
out_primary_key_column_num = it->second;
out_primary_key_column_type = sample_block.getByName(it->first).type;
out_key_column_num = it->second;
out_key_column_type = sample_block.getByName(it->first).type;
return true;
}
@ -595,8 +589,7 @@ bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
out_functions_chain.push_back(func);
if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_primary_key_column_num, out_primary_key_column_type,
out_functions_chain))
if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain))
return false;
return true;
@ -618,7 +611,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
}
catch (...)
{
throw Exception("Primary key expression contains comparison between inconvertible types: " +
throw Exception("Key expression contains comparison between inconvertible types: " +
desired_type->getName() + " and " + src_type->getName() +
" inside " + queryToString(node),
ErrorCodes::BAD_TYPE_OF_FIELD);
@ -626,9 +619,9 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
}
bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
{
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of primary key,
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
* or itself, wrapped in a chain of possibly-monotonic functions,
* or constant expression - number.
*/
@ -641,9 +634,9 @@ bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Bloc
if (args.size() != 2)
return false;
DataTypePtr key_expr_type; /// Type of expression containing primary key column
size_t key_arg_pos; /// Position of argument with primary key column (non-const argument)
size_t key_column_num; /// Number of a primary key column (inside sort_descr array)
DataTypePtr key_expr_type; /// Type of expression containing key column
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
size_t key_column_num; /// Number of a key column (inside sort_descr array)
RPNElement::MonotonicFunctionsChain chain;
bool is_set_const = false;
bool is_constant_transformed = false;
@ -655,7 +648,7 @@ bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Bloc
is_set_const = true;
}
else if (getConstant(args[1], block_with_constants, const_value, const_type)
&& isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
{
key_arg_pos = 0;
}
@ -666,7 +659,7 @@ bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Bloc
is_constant_transformed = true;
}
else if (getConstant(args[0], block_with_constants, const_value, const_type)
&& isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
{
key_arg_pos = 1;
}
@ -742,7 +735,7 @@ bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Bloc
return false;
}
bool PKCondition::operatorFromAST(const ASTFunction * func, RPNElement & out)
bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out)
{
/// Functions AND, OR, NOT.
/** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
@ -770,7 +763,7 @@ bool PKCondition::operatorFromAST(const ASTFunction * func, RPNElement & out)
return true;
}
String PKCondition::toString() const
String KeyCondition::toString() const
{
String res;
for (size_t i = 0; i < rpn.size(); ++i)
@ -783,16 +776,16 @@ String PKCondition::toString() const
}
/** Index is the value of primary key every `index_granularity` rows.
/** Index is the value of key every `index_granularity` rows.
* This value is called a "mark". That is, the index consists of marks.
*
* The primary key is the tuple.
* The data is sorted by primary key in the sense of lexicographic order over tuples.
* The key is the tuple.
* The data is sorted by key in the sense of lexicographic order over tuples.
*
* A pair of marks specifies a segment with respect to the order over the tuples.
* Denote it like this: [ x1 y1 z1 .. x2 y2 z2 ],
* where x1 y1 z1 - tuple - value of primary key in left border of segment;
* x2 y2 z2 - tuple - value of primary key in right boundary of segment.
* where x1 y1 z1 - tuple - value of key in left border of segment;
* x2 y2 z2 - tuple - value of key in right boundary of segment.
* In this section there are data between these marks.
*
* Or, the last mark specifies the range open on the right: [ a b c .. + inf )
@ -902,10 +895,10 @@ static bool forAnyParallelogram(
}
bool PKCondition::mayBeTrueInRange(
bool KeyCondition::mayBeTrueInRange(
size_t used_key_size,
const Field * left_pk,
const Field * right_pk,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
bool right_bounded) const
{
@ -913,19 +906,19 @@ bool PKCondition::mayBeTrueInRange(
/* std::cerr << "Checking for: [";
for (size_t i = 0; i != used_key_size; ++i)
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_pk[i]);
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_key[i]);
std::cerr << " ... ";
if (right_bounded)
{
for (size_t i = 0; i != used_key_size; ++i)
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_pk[i]);
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_key[i]);
std::cerr << "]\n";
}
else
std::cerr << "+inf)\n";*/
return forAnyParallelogram(used_key_size, left_pk, right_pk, true, right_bounded, key_ranges, 0,
return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0,
[&] (const std::vector<Range> & key_ranges)
{
auto res = mayBeTrueInRangeImpl(key_ranges, data_types);
@ -939,7 +932,7 @@ bool PKCondition::mayBeTrueInRange(
});
}
std::optional<Range> PKCondition::applyMonotonicFunctionsChainToRange(
std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
Range key_range,
RPNElement::MonotonicFunctionsChain & functions,
DataTypePtr current_type
@ -976,7 +969,7 @@ std::optional<Range> PKCondition::applyMonotonicFunctionsChainToRange(
return key_range;
}
bool PKCondition::mayBeTrueInRangeImpl(const std::vector<Range> & key_ranges, const DataTypes & data_types) const
bool KeyCondition::mayBeTrueInRangeImpl(const std::vector<Range> & key_ranges, const DataTypes & data_types) const
{
std::vector<BoolMask> rpn_stack;
for (size_t i = 0; i < rpn.size(); ++i)
@ -1060,30 +1053,30 @@ bool PKCondition::mayBeTrueInRangeImpl(const std::vector<Range> & key_ranges, co
rpn_stack.emplace_back(true, false);
}
else
throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
}
if (rpn_stack.size() != 1)
throw Exception("Unexpected stack size in PKCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR);
return rpn_stack[0].can_be_true;
}
bool PKCondition::mayBeTrueInRange(
size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const
bool KeyCondition::mayBeTrueInRange(
size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const
{
return mayBeTrueInRange(used_key_size, left_pk, right_pk, data_types, true);
return mayBeTrueInRange(used_key_size, left_key, right_key, data_types, true);
}
bool PKCondition::mayBeTrueAfter(
size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const
bool KeyCondition::mayBeTrueAfter(
size_t used_key_size, const Field * left_key, const DataTypes & data_types) const
{
return mayBeTrueInRange(used_key_size, left_pk, nullptr, data_types, false);
return mayBeTrueInRange(used_key_size, left_key, nullptr, data_types, false);
}
String PKCondition::RPNElement::toString() const
String KeyCondition::RPNElement::toString() const
{
auto print_wrapped_column = [this](std::ostringstream & ss)
{
@ -1112,7 +1105,8 @@ String PKCondition::RPNElement::toString() const
{
ss << "(";
print_wrapped_column(ss);
ss << (function == FUNCTION_IN_SET ? " in set" : " notIn set");
ss << (function == FUNCTION_IN_SET ? " in " : " notIn ");
ss << set_index->size() << "-element set";
ss << ")";
return ss.str();
}
@ -1135,7 +1129,7 @@ String PKCondition::RPNElement::toString() const
}
bool PKCondition::alwaysUnknownOrTrue() const
bool KeyCondition::alwaysUnknownOrTrue() const
{
std::vector<UInt8> rpn_stack;
@ -1172,14 +1166,14 @@ bool PKCondition::alwaysUnknownOrTrue() const
rpn_stack.back() = arg1 | arg2;
}
else
throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
}
return rpn_stack[0];
}
size_t PKCondition::getMaxKeyColumn() const
size_t KeyCondition::getMaxKeyColumn() const
{
size_t res = 0;
for (const auto & element : rpn)

View File

@ -224,34 +224,34 @@ private:
* Constructs a reverse polish notation from these conditions
* and can calculate (interpret) its satisfiability over key ranges.
*/
class PKCondition
class KeyCondition
{
public:
/// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table.
PKCondition(
KeyCondition(
const SelectQueryInfo & query_info,
const Context & context,
const NamesAndTypesList & all_columns,
const SortDescription & sort_descr,
const ExpressionActionsPtr & pk_expr);
const ExpressionActionsPtr & key_expr);
/// Whether the condition is feasible in the key range.
/// left_pk and right_pk must contain all fields in the sort_descr in the appropriate order.
/// data_types - the types of the primary key columns.
bool mayBeTrueInRange(size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const;
/// left_key and right_key must contain all fields in the sort_descr in the appropriate order.
/// data_types - the types of the key columns.
bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const;
/// Is the condition valid in a semi-infinite (not limited to the right) key range.
/// left_pk must contain all the fields in the sort_descr in the appropriate order.
bool mayBeTrueAfter(size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const;
/// left_key must contain all the fields in the sort_descr in the appropriate order.
bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const;
/// Checks that the index can not be used.
bool alwaysUnknownOrTrue() const;
/// Get the maximum number of the primary key element used in the condition.
/// Get the maximum number of the key element used in the condition.
size_t getMaxKeyColumn() const;
/// Impose an additional condition: the value in the column column must be in the `range` range.
/// Returns whether there is such a column in the primary key.
/// Returns whether there is such a column in the key.
bool addCondition(const String & column, const Range & range);
String toString() const;
@ -296,7 +296,7 @@ public:
MergeTreeSetIndexPtr set_index;
/** A chain of possibly monotone functions.
* If the primary key column is wrapped in functions that can be monotonous in some value ranges
* If the key column is wrapped in functions that can be monotonous in some value ranges
* (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate.
*/
using MonotonicFunctionsChain = std::vector<FunctionBasePtr>;
@ -320,8 +320,8 @@ private:
bool mayBeTrueInRange(
size_t used_key_size,
const Field * left_pk,
const Field * right_pk,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
bool right_bounded) const;
@ -331,51 +331,51 @@ private:
bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
bool operatorFromAST(const ASTFunction * func, RPNElement & out);
/** Is node the primary key column
* or expression in which column of primary key is wrapped by chain of functions,
/** Is node the key column
* or expression in which column of key is wrapped by chain of functions,
* that can be monotomic on certain ranges?
* If these conditions are true, then returns number of column in primary key, type of resulting expression
* If these conditions are true, then returns number of column in key, type of resulting expression
* and fills chain of possibly-monotonic functions.
*/
bool isPrimaryKeyPossiblyWrappedByMonotonicFunctions(
bool isKeyPossiblyWrappedByMonotonicFunctions(
const ASTPtr & node,
const Context & context,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_res_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
RPNElement::MonotonicFunctionsChain & out_functions_chain);
bool isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
bool isKeyPossiblyWrappedByMonotonicFunctionsImpl(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
std::vector<const ASTFunction *> & out_functions_chain);
bool canConstantBeWrappedByMonotonicFunctions(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type);
void getPKTuplePositionMapping(
void getKeyTuplePositionMapping(
const ASTPtr & node,
const Context & context,
std::vector<MergeTreeSetIndex::PKTuplePositionMapping> & indexes_mapping,
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const size_t tuple_index,
size_t & out_primary_key_column_num);
size_t & out_key_column_num);
bool isTupleIndexable(
const ASTPtr & node,
const Context & context,
RPNElement & out,
const SetPtr & prepared_set,
size_t & out_primary_key_column_num);
size_t & out_key_column_num);
RPN rpn;
SortDescription sort_descr;
ColumnIndices pk_columns;
ExpressionActionsPtr pk_expr;
ColumnIndices key_columns;
ExpressionActionsPtr key_expr;
PreparedSets prepared_sets;
};

View File

@ -2175,7 +2175,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit()
return total_covered_parts;
}
bool MergeTreeData::isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const
bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const
{
String column_name = node->getColumnName();
@ -2183,33 +2183,35 @@ bool MergeTreeData::isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(c
if (column_name == column.column_name)
return true;
if (partition_expr_ast && partition_expr_ast->children.at(0)->getColumnName() == column_name)
return true;
for (const auto & column : minmax_idx_sort_descr)
if (column_name == column.column_name)
return true;
if (const ASTFunction * func = typeid_cast<const ASTFunction *>(node.get()))
if (func->arguments->children.size() == 1)
return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front());
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front());
return false;
}
bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const
{
/// Make sure that the left side of the IN operator contain part of the primary key.
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple must be part of the primary key (probably wrapped by a chain of some acceptable functions).
/// Make sure that the left side of the IN operator contain part of the key.
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple
/// must be part of the key (probably wrapped by a chain of some acceptable functions).
const ASTFunction * left_in_operand_tuple = typeid_cast<const ASTFunction *>(left_in_operand.get());
if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
{
for (const auto & item : left_in_operand_tuple->arguments->children)
if (isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(item))
if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item))
return true;
/// The tuple itself may be part of the primary key, so check that as a last resort.
return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(left_in_operand);
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand);
}
else
{
return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(left_in_operand);
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand);
}
}

View File

@ -502,7 +502,7 @@ public:
Names minmax_idx_columns;
DataTypes minmax_idx_column_types;
Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column.
SortDescription minmax_idx_sort_descr; /// For use with PKCondition.
SortDescription minmax_idx_sort_descr; /// For use with KeyCondition.
/// Limiting parallel sends per one table, used in DataPartsExchange
std::atomic_uint current_table_sends {0};
@ -652,7 +652,7 @@ private:
std::lock_guard<std::mutex> & data_parts_lock) const;
/// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument.
bool isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const;
bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const;
};
}

View File

@ -6,7 +6,7 @@
#include <Storages/MergeTree/MergeTreeBlockInputStream.h>
#include <Storages/MergeTree/MergeTreeReadPool.h>
#include <Storages/MergeTree/MergeTreeThreadBlockInputStream.h>
#include <Storages/MergeTree/PKCondition.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSampleRatio.h>
@ -85,7 +85,7 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts
size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead(
const MergeTreeData::DataPartsVector & parts, const PKCondition & key_condition, const Settings & settings) const
const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const
{
size_t full_marks_count = 0;
@ -196,7 +196,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
const Settings & settings = context.getSettingsRef();
SortDescription sort_descr = data.getPrimarySortDescription();
PKCondition key_condition(query_info, context, available_real_and_virtual_columns, sort_descr,
KeyCondition key_condition(query_info, context, available_real_and_virtual_columns, sort_descr,
data.getPrimaryExpression());
if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue())
@ -210,7 +210,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED);
}
std::optional<PKCondition> minmax_idx_condition;
std::optional<KeyCondition> minmax_idx_condition;
if (data.minmax_idx_expr)
{
minmax_idx_condition.emplace(
@ -823,7 +823,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition(
/// Calculates a set of mark ranges, that could possibly contain keys, required by condition.
/// In other words, it removes subranges from whole range, that definitely could not contain required keys.
MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
const MergeTreeData::DataPart::Index & index, const PKCondition & key_condition, const Settings & settings) const
const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const
{
size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity;
@ -846,7 +846,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
*/
std::vector<MarkRange> ranges_stack{ {0, marks_count} };
/// NOTE Creating temporary Field objects to pass to PKCondition.
/// NOTE Creating temporary Field objects to pass to KeyCondition.
Row index_left(used_key_size);
Row index_right(used_key_size);

View File

@ -9,7 +9,7 @@
namespace DB
{
class PKCondition;
class KeyCondition;
/** Executes SELECT queries on data from the merge tree.
@ -57,7 +57,7 @@ private:
/// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index.
size_t getApproximateTotalRowsToRead(
const MergeTreeData::DataPartsVector & parts,
const PKCondition & key_condition,
const KeyCondition & key_condition,
const Settings & settings) const;
/// Create the expression "Sign == 1".
@ -68,7 +68,7 @@ private:
MarkRanges markRangesFromPKRange(
const MergeTreeData::DataPart::Index & index,
const PKCondition & key_condition,
const KeyCondition & key_condition,
const Settings & settings) const;
};

View File

@ -1,6 +1,6 @@
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/PKCondition.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
@ -43,7 +43,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
[] (const SortColumnDescription & col) { return col.column_name; })},
table_columns{ext::map<std::unordered_set>(data.getColumns().getAllPhysical(),
[] (const NameAndTypePair & col) { return col.name; })},
block_with_constants{PKCondition::getBlockWithConstants(query_info.query, context, data.getColumns().getAllPhysical())},
block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, context, data.getColumns().getAllPhysical())},
prepared_sets(query_info.sets),
log{log}
{
@ -321,7 +321,7 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const
{
if (const auto func = typeid_cast<const ASTFunction *>(ast))
{
if (!PKCondition::atom_map.count(func->name))
if (!KeyCondition::atom_map.count(func->name))
return false;
const auto & args = func->arguments->children;

View File

@ -188,12 +188,21 @@ void ReplicatedMergeTreeAlterThread::run()
wakeup_event->wait();
}
catch (const zkutil::KeeperException & e)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED)
break;
force_recheck_parts = true;
wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS);
}
catch (...)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
force_recheck_parts = true;
wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS);
}
}

View File

@ -36,6 +36,13 @@ void ReplicatedMergeTreeCleanupThread::run()
{
iterate();
}
catch (const zkutil::KeeperException & e)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED)
break;
}
catch (...)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);

View File

@ -381,6 +381,15 @@ void ReplicatedMergeTreePartCheckThread::run()
}
}
}
catch (const zkutil::KeeperException & e)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED)
break;
wakeup_event.tryWait(PART_CHECK_ERROR_SLEEP_MS);
}
catch (...)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);

View File

@ -1596,6 +1596,15 @@ void StorageReplicatedMergeTree::queueUpdatingThread()
update_in_progress = false;
queue_updating_event->wait();
}
catch (const zkutil::KeeperException & e)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED)
break;
else
queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS);
}
catch (...)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);

View File

@ -107,6 +107,10 @@ StorageSet::StorageSet(
: StorageSetOrJoinBase{path_, name_, columns_},
set(std::make_shared<Set>(SizeLimits()))
{
Block header = getSampleBlock();
header = header.sortColumns();
set->setHeader(header);
restore();
}

View File

@ -239,6 +239,8 @@ BlockInputStreams StorageSystemTables::read(
res_columns[j++]->insert(table.first);
res_columns[j++]->insert(table.second->getName());
res_columns[j++]->insert(UInt64(1));
res_columns[j++]->insertDefault();
res_columns[j++]->insertDefault();
if (has_metadata_modification_time)
res_columns[j++]->insertDefault();

View File

@ -2,4 +2,5 @@ A
B
A 1 TinyLog CREATE TABLE test_show_tables.A ( A UInt8) ENGINE = TinyLog
B 1 TinyLog CREATE TABLE test_show_tables.B ( A UInt8) ENGINE = TinyLog
test_temporary_table
0

View File

@ -9,6 +9,9 @@ SHOW TABLES from test_show_tables;
SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = 'test_show_tables' ORDER BY name FORMAT TSVRaw;
CREATE TEMPORARY TABLE test_temporary_table (id UInt64);
SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary_table';
DROP DATABASE test_show_tables;

View File

@ -0,0 +1,59 @@
111
112
113
114
-
121
122
123
124
-
131
132
133
134
---
211
212
213
214
-
221
222
223
224
-
231
232
233
234
---
311
312
313
314
-
321
322
323
324
-
331
332
333
334
---
411
412
413
414
-
421
422
423
424
-
431
432
433
434

View File

@ -0,0 +1,75 @@
drop table if exists test.test54378;
create table test.test54378 (part_date Date, pk_date Date, date Date) Engine=MergeTree(part_date, pk_date, 8192);
insert into test.test54378 values ('2018-04-19', '2018-04-19', '2018-04-19');
select 111 from test.test54378 where part_date = '2018-04-19';
select 112 from test.test54378 where part_date in ('2018-04-19');
select 113 from test.test54378 where pk_date in ('2018-04-19');
select 114 from test.test54378 where date in ('2018-04-19');
SELECT '-';
select 121 from test.test54378 where part_date = toDate('2018-04-19');
select 122 from test.test54378 where part_date in (toDate('2018-04-19'));
select 123 from test.test54378 where pk_date in (toDate('2018-04-19'));
select 124 from test.test54378 where date in (toDate('2018-04-19'));
SELECT '-';
select 131 from test.test54378 where part_date = (SELECT toDate('2018-04-19'));
select 132 from test.test54378 where part_date in (SELECT toDate('2018-04-19'));
select 133 from test.test54378 where pk_date in (SELECT toDate('2018-04-19'));
select 134 from test.test54378 where date in (SELECT toDate('2018-04-19'));
SELECT '---';
select 211 from test.test54378 prewhere part_date = '2018-04-19';
select 212 from test.test54378 prewhere part_date in ('2018-04-19');
select 213 from test.test54378 prewhere pk_date in ('2018-04-19');
select 214 from test.test54378 prewhere date in ('2018-04-19');
SELECT '-';
select 221 from test.test54378 prewhere part_date = toDate('2018-04-19');
select 222 from test.test54378 prewhere part_date in (toDate('2018-04-19'));
select 223 from test.test54378 prewhere pk_date in (toDate('2018-04-19'));
select 224 from test.test54378 prewhere date in (toDate('2018-04-19'));
SELECT '-';
select 231 from test.test54378 prewhere part_date = (SELECT toDate('2018-04-19'));
select 232 from test.test54378 prewhere part_date in (SELECT toDate('2018-04-19'));
select 233 from test.test54378 prewhere pk_date in (SELECT toDate('2018-04-19'));
select 234 from test.test54378 prewhere date in (SELECT toDate('2018-04-19'));
SELECT '---';
SET optimize_move_to_prewhere = 0;
select 311 from test.test54378 where part_date = '2018-04-19';
select 312 from test.test54378 where part_date in ('2018-04-19');
select 313 from test.test54378 where pk_date in ('2018-04-19');
select 314 from test.test54378 where date in ('2018-04-19');
SELECT '-';
select 321 from test.test54378 where part_date = toDate('2018-04-19');
select 322 from test.test54378 where part_date in (toDate('2018-04-19'));
select 323 from test.test54378 where pk_date in (toDate('2018-04-19'));
select 324 from test.test54378 where date in (toDate('2018-04-19'));
SELECT '-';
select 331 from test.test54378 where part_date = (SELECT toDate('2018-04-19'));
select 332 from test.test54378 where part_date in (SELECT toDate('2018-04-19'));
select 333 from test.test54378 where pk_date in (SELECT toDate('2018-04-19'));
select 334 from test.test54378 where date in (SELECT toDate('2018-04-19'));
SELECT '---';
SET optimize_move_to_prewhere = 1;
select 411 from test.test54378 where part_date = '2018-04-19';
select 412 from test.test54378 where part_date in ('2018-04-19');
select 413 from test.test54378 where pk_date in ('2018-04-19');
select 414 from test.test54378 where date in ('2018-04-19');
SELECT '-';
select 421 from test.test54378 where part_date = toDate('2018-04-19');
select 422 from test.test54378 where part_date in (toDate('2018-04-19'));
select 423 from test.test54378 where pk_date in (toDate('2018-04-19'));
select 424 from test.test54378 where date in (toDate('2018-04-19'));
SELECT '-';
select 431 from test.test54378 where part_date = (SELECT toDate('2018-04-19'));
select 432 from test.test54378 where part_date in (SELECT toDate('2018-04-19'));
select 433 from test.test54378 where pk_date in (SELECT toDate('2018-04-19'));
select 434 from test.test54378 where date in (SELECT toDate('2018-04-19'));
drop table test.test54378;

View File

@ -706,17 +706,18 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config)
Poco::AutoPtr<SplitterChannel> split = new SplitterChannel;
auto log_level = config.getString("logger.level", "trace");
if (config.hasProperty("logger.log"))
const auto log_path = config.getString("logger.log", "");
if (!log_path.empty())
{
createDirectory(config.getString("logger.log"));
std::cerr << "Logging " << log_level << " to " << config.getString("logger.log") << std::endl;
createDirectory(log_path);
std::cerr << "Logging " << log_level << " to " << log_path << std::endl;
// Set up two channel chains.
Poco::AutoPtr<OwnPatternFormatter> pf = new OwnPatternFormatter(this);
pf->setProperty("times", "local");
Poco::AutoPtr<FormattingChannel> log = new FormattingChannel(pf);
log_file = new FileChannel;
log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.log")).absolute().toString());
log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(log_path).absolute().toString());
log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M"));
log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number");
log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true"));
@ -728,17 +729,18 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config)
log_file->open();
}
if (config.hasProperty("logger.errorlog"))
const auto errorlog_path = config.getString("logger.errorlog", "");
if (!errorlog_path.empty())
{
createDirectory(config.getString("logger.errorlog"));
std::cerr << "Logging errors to " << config.getString("logger.errorlog") << std::endl;
createDirectory(errorlog_path);
std::cerr << "Logging errors to " << errorlog_path << std::endl;
Poco::AutoPtr<Poco::LevelFilterChannel> level = new Poco::LevelFilterChannel;
level->setLevel(Message::PRIO_NOTICE);
Poco::AutoPtr<OwnPatternFormatter> pf = new OwnPatternFormatter(this);
pf->setProperty("times", "local");
Poco::AutoPtr<FormattingChannel> errorlog = new FormattingChannel(pf);
error_log_file = new FileChannel;
error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.errorlog")).absolute().toString());
error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(errorlog_path).absolute().toString());
error_log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M"));
error_log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number");
error_log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true"));
@ -965,9 +967,9 @@ void BaseDaemon::initialize(Application & self)
}
/// Change path for logging.
if (config().hasProperty("logger.log"))
if (!log_path.empty())
{
std::string path = createDirectory(config().getString("logger.log"));
std::string path = createDirectory(log_path);
if (is_daemon
&& chdir(path.c_str()) != 0)
throw Poco::Exception("Cannot change directory to " + path);