mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Fix missing type check in StorageEmbeddedRocksDB
This commit is contained in:
parent
12be9d51a9
commit
82ab793731
@ -319,6 +319,7 @@ function run_tests
|
|||||||
|
|
||||||
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
|
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
|
||||||
01504_rocksdb
|
01504_rocksdb
|
||||||
|
01686_rocksdb
|
||||||
|
|
||||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||||
01460_DistributedFilesToInsert
|
01460_DistributedFilesToInsert
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <Interpreters/Set.h>
|
#include <Interpreters/Set.h>
|
||||||
#include <Interpreters/PreparedSets.h>
|
#include <Interpreters/PreparedSets.h>
|
||||||
#include <Interpreters/TreeRewriter.h>
|
#include <Interpreters/TreeRewriter.h>
|
||||||
|
#include <Interpreters/convertFieldToType.h>
|
||||||
|
|
||||||
#include <Poco/File.h>
|
#include <Poco/File.h>
|
||||||
#include <Poco/Path.h>
|
#include <Poco/Path.h>
|
||||||
@ -44,9 +45,12 @@ namespace ErrorCodes
|
|||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using FieldVectorPtr = std::shared_ptr<FieldVector>;
|
||||||
|
|
||||||
|
|
||||||
// returns keys may be filter by condition
|
// returns keys may be filter by condition
|
||||||
static bool traverseASTFilter(const String & primary_key, const DataTypePtr & primary_key_type, const ASTPtr & elem, const PreparedSets & sets, FieldVector & res)
|
static bool traverseASTFilter(
|
||||||
|
const String & primary_key, const DataTypePtr & primary_key_type, const ASTPtr & elem, const PreparedSets & sets, FieldVectorPtr & res)
|
||||||
{
|
{
|
||||||
const auto * function = elem->as<ASTFunction>();
|
const auto * function = elem->as<ASTFunction>();
|
||||||
if (!function)
|
if (!function)
|
||||||
@ -63,13 +67,9 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
|
|||||||
else if (function->name == "or")
|
else if (function->name == "or")
|
||||||
{
|
{
|
||||||
// make sure every child has the key filter condition
|
// make sure every child has the key filter condition
|
||||||
FieldVector child_res;
|
|
||||||
for (const auto & child : function->arguments->children)
|
for (const auto & child : function->arguments->children)
|
||||||
{
|
if (!traverseASTFilter(primary_key, primary_key_type, child, sets, res))
|
||||||
if (!traverseASTFilter(primary_key, primary_key_type, child, sets, child_res))
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
res.insert(res.end(), child_res.begin(), child_res.end());
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (function->name == "equals" || function->name == "in")
|
else if (function->name == "equals" || function->name == "in")
|
||||||
@ -108,9 +108,7 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
|
|||||||
prepared_set->checkColumnsNumber(1);
|
prepared_set->checkColumnsNumber(1);
|
||||||
const auto & set_column = *prepared_set->getSetElements()[0];
|
const auto & set_column = *prepared_set->getSetElements()[0];
|
||||||
for (size_t row = 0; row < set_column.size(); ++row)
|
for (size_t row = 0; row < set_column.size(); ++row)
|
||||||
{
|
res->push_back(set_column[row]);
|
||||||
res.push_back(set_column[row]);
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -125,10 +123,12 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
|
|||||||
if (ident->name() != primary_key)
|
if (ident->name() != primary_key)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
//function->name == "equals"
|
/// function->name == "equals"
|
||||||
if (const auto * literal = value->as<ASTLiteral>())
|
if (const auto * literal = value->as<ASTLiteral>())
|
||||||
{
|
{
|
||||||
res.push_back(literal->value);
|
auto converted_field = convertFieldToType(literal->value, *primary_key_type);
|
||||||
|
if (!converted_field.isNull())
|
||||||
|
res->push_back(converted_field);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -140,14 +140,14 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
|
|||||||
/** Retrieve from the query a condition of the form `key = 'key'`, `key in ('xxx_'), from conjunctions in the WHERE clause.
|
/** Retrieve from the query a condition of the form `key = 'key'`, `key in ('xxx_'), from conjunctions in the WHERE clause.
|
||||||
* TODO support key like search
|
* TODO support key like search
|
||||||
*/
|
*/
|
||||||
static std::pair<FieldVector, bool> getFilterKeys(const String & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info)
|
static std::pair<FieldVectorPtr, bool> getFilterKeys(
|
||||||
|
const String & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info)
|
||||||
{
|
{
|
||||||
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
||||||
if (!select.where())
|
if (!select.where())
|
||||||
{
|
return {{}, true};
|
||||||
return std::make_pair(FieldVector{}, true);
|
|
||||||
}
|
FieldVectorPtr res = std::make_shared<FieldVector>();
|
||||||
FieldVector res;
|
|
||||||
auto matched_keys = traverseASTFilter(primary_key, primary_key_type, select.where(), query_info.sets, res);
|
auto matched_keys = traverseASTFilter(primary_key, primary_key_type, select.where(), query_info.sets, res);
|
||||||
return std::make_pair(res, !matched_keys);
|
return std::make_pair(res, !matched_keys);
|
||||||
}
|
}
|
||||||
@ -159,23 +159,19 @@ public:
|
|||||||
EmbeddedRocksDBSource(
|
EmbeddedRocksDBSource(
|
||||||
const StorageEmbeddedRocksDB & storage_,
|
const StorageEmbeddedRocksDB & storage_,
|
||||||
const StorageMetadataPtr & metadata_snapshot_,
|
const StorageMetadataPtr & metadata_snapshot_,
|
||||||
const FieldVector & keys_,
|
FieldVectorPtr keys_,
|
||||||
const size_t start_,
|
FieldVector::const_iterator begin_,
|
||||||
const size_t end_,
|
FieldVector::const_iterator end_,
|
||||||
const size_t max_block_size_)
|
const size_t max_block_size_)
|
||||||
: SourceWithProgress(metadata_snapshot_->getSampleBlock())
|
: SourceWithProgress(metadata_snapshot_->getSampleBlock())
|
||||||
, storage(storage_)
|
, storage(storage_)
|
||||||
, metadata_snapshot(metadata_snapshot_)
|
, metadata_snapshot(metadata_snapshot_)
|
||||||
, start(start_)
|
, keys(std::move(keys_))
|
||||||
|
, begin(begin_)
|
||||||
, end(end_)
|
, end(end_)
|
||||||
|
, it(begin)
|
||||||
, max_block_size(max_block_size_)
|
, max_block_size(max_block_size_)
|
||||||
{
|
{
|
||||||
// slice the keys
|
|
||||||
if (end > start)
|
|
||||||
{
|
|
||||||
keys.resize(end - start);
|
|
||||||
std::copy(keys_.begin() + start, keys_.begin() + end, keys.begin());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String getName() const override
|
String getName() const override
|
||||||
@ -185,27 +181,34 @@ public:
|
|||||||
|
|
||||||
Chunk generate() override
|
Chunk generate() override
|
||||||
{
|
{
|
||||||
if (processed_keys >= keys.size() || (start == end))
|
if (it >= end)
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
std::vector<rocksdb::Slice> slices_keys;
|
size_t num_keys = end - begin;
|
||||||
slices_keys.reserve(keys.size());
|
|
||||||
std::vector<String> values;
|
std::vector<std::string> serialized_keys(num_keys);
|
||||||
std::vector<WriteBufferFromOwnString> wbs(keys.size());
|
std::vector<rocksdb::Slice> slices_keys(num_keys);
|
||||||
|
|
||||||
const auto & sample_block = metadata_snapshot->getSampleBlock();
|
const auto & sample_block = metadata_snapshot->getSampleBlock();
|
||||||
const auto & key_column = sample_block.getByName(storage.primary_key);
|
const auto & key_column = sample_block.getByName(storage.primary_key);
|
||||||
auto columns = sample_block.cloneEmptyColumns();
|
auto columns = sample_block.cloneEmptyColumns();
|
||||||
size_t primary_key_pos = sample_block.getPositionByName(storage.primary_key);
|
size_t primary_key_pos = sample_block.getPositionByName(storage.primary_key);
|
||||||
|
|
||||||
for (size_t i = processed_keys; i < std::min(keys.size(), processed_keys + max_block_size); ++i)
|
size_t rows_processed = 0;
|
||||||
|
while (it < end && rows_processed < max_block_size)
|
||||||
{
|
{
|
||||||
key_column.type->serializeBinary(keys[i], wbs[i]);
|
WriteBufferFromString wb(serialized_keys[rows_processed]);
|
||||||
auto str_ref = wbs[i].stringRef();
|
key_column.type->serializeBinary(*it, wb);
|
||||||
slices_keys.emplace_back(str_ref.data, str_ref.size);
|
wb.finalize();
|
||||||
|
slices_keys[rows_processed] = std::move(serialized_keys[rows_processed]);
|
||||||
|
|
||||||
|
++it;
|
||||||
|
++rows_processed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<String> values;
|
||||||
auto statuses = storage.rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values);
|
auto statuses = storage.rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values);
|
||||||
|
|
||||||
for (size_t i = 0; i < statuses.size(); ++i)
|
for (size_t i = 0; i < statuses.size(); ++i)
|
||||||
{
|
{
|
||||||
if (statuses[i].ok())
|
if (statuses[i].ok())
|
||||||
@ -221,7 +224,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
processed_keys += max_block_size;
|
|
||||||
|
|
||||||
UInt64 num_rows = columns.at(0)->size();
|
UInt64 num_rows = columns.at(0)->size();
|
||||||
return Chunk(std::move(columns), num_rows);
|
return Chunk(std::move(columns), num_rows);
|
||||||
@ -231,12 +233,11 @@ private:
|
|||||||
const StorageEmbeddedRocksDB & storage;
|
const StorageEmbeddedRocksDB & storage;
|
||||||
|
|
||||||
const StorageMetadataPtr metadata_snapshot;
|
const StorageMetadataPtr metadata_snapshot;
|
||||||
const size_t start;
|
FieldVectorPtr keys;
|
||||||
const size_t end;
|
FieldVector::const_iterator begin;
|
||||||
|
FieldVector::const_iterator end;
|
||||||
|
FieldVector::const_iterator it;
|
||||||
const size_t max_block_size;
|
const size_t max_block_size;
|
||||||
FieldVector keys;
|
|
||||||
|
|
||||||
size_t processed_keys = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -289,7 +290,8 @@ Pipe StorageEmbeddedRocksDB::read(
|
|||||||
unsigned num_streams)
|
unsigned num_streams)
|
||||||
{
|
{
|
||||||
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
|
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
|
||||||
FieldVector keys;
|
|
||||||
|
FieldVectorPtr keys;
|
||||||
bool all_scan = false;
|
bool all_scan = false;
|
||||||
|
|
||||||
auto primary_key_data_type = metadata_snapshot->getSampleBlock().getByName(primary_key).type;
|
auto primary_key_data_type = metadata_snapshot->getSampleBlock().getByName(primary_key).type;
|
||||||
@ -302,37 +304,34 @@ Pipe StorageEmbeddedRocksDB::read(
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (keys.empty())
|
if (keys->empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
std::sort(keys.begin(), keys.end());
|
std::sort(keys->begin(), keys->end());
|
||||||
auto unique_iter = std::unique(keys.begin(), keys.end());
|
keys->erase(std::unique(keys->begin(), keys->end()), keys->end());
|
||||||
if (unique_iter != keys.end())
|
|
||||||
keys.erase(unique_iter, keys.end());
|
|
||||||
|
|
||||||
Pipes pipes;
|
Pipes pipes;
|
||||||
size_t start = 0;
|
|
||||||
size_t end;
|
|
||||||
|
|
||||||
const size_t num_threads = std::min(size_t(num_streams), keys.size());
|
size_t num_keys = keys->size();
|
||||||
const size_t batch_per_size = ceil(keys.size() * 1.0 / num_threads);
|
size_t num_threads = std::min(size_t(num_streams), keys->size());
|
||||||
|
|
||||||
for (size_t t = 0; t < num_threads; ++t)
|
assert(num_keys <= std::numeric_limits<uint32_t>::max());
|
||||||
|
assert(num_threads <= std::numeric_limits<uint32_t>::max());
|
||||||
|
|
||||||
|
for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx)
|
||||||
{
|
{
|
||||||
if (start >= keys.size())
|
size_t begin = num_keys * thread_idx / num_threads;
|
||||||
start = end = 0;
|
size_t end = num_keys * (thread_idx + 1) / num_threads;
|
||||||
else
|
|
||||||
end = start + batch_per_size > keys.size() ? keys.size() : start + batch_per_size;
|
|
||||||
|
|
||||||
pipes.emplace_back(
|
pipes.emplace_back(std::make_shared<EmbeddedRocksDBSource>(
|
||||||
std::make_shared<EmbeddedRocksDBSource>(*this, metadata_snapshot, keys, start, end, max_block_size));
|
*this, metadata_snapshot, keys, keys->begin() + begin, keys->begin() + end, max_block_size));
|
||||||
start += batch_per_size;
|
|
||||||
}
|
}
|
||||||
return Pipe::unitePipes(std::move(pipes));
|
return Pipe::unitePipes(std::move(pipes));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockOutputStreamPtr StorageEmbeddedRocksDB::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
|
BlockOutputStreamPtr StorageEmbeddedRocksDB::write(
|
||||||
|
const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
|
||||||
{
|
{
|
||||||
return std::make_shared<EmbeddedRocksDBBlockOutputStream>(*this, metadata_snapshot);
|
return std::make_shared<EmbeddedRocksDBBlockOutputStream>(*this, metadata_snapshot);
|
||||||
}
|
}
|
||||||
|
15
tests/queries/0_stateless/01686_rocksdb.reference
Normal file
15
tests/queries/0_stateless/01686_rocksdb.reference
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
123 Hello, world (123)
|
||||||
|
--
|
||||||
|
--
|
||||||
|
123 Hello, world (123)
|
||||||
|
4567 Hello, world (4567)
|
||||||
|
--
|
||||||
|
--
|
||||||
|
0 Hello, world (0)
|
||||||
|
--
|
||||||
|
123 Hello, world (123)
|
||||||
|
456 Hello, world (456)
|
||||||
|
--
|
||||||
|
99 Hello, world (99)
|
||||||
|
999 Hello, world (999)
|
||||||
|
9999 Hello, world (9999)
|
27
tests/queries/0_stateless/01686_rocksdb.sql
Normal file
27
tests/queries/0_stateless/01686_rocksdb.sql
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
DROP TABLE IF EXISTS test;
|
||||||
|
|
||||||
|
CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB PRIMARY KEY(key);
|
||||||
|
|
||||||
|
INSERT INTO test SELECT number, format('Hello, world ({})', toString(number)) FROM numbers(10000);
|
||||||
|
|
||||||
|
SELECT * FROM test WHERE key = 123;
|
||||||
|
SELECT '--';
|
||||||
|
SELECT * FROM test WHERE key = -123;
|
||||||
|
SELECT '--';
|
||||||
|
SELECT * FROM test WHERE key = 123 OR key = 4567 ORDER BY key;
|
||||||
|
SELECT '--';
|
||||||
|
SELECT * FROM test WHERE key = NULL;
|
||||||
|
SELECT '--';
|
||||||
|
SELECT * FROM test WHERE key = NULL OR key = 0;
|
||||||
|
SELECT '--';
|
||||||
|
SELECT * FROM test WHERE key IN (123, 456, -123) ORDER BY key;
|
||||||
|
SELECT '--';
|
||||||
|
SELECT * FROM test WHERE key = 'Hello'; -- { serverError 53 }
|
||||||
|
|
||||||
|
DETACH TABLE test NO DELAY;
|
||||||
|
ATTACH TABLE test;
|
||||||
|
|
||||||
|
SELECT * FROM test WHERE key IN (99, 999, 9999, -123) ORDER BY key;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS test;
|
||||||
|
|
@ -200,3 +200,4 @@
|
|||||||
01676_clickhouse_client_autocomplete
|
01676_clickhouse_client_autocomplete
|
||||||
01671_aggregate_function_group_bitmap_data
|
01671_aggregate_function_group_bitmap_data
|
||||||
01674_executable_dictionary_implicit_key
|
01674_executable_dictionary_implicit_key
|
||||||
|
01686_rocksdb
|
||||||
|
Loading…
Reference in New Issue
Block a user