ClickHouse/src/Storages/StorageMemory.cpp

246 lines
6.8 KiB
C++
Raw Normal View History

#include <Common/Exception.h>
2011-10-31 17:55:06 +00:00
#include <DataStreams/IBlockInputStream.h>
2015-01-18 08:25:56 +00:00
#include <Storages/StorageMemory.h>
#include <Storages/StorageFactory.h>
2011-10-31 17:55:06 +00:00
2018-06-05 19:46:49 +00:00
#include <IO/WriteHelpers.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <Processors/Pipe.h>
2018-06-05 19:46:49 +00:00
2011-10-31 17:55:06 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
class MemorySource : public SourceWithProgress
2011-10-31 17:55:06 +00:00
{
2015-01-18 08:25:56 +00:00
public:
2020-08-06 14:16:52 +00:00
/// We use range [first, last] which includes right border.
2020-08-07 13:02:28 +00:00
/// Blocks are stored in std::list which may be appended in another thread.
/// We don't use synchronisation here, because elements in range [first, last] won't be modified.
MemorySource(
Names column_names_,
2020-08-06 14:16:52 +00:00
BlocksList::iterator first_,
2020-08-25 17:54:44 +00:00
size_t num_blocks_,
const StorageMemory & storage,
const StorageMetadataPtr & metadata_snapshot)
2020-06-19 17:17:13 +00:00
: SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID()))
, column_names(std::move(column_names_))
2020-08-25 17:54:44 +00:00
, current_it(first_)
, num_blocks(num_blocks_)
{
}
2011-10-31 17:55:06 +00:00
2020-09-04 08:36:47 +00:00
/// If called, will initialize the number of blocks at first read.
/// It allows to read data which was inserted into memory table AFTER Storage::read was called.
/// This hack is needed for global subqueries.
void delayInitialization(BlocksList * data_, std::mutex * mutex_)
{
data = data_;
mutex = mutex_;
}
String getName() const override { return "Memory"; }
2011-10-31 17:55:06 +00:00
2015-01-18 08:25:56 +00:00
protected:
Chunk generate() override
{
2020-09-04 08:36:47 +00:00
if (data)
{
std::lock_guard guard(*mutex);
current_it = data->begin();
num_blocks = data->size();
is_finished = num_blocks == 0;
data = nullptr;
mutex = nullptr;
}
2020-08-06 13:22:17 +00:00
if (is_finished)
{
return {};
}
else
{
2020-08-25 17:54:44 +00:00
const Block & src = *current_it;
Columns columns;
columns.reserve(column_names.size());
/// Add only required columns to `res`.
for (const auto & name : column_names)
columns.emplace_back(src.getByName(name).column);
2020-08-25 19:46:47 +00:00
++current_block_idx;
2020-08-25 17:54:44 +00:00
if (current_block_idx == num_blocks)
2020-08-06 13:22:17 +00:00
is_finished = true;
2020-08-25 23:12:16 +00:00
else
++current_it;
2020-08-25 19:46:47 +00:00
return Chunk(std::move(columns), src.rows());
}
}
2015-01-18 08:25:56 +00:00
private:
Names column_names;
2020-08-25 17:54:44 +00:00
BlocksList::iterator current_it;
size_t current_block_idx = 0;
2020-09-04 08:36:47 +00:00
size_t num_blocks;
2020-08-06 13:22:17 +00:00
bool is_finished = false;
2020-09-04 08:36:47 +00:00
BlocksList * data = nullptr;
std::mutex * mutex = nullptr;
2015-01-18 08:25:56 +00:00
};
2011-10-31 17:55:06 +00:00
2015-01-18 08:25:56 +00:00
class MemoryBlockOutputStream : public IBlockOutputStream
2011-10-31 17:55:06 +00:00
{
2015-01-18 08:25:56 +00:00
public:
explicit MemoryBlockOutputStream(
StorageMemory & storage_,
const StorageMetadataPtr & metadata_snapshot_)
: storage(storage_)
, metadata_snapshot(metadata_snapshot_)
{}
Block getHeader() const override { return metadata_snapshot->getSampleBlock(); }
void write(const Block & block) override
{
2020-06-17 14:32:25 +00:00
metadata_snapshot->check(block, true);
2019-01-02 06:44:36 +00:00
std::lock_guard lock(storage.mutex);
storage.data.push_back(block);
}
2015-01-18 08:25:56 +00:00
private:
StorageMemory & storage;
StorageMetadataPtr metadata_snapshot;
2015-01-18 08:25:56 +00:00
};
2011-10-31 17:55:06 +00:00
2019-12-04 16:06:55 +00:00
StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_)
: IStorage(table_id_)
{
2020-06-19 15:39:41 +00:00
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(std::move(columns_description_));
storage_metadata.setConstraints(std::move(constraints_));
setInMemoryMetadata(storage_metadata);
}
2020-08-03 13:54:14 +00:00
Pipe StorageMemory::read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
2017-12-01 21:13:25 +00:00
const SelectQueryInfo & /*query_info*/,
const Context & /*context*/,
QueryProcessingStage::Enum /*processed_stage*/,
2017-12-01 21:13:25 +00:00
size_t /*max_block_size*/,
2017-06-02 15:54:39 +00:00
unsigned num_streams)
2011-10-31 17:55:06 +00:00
{
2020-06-19 17:17:13 +00:00
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
2012-05-30 04:45:49 +00:00
2019-01-02 06:44:36 +00:00
std::lock_guard lock(mutex);
2020-09-04 08:36:47 +00:00
if (delay_read_for_global_subqueries)
{
/// Note: for global subquery we use single source.
/// Mainly, the reason is that at this point table is empty,
/// and we don't know the number of blocks are going to be inserted into it.
///
/// It may seem to be not optimal, but actually data from such table is used to fill
/// set for IN or hash table for JOIN, which can't be done concurrently.
/// Since no other manipulation with data is done, multiple sources shouldn't give any profit.
auto source = std::make_shared<MemorySource>(column_names, data.begin(), data.size(), *this, metadata_snapshot);
source->delayInitialization(&data, &mutex);
return Pipe(std::move(source));
}
size_t size = data.size();
2017-06-02 15:54:39 +00:00
if (num_streams > size)
num_streams = size;
2012-05-30 04:45:49 +00:00
Pipes pipes;
2012-05-30 04:45:49 +00:00
2020-08-25 17:54:44 +00:00
BlocksList::iterator it = data.begin();
2020-08-25 17:42:35 +00:00
size_t offset = 0;
2017-06-02 15:54:39 +00:00
for (size_t stream = 0; stream < num_streams; ++stream)
{
2020-08-25 19:46:47 +00:00
size_t next_offset = (stream + 1) * size / num_streams;
2020-08-25 17:54:44 +00:00
size_t num_blocks = next_offset - offset;
2020-08-25 17:42:35 +00:00
2020-08-25 17:54:44 +00:00
assert(num_blocks > 0);
2020-08-06 12:34:16 +00:00
2020-08-25 17:54:44 +00:00
pipes.emplace_back(std::make_shared<MemorySource>(column_names, it, num_blocks, *this, metadata_snapshot));
2020-08-25 17:42:35 +00:00
2020-08-25 17:54:44 +00:00
while (offset < next_offset)
{
++it;
++offset;
}
}
2020-08-06 12:24:05 +00:00
return Pipe::unitePipes(std::move(pipes));
2011-10-31 17:55:06 +00:00
}
BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
2011-10-31 17:55:06 +00:00
{
return std::make_shared<MemoryBlockOutputStream>(*this, metadata_snapshot);
2011-10-31 17:55:06 +00:00
}
2011-11-05 23:31:19 +00:00
2020-01-22 11:30:11 +00:00
void StorageMemory::drop()
2011-11-05 23:31:19 +00:00
{
2019-01-02 06:44:36 +00:00
std::lock_guard lock(mutex);
data.clear();
2011-11-05 23:31:19 +00:00
}
2020-06-18 10:29:13 +00:00
void StorageMemory::truncate(
2020-06-18 16:10:47 +00:00
const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &)
2018-04-21 00:35:20 +00:00
{
2019-01-02 06:44:36 +00:00
std::lock_guard lock(mutex);
2018-04-21 00:35:20 +00:00
data.clear();
}
std::optional<UInt64> StorageMemory::totalRows() const
{
UInt64 rows = 0;
std::lock_guard lock(mutex);
2020-04-22 06:22:14 +00:00
for (const auto & buffer : data)
rows += buffer.rows();
return rows;
}
std::optional<UInt64> StorageMemory::totalBytes() const
{
UInt64 bytes = 0;
std::lock_guard lock(mutex);
2020-04-22 06:22:14 +00:00
for (const auto & buffer : data)
bytes += buffer.allocatedBytes();
return bytes;
}
void registerStorageMemory(StorageFactory & factory)
{
factory.registerStorage("Memory", [](const StorageFactory::Arguments & args)
{
if (!args.engine_args.empty())
throw Exception(
"Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
2019-12-04 16:06:55 +00:00
return StorageMemory::create(args.table_id, args.columns, args.constraints);
});
}
2011-10-31 17:55:06 +00:00
}