ClickHouse/dbms/src/Storages/StorageLog.cpp

187 lines
5.7 KiB
C++
Raw Normal View History

2010-03-18 19:32:14 +00:00
#include <map>
2011-11-05 23:31:19 +00:00
#include <DB/Common/escapeForFileName.h>
2010-03-18 19:32:14 +00:00
#include <DB/Core/Exception.h>
#include <DB/Core/ErrorCodes.h>
2012-01-10 22:11:51 +00:00
#include <DB/IO/ReadHelpers.h>
2012-01-09 19:20:48 +00:00
#include <DB/IO/WriteHelpers.h>
2010-03-18 19:32:14 +00:00
#include <DB/Storages/StorageLog.h>
2012-01-09 19:20:48 +00:00
#define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin"
#define DBMS_STORAGE_LOG_MARKS_FILE_EXTENSION ".mrk"
2010-03-18 19:32:14 +00:00
namespace DB
{
using Poco::SharedPtr;
2012-01-10 22:11:51 +00:00
LogBlockInputStream::LogBlockInputStream(size_t block_size_, const Names & column_names_, StorageLog & storage_, size_t mark_number_, size_t rows_limit_)
: block_size(block_size_), column_names(column_names_), storage(storage_), mark_number(mark_number_), rows_limit(rows_limit_), rows_read(0)
2010-03-18 19:32:14 +00:00
{
2011-10-31 06:37:12 +00:00
for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it)
2012-01-10 22:11:51 +00:00
streams.insert(std::make_pair(*it, new Stream(storage.files[*it].data_file.path(), storage.files[*it].marks[mark_number].offset)));
2010-03-18 19:32:14 +00:00
}
2011-09-04 21:23:19 +00:00
Block LogBlockInputStream::readImpl()
2010-03-18 19:32:14 +00:00
{
Block res;
2011-08-09 15:57:33 +00:00
for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it)
2010-03-18 19:32:14 +00:00
{
ColumnWithNameAndType column;
column.name = *it;
2011-11-01 17:12:11 +00:00
column.type = storage.getDataTypeByName(*it);
2010-03-18 19:32:14 +00:00
column.column = column.type->createColumn();
2012-01-10 22:11:51 +00:00
column.type->deserializeBinary(*column.column, streams[column.name]->compressed, std::min(block_size, rows_limit - rows_read));
2010-03-18 19:32:14 +00:00
2010-05-24 18:58:14 +00:00
if (column.column->size())
res.insert(column);
2012-01-10 22:11:51 +00:00
rows_read += column.column->size();
2010-03-18 19:32:14 +00:00
}
return res;
}
LogBlockOutputStream::LogBlockOutputStream(StorageLog & storage_)
: storage(storage_)
{
2011-11-01 17:12:11 +00:00
for (NamesAndTypesList::const_iterator it = storage.columns->begin(); it != storage.columns->end(); ++it)
2012-01-10 22:11:51 +00:00
streams.insert(std::make_pair(it->first, new Stream(storage.files[it->first].data_file.path(), storage.files[it->first].marks_file.path())));
2010-03-18 19:32:14 +00:00
}
void LogBlockOutputStream::write(const Block & block)
{
2011-08-15 02:24:44 +00:00
storage.check(block);
2010-03-18 19:32:14 +00:00
for (size_t i = 0; i < block.columns(); ++i)
{
const ColumnWithNameAndType & column = block.getByPosition(i);
2012-01-10 22:11:51 +00:00
Mark mark;
mark.rows = (storage.files[column.name].marks.empty() ? 0 : storage.files[column.name].marks.back().rows) + column.column->size();
mark.offset = streams[column.name]->plain.count();
writeIntBinary(mark.rows, streams[column.name]->marks);
writeIntBinary(mark.offset, streams[column.name]->marks);
storage.files[column.name].marks.push_back(mark);
2010-03-18 20:52:28 +00:00
column.type->serializeBinary(*column.column, streams[column.name]->compressed);
2012-01-09 19:20:48 +00:00
streams[column.name]->compressed.next();
2010-03-18 19:32:14 +00:00
}
}
2012-01-09 19:20:48 +00:00
StorageLog::StorageLog(const std::string & path_, const std::string & name_, NamesAndTypesListPtr columns_)
: path(path_), name(name_), columns(columns_)
2010-03-18 19:32:14 +00:00
{
2012-01-10 22:11:51 +00:00
if (columns->empty())
throw Exception("Empty list of columns passed to StorageLog constructor", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
2010-03-18 19:32:14 +00:00
/// создаём файлы, если их нет
2011-11-05 23:31:19 +00:00
Poco::File(path + escapeForFileName(name) + '/').createDirectories();
2012-01-10 22:11:51 +00:00
ssize_t size_of_marks_file = -1;
2011-11-01 17:12:11 +00:00
for (NamesAndTypesList::const_iterator it = columns->begin(); it != columns->end(); ++it)
2010-03-18 19:32:14 +00:00
{
if (files.end() != files.find(it->first))
throw Exception("Duplicate column with name " + it->first + " in constructor of StorageLog.",
ErrorCodes::DUPLICATE_COLUMN);
2012-01-10 22:11:51 +00:00
ColumnData column_data;
files.insert(std::make_pair(it->first, column_data));
files[it->first].data_file = Poco::File(path + escapeForFileName(name) + '/' + escapeForFileName(it->first) + DBMS_STORAGE_LOG_DATA_FILE_EXTENSION);
files[it->first].marks_file = Poco::File(path + escapeForFileName(name) + '/' + escapeForFileName(it->first) + DBMS_STORAGE_LOG_MARKS_FILE_EXTENSION);
/// Считаем засечки
if (files[it->first].marks_file.exists())
{
ssize_t size_of_current_marks_file = files[it->first].marks_file.getSize();
if (size_of_current_marks_file % sizeof(Mark) != 0)
throw Exception("Sizes of marks files are inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT);
if (-1 == size_of_marks_file)
size_of_marks_file = size_of_current_marks_file;
else if (size_of_marks_file != size_of_current_marks_file)
throw Exception("Sizes of marks files are inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT);
files[it->first].marks.reserve(files[it->first].marks_file.getSize() / sizeof(Mark));
ReadBufferFromFile marks_rb(files[it->first].marks_file.path());
while (!marks_rb.eof())
{
Mark mark;
readIntBinary(mark.rows, marks_rb);
readIntBinary(mark.offset, marks_rb);
files[it->first].marks.push_back(mark);
}
}
2010-03-18 19:32:14 +00:00
}
}
2012-01-09 19:20:48 +00:00
BlockInputStreams StorageLog::read(
2011-08-09 15:57:33 +00:00
const Names & column_names,
2011-08-15 01:12:57 +00:00
ASTPtr query,
2012-01-09 19:20:48 +00:00
size_t max_block_size,
unsigned max_threads)
2010-03-18 19:32:14 +00:00
{
2011-08-15 02:24:44 +00:00
check(column_names);
2012-01-10 22:11:51 +00:00
Marks marks = files.begin()->second.marks;
size_t marks_size = marks.size();
if (max_threads > marks_size)
max_threads = marks_size;
BlockInputStreams res;
for (size_t thread = 0; thread < max_threads; ++thread)
{
std::cerr << "Thread " << thread << ", mark " << thread * marks_size / max_threads
<< ", rows " << (thread == 0
? marks[marks_size / max_threads - 1].rows
: (marks[(thread + 1) * marks_size / max_threads - 1].rows - marks[thread * marks_size / max_threads - 1].rows)) << std::endl;
res.push_back(new LogBlockInputStream(
max_block_size,
column_names,
*this,
thread * marks_size / max_threads,
thread == 0
? marks[marks_size / max_threads - 1].rows
: (marks[(thread + 1) * marks_size / max_threads - 1].rows - marks[thread * marks_size / max_threads - 1].rows)));
}
return res;
2010-03-18 19:32:14 +00:00
}
2011-08-28 02:22:23 +00:00
BlockOutputStreamPtr StorageLog::write(
2011-08-15 01:12:57 +00:00
ASTPtr query)
2010-03-18 19:32:14 +00:00
{
return new LogBlockOutputStream(*this);
}
2011-11-05 23:31:19 +00:00
void StorageLog::drop()
{
for (Files_t::iterator it = files.begin(); it != files.end(); ++it)
2012-01-09 19:20:48 +00:00
{
2012-01-10 22:11:51 +00:00
it->second.data_file.remove();
it->second.marks_file.remove();
2012-01-09 19:20:48 +00:00
}
2011-11-05 23:31:19 +00:00
}
2010-03-18 19:32:14 +00:00
}