2014-03-14 07:05:43 +00:00
|
|
|
|
#include <DB/Storages/MergeTree/MergeTreeData.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
#include <Yandex/time2str.h>
|
2014-03-14 07:05:43 +00:00
|
|
|
|
#include <Poco/Ext/ScopedTry.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
#include <DB/Interpreters/ExpressionAnalyzer.h>
|
2014-03-14 07:05:43 +00:00
|
|
|
|
#include <DB/Storages/MergeTree/MergeTreeReader.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
#include <DB/Storages/MergeTree/MergeTreeBlockInputStream.h>
|
|
|
|
|
#include <DB/Storages/MergeTree/MergedBlockOutputStream.h>
|
2014-03-14 07:05:43 +00:00
|
|
|
|
#include <DB/Parsers/ASTIdentifier.h>
|
|
|
|
|
#include <DB/Parsers/ASTNameTypePair.h>
|
|
|
|
|
#include <DB/DataStreams/ExpressionBlockInputStream.h>
|
2014-03-27 17:30:04 +00:00
|
|
|
|
#include <DB/DataStreams/copyData.h>
|
2014-06-10 14:24:33 +00:00
|
|
|
|
#include <algorithm>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
MergeTreeData::MergeTreeData(
|
2014-03-13 12:48:07 +00:00
|
|
|
|
const String & full_path_, NamesAndTypesListPtr columns_,
|
2014-03-09 17:36:01 +00:00
|
|
|
|
const Context & context_,
|
|
|
|
|
ASTPtr & primary_expr_ast_,
|
|
|
|
|
const String & date_column_name_, const ASTPtr & sampling_expression_,
|
|
|
|
|
size_t index_granularity_,
|
|
|
|
|
Mode mode_,
|
|
|
|
|
const String & sign_column_,
|
2014-05-08 07:12:01 +00:00
|
|
|
|
const MergeTreeSettings & settings_,
|
|
|
|
|
const String & log_name_)
|
2014-03-13 17:44:00 +00:00
|
|
|
|
: context(context_),
|
2014-03-09 17:36:01 +00:00
|
|
|
|
date_column_name(date_column_name_), sampling_expression(sampling_expression_),
|
|
|
|
|
index_granularity(index_granularity_),
|
|
|
|
|
mode(mode_), sign_column(sign_column_),
|
2014-03-13 17:44:00 +00:00
|
|
|
|
settings(settings_), primary_expr_ast(primary_expr_ast_->clone()),
|
2014-05-08 07:12:01 +00:00
|
|
|
|
full_path(full_path_), columns(columns_), log_name(log_name_),
|
2014-05-26 10:41:40 +00:00
|
|
|
|
log(&Logger::get(log_name + " (Data)"))
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
/// создаём директорию, если её нет
|
|
|
|
|
Poco::File(full_path).createDirectories();
|
|
|
|
|
|
|
|
|
|
/// инициализируем описание сортировки
|
|
|
|
|
sort_descr.reserve(primary_expr_ast->children.size());
|
2014-03-22 14:44:44 +00:00
|
|
|
|
for (const ASTPtr & ast : primary_expr_ast->children)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-22 14:44:44 +00:00
|
|
|
|
String name = ast->getColumnName();
|
2014-03-09 17:36:01 +00:00
|
|
|
|
sort_descr.push_back(SortColumnDescription(name, 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
primary_expr = ExpressionAnalyzer(primary_expr_ast, context, *columns).getActions(false);
|
|
|
|
|
|
|
|
|
|
ExpressionActionsPtr projected_expr = ExpressionAnalyzer(primary_expr_ast, context, *columns).getActions(true);
|
|
|
|
|
primary_key_sample = projected_expr->getSampleBlock();
|
|
|
|
|
|
|
|
|
|
loadDataParts();
|
2014-03-13 12:48:07 +00:00
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
UInt64 MergeTreeData::getMaxDataPartIndex()
|
|
|
|
|
{
|
2014-03-09 17:36:01 +00:00
|
|
|
|
UInt64 max_part_id = 0;
|
|
|
|
|
for (DataParts::iterator it = data_parts.begin(); it != data_parts.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
max_part_id = std::max(max_part_id, (*it)->right);
|
|
|
|
|
}
|
2014-03-13 12:48:07 +00:00
|
|
|
|
return max_part_id;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
std::string MergeTreeData::getModePrefix() const
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-13 12:48:07 +00:00
|
|
|
|
switch (mode)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-13 12:48:07 +00:00
|
|
|
|
case Ordinary: return "";
|
|
|
|
|
case Collapsing: return "Collapsing";
|
|
|
|
|
case Summing: return "Summing";
|
2014-05-28 14:54:42 +00:00
|
|
|
|
case Aggregating: return "Aggregating";
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
default:
|
|
|
|
|
throw Exception("Unknown mode of operation for MergeTreeData: " + toString(mode), ErrorCodes::LOGICAL_ERROR);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::loadDataParts()
|
|
|
|
|
{
|
|
|
|
|
LOG_DEBUG(log, "Loading data parts");
|
|
|
|
|
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
|
|
|
|
|
data_parts.clear();
|
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
Strings all_file_names;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
Poco::DirectoryIterator end;
|
|
|
|
|
for (Poco::DirectoryIterator it(full_path); it != end; ++it)
|
2014-04-09 15:52:47 +00:00
|
|
|
|
all_file_names.push_back(it.name());
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
Strings part_file_names;
|
|
|
|
|
for (const String & file_name : all_file_names)
|
|
|
|
|
{
|
2014-03-09 17:36:01 +00:00
|
|
|
|
/// Удаляем временные директории старше суток.
|
|
|
|
|
if (0 == file_name.compare(0, strlen("tmp_"), "tmp_"))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (0 == file_name.compare(0, strlen("old_"), "old_"))
|
2014-04-09 15:52:47 +00:00
|
|
|
|
{
|
|
|
|
|
String new_file_name = file_name.substr(strlen("old_"));
|
|
|
|
|
LOG_WARNING(log, "Renaming " << file_name << " to " << new_file_name << " for compatibility reasons");
|
|
|
|
|
Poco::File(full_path + file_name).renameTo(full_path + new_file_name);
|
|
|
|
|
part_file_names.push_back(new_file_name);
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
else
|
2014-04-09 15:52:47 +00:00
|
|
|
|
{
|
2014-03-09 17:36:01 +00:00
|
|
|
|
part_file_names.push_back(file_name);
|
2014-04-09 15:52:47 +00:00
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-18 11:05:30 +00:00
|
|
|
|
DataPartsVector broken_parts_to_remove;
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
Poco::RegularExpression::MatchVec matches;
|
2014-04-09 15:52:47 +00:00
|
|
|
|
for (const String & file_name : part_file_names)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-05-26 10:41:40 +00:00
|
|
|
|
if (!ActiveDataPartSet::isPartDirectory(file_name, matches))
|
2014-03-09 17:36:01 +00:00
|
|
|
|
continue;
|
|
|
|
|
|
2014-03-14 17:19:38 +00:00
|
|
|
|
MutableDataPartPtr part = std::make_shared<DataPart>(*this);
|
2014-05-26 10:41:40 +00:00
|
|
|
|
ActiveDataPartSet::parsePartName(file_name, *part, &matches);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
part->name = file_name;
|
|
|
|
|
|
2014-04-18 11:05:30 +00:00
|
|
|
|
bool broken = false;
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
part->loadIndex();
|
|
|
|
|
part->loadChecksums();
|
|
|
|
|
part->checkNotBroken();
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
broken = true;
|
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
/// Игнорируем и, возможно, удаляем битые куски, которые могут образовываться после грубого перезапуска сервера.
|
2014-04-18 11:05:30 +00:00
|
|
|
|
if (broken)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
if (part->level == 0)
|
|
|
|
|
{
|
|
|
|
|
/// Восстановить куски нулевого уровня невозможно.
|
2014-03-13 12:48:07 +00:00
|
|
|
|
LOG_ERROR(log, "Removing broken part " << full_path + file_name << " because is't impossible to repair.");
|
2014-04-18 11:05:30 +00:00
|
|
|
|
broken_parts_to_remove.push_back(part);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2014-04-09 15:52:47 +00:00
|
|
|
|
/// Посмотрим, сколько кусков покрыты битым. Если хотя бы два, предполагаем, что битый кусок образован их
|
|
|
|
|
/// слиянием, и мы ничего не потеряем, если его удалим.
|
|
|
|
|
int contained_parts = 0;
|
|
|
|
|
|
|
|
|
|
LOG_ERROR(log, "Part " << full_path + file_name << " is broken. Looking for parts to replace it.");
|
|
|
|
|
|
|
|
|
|
for (const String & contained_name : part_file_names)
|
|
|
|
|
{
|
|
|
|
|
if (contained_name == file_name)
|
|
|
|
|
continue;
|
2014-05-26 10:41:40 +00:00
|
|
|
|
if (!ActiveDataPartSet::isPartDirectory(contained_name, matches))
|
2014-04-09 15:52:47 +00:00
|
|
|
|
continue;
|
|
|
|
|
DataPart contained_part(*this);
|
2014-05-26 10:41:40 +00:00
|
|
|
|
ActiveDataPartSet::parsePartName(contained_name, contained_part, &matches);
|
2014-04-09 15:52:47 +00:00
|
|
|
|
if (part->contains(contained_part))
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Found part " << full_path + contained_name);
|
|
|
|
|
++contained_parts;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (contained_parts >= 2)
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Removing broken part " << full_path + file_name << " because it covers at least 2 other parts");
|
2014-04-18 11:05:30 +00:00
|
|
|
|
broken_parts_to_remove.push_back(part);
|
2014-04-09 15:52:47 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Not removing broken part " << full_path + file_name
|
|
|
|
|
<< " because it covers less than 2 parts. You need to resolve this manually");
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
part->modification_time = Poco::File(full_path + file_name).getLastModified().epochTime();
|
|
|
|
|
|
|
|
|
|
data_parts.insert(part);
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-18 11:05:30 +00:00
|
|
|
|
if (broken_parts_to_remove.size() > 2)
|
|
|
|
|
throw Exception("Suspiciously many (" + toString(broken_parts_to_remove.size()) + ") broken parts to remove.",
|
|
|
|
|
ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS);
|
|
|
|
|
|
|
|
|
|
for (const auto & part : broken_parts_to_remove)
|
|
|
|
|
part->remove();
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
all_data_parts = data_parts;
|
|
|
|
|
|
|
|
|
|
/** Удаляем из набора актуальных кусков куски, которые содержатся в другом куске (которые были склеены),
|
|
|
|
|
* но по каким-то причинам остались лежать в файловой системе.
|
|
|
|
|
* Удаление файлов будет произведено потом в методе clearOldParts.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
if (data_parts.size() >= 2)
|
|
|
|
|
{
|
|
|
|
|
DataParts::iterator prev_jt = data_parts.begin();
|
|
|
|
|
DataParts::iterator curr_jt = prev_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
while (curr_jt != data_parts.end())
|
|
|
|
|
{
|
|
|
|
|
/// Куски данных за разные месяцы рассматривать не будем
|
|
|
|
|
if ((*curr_jt)->left_month != (*curr_jt)->right_month
|
|
|
|
|
|| (*curr_jt)->right_month != (*prev_jt)->left_month
|
|
|
|
|
|| (*prev_jt)->left_month != (*prev_jt)->right_month)
|
|
|
|
|
{
|
|
|
|
|
++prev_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((*curr_jt)->contains(**prev_jt))
|
|
|
|
|
{
|
2014-04-09 15:52:47 +00:00
|
|
|
|
(*prev_jt)->remove_time = time(0);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
data_parts.erase(prev_jt);
|
|
|
|
|
prev_jt = curr_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
}
|
|
|
|
|
else if ((*prev_jt)->contains(**curr_jt))
|
|
|
|
|
{
|
2014-04-09 15:52:47 +00:00
|
|
|
|
(*curr_jt)->remove_time = time(0);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
data_parts.erase(curr_jt++);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
++prev_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOG_DEBUG(log, "Loaded data parts (" << data_parts.size() << " items)");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
Strings MergeTreeData::clearOldParts()
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
Poco::ScopedTry<Poco::FastMutex> lock;
|
2014-04-09 15:52:47 +00:00
|
|
|
|
Strings res;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
|
|
|
|
/// Если метод уже вызван из другого потока (или если all_data_parts прямо сейчас меняют), то можно ничего не делать.
|
|
|
|
|
if (!lock.lock(&all_data_parts_mutex))
|
|
|
|
|
{
|
2014-04-09 15:52:47 +00:00
|
|
|
|
return res;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
time_t now = time(0);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
for (DataParts::iterator it = all_data_parts.begin(); it != all_data_parts.end();)
|
|
|
|
|
{
|
2014-03-14 17:19:38 +00:00
|
|
|
|
int ref_count = it->use_count();
|
2014-04-09 15:52:47 +00:00
|
|
|
|
if (ref_count == 1 && /// После этого ref_count не может увеличиться.
|
|
|
|
|
(*it)->remove_time + settings.old_parts_lifetime < now)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-04-09 15:52:47 +00:00
|
|
|
|
LOG_DEBUG(log, "Removing part " << (*it)->name);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
res.push_back((*it)->name);
|
|
|
|
|
(*it)->remove();
|
2014-03-09 17:36:01 +00:00
|
|
|
|
all_data_parts.erase(it++);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
++it;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-18 10:05:38 +00:00
|
|
|
|
/// Удаляем временные директории старше суток.
|
|
|
|
|
Strings all_file_names;
|
|
|
|
|
Poco::DirectoryIterator end;
|
|
|
|
|
for (Poco::DirectoryIterator it(full_path); it != end; ++it)
|
|
|
|
|
all_file_names.push_back(it.name());
|
|
|
|
|
|
|
|
|
|
for (const String & file_name : all_file_names)
|
|
|
|
|
{
|
|
|
|
|
if (0 == file_name.compare(0, strlen("tmp_"), "tmp_"))
|
|
|
|
|
{
|
|
|
|
|
Poco::File tmp_dir(full_path + file_name);
|
|
|
|
|
|
|
|
|
|
if (tmp_dir.isDirectory() && tmp_dir.getLastModified().epochTime() + 86400 < time(0))
|
|
|
|
|
{
|
|
|
|
|
LOG_WARNING(log, "Removing temporary directory " << full_path << file_name);
|
|
|
|
|
Poco::File(full_path + file_name).remove(true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
return res;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
void MergeTreeData::setPath(const String & new_full_path)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-13 19:14:25 +00:00
|
|
|
|
Poco::File(full_path).renameTo(new_full_path);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
full_path = new_full_path;
|
2014-03-13 19:14:25 +00:00
|
|
|
|
|
2014-04-05 19:54:00 +00:00
|
|
|
|
context.resetCaches();
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
void MergeTreeData::dropAllData()
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
data_parts.clear();
|
|
|
|
|
all_data_parts.clear();
|
|
|
|
|
|
2014-04-05 19:54:00 +00:00
|
|
|
|
context.resetCaches();
|
2014-03-13 19:14:25 +00:00
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
Poco::File(full_path).remove(true);
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-03 08:53:32 +00:00
|
|
|
|
void MergeTreeData::removeColumnFiles(String column_name, bool remove_array_size_files)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
|
2014-04-03 08:53:32 +00:00
|
|
|
|
size_t dot_pos = column_name.find('.');
|
|
|
|
|
if (dot_pos != std::string::npos)
|
|
|
|
|
{
|
|
|
|
|
std::string nested_column = column_name.substr(0, dot_pos);
|
|
|
|
|
column_name = nested_column + "%2E" + column_name.substr(dot_pos + 1);
|
|
|
|
|
|
|
|
|
|
if (remove_array_size_files)
|
|
|
|
|
column_name = std::string("(?:") + nested_column + "|" + column_name + ")";
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
/// Регэксп выбирает файлы столбца для удаления
|
|
|
|
|
Poco::RegularExpression re(column_name + "(?:(?:\\.|\\%2E).+){0,1}" +"(?:\\.mrk|\\.bin|\\.size\\d+\\.bin|\\.size\\d+\\.mrk)");
|
|
|
|
|
/// Цикл по всем директориям кусочков
|
|
|
|
|
Poco::RegularExpression::MatchVec matches;
|
|
|
|
|
Poco::DirectoryIterator end;
|
|
|
|
|
for (Poco::DirectoryIterator it_dir = Poco::DirectoryIterator(full_path); it_dir != end; ++it_dir)
|
|
|
|
|
{
|
|
|
|
|
std::string dir_name = it_dir.name();
|
|
|
|
|
|
2014-05-26 10:41:40 +00:00
|
|
|
|
if (!ActiveDataPartSet::isPartDirectory(dir_name, matches))
|
2014-03-09 17:36:01 +00:00
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/// Цикл по каждому из файлов в директории кусочков
|
|
|
|
|
String full_dir_name = full_path + dir_name + "/";
|
|
|
|
|
for (Poco::DirectoryIterator it_file(full_dir_name); it_file != end; ++it_file)
|
|
|
|
|
{
|
|
|
|
|
if (re.match(it_file.name()))
|
|
|
|
|
{
|
|
|
|
|
Poco::File file(full_dir_name + it_file.name());
|
|
|
|
|
if (file.exists())
|
|
|
|
|
file.remove();
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-10 14:24:33 +00:00
|
|
|
|
|
|
|
|
|
/// Удаляем лишние столбцы из checksums.txt
|
|
|
|
|
for (auto & part : all_data_parts)
|
|
|
|
|
{
|
|
|
|
|
if (!part)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
for (auto it = part->checksums.files.lower_bound(column_name);
|
|
|
|
|
(it != part->checksums.files.end()) && (it->first.substr(0, column_name.size()) == column_name); ++it)
|
|
|
|
|
{
|
|
|
|
|
if (re.match(it->first))
|
|
|
|
|
const_cast<DataPart::Checksums::FileChecksums &>(part->checksums.files).erase(it);
|
|
|
|
|
}
|
|
|
|
|
/// Записываем файл с чексуммами.
|
|
|
|
|
WriteBufferFromFile out(full_path + part->name + "/" + "checksums.txt", 1024);
|
|
|
|
|
part->checksums.writeText(out);
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
void MergeTreeData::createConvertExpression(
|
|
|
|
|
const String & in_column_name,
|
|
|
|
|
const String & out_type,
|
|
|
|
|
ExpressionActionsPtr & out_expression,
|
|
|
|
|
String & out_column)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-27 17:30:04 +00:00
|
|
|
|
Names out_names;
|
|
|
|
|
out_expression = new ExpressionActions(
|
|
|
|
|
NamesAndTypesList(1, NameAndTypePair(in_column_name, getDataTypeByName(in_column_name))), context.getSettingsRef());
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
FunctionPtr function = context.getFunctionFactory().get("to" + out_type, context);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
out_expression->add(ExpressionAction::applyFunction(function, Names(1, in_column_name)), out_names);
|
|
|
|
|
out_expression->add(ExpressionAction::removeColumn(in_column_name));
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
out_column = out_names[0];
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 19:36:28 +00:00
|
|
|
|
static DataTypePtr getDataTypeByName(const String & name, const NamesAndTypesList & columns)
|
|
|
|
|
{
|
|
|
|
|
for (const auto & it : columns)
|
|
|
|
|
{
|
|
|
|
|
if (it.first == name)
|
|
|
|
|
return it.second;
|
|
|
|
|
}
|
|
|
|
|
throw Exception("No column " + name + " in table", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-03 08:53:32 +00:00
|
|
|
|
/// одинаковыми считаются имена, вида "name.*"
|
2014-04-14 13:08:26 +00:00
|
|
|
|
static bool namesWithDotEqual(const String & name_with_dot, const NameAndTypePair & name_type)
|
2014-04-03 08:53:32 +00:00
|
|
|
|
{
|
|
|
|
|
return (name_with_dot == name_type.first.substr(0, name_with_dot.length()));
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
void MergeTreeData::alter(const ASTAlterQuery::Parameters & params)
|
|
|
|
|
{
|
|
|
|
|
{
|
2014-03-20 13:00:42 +00:00
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
alterColumns(params, columns, context);
|
|
|
|
|
}
|
2014-04-05 19:54:00 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
if (params.type == ASTAlterQuery::DROP)
|
|
|
|
|
{
|
|
|
|
|
String column_name = dynamic_cast<const ASTIdentifier &>(*params.column).name;
|
2014-04-03 08:53:32 +00:00
|
|
|
|
|
|
|
|
|
/// Если нет колонок вида nested_name.*, то удалим столбцы размера массивов
|
|
|
|
|
bool remove_array_size_files = false;
|
|
|
|
|
size_t dot_pos = column_name.find('.');
|
|
|
|
|
if (dot_pos != std::string::npos)
|
|
|
|
|
{
|
|
|
|
|
remove_array_size_files = (columns->end() == std::find_if(columns->begin(), columns->end(), boost::bind(namesWithDotEqual, column_name.substr(0, dot_pos), _1)));
|
|
|
|
|
}
|
|
|
|
|
removeColumnFiles(column_name, remove_array_size_files);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-04-05 19:54:00 +00:00
|
|
|
|
context.resetCaches();
|
2014-03-20 13:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
void MergeTreeData::prepareAlterModify(const ASTAlterQuery::Parameters & params)
|
|
|
|
|
{
|
|
|
|
|
DataPartsVector parts;
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
parts = DataPartsVector(data_parts.begin(), data_parts.end());
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
Names column_name;
|
|
|
|
|
const ASTNameTypePair & name_type = dynamic_cast<const ASTNameTypePair &>(*params.name_type);
|
|
|
|
|
StringRange type_range = name_type.type->range;
|
|
|
|
|
String type(type_range.first, type_range.second - type_range.first);
|
|
|
|
|
DataTypePtr old_type_ptr = DB::getDataTypeByName(name_type.name, *columns);
|
|
|
|
|
DataTypePtr new_type_ptr = context.getDataTypeFactory().get(type);
|
|
|
|
|
if (dynamic_cast<DataTypeNested *>(old_type_ptr.get()) || dynamic_cast<DataTypeArray *>(old_type_ptr.get()) ||
|
|
|
|
|
dynamic_cast<DataTypeNested *>(new_type_ptr.get()) || dynamic_cast<DataTypeArray *>(new_type_ptr.get()))
|
|
|
|
|
throw Exception("ALTER MODIFY not supported for nested and array types");
|
|
|
|
|
|
|
|
|
|
column_name.push_back(name_type.name);
|
|
|
|
|
ExpressionActionsPtr expr;
|
|
|
|
|
String out_column;
|
|
|
|
|
createConvertExpression(name_type.name, type, expr, out_column);
|
|
|
|
|
|
|
|
|
|
ColumnNumbers num(1, 0);
|
|
|
|
|
for (DataPartPtr & part : parts)
|
|
|
|
|
{
|
|
|
|
|
MarkRanges ranges(1, MarkRange(0, part->size));
|
|
|
|
|
ExpressionBlockInputStream in(new MergeTreeBlockInputStream(full_path + part->name + '/',
|
2014-04-08 07:58:53 +00:00
|
|
|
|
DEFAULT_MERGE_BLOCK_SIZE, column_name, *this, part, ranges, false, nullptr, ""), expr);
|
2014-03-20 13:00:42 +00:00
|
|
|
|
MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true);
|
2014-03-27 17:30:04 +00:00
|
|
|
|
in.readPrefix();
|
2014-03-20 13:00:42 +00:00
|
|
|
|
out.writePrefix();
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
try
|
|
|
|
|
{
|
2014-04-24 18:49:07 +00:00
|
|
|
|
while (Block b = in.read())
|
2014-03-20 13:00:42 +00:00
|
|
|
|
out.write(b);
|
2014-03-27 17:30:04 +00:00
|
|
|
|
|
|
|
|
|
in.readSuffix();
|
|
|
|
|
DataPart::Checksums add_checksums = out.writeSuffixAndGetChecksums();
|
|
|
|
|
|
|
|
|
|
/// Запишем обновленные контрольные суммы во временный файл.
|
|
|
|
|
if (!part->checksums.empty())
|
|
|
|
|
{
|
|
|
|
|
DataPart::Checksums new_checksums = part->checksums;
|
|
|
|
|
std::string escaped_name = escapeForFileName(name_type.name);
|
|
|
|
|
std::string escaped_out_column = escapeForFileName(out_column);
|
|
|
|
|
new_checksums.files[escaped_name + ".bin"] = add_checksums.files[escaped_out_column + ".bin"];
|
|
|
|
|
new_checksums.files[escaped_name + ".mrk"] = add_checksums.files[escaped_out_column + ".mrk"];
|
|
|
|
|
|
|
|
|
|
WriteBufferFromFile checksums_file(full_path + part->name + '/' + escaped_out_column + ".checksums.txt", 1024);
|
|
|
|
|
new_checksums.writeText(checksums_file);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
catch (const Exception & e)
|
|
|
|
|
{
|
|
|
|
|
if (e.code() != ErrorCodes::ALL_REQUESTED_COLUMNS_ARE_MISSING)
|
|
|
|
|
throw;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::commitAlterModify(const ASTAlterQuery::Parameters & params)
|
|
|
|
|
{
|
|
|
|
|
DataPartsVector parts;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
2014-03-20 13:00:42 +00:00
|
|
|
|
parts = DataPartsVector(data_parts.begin(), data_parts.end());
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
|
|
|
|
|
const ASTNameTypePair & name_type = dynamic_cast<const ASTNameTypePair &>(*params.name_type);
|
|
|
|
|
StringRange type_range = name_type.type->range;
|
|
|
|
|
String type(type_range.first, type_range.second - type_range.first);
|
|
|
|
|
|
|
|
|
|
ExpressionActionsPtr expr;
|
|
|
|
|
String out_column;
|
|
|
|
|
createConvertExpression(name_type.name, type, expr, out_column);
|
|
|
|
|
|
|
|
|
|
/// переименовываем файлы
|
|
|
|
|
/// переименовываем старые столбцы, добавляя расширение .old
|
|
|
|
|
for (DataPartPtr & part : parts)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-27 17:30:04 +00:00
|
|
|
|
std::string part_path = full_path + part->name + '/';
|
|
|
|
|
std::string path = part_path + escapeForFileName(name_type.name);
|
2014-03-20 13:00:42 +00:00
|
|
|
|
if (Poco::File(path + ".bin").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".bin" << " to " << path + ".bin" + ".old");
|
|
|
|
|
Poco::File(path + ".bin").renameTo(path + ".bin" + ".old");
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".mrk" << " to " << path + ".mrk" + ".old");
|
|
|
|
|
Poco::File(path + ".mrk").renameTo(path + ".mrk" + ".old");
|
2014-03-27 17:30:04 +00:00
|
|
|
|
|
|
|
|
|
if (Poco::File(part_path + "checksums.txt").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming " << part_path + "checksums.txt" << " to " << part_path + "checksums.txt" + ".old");
|
|
|
|
|
Poco::File(part_path + "checksums.txt").renameTo(part_path + "checksums.txt" + ".old");
|
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-03-13 19:36:28 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
/// переименовываем временные столбцы
|
|
|
|
|
for (DataPartPtr & part : parts)
|
|
|
|
|
{
|
2014-03-27 17:30:04 +00:00
|
|
|
|
std::string part_path = full_path + part->name + '/';
|
|
|
|
|
std::string name = escapeForFileName(out_column);
|
|
|
|
|
std::string new_name = escapeForFileName(name_type.name);
|
|
|
|
|
std::string path = part_path + name;
|
|
|
|
|
std::string new_path = part_path + new_name;
|
2014-03-20 13:00:42 +00:00
|
|
|
|
if (Poco::File(path + ".bin").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".bin" << " to " << new_path + ".bin");
|
|
|
|
|
Poco::File(path + ".bin").renameTo(new_path + ".bin");
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".mrk" << " to " << new_path + ".mrk");
|
|
|
|
|
Poco::File(path + ".mrk").renameTo(new_path + ".mrk");
|
2014-03-27 17:30:04 +00:00
|
|
|
|
|
|
|
|
|
if (Poco::File(path + ".checksums.txt").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".checksums.txt" << " to " << part_path + ".checksums.txt");
|
|
|
|
|
Poco::File(path + ".checksums.txt").renameTo(part_path + "checksums.txt");
|
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// удаляем старые столбцы
|
|
|
|
|
for (DataPartPtr & part : parts)
|
|
|
|
|
{
|
2014-03-27 17:30:04 +00:00
|
|
|
|
std::string part_path = full_path + part->name + '/';
|
|
|
|
|
std::string path = part_path + escapeForFileName(name_type.name);
|
2014-03-20 13:00:42 +00:00
|
|
|
|
if (Poco::File(path + ".bin" + ".old").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Removing old column " << path + ".bin" + ".old");
|
|
|
|
|
Poco::File(path + ".bin" + ".old").remove();
|
|
|
|
|
LOG_TRACE(log, "Removing old column " << path + ".mrk" + ".old");
|
|
|
|
|
Poco::File(path + ".mrk" + ".old").remove();
|
2014-03-27 17:30:04 +00:00
|
|
|
|
|
|
|
|
|
if (Poco::File(part_path + "checksums.txt" + ".old").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Removing old checksums " << part_path + "checksums.txt" + ".old");
|
|
|
|
|
Poco::File(part_path + "checksums.txt" + ".old").remove();
|
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-05 19:54:00 +00:00
|
|
|
|
context.resetCaches();
|
2014-03-20 13:00:42 +00:00
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
alterColumns(params, columns, context);
|
2014-03-13 19:36:28 +00:00
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-04-07 15:45:46 +00:00
|
|
|
|
void MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr part, Increment * increment)
|
2014-03-13 12:48:07 +00:00
|
|
|
|
{
|
2014-04-07 15:45:46 +00:00
|
|
|
|
auto removed = renameTempPartAndReplace(part, increment);
|
|
|
|
|
if (!removed.empty())
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Added part " << part->name << + " covers " << toString(removed.size())
|
|
|
|
|
<< " existing part(s) (including " << removed[0]->name << ")");
|
|
|
|
|
}
|
2014-03-13 12:48:07 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-07 15:45:46 +00:00
|
|
|
|
MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(MutableDataPartPtr part, Increment * increment)
|
2014-03-13 17:44:00 +00:00
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming.");
|
|
|
|
|
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
|
2014-03-19 10:45:13 +00:00
|
|
|
|
String old_path = getFullPath() + part->name + "/";
|
2014-03-13 17:44:00 +00:00
|
|
|
|
|
2014-04-07 15:45:46 +00:00
|
|
|
|
/** Для StorageMergeTree важно, что получение номера куска происходит атомарно с добавлением этого куска в набор.
|
2014-03-13 17:44:00 +00:00
|
|
|
|
* Иначе есть race condition - может произойти слияние пары кусков, диапазоны номеров которых
|
|
|
|
|
* содержат ещё не добавленный кусок.
|
|
|
|
|
*/
|
|
|
|
|
if (increment)
|
2014-04-07 15:45:46 +00:00
|
|
|
|
part->left = part->right = increment->get(false);
|
2014-03-13 17:44:00 +00:00
|
|
|
|
|
2014-05-26 10:41:40 +00:00
|
|
|
|
part->name = ActiveDataPartSet::getPartName(part->left_date, part->right_date, part->left, part->right, part->level);
|
2014-03-13 17:44:00 +00:00
|
|
|
|
|
2014-04-03 11:48:28 +00:00
|
|
|
|
if (data_parts.count(part))
|
|
|
|
|
throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART);
|
2014-03-13 17:44:00 +00:00
|
|
|
|
|
2014-03-19 10:45:13 +00:00
|
|
|
|
String new_path = getFullPath() + part->name + "/";
|
2014-03-13 17:44:00 +00:00
|
|
|
|
|
|
|
|
|
/// Переименовываем кусок.
|
|
|
|
|
Poco::File(old_path).renameTo(new_path);
|
|
|
|
|
|
2014-05-27 12:08:40 +00:00
|
|
|
|
bool obsolete = false; /// Покрыт ли part каким-нибудь куском.
|
2014-04-07 15:45:46 +00:00
|
|
|
|
DataPartsVector res;
|
|
|
|
|
/// Куски, содержащиеся в part, идут в data_parts подряд, задевая место, куда вставился бы сам part.
|
|
|
|
|
DataParts::iterator it = data_parts.lower_bound(part);
|
|
|
|
|
/// Пойдем влево.
|
|
|
|
|
while (it != data_parts.begin())
|
|
|
|
|
{
|
|
|
|
|
--it;
|
|
|
|
|
if (!part->contains(**it))
|
2014-04-07 17:39:45 +00:00
|
|
|
|
{
|
2014-05-27 12:08:40 +00:00
|
|
|
|
if ((*it)->contains(*part))
|
|
|
|
|
obsolete = true;
|
2014-04-07 17:39:45 +00:00
|
|
|
|
++it;
|
2014-04-07 15:45:46 +00:00
|
|
|
|
break;
|
2014-04-07 17:39:45 +00:00
|
|
|
|
}
|
2014-04-07 15:45:46 +00:00
|
|
|
|
res.push_back(*it);
|
2014-04-09 15:52:47 +00:00
|
|
|
|
(*it)->remove_time = time(0);
|
2014-04-07 15:45:46 +00:00
|
|
|
|
data_parts.erase(it++); /// Да, ++, а не --.
|
|
|
|
|
}
|
|
|
|
|
std::reverse(res.begin(), res.end()); /// Нужно получить куски в порядке возрастания.
|
|
|
|
|
/// Пойдем вправо.
|
2014-05-27 12:08:40 +00:00
|
|
|
|
while (it != data_parts.end())
|
2014-04-07 15:45:46 +00:00
|
|
|
|
{
|
2014-05-27 12:08:40 +00:00
|
|
|
|
if (!part->contains(**it))
|
|
|
|
|
{
|
|
|
|
|
if ((*it)->name == part->name || (*it)->contains(*part))
|
|
|
|
|
obsolete = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2014-04-07 15:45:46 +00:00
|
|
|
|
res.push_back(*it);
|
2014-04-09 15:52:47 +00:00
|
|
|
|
(*it)->remove_time = time(0);
|
2014-04-07 15:45:46 +00:00
|
|
|
|
data_parts.erase(it++);
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 12:08:40 +00:00
|
|
|
|
if (obsolete)
|
|
|
|
|
{
|
|
|
|
|
LOG_WARNING(log, "Obsolete part " + part->name + " added");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
data_parts.insert(part);
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-03-13 17:44:00 +00:00
|
|
|
|
all_data_parts.insert(part);
|
2014-04-07 15:45:46 +00:00
|
|
|
|
|
|
|
|
|
return res;
|
2014-03-13 17:44:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-02 10:10:37 +00:00
|
|
|
|
void MergeTreeData::renameAndDetachPart(DataPartPtr part, const String & prefix)
|
2014-04-02 07:59:43 +00:00
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
2014-04-09 15:52:47 +00:00
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
2014-04-09 16:32:32 +00:00
|
|
|
|
if (!all_data_parts.erase(part))
|
2014-04-02 07:59:43 +00:00
|
|
|
|
throw Exception("No such data part", ErrorCodes::NO_SUCH_DATA_PART);
|
2014-04-09 16:32:32 +00:00
|
|
|
|
data_parts.erase(part);
|
2014-04-02 07:59:43 +00:00
|
|
|
|
part->renameAddPrefix(prefix);
|
2014-03-13 17:44:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
MergeTreeData::DataParts MergeTreeData::getDataParts()
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
|
|
|
|
|
return data_parts;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-09 16:32:32 +00:00
|
|
|
|
MergeTreeData::DataParts MergeTreeData::getAllDataParts()
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(all_data_parts_mutex);
|
|
|
|
|
|
|
|
|
|
return all_data_parts;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-16 15:55:57 +00:00
|
|
|
|
size_t MergeTreeData::getMaxPartsCountForMonth()
|
2014-04-11 16:56:49 +00:00
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
|
2014-05-16 15:55:57 +00:00
|
|
|
|
size_t res = 0;
|
|
|
|
|
size_t cur_count = 0;
|
|
|
|
|
DayNum_t cur_month = DayNum_t(0);
|
|
|
|
|
|
|
|
|
|
for (const auto & part : data_parts)
|
|
|
|
|
{
|
|
|
|
|
if (part->left_month == cur_month)
|
|
|
|
|
{
|
|
|
|
|
++cur_count;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
cur_month = part->left_month;
|
|
|
|
|
cur_count = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
res = std::max(res, cur_count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res;
|
2014-04-11 16:56:49 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 08:43:01 +00:00
|
|
|
|
void MergeTreeData::delayInsertIfNeeded()
|
|
|
|
|
{
|
|
|
|
|
size_t parts_count = getMaxPartsCountForMonth();
|
|
|
|
|
if (parts_count > settings.parts_to_delay_insert)
|
|
|
|
|
{
|
|
|
|
|
double delay = std::pow(settings.insert_delay_step, parts_count - settings.parts_to_delay_insert);
|
|
|
|
|
delay /= 1000;
|
2014-06-20 18:45:19 +00:00
|
|
|
|
delay = std::min(delay, DBMS_MAX_DELAY_OF_INSERT);
|
|
|
|
|
|
2014-05-27 08:43:01 +00:00
|
|
|
|
LOG_INFO(log, "Delaying inserting block by "
|
|
|
|
|
<< std::fixed << std::setprecision(4) << delay << "s because there are " << parts_count << " parts");
|
|
|
|
|
std::this_thread::sleep_for(std::chrono::duration<double>(delay));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-09 15:52:47 +00:00
|
|
|
|
MergeTreeData::DataPartPtr MergeTreeData::getContainingPart(const String & part_name, bool including_inactive)
|
2014-04-03 11:48:28 +00:00
|
|
|
|
{
|
|
|
|
|
MutableDataPartPtr tmp_part(new DataPart(*this));
|
2014-05-26 10:41:40 +00:00
|
|
|
|
ActiveDataPartSet::parsePartName(part_name, *tmp_part);
|
2014-04-03 11:48:28 +00:00
|
|
|
|
|
2014-05-27 10:03:13 +00:00
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
2014-04-09 15:52:47 +00:00
|
|
|
|
|
2014-05-27 10:03:13 +00:00
|
|
|
|
if (including_inactive)
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(all_data_parts_mutex);
|
|
|
|
|
DataParts::iterator it = all_data_parts.lower_bound(tmp_part);
|
|
|
|
|
if (it != all_data_parts.end() && (*it)->name == part_name)
|
|
|
|
|
return *it;
|
|
|
|
|
}
|
2014-04-03 11:48:28 +00:00
|
|
|
|
|
|
|
|
|
/// Кусок может покрываться только предыдущим или следующим в data_parts.
|
2014-05-27 10:03:13 +00:00
|
|
|
|
DataParts::iterator it = data_parts.lower_bound(tmp_part);
|
2014-04-03 11:48:28 +00:00
|
|
|
|
|
2014-05-27 10:03:13 +00:00
|
|
|
|
if (it != data_parts.end())
|
2014-04-03 11:48:28 +00:00
|
|
|
|
{
|
|
|
|
|
if ((*it)->name == part_name)
|
|
|
|
|
return *it;
|
|
|
|
|
if ((*it)->contains(*tmp_part))
|
|
|
|
|
return *it;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 10:03:13 +00:00
|
|
|
|
if (it != data_parts.begin())
|
2014-04-03 11:48:28 +00:00
|
|
|
|
{
|
|
|
|
|
--it;
|
|
|
|
|
if ((*it)->contains(*tmp_part))
|
|
|
|
|
return *it;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-27 11:29:40 +00:00
|
|
|
|
|
2014-04-14 13:08:26 +00:00
|
|
|
|
void MergeTreeData::DataPart::Checksums::Checksum::checkEqual(const Checksum & rhs, bool have_uncompressed, const String & name) const
|
|
|
|
|
{
|
|
|
|
|
if (is_compressed && have_uncompressed)
|
|
|
|
|
{
|
|
|
|
|
if (!rhs.is_compressed)
|
|
|
|
|
throw Exception("No uncompressed checksum for file " + name, ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
|
|
|
|
if (rhs.uncompressed_size != uncompressed_size)
|
|
|
|
|
throw Exception("Unexpected size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
|
if (rhs.uncompressed_hash != uncompressed_hash)
|
|
|
|
|
throw Exception("Checksum mismatch for file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (rhs.file_size != file_size)
|
|
|
|
|
throw Exception("Unexpected size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
|
if (rhs.file_hash != file_hash)
|
|
|
|
|
throw Exception("Checksum mismatch for file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::DataPart::Checksums::Checksum::checkSize(const String & path) const
|
|
|
|
|
{
|
|
|
|
|
Poco::File file(path);
|
|
|
|
|
if (!file.exists())
|
|
|
|
|
throw Exception(path + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST);
|
|
|
|
|
size_t size = file.getSize();
|
|
|
|
|
if (size != file_size)
|
|
|
|
|
throw Exception(path + " has unexpected size: " + DB::toString(size) + " instead of " + DB::toString(file_size),
|
|
|
|
|
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::DataPart::Checksums::checkEqual(const Checksums & rhs, bool have_uncompressed) const
|
2014-03-27 11:29:40 +00:00
|
|
|
|
{
|
2014-03-27 17:30:04 +00:00
|
|
|
|
for (const auto & it : rhs.files)
|
2014-03-27 11:29:40 +00:00
|
|
|
|
{
|
|
|
|
|
const String & name = it.first;
|
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
if (!files.count(name))
|
2014-03-27 11:29:40 +00:00
|
|
|
|
throw Exception("Unexpected file " + name + " in data part", ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART);
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
for (const auto & it : files)
|
2014-03-27 11:29:40 +00:00
|
|
|
|
{
|
|
|
|
|
const String & name = it.first;
|
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
auto jt = rhs.files.find(name);
|
|
|
|
|
if (jt == rhs.files.end())
|
2014-03-27 11:29:40 +00:00
|
|
|
|
throw Exception("No file " + name + " in data part", ErrorCodes::NO_FILE_IN_DATA_PART);
|
|
|
|
|
|
2014-04-14 13:08:26 +00:00
|
|
|
|
it.second.checkEqual(jt->second, have_uncompressed, name);
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-04-14 13:13:20 +00:00
|
|
|
|
|
2014-04-14 13:08:26 +00:00
|
|
|
|
void MergeTreeData::DataPart::Checksums::checkSizes(const String & path) const
|
|
|
|
|
{
|
|
|
|
|
for (const auto & it : files)
|
|
|
|
|
{
|
|
|
|
|
const String & name = it.first;
|
|
|
|
|
it.second.checkSize(path + name);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-18 09:55:21 +00:00
|
|
|
|
bool MergeTreeData::DataPart::Checksums::readText(ReadBuffer & in)
|
2014-03-27 11:29:40 +00:00
|
|
|
|
{
|
2014-03-27 17:30:04 +00:00
|
|
|
|
files.clear();
|
2014-03-27 11:29:40 +00:00
|
|
|
|
size_t count;
|
|
|
|
|
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::assertString("checksums format version: ", in);
|
|
|
|
|
int format_version;
|
|
|
|
|
DB::readText(format_version, in);
|
|
|
|
|
if (format_version < 1 || format_version > 2)
|
|
|
|
|
throw Exception("Bad checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT);
|
2014-04-18 09:55:21 +00:00
|
|
|
|
if (format_version == 1)
|
|
|
|
|
return false;
|
2014-04-18 10:37:26 +00:00
|
|
|
|
DB::assertString("\n", in);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::readText(count, in);
|
|
|
|
|
DB::assertString(" files:\n", in);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < count; ++i)
|
|
|
|
|
{
|
|
|
|
|
String name;
|
|
|
|
|
Checksum sum;
|
|
|
|
|
|
|
|
|
|
DB::readString(name, in);
|
|
|
|
|
DB::assertString("\n\tsize: ", in);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::readText(sum.file_size, in);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::assertString("\n\thash: ", in);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::readText(sum.file_hash.first, in);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::assertString(" ", in);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::readText(sum.file_hash.second, in);
|
2014-04-18 09:55:21 +00:00
|
|
|
|
DB::assertString("\n\tcompressed: ", in);
|
|
|
|
|
DB::readText(sum.is_compressed, in);
|
|
|
|
|
if (sum.is_compressed)
|
2014-04-14 13:08:26 +00:00
|
|
|
|
{
|
2014-04-18 09:55:21 +00:00
|
|
|
|
DB::assertString("\n\tuncompressed size: ", in);
|
|
|
|
|
DB::readText(sum.uncompressed_size, in);
|
|
|
|
|
DB::assertString("\n\tuncompressed hash: ", in);
|
|
|
|
|
DB::readText(sum.uncompressed_hash.first, in);
|
|
|
|
|
DB::assertString(" ", in);
|
|
|
|
|
DB::readText(sum.uncompressed_hash.second, in);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
}
|
2014-04-18 09:55:21 +00:00
|
|
|
|
DB::assertString("\n", in);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
files.insert(std::make_pair(name, sum));
|
2014-03-27 11:29:40 +00:00
|
|
|
|
}
|
2014-04-18 09:55:21 +00:00
|
|
|
|
|
|
|
|
|
return true;
|
2014-03-27 11:29:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::DataPart::Checksums::writeText(WriteBuffer & out) const
|
|
|
|
|
{
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::writeString("checksums format version: 2\n", out);
|
2014-03-27 17:30:04 +00:00
|
|
|
|
DB::writeText(files.size(), out);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::writeString(" files:\n", out);
|
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
for (const auto & it : files)
|
2014-03-27 11:29:40 +00:00
|
|
|
|
{
|
2014-04-14 13:08:26 +00:00
|
|
|
|
const String & name = it.first;
|
|
|
|
|
const Checksum & sum = it.second;
|
|
|
|
|
DB::writeString(name, out);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::writeString("\n\tsize: ", out);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::writeText(sum.file_size, out);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::writeString("\n\thash: ", out);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::writeText(sum.file_hash.first, out);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::writeString(" ", out);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
DB::writeText(sum.file_hash.second, out);
|
|
|
|
|
DB::writeString("\n\tcompressed: ", out);
|
|
|
|
|
DB::writeText(sum.is_compressed, out);
|
2014-03-27 11:29:40 +00:00
|
|
|
|
DB::writeString("\n", out);
|
2014-04-14 13:08:26 +00:00
|
|
|
|
if (sum.is_compressed)
|
|
|
|
|
{
|
|
|
|
|
DB::writeString("\tuncompressed size: ", out);
|
|
|
|
|
DB::writeText(sum.uncompressed_size, out);
|
|
|
|
|
DB::writeString("\n\tuncompressed hash: ", out);
|
|
|
|
|
DB::writeText(sum.uncompressed_hash.first, out);
|
|
|
|
|
DB::writeString(" ", out);
|
|
|
|
|
DB::writeText(sum.uncompressed_hash.second, out);
|
|
|
|
|
DB::writeString("\n", out);
|
|
|
|
|
}
|
2014-03-27 11:29:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|