2014-03-14 07:05:43 +00:00
|
|
|
|
#include <DB/Storages/MergeTree/MergeTreeData.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
#include <Yandex/time2str.h>
|
2014-03-14 07:05:43 +00:00
|
|
|
|
#include <Poco/Ext/ScopedTry.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
#include <DB/Interpreters/ExpressionAnalyzer.h>
|
2014-03-14 07:05:43 +00:00
|
|
|
|
#include <DB/Common/escapeForFileName.h>
|
|
|
|
|
#include <DB/Storages/MergeTree/MergeTreeReader.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
#include <DB/Storages/MergeTree/MergeTreeBlockInputStream.h>
|
|
|
|
|
#include <DB/Storages/MergeTree/MergedBlockOutputStream.h>
|
2014-03-14 07:05:43 +00:00
|
|
|
|
#include <DB/Parsers/ASTIdentifier.h>
|
|
|
|
|
#include <DB/Parsers/ASTNameTypePair.h>
|
|
|
|
|
#include <DB/DataStreams/ExpressionBlockInputStream.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2014-03-13 17:44:00 +00:00
|
|
|
|
static String lastTwoPathComponents(String path)
|
2014-03-13 12:48:07 +00:00
|
|
|
|
{
|
|
|
|
|
if (!path.empty() && *path.rbegin() == '/')
|
|
|
|
|
path.erase(path.end() - 1);
|
|
|
|
|
size_t slash = path.rfind('/');
|
|
|
|
|
if (slash == String::npos || slash == 0)
|
|
|
|
|
return path;
|
|
|
|
|
slash = path.rfind('/', slash - 1);
|
|
|
|
|
if (slash == String::npos)
|
|
|
|
|
return path;
|
|
|
|
|
return path.substr(slash + 1);
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
|
|
|
|
MergeTreeData::MergeTreeData(
|
2014-03-13 12:48:07 +00:00
|
|
|
|
const String & full_path_, NamesAndTypesListPtr columns_,
|
2014-03-09 17:36:01 +00:00
|
|
|
|
const Context & context_,
|
|
|
|
|
ASTPtr & primary_expr_ast_,
|
|
|
|
|
const String & date_column_name_, const ASTPtr & sampling_expression_,
|
|
|
|
|
size_t index_granularity_,
|
|
|
|
|
Mode mode_,
|
|
|
|
|
const String & sign_column_,
|
|
|
|
|
const MergeTreeSettings & settings_)
|
2014-03-13 17:44:00 +00:00
|
|
|
|
: context(context_),
|
2014-03-09 17:36:01 +00:00
|
|
|
|
date_column_name(date_column_name_), sampling_expression(sampling_expression_),
|
|
|
|
|
index_granularity(index_granularity_),
|
|
|
|
|
mode(mode_), sign_column(sign_column_),
|
2014-03-13 17:44:00 +00:00
|
|
|
|
settings(settings_), primary_expr_ast(primary_expr_ast_->clone()),
|
2014-03-13 12:48:07 +00:00
|
|
|
|
full_path(full_path_), columns(columns_),
|
|
|
|
|
log(&Logger::get("MergeTreeData: " + lastTwoPathComponents(full_path))),
|
2014-03-09 17:36:01 +00:00
|
|
|
|
file_name_regexp("^(\\d{8})_(\\d{8})_(\\d+)_(\\d+)_(\\d+)")
|
|
|
|
|
{
|
|
|
|
|
/// создаём директорию, если её нет
|
|
|
|
|
Poco::File(full_path).createDirectories();
|
|
|
|
|
|
|
|
|
|
/// инициализируем описание сортировки
|
|
|
|
|
sort_descr.reserve(primary_expr_ast->children.size());
|
|
|
|
|
for (ASTs::iterator it = primary_expr_ast->children.begin();
|
|
|
|
|
it != primary_expr_ast->children.end();
|
|
|
|
|
++it)
|
|
|
|
|
{
|
|
|
|
|
String name = (*it)->getColumnName();
|
|
|
|
|
sort_descr.push_back(SortColumnDescription(name, 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
primary_expr = ExpressionAnalyzer(primary_expr_ast, context, *columns).getActions(false);
|
|
|
|
|
|
|
|
|
|
ExpressionActionsPtr projected_expr = ExpressionAnalyzer(primary_expr_ast, context, *columns).getActions(true);
|
|
|
|
|
primary_key_sample = projected_expr->getSampleBlock();
|
|
|
|
|
|
|
|
|
|
loadDataParts();
|
|
|
|
|
clearOldParts();
|
2014-03-13 12:48:07 +00:00
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
UInt64 MergeTreeData::getMaxDataPartIndex()
|
|
|
|
|
{
|
2014-03-09 17:36:01 +00:00
|
|
|
|
UInt64 max_part_id = 0;
|
|
|
|
|
for (DataParts::iterator it = data_parts.begin(); it != data_parts.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
max_part_id = std::max(max_part_id, (*it)->right);
|
|
|
|
|
}
|
2014-03-13 12:48:07 +00:00
|
|
|
|
return max_part_id;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
std::string MergeTreeData::getModePrefix() const
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-13 12:48:07 +00:00
|
|
|
|
switch (mode)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-13 12:48:07 +00:00
|
|
|
|
case Ordinary: return "";
|
|
|
|
|
case Collapsing: return "Collapsing";
|
|
|
|
|
case Summing: return "Summing";
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
default:
|
|
|
|
|
throw Exception("Unknown mode of operation for MergeTreeData: " + toString(mode), ErrorCodes::LOGICAL_ERROR);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
String MergeTreeData::getPartName(DayNum_t left_date, DayNum_t right_date, UInt64 left_id, UInt64 right_id, UInt64 level)
|
|
|
|
|
{
|
|
|
|
|
DateLUTSingleton & date_lut = DateLUTSingleton::instance();
|
|
|
|
|
|
|
|
|
|
/// Имя директории для куска иммет вид: YYYYMMDD_YYYYMMDD_N_N_L.
|
|
|
|
|
String res;
|
|
|
|
|
{
|
|
|
|
|
unsigned left_date_id = Date2OrderedIdentifier(date_lut.fromDayNum(left_date));
|
|
|
|
|
unsigned right_date_id = Date2OrderedIdentifier(date_lut.fromDayNum(right_date));
|
|
|
|
|
|
|
|
|
|
WriteBufferFromString wb(res);
|
|
|
|
|
|
|
|
|
|
writeIntText(left_date_id, wb);
|
|
|
|
|
writeChar('_', wb);
|
|
|
|
|
writeIntText(right_date_id, wb);
|
|
|
|
|
writeChar('_', wb);
|
|
|
|
|
writeIntText(left_id, wb);
|
|
|
|
|
writeChar('_', wb);
|
|
|
|
|
writeIntText(right_id, wb);
|
|
|
|
|
writeChar('_', wb);
|
|
|
|
|
writeIntText(level, wb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::parsePartName(const String & file_name, const Poco::RegularExpression::MatchVec & matches, DataPart & part)
|
|
|
|
|
{
|
|
|
|
|
DateLUTSingleton & date_lut = DateLUTSingleton::instance();
|
|
|
|
|
|
|
|
|
|
part.left_date = date_lut.toDayNum(OrderedIdentifier2Date(file_name.substr(matches[1].offset, matches[1].length)));
|
|
|
|
|
part.right_date = date_lut.toDayNum(OrderedIdentifier2Date(file_name.substr(matches[2].offset, matches[2].length)));
|
|
|
|
|
part.left = parse<UInt64>(file_name.substr(matches[3].offset, matches[3].length));
|
|
|
|
|
part.right = parse<UInt64>(file_name.substr(matches[4].offset, matches[4].length));
|
|
|
|
|
part.level = parse<UInt32>(file_name.substr(matches[5].offset, matches[5].length));
|
|
|
|
|
|
|
|
|
|
part.left_month = date_lut.toFirstDayNumOfMonth(part.left_date);
|
|
|
|
|
part.right_month = date_lut.toFirstDayNumOfMonth(part.right_date);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::loadDataParts()
|
|
|
|
|
{
|
|
|
|
|
LOG_DEBUG(log, "Loading data parts");
|
|
|
|
|
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
|
|
|
|
|
data_parts.clear();
|
|
|
|
|
|
|
|
|
|
Strings part_file_names;
|
|
|
|
|
Strings old_file_names;
|
|
|
|
|
Poco::DirectoryIterator end;
|
|
|
|
|
for (Poco::DirectoryIterator it(full_path); it != end; ++it)
|
|
|
|
|
{
|
|
|
|
|
String file_name = it.name();
|
|
|
|
|
|
|
|
|
|
/// Удаляем временные директории старше суток.
|
|
|
|
|
if (0 == file_name.compare(0, strlen("tmp_"), "tmp_"))
|
|
|
|
|
{
|
|
|
|
|
Poco::File tmp_dir(full_path + file_name);
|
|
|
|
|
|
|
|
|
|
if (tmp_dir.isDirectory() && tmp_dir.getLastModified().epochTime() + 86400 < time(0))
|
|
|
|
|
{
|
|
|
|
|
LOG_WARNING(log, "Removing temporary directory " << full_path << file_name);
|
|
|
|
|
Poco::File(full_path + file_name).remove(true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (0 == file_name.compare(0, strlen("old_"), "old_"))
|
|
|
|
|
old_file_names.push_back(file_name);
|
|
|
|
|
else
|
|
|
|
|
part_file_names.push_back(file_name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Poco::RegularExpression::MatchVec matches;
|
|
|
|
|
while (!part_file_names.empty())
|
|
|
|
|
{
|
|
|
|
|
String file_name = part_file_names.back();
|
|
|
|
|
part_file_names.pop_back();
|
|
|
|
|
|
|
|
|
|
if (!isPartDirectory(file_name, matches))
|
|
|
|
|
continue;
|
|
|
|
|
|
2014-03-14 17:19:38 +00:00
|
|
|
|
MutableDataPartPtr part = std::make_shared<DataPart>(*this);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
parsePartName(file_name, matches, *part);
|
|
|
|
|
part->name = file_name;
|
|
|
|
|
|
|
|
|
|
/// Для битых кусков, которые могут образовываться после грубого перезапуска сервера, попытаться восстановить куски, из которых они сделаны.
|
|
|
|
|
if (isBrokenPart(full_path + file_name))
|
|
|
|
|
{
|
|
|
|
|
if (part->level == 0)
|
|
|
|
|
{
|
|
|
|
|
/// Восстановить куски нулевого уровня невозможно.
|
2014-03-13 12:48:07 +00:00
|
|
|
|
LOG_ERROR(log, "Removing broken part " << full_path + file_name << " because is't impossible to repair.");
|
2014-03-09 17:36:01 +00:00
|
|
|
|
part->remove();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
Strings new_parts = tryRestorePart(full_path, file_name, old_file_names);
|
|
|
|
|
part_file_names.insert(part_file_names.begin(), new_parts.begin(), new_parts.end());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Размер - в количестве засечек.
|
|
|
|
|
part->size = Poco::File(full_path + file_name + "/" + escapeForFileName(columns->front().first) + ".mrk").getSize()
|
|
|
|
|
/ MERGE_TREE_MARK_SIZE;
|
|
|
|
|
|
|
|
|
|
part->modification_time = Poco::File(full_path + file_name).getLastModified().epochTime();
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
part->loadIndex();
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
/// Не будем вставлять в набор кусок с битым индексом. Пропустим кусок и позволим серверу запуститься.
|
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
data_parts.insert(part);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
all_data_parts = data_parts;
|
|
|
|
|
|
|
|
|
|
/** Удаляем из набора актуальных кусков куски, которые содержатся в другом куске (которые были склеены),
|
|
|
|
|
* но по каким-то причинам остались лежать в файловой системе.
|
|
|
|
|
* Удаление файлов будет произведено потом в методе clearOldParts.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
if (data_parts.size() >= 2)
|
|
|
|
|
{
|
|
|
|
|
DataParts::iterator prev_jt = data_parts.begin();
|
|
|
|
|
DataParts::iterator curr_jt = prev_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
while (curr_jt != data_parts.end())
|
|
|
|
|
{
|
|
|
|
|
/// Куски данных за разные месяцы рассматривать не будем
|
|
|
|
|
if ((*curr_jt)->left_month != (*curr_jt)->right_month
|
|
|
|
|
|| (*curr_jt)->right_month != (*prev_jt)->left_month
|
|
|
|
|
|| (*prev_jt)->left_month != (*prev_jt)->right_month)
|
|
|
|
|
{
|
|
|
|
|
++prev_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((*curr_jt)->contains(**prev_jt))
|
|
|
|
|
{
|
|
|
|
|
LOG_WARNING(log, "Part " << (*curr_jt)->name << " contains " << (*prev_jt)->name);
|
|
|
|
|
data_parts.erase(prev_jt);
|
|
|
|
|
prev_jt = curr_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
}
|
|
|
|
|
else if ((*prev_jt)->contains(**curr_jt))
|
|
|
|
|
{
|
|
|
|
|
LOG_WARNING(log, "Part " << (*prev_jt)->name << " contains " << (*curr_jt)->name);
|
|
|
|
|
data_parts.erase(curr_jt++);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
++prev_jt;
|
|
|
|
|
++curr_jt;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOG_DEBUG(log, "Loaded data parts (" << data_parts.size() << " items)");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::clearOldParts()
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedTry<Poco::FastMutex> lock;
|
|
|
|
|
|
|
|
|
|
/// Если метод уже вызван из другого потока (или если all_data_parts прямо сейчас меняют), то можно ничего не делать.
|
|
|
|
|
if (!lock.lock(&all_data_parts_mutex))
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Already clearing or modifying old parts");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOG_TRACE(log, "Clearing old parts");
|
|
|
|
|
for (DataParts::iterator it = all_data_parts.begin(); it != all_data_parts.end();)
|
|
|
|
|
{
|
2014-03-14 17:19:38 +00:00
|
|
|
|
int ref_count = it->use_count();
|
2014-03-09 17:36:01 +00:00
|
|
|
|
if (ref_count == 1) /// После этого ref_count не может увеличиться.
|
|
|
|
|
{
|
|
|
|
|
LOG_DEBUG(log, "'Removing' part " << (*it)->name << " (prepending old_ to its name)");
|
|
|
|
|
|
|
|
|
|
(*it)->renameToOld();
|
|
|
|
|
all_data_parts.erase(it++);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
++it;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Удалим старые old_ куски.
|
|
|
|
|
Poco::DirectoryIterator end;
|
|
|
|
|
for (Poco::DirectoryIterator it(full_path); it != end; ++it)
|
|
|
|
|
{
|
|
|
|
|
if (0 != it.name().compare(0, strlen("old_"), "old_"))
|
|
|
|
|
continue;
|
|
|
|
|
if (it->isDirectory() && it->getLastModified().epochTime() + settings.old_parts_lifetime < time(0))
|
|
|
|
|
{
|
|
|
|
|
it->remove(true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
void MergeTreeData::setPath(const String & new_full_path)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-13 19:14:25 +00:00
|
|
|
|
Poco::File(full_path).renameTo(new_full_path);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
full_path = new_full_path;
|
2014-03-13 19:14:25 +00:00
|
|
|
|
|
2014-03-13 19:07:17 +00:00
|
|
|
|
context.getUncompressedCache()->reset();
|
|
|
|
|
context.getMarkCache()->reset();
|
2014-03-13 19:14:25 +00:00
|
|
|
|
|
2014-03-13 19:07:17 +00:00
|
|
|
|
log = &Logger::get(lastTwoPathComponents(full_path));
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
void MergeTreeData::dropAllData()
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
data_parts.clear();
|
|
|
|
|
all_data_parts.clear();
|
|
|
|
|
|
2014-03-13 19:14:25 +00:00
|
|
|
|
context.getUncompressedCache()->reset();
|
|
|
|
|
context.getMarkCache()->reset();
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
Poco::File(full_path).remove(true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::removeColumnFiles(String column_name)
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
|
|
|
|
|
/// Регэксп выбирает файлы столбца для удаления
|
|
|
|
|
Poco::RegularExpression re(column_name + "(?:(?:\\.|\\%2E).+){0,1}" +"(?:\\.mrk|\\.bin|\\.size\\d+\\.bin|\\.size\\d+\\.mrk)");
|
|
|
|
|
/// Цикл по всем директориям кусочков
|
|
|
|
|
Poco::RegularExpression::MatchVec matches;
|
|
|
|
|
Poco::DirectoryIterator end;
|
|
|
|
|
for (Poco::DirectoryIterator it_dir = Poco::DirectoryIterator(full_path); it_dir != end; ++it_dir)
|
|
|
|
|
{
|
|
|
|
|
std::string dir_name = it_dir.name();
|
|
|
|
|
|
|
|
|
|
if (!isPartDirectory(dir_name, matches))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/// Цикл по каждому из файлов в директории кусочков
|
|
|
|
|
String full_dir_name = full_path + dir_name + "/";
|
|
|
|
|
for (Poco::DirectoryIterator it_file(full_dir_name); it_file != end; ++it_file)
|
|
|
|
|
{
|
|
|
|
|
if (re.match(it_file.name()))
|
|
|
|
|
{
|
|
|
|
|
Poco::File file(full_dir_name + it_file.name());
|
|
|
|
|
if (file.exists())
|
|
|
|
|
file.remove();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::createConvertExpression(const String & in_column_name, const String & out_type, ExpressionActionsPtr & out_expression, String & out_column)
|
|
|
|
|
{
|
|
|
|
|
ASTFunction * function = new ASTFunction;
|
|
|
|
|
ASTPtr function_ptr = function;
|
|
|
|
|
|
|
|
|
|
ASTExpressionList * arguments = new ASTExpressionList;
|
|
|
|
|
ASTPtr arguments_ptr = arguments;
|
|
|
|
|
|
|
|
|
|
function->name = "to" + out_type;
|
|
|
|
|
function->arguments = arguments_ptr;
|
|
|
|
|
function->children.push_back(arguments_ptr);
|
|
|
|
|
|
|
|
|
|
ASTIdentifier * in_column = new ASTIdentifier;
|
|
|
|
|
ASTPtr in_column_ptr = in_column;
|
|
|
|
|
|
|
|
|
|
arguments->children.push_back(in_column_ptr);
|
|
|
|
|
|
|
|
|
|
in_column->name = in_column_name;
|
|
|
|
|
in_column->kind = ASTIdentifier::Column;
|
|
|
|
|
|
|
|
|
|
out_expression = ExpressionAnalyzer(function_ptr, context, *columns).getActions(false);
|
|
|
|
|
out_column = function->getColumnName();
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 19:36:28 +00:00
|
|
|
|
static DataTypePtr getDataTypeByName(const String & name, const NamesAndTypesList & columns)
|
|
|
|
|
{
|
|
|
|
|
for (const auto & it : columns)
|
|
|
|
|
{
|
|
|
|
|
if (it.first == name)
|
|
|
|
|
return it.second;
|
|
|
|
|
}
|
|
|
|
|
throw Exception("No column " + name + " in table", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
void MergeTreeData::alter(const ASTAlterQuery::Parameters & params)
|
|
|
|
|
{
|
|
|
|
|
{
|
2014-03-20 13:00:42 +00:00
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
alterColumns(params, columns, context);
|
|
|
|
|
}
|
|
|
|
|
if (params.type == ASTAlterQuery::DROP)
|
|
|
|
|
{
|
|
|
|
|
String column_name = dynamic_cast<const ASTIdentifier &>(*params.column).name;
|
|
|
|
|
removeColumnFiles(column_name);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
context.getUncompressedCache()->reset();
|
|
|
|
|
context.getMarkCache()->reset();
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
void MergeTreeData::prepareAlterModify(const ASTAlterQuery::Parameters & params)
|
|
|
|
|
{
|
|
|
|
|
DataPartsVector parts;
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
parts = DataPartsVector(data_parts.begin(), data_parts.end());
|
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
Names column_name;
|
|
|
|
|
const ASTNameTypePair & name_type = dynamic_cast<const ASTNameTypePair &>(*params.name_type);
|
|
|
|
|
StringRange type_range = name_type.type->range;
|
|
|
|
|
String type(type_range.first, type_range.second - type_range.first);
|
|
|
|
|
DataTypePtr old_type_ptr = DB::getDataTypeByName(name_type.name, *columns);
|
|
|
|
|
DataTypePtr new_type_ptr = context.getDataTypeFactory().get(type);
|
|
|
|
|
if (dynamic_cast<DataTypeNested *>(old_type_ptr.get()) || dynamic_cast<DataTypeArray *>(old_type_ptr.get()) ||
|
|
|
|
|
dynamic_cast<DataTypeNested *>(new_type_ptr.get()) || dynamic_cast<DataTypeArray *>(new_type_ptr.get()))
|
|
|
|
|
throw Exception("ALTER MODIFY not supported for nested and array types");
|
|
|
|
|
|
|
|
|
|
column_name.push_back(name_type.name);
|
|
|
|
|
ExpressionActionsPtr expr;
|
|
|
|
|
String out_column;
|
|
|
|
|
createConvertExpression(name_type.name, type, expr, out_column);
|
|
|
|
|
|
|
|
|
|
ColumnNumbers num(1, 0);
|
|
|
|
|
for (DataPartPtr & part : parts)
|
|
|
|
|
{
|
|
|
|
|
MarkRanges ranges(1, MarkRange(0, part->size));
|
|
|
|
|
ExpressionBlockInputStream in(new MergeTreeBlockInputStream(full_path + part->name + '/',
|
|
|
|
|
DEFAULT_MERGE_BLOCK_SIZE, column_name, *this, part, ranges, false, NULL, ""), expr);
|
|
|
|
|
MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true);
|
|
|
|
|
out.writePrefix();
|
2014-03-09 17:36:01 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
while(DB::Block b = in.read())
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-20 13:00:42 +00:00
|
|
|
|
/// оставляем только столбец с результатом
|
|
|
|
|
b.erase(0);
|
|
|
|
|
out.write(b);
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
LOG_TRACE(log, "Write Suffix");
|
|
|
|
|
out.writeSuffix();
|
|
|
|
|
}
|
|
|
|
|
catch (const Exception & e)
|
|
|
|
|
{
|
|
|
|
|
if (e.code() != ErrorCodes::ALL_REQUESTED_COLUMNS_ARE_MISSING)
|
|
|
|
|
throw;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void MergeTreeData::commitAlterModify(const ASTAlterQuery::Parameters & params)
|
|
|
|
|
{
|
|
|
|
|
DataPartsVector parts;
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
2014-03-20 13:00:42 +00:00
|
|
|
|
parts = DataPartsVector(data_parts.begin(), data_parts.end());
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
2014-03-20 13:00:42 +00:00
|
|
|
|
|
|
|
|
|
const ASTNameTypePair & name_type = dynamic_cast<const ASTNameTypePair &>(*params.name_type);
|
|
|
|
|
StringRange type_range = name_type.type->range;
|
|
|
|
|
String type(type_range.first, type_range.second - type_range.first);
|
|
|
|
|
|
|
|
|
|
ExpressionActionsPtr expr;
|
|
|
|
|
String out_column;
|
|
|
|
|
createConvertExpression(name_type.name, type, expr, out_column);
|
|
|
|
|
|
|
|
|
|
/// переименовываем файлы
|
|
|
|
|
/// переименовываем старые столбцы, добавляя расширение .old
|
|
|
|
|
for (DataPartPtr & part : parts)
|
2014-03-09 17:36:01 +00:00
|
|
|
|
{
|
2014-03-20 13:00:42 +00:00
|
|
|
|
std::string path = full_path + part->name + '/' + escapeForFileName(name_type.name);
|
|
|
|
|
if (Poco::File(path + ".bin").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".bin" << " to " << path + ".bin" + ".old");
|
|
|
|
|
Poco::File(path + ".bin").renameTo(path + ".bin" + ".old");
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".mrk" << " to " << path + ".mrk" + ".old");
|
|
|
|
|
Poco::File(path + ".mrk").renameTo(path + ".mrk" + ".old");
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-03-13 19:36:28 +00:00
|
|
|
|
|
2014-03-20 13:00:42 +00:00
|
|
|
|
/// переименовываем временные столбцы
|
|
|
|
|
for (DataPartPtr & part : parts)
|
|
|
|
|
{
|
|
|
|
|
std::string path = full_path + part->name + '/' + escapeForFileName(out_column);
|
|
|
|
|
std::string new_path = full_path + part->name + '/' + escapeForFileName(name_type.name);
|
|
|
|
|
if (Poco::File(path + ".bin").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".bin" << " to " << new_path + ".bin");
|
|
|
|
|
Poco::File(path + ".bin").renameTo(new_path + ".bin");
|
|
|
|
|
LOG_TRACE(log, "Renaming " << path + ".mrk" << " to " << new_path + ".mrk");
|
|
|
|
|
Poco::File(path + ".mrk").renameTo(new_path + ".mrk");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// удаляем старые столбцы
|
|
|
|
|
for (DataPartPtr & part : parts)
|
|
|
|
|
{
|
|
|
|
|
std::string path = full_path + part->name + '/' + escapeForFileName(name_type.name);
|
|
|
|
|
if (Poco::File(path + ".bin" + ".old").exists())
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Removing old column " << path + ".bin" + ".old");
|
|
|
|
|
Poco::File(path + ".bin" + ".old").remove();
|
|
|
|
|
LOG_TRACE(log, "Removing old column " << path + ".mrk" + ".old");
|
|
|
|
|
Poco::File(path + ".mrk" + ".old").remove();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
context.getUncompressedCache()->reset();
|
|
|
|
|
context.getMarkCache()->reset();
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
alterColumns(params, columns, context);
|
2014-03-13 19:36:28 +00:00
|
|
|
|
}
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool MergeTreeData::isPartDirectory(const String & dir_name, Poco::RegularExpression::MatchVec & matches) const
|
|
|
|
|
{
|
|
|
|
|
return (file_name_regexp.match(dir_name, 0, matches) && 6 == matches.size());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool MergeTreeData::isBrokenPart(const String & path)
|
|
|
|
|
{
|
|
|
|
|
/// Проверяем, что первичный ключ непуст.
|
|
|
|
|
|
|
|
|
|
Poco::File index_file(path + "/primary.idx");
|
|
|
|
|
|
|
|
|
|
if (!index_file.exists() || index_file.getSize() == 0)
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Part " << path << " is broken: primary key is empty.");
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Проверяем, что все засечки непусты и имеют одинаковый размер.
|
|
|
|
|
|
|
|
|
|
ssize_t marks_size = -1;
|
|
|
|
|
for (NamesAndTypesList::const_iterator it = columns->begin(); it != columns->end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
Poco::File marks_file(path + "/" + escapeForFileName(it->first) + ".mrk");
|
|
|
|
|
|
|
|
|
|
/// при добавлении нового столбца в таблицу файлы .mrk не создаются. Не будем ничего удалять.
|
|
|
|
|
if (!marks_file.exists())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (marks_size == -1)
|
|
|
|
|
{
|
|
|
|
|
marks_size = marks_file.getSize();
|
|
|
|
|
|
|
|
|
|
if (0 == marks_size)
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Part " << path << " is broken: " << marks_file.path() << " is empty.");
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (static_cast<ssize_t>(marks_file.getSize()) != marks_size)
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Part " << path << " is broken: marks have different sizes.");
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Strings MergeTreeData::tryRestorePart(const String & path, const String & file_name, Strings & old_parts)
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Restoring all old_ parts covered by " << file_name);
|
|
|
|
|
|
|
|
|
|
Poco::RegularExpression::MatchVec matches;
|
|
|
|
|
Strings restored_parts;
|
|
|
|
|
|
|
|
|
|
isPartDirectory(file_name, matches);
|
|
|
|
|
DataPart broken_part(*this);
|
|
|
|
|
parsePartName(file_name, matches, broken_part);
|
|
|
|
|
|
|
|
|
|
for (int i = static_cast<int>(old_parts.size()) - 1; i >= 0; --i)
|
|
|
|
|
{
|
|
|
|
|
DataPart old_part(*this);
|
|
|
|
|
String name = old_parts[i].substr(strlen("old_"));
|
|
|
|
|
if (!isPartDirectory(name, matches))
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Strange file name: " << path + old_parts[i] << "; ignoring");
|
|
|
|
|
old_parts.erase(old_parts.begin() + i);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
parsePartName(name, matches, old_part);
|
|
|
|
|
if (broken_part.contains(old_part))
|
|
|
|
|
{
|
|
|
|
|
/// Восстанавливаем все содержащиеся куски. Если некоторые из них содержатся в других, их удалит loadDataParts.
|
|
|
|
|
LOG_ERROR(log, "Restoring part " << path + old_parts[i]);
|
|
|
|
|
Poco::File(path + old_parts[i]).renameTo(path + name);
|
|
|
|
|
old_parts.erase(old_parts.begin() + i);
|
|
|
|
|
restored_parts.push_back(name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (restored_parts.size() >= 2)
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Removing broken part " << path + file_name << " because at least 2 old_ parts were restored in its place");
|
|
|
|
|
Poco::File(path + file_name).remove(true);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
LOG_ERROR(log, "Not removing broken part " << path + file_name
|
|
|
|
|
<< " because less than 2 old_ parts were restored in its place. You need to resolve this manually");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return restored_parts;
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
void MergeTreeData::replaceParts(DataPartsVector old_parts, DataPartPtr new_part)
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> all_lock(all_data_parts_mutex);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < old_parts.size(); ++i)
|
|
|
|
|
if (data_parts.end() == data_parts.find(old_parts[i]))
|
|
|
|
|
throw Exception("Logical error: cannot find data part " + old_parts[i]->name + " in list", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
|
|
data_parts.insert(new_part);
|
|
|
|
|
all_data_parts.insert(new_part);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < old_parts.size(); ++i)
|
|
|
|
|
data_parts.erase(data_parts.find(old_parts[i]));
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-19 10:45:13 +00:00
|
|
|
|
void MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr part, Increment * increment)
|
2014-03-13 17:44:00 +00:00
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Renaming.");
|
|
|
|
|
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock_all(all_data_parts_mutex);
|
|
|
|
|
|
2014-03-19 10:45:13 +00:00
|
|
|
|
String old_path = getFullPath() + part->name + "/";
|
2014-03-13 17:44:00 +00:00
|
|
|
|
|
|
|
|
|
UInt64 part_id = part->left;
|
|
|
|
|
|
|
|
|
|
/** Важно, что получение номера куска происходит атомарно с добавлением этого куска в набор.
|
|
|
|
|
* Иначе есть race condition - может произойти слияние пары кусков, диапазоны номеров которых
|
|
|
|
|
* содержат ещё не добавленный кусок.
|
|
|
|
|
*/
|
|
|
|
|
if (increment)
|
|
|
|
|
part_id = increment->get(false);
|
|
|
|
|
|
|
|
|
|
part->left = part->right = part_id;
|
|
|
|
|
part->name = getPartName(part->left_date, part->right_date, part_id, part_id, 0);
|
|
|
|
|
|
2014-03-19 10:45:13 +00:00
|
|
|
|
String new_path = getFullPath() + part->name + "/";
|
2014-03-13 17:44:00 +00:00
|
|
|
|
|
|
|
|
|
/// Переименовываем кусок.
|
|
|
|
|
Poco::File(old_path).renameTo(new_path);
|
|
|
|
|
|
|
|
|
|
data_parts.insert(part);
|
|
|
|
|
all_data_parts.insert(part);
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
|
MergeTreeData::DataParts MergeTreeData::getDataParts()
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(data_parts_mutex);
|
|
|
|
|
|
|
|
|
|
return data_parts;
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
}
|