2015-09-29 19:19:54 +00:00
|
|
|
#include <common/JSON.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/WriteBufferFromFile.h>
|
|
|
|
#include <IO/ReadBufferFromFile.h>
|
|
|
|
#include <IO/WriteBufferFromString.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <Common/escapeForFileName.h>
|
|
|
|
|
|
|
|
#include <Common/FileChecker.h>
|
2015-09-29 14:09:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-12-31 00:32:39 +00:00
|
|
|
FileChecker::FileChecker(DiskPtr disk_, const String & file_info_path_) : disk(std::move(disk_))
|
2015-09-29 14:09:01 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
setPath(file_info_path_);
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
void FileChecker::setPath(const String & file_info_path_)
|
2015-09-29 14:09:01 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
files_info_path = file_info_path_;
|
2017-02-06 12:09:54 +00:00
|
|
|
|
2019-12-27 10:26:23 +00:00
|
|
|
tmp_files_info_path = parentPath(files_info_path) + "tmp_" + fileName(files_info_path);
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
void FileChecker::update(const String & file_path)
|
2015-09-29 14:09:01 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
initialize();
|
2019-12-12 08:57:25 +00:00
|
|
|
updateImpl(file_path);
|
2017-04-01 07:20:54 +00:00
|
|
|
save();
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
void FileChecker::update(const Strings::const_iterator & begin, const Strings::const_iterator & end)
|
2015-09-29 14:09:01 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
initialize();
|
|
|
|
for (auto it = begin; it != end; ++it)
|
|
|
|
updateImpl(*it);
|
|
|
|
save();
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
2019-07-03 13:17:19 +00:00
|
|
|
CheckResults FileChecker::check() const
|
2015-09-29 14:09:01 +00:00
|
|
|
{
|
2019-12-12 08:57:25 +00:00
|
|
|
// Read the files again every time you call `check` - so as not to violate the constancy.
|
|
|
|
// `check` method is rarely called.
|
2019-07-03 13:17:19 +00:00
|
|
|
|
|
|
|
CheckResults results;
|
2017-04-01 07:20:54 +00:00
|
|
|
Map local_map;
|
2018-08-26 02:08:35 +00:00
|
|
|
load(local_map, files_info_path);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (local_map.empty())
|
2019-07-03 13:17:19 +00:00
|
|
|
return {};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
for (const auto & name_size : local_map)
|
|
|
|
{
|
2019-12-12 08:57:25 +00:00
|
|
|
const String & name = name_size.first;
|
2019-12-27 10:26:23 +00:00
|
|
|
String path = parentPath(files_info_path) + name;
|
2019-12-12 08:57:25 +00:00
|
|
|
if (!disk->exists(path))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-12-12 08:57:25 +00:00
|
|
|
results.emplace_back(name, false, "File " + path + " doesn't exist");
|
2019-07-03 13:17:19 +00:00
|
|
|
break;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
auto real_size = disk->getFileSize(path);
|
2017-04-01 07:20:54 +00:00
|
|
|
if (real_size != name_size.second)
|
|
|
|
{
|
2019-12-12 08:57:25 +00:00
|
|
|
results.emplace_back(name, false, "Size of " + path + " is wrong. Size is " + toString(real_size) + " but should be " + toString(name_size.second));
|
2019-07-03 13:17:19 +00:00
|
|
|
break;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-12-12 08:57:25 +00:00
|
|
|
results.emplace_back(name, true, "");
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-07-03 13:17:19 +00:00
|
|
|
return results;
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void FileChecker::initialize()
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (initialized)
|
|
|
|
return;
|
2015-09-29 14:09:01 +00:00
|
|
|
|
2018-08-26 02:08:35 +00:00
|
|
|
load(map, files_info_path);
|
2017-04-01 07:20:54 +00:00
|
|
|
initialized = true;
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
void FileChecker::updateImpl(const String & file_path)
|
2015-09-29 14:09:01 +00:00
|
|
|
{
|
2019-12-27 10:26:23 +00:00
|
|
|
map[fileName(file_path)] = disk->getFileSize(file_path);
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void FileChecker::save() const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-12-26 14:28:22 +00:00
|
|
|
std::unique_ptr<WriteBuffer> out = disk->writeFile(tmp_files_info_path);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// So complex JSON structure - for compatibility with the old format.
|
2019-12-12 08:57:25 +00:00
|
|
|
writeCString("{\"yandex\":{", *out);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-08-07 17:04:39 +00:00
|
|
|
auto settings = FormatSettings();
|
2017-04-01 07:20:54 +00:00
|
|
|
for (auto it = map.begin(); it != map.end(); ++it)
|
|
|
|
{
|
|
|
|
if (it != map.begin())
|
2019-12-12 08:57:25 +00:00
|
|
|
writeString(",", *out);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// `escapeForFileName` is not really needed. But it is left for compatibility with the old code.
|
2019-12-12 08:57:25 +00:00
|
|
|
writeJSONString(escapeForFileName(it->first), *out, settings);
|
2020-03-08 21:04:10 +00:00
|
|
|
writeString(R"(:{"size":")", *out);
|
2019-12-12 08:57:25 +00:00
|
|
|
writeIntText(it->second, *out);
|
|
|
|
writeString("\"}", *out);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
writeCString("}}", *out);
|
|
|
|
out->next();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-12-17 13:45:53 +00:00
|
|
|
disk->replaceFile(tmp_files_info_path, files_info_path);
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
void FileChecker::load(Map & local_map, const String & path) const
|
2015-09-29 14:09:01 +00:00
|
|
|
{
|
2018-08-26 02:08:35 +00:00
|
|
|
local_map.clear();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
if (!disk->exists(path))
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
2019-12-26 14:28:22 +00:00
|
|
|
std::unique_ptr<ReadBuffer> in = disk->readFile(path);
|
2017-07-31 21:39:24 +00:00
|
|
|
WriteBufferFromOwnString out;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-07-31 21:39:24 +00:00
|
|
|
/// The JSON library does not support whitespace. We delete them. Inefficient.
|
2019-12-12 08:57:25 +00:00
|
|
|
while (!in->eof())
|
2017-07-31 21:39:24 +00:00
|
|
|
{
|
|
|
|
char c;
|
2019-12-12 08:57:25 +00:00
|
|
|
readChar(c, *in);
|
2017-07-31 21:39:24 +00:00
|
|
|
if (!isspace(c))
|
|
|
|
writeChar(c, out);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-07-31 21:39:24 +00:00
|
|
|
JSON json(out.str());
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
JSON files = json["yandex"];
|
2020-03-08 21:04:10 +00:00
|
|
|
for (const JSON file : files) // NOLINT
|
|
|
|
local_map[unescapeForFileName(file.getName())] = file.getValue()["size"].toUInt();
|
2015-09-29 14:09:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|