2016-01-15 02:47:19 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
2016-03-07 02:12:51 +00:00
|
|
|
|
#include <sparsehash/dense_hash_map>
|
2016-01-15 02:47:19 +00:00
|
|
|
|
|
|
|
|
|
#include <Poco/File.h>
|
|
|
|
|
#include <Poco/NumberParser.h>
|
|
|
|
|
#include <Poco/Util/Application.h>
|
|
|
|
|
#include <Poco/Exception.h>
|
|
|
|
|
|
|
|
|
|
#include <common/Common.h>
|
|
|
|
|
#include <common/logger_useful.h>
|
|
|
|
|
|
|
|
|
|
#include <DB/Core/StringRef.h>
|
2016-10-27 17:53:47 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
#include <DB/IO/ReadHelpers.h>
|
2016-10-27 17:53:47 +00:00
|
|
|
|
#include <DB/IO/WriteHelpers.h>
|
2016-01-15 02:47:19 +00:00
|
|
|
|
#include <DB/IO/ReadBufferFromFile.h>
|
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
/** @brief Класс, позволяющий узнавать по id региона его текстовое название на одном из поддерживаемых языков: ru, en, ua, by, kz, tr.
|
2016-02-07 08:42:21 +00:00
|
|
|
|
*
|
2016-01-15 02:47:19 +00:00
|
|
|
|
* Информацию об именах регионов загружает из текстовых файлов с названиями следующего формата:
|
|
|
|
|
* regions_names_xx.txt,
|
|
|
|
|
* где xx - одно из двух буквенных обозначений следующих поддерживаемых языков:
|
|
|
|
|
* ru, en, ua, by, kz, tr.
|
2016-02-07 08:42:21 +00:00
|
|
|
|
*
|
2016-01-15 02:47:19 +00:00
|
|
|
|
* Умеет, по запросу, обновлять данные.
|
|
|
|
|
*/
|
|
|
|
|
class RegionsNames
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
enum class Language
|
|
|
|
|
{
|
|
|
|
|
RU = 0,
|
|
|
|
|
EN,
|
|
|
|
|
UA,
|
|
|
|
|
BY,
|
|
|
|
|
KZ,
|
|
|
|
|
TR,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
static const size_t ROOT_LANGUAGE = 0;
|
|
|
|
|
static const size_t SUPPORTED_LANGUAGES_COUNT = 6;
|
|
|
|
|
static const size_t LANGUAGE_ALIASES_COUNT = 7;
|
|
|
|
|
|
|
|
|
|
static const char ** getSupportedLanguages()
|
|
|
|
|
{
|
|
|
|
|
static const char * res[] { "ru", "en", "ua", "by", "kz", "tr" };
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct language_alias { const char * const name; const Language lang; };
|
|
|
|
|
static const language_alias * getLanguageAliases()
|
|
|
|
|
{
|
|
|
|
|
static constexpr const language_alias language_aliases[] {
|
|
|
|
|
{ "ru", Language::RU },
|
|
|
|
|
{ "en", Language::EN },
|
|
|
|
|
{ "ua", Language::UA },
|
|
|
|
|
{ "uk", Language::UA },
|
|
|
|
|
{ "by", Language::BY },
|
|
|
|
|
{ "kz", Language::KZ },
|
|
|
|
|
{ "tr", Language::TR }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return language_aliases;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
using RegionID = UInt32;
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
using Chars = std::vector<char>;
|
|
|
|
|
using CharsForLanguageID = std::vector<Chars>;
|
|
|
|
|
using ModificationTimes = std::vector<time_t>;
|
|
|
|
|
using StringRefs = std::vector<StringRef>; /// Lookup table RegionID -> StringRef
|
|
|
|
|
using StringRefsForLanguageID = std::vector<StringRefs>;
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
public:
|
|
|
|
|
static constexpr auto required_key = "path_to_regions_names_files";
|
|
|
|
|
|
|
|
|
|
RegionsNames(const std::string & directory_ = Poco::Util::Application::instance().config().getString(required_key))
|
|
|
|
|
: directory(directory_)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** @brief Перезагружает, при необходимости, имена регионов.
|
|
|
|
|
*/
|
|
|
|
|
void reload()
|
|
|
|
|
{
|
|
|
|
|
LOG_DEBUG(log, "Reloading regions names");
|
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
RegionID max_region_id = 0;
|
2016-01-15 02:47:19 +00:00
|
|
|
|
for (size_t language_id = 0; language_id < SUPPORTED_LANGUAGES_COUNT; ++language_id)
|
|
|
|
|
{
|
|
|
|
|
const std::string & language = getSupportedLanguages()[language_id];
|
|
|
|
|
std::string path = directory + "/regions_names_" + language + ".txt";
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
Poco::File file(path);
|
|
|
|
|
time_t new_modification_time = file.getLastModified().epochTime();
|
|
|
|
|
if (new_modification_time <= file_modification_times[language_id])
|
|
|
|
|
continue;
|
|
|
|
|
file_modification_times[language_id] = new_modification_time;
|
|
|
|
|
|
|
|
|
|
LOG_DEBUG(log, "Reloading regions names for language: " << language);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
DB::ReadBufferFromFile in(path);
|
|
|
|
|
|
|
|
|
|
const size_t initial_size = 10000;
|
2016-10-27 17:53:47 +00:00
|
|
|
|
const size_t max_size = 1000000;
|
2016-01-15 02:47:19 +00:00
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
Chars new_chars;
|
|
|
|
|
StringRefs new_names_refs(initial_size, StringRef("", 0));
|
2016-01-15 02:47:19 +00:00
|
|
|
|
|
|
|
|
|
/// Выделим непрерывный кусок памяти, которого хватит для хранения всех имён.
|
|
|
|
|
new_chars.reserve(Poco::File(path).getSize());
|
|
|
|
|
|
|
|
|
|
while (!in.eof())
|
|
|
|
|
{
|
2016-11-01 12:57:58 +00:00
|
|
|
|
Int32 read_region_id;
|
2016-01-15 02:47:19 +00:00
|
|
|
|
std::string region_name;
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-11-01 12:57:58 +00:00
|
|
|
|
DB::readIntText(read_region_id, in);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
DB::assertChar('\t', in);
|
2016-01-15 02:47:19 +00:00
|
|
|
|
DB::readString(region_name, in);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
DB::assertChar('\n', in);
|
2016-01-15 02:47:19 +00:00
|
|
|
|
|
2016-11-01 12:57:58 +00:00
|
|
|
|
if (read_region_id <= 0)
|
2016-01-15 02:47:19 +00:00
|
|
|
|
continue;
|
|
|
|
|
|
2016-11-01 12:57:58 +00:00
|
|
|
|
RegionID region_id = read_region_id;
|
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
size_t old_size = new_chars.size();
|
|
|
|
|
|
|
|
|
|
if (new_chars.capacity() < old_size + region_name.length() + 1)
|
|
|
|
|
throw Poco::Exception("Logical error. Maybe size of file " + path + " is wrong.");
|
|
|
|
|
|
|
|
|
|
new_chars.resize(old_size + region_name.length() + 1);
|
|
|
|
|
memcpy(&new_chars[old_size], region_name.c_str(), region_name.length() + 1);
|
|
|
|
|
|
|
|
|
|
if (region_id > max_region_id)
|
2016-10-27 17:53:47 +00:00
|
|
|
|
{
|
2016-01-15 02:47:19 +00:00
|
|
|
|
max_region_id = region_id;
|
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
if (region_id > max_size)
|
|
|
|
|
throw DB::Exception("Region id is too large: " + DB::toString(region_id) + ", should be not more than " + DB::toString(max_size));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (region_id >= new_names_refs.size())
|
2016-01-15 02:47:19 +00:00
|
|
|
|
new_names_refs.resize(new_names_refs.size() * 2, StringRef("", 0));
|
|
|
|
|
|
|
|
|
|
new_names_refs[region_id] = StringRef(&new_chars[old_size], region_name.length());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chars[language_id].swap(new_chars);
|
|
|
|
|
names_refs[language_id].swap(new_names_refs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t language_id = 0; language_id < SUPPORTED_LANGUAGES_COUNT; ++language_id)
|
|
|
|
|
names_refs[language_id].resize(max_region_id + 1, StringRef("", 0));
|
|
|
|
|
}
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
StringRef getRegionName(RegionID region_id, Language language = Language::RU) const
|
2016-01-15 02:47:19 +00:00
|
|
|
|
{
|
|
|
|
|
size_t language_id = static_cast<size_t>(language);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-10-27 17:53:47 +00:00
|
|
|
|
if (region_id > names_refs[language_id].size())
|
2016-01-15 02:47:19 +00:00
|
|
|
|
return StringRef("", 0);
|
|
|
|
|
|
|
|
|
|
StringRef ref = names_refs[language_id][region_id];
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
while (ref.size == 0 && language_id != ROOT_LANGUAGE)
|
|
|
|
|
{
|
|
|
|
|
static const size_t FALLBACK[] = { 0, 0, 0, 0, 0, 1 };
|
|
|
|
|
language_id = FALLBACK[language_id];
|
|
|
|
|
ref = names_refs[language_id][region_id];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ref;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static Language getLanguageEnum(const std::string & language)
|
|
|
|
|
{
|
|
|
|
|
if (language.size() == 2)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < LANGUAGE_ALIASES_COUNT; ++i)
|
|
|
|
|
{
|
|
|
|
|
const auto & alias = getLanguageAliases()[i];
|
|
|
|
|
if (language[0] == alias.name[0] && language[1] == alias.name[1])
|
|
|
|
|
return alias.lang;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
throw Poco::Exception("Unsupported language for region name. Supported languages are: " + dumpSupportedLanguagesNames() + ".");
|
|
|
|
|
}
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
static std::string dumpSupportedLanguagesNames()
|
|
|
|
|
{
|
|
|
|
|
std::string res = "";
|
|
|
|
|
for (size_t i = 0; i < LANGUAGE_ALIASES_COUNT; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i > 0)
|
|
|
|
|
res += ", ";
|
|
|
|
|
res += '\'';
|
|
|
|
|
res += getLanguageAliases()[i].name;
|
|
|
|
|
res += '\'';
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
const std::string directory;
|
2016-10-27 17:53:47 +00:00
|
|
|
|
ModificationTimes file_modification_times = ModificationTimes(SUPPORTED_LANGUAGES_COUNT);
|
2016-01-15 02:47:19 +00:00
|
|
|
|
Logger * log = &Logger::get("RegionsNames");
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
/// Байты имен для каждого языка, уложенные подряд, разделенные нулями
|
2016-10-27 17:53:47 +00:00
|
|
|
|
CharsForLanguageID chars = CharsForLanguageID(SUPPORTED_LANGUAGES_COUNT);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
|
2016-01-15 02:47:19 +00:00
|
|
|
|
/// Отображение для каждого языка из id региона в указатель на диапазон байт имени
|
2016-10-27 17:53:47 +00:00
|
|
|
|
StringRefsForLanguageID names_refs = StringRefsForLanguageID(SUPPORTED_LANGUAGES_COUNT);
|
2016-01-15 02:47:19 +00:00
|
|
|
|
};
|