ClickHouse/dbms/src/Dictionaries/Embedded/RegionsNames.h

117 lines
3.4 KiB
C++
Raw Normal View History

#pragma once
#include <string>
#include <vector>
#include <Poco/Exception.h>
#include <common/StringRef.h>
#include <common/Types.h>
#include "GeodataProviders/INamesProvider.h"
2017-04-17 11:56:55 +00:00
/** A class that allows you to recognize by region id its text name in one of the supported languages: ru, en, ua, by, kz, tr.
*
2017-04-17 11:56:55 +00:00
* Information about region names loads from text files with the following format names:
* regions_names_xx.txt,
2017-04-17 11:56:55 +00:00
* where xx is one of the two letters of the following supported languages:
* ru, en, ua, by, kz, tr.
*
2017-04-17 11:56:55 +00:00
* Can on request update the data.
*/
class RegionsNames
{
public:
2019-11-11 20:54:57 +00:00
enum class Language : size_t
{
RU = 0,
EN,
UA,
BY,
KZ,
TR,
2019-11-11 20:54:57 +00:00
END
};
private:
2019-11-11 20:54:57 +00:00
static inline constexpr const char * supported_languages[] =
{
2019-11-11 20:54:57 +00:00
"ru",
"en",
"ua",
"by",
"kz",
"tr"
};
2019-11-11 20:54:57 +00:00
static inline constexpr std::pair<const char *, Language> language_aliases[] =
{
2019-11-11 20:54:57 +00:00
{"ru", Language::RU},
{"en", Language::EN},
{"ua", Language::UA},
{"uk", Language::UA},
{"by", Language::BY},
{"kz", Language::KZ},
{"tr", Language::TR}
};
2019-11-11 20:54:57 +00:00
static constexpr size_t ROOT_LANGUAGE = 0;
static constexpr size_t SUPPORTED_LANGUAGES_COUNT = size_t(Language::END);
static constexpr size_t LANGUAGE_ALIASES_COUNT = sizeof(language_aliases);
using NamesSources = std::vector<std::shared_ptr<ILanguageRegionsNamesDataSource>>;
using Chars = std::vector<char>;
using CharsForLanguageID = std::vector<Chars>;
using StringRefs = std::vector<StringRef>; /// Lookup table RegionID -> StringRef
using StringRefsForLanguageID = std::vector<StringRefs>;
public:
RegionsNames(IRegionsNamesDataProviderPtr data_provider);
StringRef getRegionName(RegionID region_id, Language language = Language::RU) const
{
size_t language_id = static_cast<size_t>(language);
2019-01-09 15:44:20 +00:00
if (region_id >= names_refs[language_id].size())
return StringRef("", 0);
StringRef ref = names_refs[language_id][region_id];
while (ref.size == 0 && language_id != ROOT_LANGUAGE)
{
static const size_t FALLBACK[] = {0, 0, 0, 0, 0, 1};
language_id = FALLBACK[language_id];
ref = names_refs[language_id][region_id];
}
return ref;
}
static Language getLanguageEnum(const std::string & language)
{
if (language.size() == 2)
{
for (size_t i = 0; i < LANGUAGE_ALIASES_COUNT; ++i)
{
2019-11-11 20:54:57 +00:00
const auto & alias = language_aliases[i];
if (language[0] == alias.first[0] && language[1] == alias.first[1])
return alias.second;
}
}
throw Poco::Exception("Unsupported language for region name. Supported languages are: " + dumpSupportedLanguagesNames() + ".");
}
void reload();
private:
static std::string dumpSupportedLanguagesNames();
NamesSources names_sources = NamesSources(SUPPORTED_LANGUAGES_COUNT);
2017-04-17 11:56:55 +00:00
/// Bytes of names for each language, laid out in a row, separated by zeros
CharsForLanguageID chars = CharsForLanguageID(SUPPORTED_LANGUAGES_COUNT);
2017-04-17 11:56:55 +00:00
/// Mapping for each language from the region id into a pointer to the byte range of the name
StringRefsForLanguageID names_refs = StringRefsForLanguageID(SUPPORTED_LANGUAGES_COUNT);
};