Merge pull request #1559 from rlipovsky/geodata

[clickhouse-yt] separate geo dictionaries from data providers
This commit is contained in:
alexey-milovidov 2017-12-01 19:54:08 +03:00 committed by GitHub
commit 0ac2f5b479
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 723 additions and 210 deletions

View File

@ -49,6 +49,7 @@ add_headers_and_sources(dbms src/DataTypes)
add_headers_and_sources(dbms src/Databases)
add_headers_and_sources(dbms src/Dictionaries)
add_headers_and_sources(dbms src/Dictionaries/Embedded)
add_headers_and_sources(dbms src/Dictionaries/Embedded/GeodataProviders)
add_headers_and_sources(dbms src/Interpreters)
add_headers_and_sources(dbms src/Interpreters/ClusterProxy)
add_headers_and_sources(dbms src/Columns)

View File

@ -0,0 +1,36 @@
#pragma once
#include <Poco/File.h>
#include <Poco/Timestamp.h>
#include <string>
class FileUpdatesTracker
{
private:
std::string path;
Poco::Timestamp known_time;
public:
FileUpdatesTracker(const std::string & path_)
: path(path_)
, known_time(0)
{}
bool isModified() const
{
return getLastModificationTime() > known_time;
}
void fixCurrentVersion()
{
known_time = getLastModificationTime();
}
private:
Poco::Timestamp getLastModificationTime() const
{
return Poco::File(path).getLastModified();
}
};

View File

@ -0,0 +1,31 @@
#include <Dictionaries/Embedded/GeoDictionariesLoader.h>
#include <Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h>
#include <Dictionaries/Embedded/GeodataProviders/NamesProvider.h>
std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarchies(
const Poco::Util::AbstractConfiguration & config)
{
static constexpr auto config_key = "path_to_regions_hierarchy_file";
if (!config.has(config_key))
return {};
const auto default_hierarchy_file = config.getString(config_key);
auto data_provider = std::make_unique<RegionsHierarchiesDataProvider>(default_hierarchy_file);
return std::make_unique<RegionsHierarchies>(std::move(data_provider));
}
std::unique_ptr<RegionsNames> GeoDictionariesLoader::reloadRegionsNames(
const Poco::Util::AbstractConfiguration & config)
{
static constexpr auto config_key = "path_to_regions_names_files";
if (!config.has(config_key))
return {};
const auto directory = config.getString(config_key);
auto data_provider = std::make_unique<RegionsNamesDataProvider>(directory);
return std::make_unique<RegionsNames>(std::move(data_provider));
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Dictionaries/Embedded/IGeoDictionariesLoader.h>
// Default implementation of geo dictionaries loader used by native server application
class GeoDictionariesLoader : public IGeoDictionariesLoader
{
public:
std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(
const Poco::Util::AbstractConfiguration & config) override;
std::unique_ptr<RegionsNames> reloadRegionsNames(
const Poco::Util::AbstractConfiguration & config) override;
};

View File

@ -0,0 +1,19 @@
#pragma once
#include <Dictionaries/Embedded/GeodataProviders/Types.h>
struct RegionEntry
{
RegionID id;
RegionID parent_id;
RegionType type;
RegionDepth depth;
RegionPopulation population;
};
struct RegionNameEntry
{
RegionID id;
std::string name;
};

View File

@ -0,0 +1,143 @@
#include <Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Poco/Util/Application.h>
#include <Poco/Exception.h>
#include <Poco/File.h>
#include <Poco/DirectoryIterator.h>
class RegionsHierarchyFileReader : public IRegionsHierarchyReader
{
private:
DB::ReadBufferFromFile in;
public:
RegionsHierarchyFileReader(const std::string & path)
: in(path)
{}
bool readNext(RegionEntry & entry) override;
};
bool RegionsHierarchyFileReader::readNext(RegionEntry & entry)
{
while (!in.eof())
{
/** Our internal geobase has negative numbers,
* that means "this is garbage, ignore this row".
*/
Int32 read_region_id = 0;
Int32 read_parent_id = 0;
Int8 read_type = 0;
DB::readIntText(read_region_id, in);
DB::assertChar('\t', in);
DB::readIntText(read_parent_id, in);
DB::assertChar('\t', in);
DB::readIntText(read_type, in);
/** Then there can be a newline (old version)
* or tab, the region's population, line feed (new version).
*/
RegionPopulation population = 0;
if (!in.eof() && *in.position() == '\t')
{
++in.position();
UInt64 population_big = 0;
DB::readIntText(population_big, in);
population = population_big > std::numeric_limits<RegionPopulation>::max()
? std::numeric_limits<RegionPopulation>::max()
: population_big;
}
DB::assertChar('\n', in);
if (read_region_id <= 0 || read_type < 0)
continue;
RegionID region_id = read_region_id;
RegionID parent_id = 0;
if (read_parent_id >= 0)
parent_id = read_parent_id;
RegionType type = static_cast<RegionType>(read_type);
entry.id = region_id;
entry.parent_id = parent_id;
entry.type = type;
entry.population = population;
return true;
}
return false;
}
bool RegionsHierarchyDataSource::isModified() const
{
return updates_tracker.isModified();
}
IRegionsHierarchyReaderPtr RegionsHierarchyDataSource::createReader()
{
updates_tracker.fixCurrentVersion();
return std::make_unique<RegionsHierarchyFileReader>(path);
}
RegionsHierarchiesDataProvider::RegionsHierarchiesDataProvider(const std::string & path)
: path(path)
{
discoverFilesWithCustomHierarchies();
}
void RegionsHierarchiesDataProvider::discoverFilesWithCustomHierarchies()
{
std::string basename = Poco::Path(path).getBaseName();
Poco::Path dir_path = Poco::Path(path).absolute().parent();
Poco::DirectoryIterator dir_end;
for (Poco::DirectoryIterator dir_it(dir_path); dir_it != dir_end; ++dir_it)
{
std::string candidate_basename = dir_it.path().getBaseName();
if ((0 == candidate_basename.compare(0, basename.size(), basename)) &&
(candidate_basename.size() > basename.size() + 1) &&
(candidate_basename[basename.size()] == '_'))
{
const std::string suffix = candidate_basename.substr(basename.size() + 1);
hierarchy_files.emplace(suffix, dir_it->path());
}
}
}
std::vector<std::string> RegionsHierarchiesDataProvider::listCustomHierarchies() const
{
std::vector<std::string> names(hierarchy_files.size());
for (const auto & it : hierarchy_files)
names.push_back(it.first);
return names;
}
IRegionsHierarchyDataSourcePtr RegionsHierarchiesDataProvider::getDefaultHierarchySource() const
{
return std::make_shared<RegionsHierarchyDataSource>(path);
}
IRegionsHierarchyDataSourcePtr RegionsHierarchiesDataProvider::getHierarchySource(const std::string & name) const
{
auto found = hierarchy_files.find(name);
if (found != hierarchy_files.end())
{
const auto & hierarchy_file_path = found->second;
return std::make_shared<RegionsHierarchyDataSource>(hierarchy_file_path);
}
throw Poco::Exception("Regions hierarchy `" + name + "` not found");
}

View File

@ -0,0 +1,59 @@
#pragma once
#include <Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h>
#include <Common/FileUpdatesTracker.h>
#include <unordered_map>
// Represents local file with regions hierarchy dump
class RegionsHierarchyDataSource
: public IRegionsHierarchyDataSource
{
private:
std::string path;
FileUpdatesTracker updates_tracker;
public:
RegionsHierarchyDataSource(const std::string & path_)
: path(path_)
, updates_tracker(path_)
{}
bool isModified() const override;
IRegionsHierarchyReaderPtr createReader() override;
};
// Provides access to directory with multiple data source files: one file per regions hierarchy
class RegionsHierarchiesDataProvider
: public IRegionsHierarchiesDataProvider
{
private:
// path to file with default regions hierarchy
std::string path;
using HierarchyFiles = std::unordered_map<std::string, std::string>;
HierarchyFiles hierarchy_files;
public:
/** path must point to the file with the hierarchy of regions "by default". It will be accessible by an empty key.
* In addition, a number of files are searched for, the name of which (before the extension, if any) is added arbitrary _suffix.
* Such files are loaded, and the hierarchy of regions is put on the `suffix` key.
*
* For example, if /opt/geo/regions_hierarchy.txt is specified,
* then the /opt/geo/regions_hierarchy_ua.txt file will also be loaded, if any, it will be accessible by the `ua` key.
*/
RegionsHierarchiesDataProvider(const std::string & path);
std::vector<std::string> listCustomHierarchies() const override;
IRegionsHierarchyDataSourcePtr getDefaultHierarchySource() const override;
IRegionsHierarchyDataSourcePtr getHierarchySource(const std::string & name) const override;
private:
void discoverFilesWithCustomHierarchies();
};

View File

@ -0,0 +1,50 @@
#pragma once
#include <Dictionaries/Embedded/GeodataProviders/Entries.h>
#include <memory>
#include <string>
#include <vector>
// Iterates over all regions in data source
class IRegionsHierarchyReader
{
public:
virtual bool readNext(RegionEntry & entry) = 0;
virtual ~IRegionsHierarchyReader() {}
};
using IRegionsHierarchyReaderPtr = std::unique_ptr<IRegionsHierarchyReader>;
// Data source for single regions hierarchy
class IRegionsHierarchyDataSource
{
public:
// data modified since last createReader invocation
virtual bool isModified() const = 0;
virtual IRegionsHierarchyReaderPtr createReader() = 0;
virtual ~IRegionsHierarchyDataSource() {}
};
using IRegionsHierarchyDataSourcePtr = std::shared_ptr<IRegionsHierarchyDataSource>;
// Provides data sources for different regions hierarchies
class IRegionsHierarchiesDataProvider
{
public:
virtual std::vector<std::string> listCustomHierarchies() const = 0;
virtual IRegionsHierarchyDataSourcePtr getDefaultHierarchySource() const = 0;
virtual IRegionsHierarchyDataSourcePtr getHierarchySource(const std::string & name) const = 0;
virtual ~IRegionsHierarchiesDataProvider() {};
};
using IRegionsHierarchiesDataProviderPtr = std::shared_ptr<IRegionsHierarchiesDataProvider>;

View File

@ -0,0 +1,54 @@
#pragma once
#include <Dictionaries/Embedded/GeodataProviders/Entries.h>
#include <memory>
// Iterates over all name entries in data source
class ILanguageRegionsNamesReader
{
public:
virtual bool readNext(RegionNameEntry & entry) = 0;
virtual ~ILanguageRegionsNamesReader() {}
};
using ILanguageRegionsNamesReaderPtr = std::unique_ptr<ILanguageRegionsNamesReader>;
// Regions names data source for one language
class ILanguageRegionsNamesDataSource
{
public:
// data modified since last createReader invocation
virtual bool isModified() const = 0;
// Upper bound on total length of all names
virtual size_t estimateTotalSize() const = 0;
virtual ILanguageRegionsNamesReaderPtr createReader() = 0;
virtual std::string getLanguage() const = 0;
virtual std::string getSourceName() const = 0;
virtual ~ILanguageRegionsNamesDataSource() {}
};
using ILanguageRegionsNamesDataSourcePtr = std::unique_ptr<ILanguageRegionsNamesDataSource>;
// Provides regions names data sources for different languages
class IRegionsNamesDataProvider
{
public:
virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(
const std::string& language) const = 0;
;
virtual ~IRegionsNamesDataProvider() {}
};
using IRegionsNamesDataProviderPtr = std::unique_ptr<IRegionsNamesDataProvider>;

View File

@ -0,0 +1,86 @@
#include <Dictionaries/Embedded/GeodataProviders/NamesProvider.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
class LanguageRegionsNamesFileReader : public ILanguageRegionsNamesReader
{
private:
DB::ReadBufferFromFile in;
public:
LanguageRegionsNamesFileReader(const std::string & path)
: in(path)
{}
bool readNext(RegionNameEntry & entry) override;
};
bool LanguageRegionsNamesFileReader::readNext(RegionNameEntry & entry)
{
while (!in.eof())
{
Int32 read_region_id;
std::string region_name;
DB::readIntText(read_region_id, in);
DB::assertChar('\t', in);
DB::readString(region_name, in);
DB::assertChar('\n', in);
if (read_region_id <= 0)
continue;
entry.id = read_region_id;
entry.name = region_name;
return true;
}
return false;
}
bool LanguageRegionsNamesDataSource::isModified() const
{
return updates_tracker.isModified();
}
size_t LanguageRegionsNamesDataSource::estimateTotalSize() const
{
return Poco::File(path).getSize();
}
ILanguageRegionsNamesReaderPtr LanguageRegionsNamesDataSource::createReader()
{
updates_tracker.fixCurrentVersion();
return std::make_unique<LanguageRegionsNamesFileReader>(path);
}
std::string LanguageRegionsNamesDataSource::getLanguage() const
{
return language;
}
std::string LanguageRegionsNamesDataSource::getSourceName() const
{
return path;
}
RegionsNamesDataProvider::RegionsNamesDataProvider(const std::string & directory_)
: directory(directory_)
{}
ILanguageRegionsNamesDataSourcePtr RegionsNamesDataProvider::getLanguageRegionsNamesSource(
const std::string & language) const
{
const auto data_file = getDataFilePath(language);
return std::make_unique<LanguageRegionsNamesDataSource>(data_file, language);
}
std::string RegionsNamesDataProvider::getDataFilePath(const std::string & language) const
{
return directory + "/regions_names_" + language + ".txt";
}

View File

@ -0,0 +1,51 @@
#pragma once
#include <Dictionaries/Embedded/GeodataProviders/INamesProvider.h>
#include <Common/FileUpdatesTracker.h>
// Represents local file with list of regions ids / names
class LanguageRegionsNamesDataSource : public ILanguageRegionsNamesDataSource
{
private:
std::string path;
FileUpdatesTracker updates_tracker;
std::string language;
public:
LanguageRegionsNamesDataSource(const std::string & path_, const std::string & language_)
: path(path_)
, updates_tracker(path_)
, language(language_)
{}
bool isModified() const override;
size_t estimateTotalSize() const override;
ILanguageRegionsNamesReaderPtr createReader() override;
std::string getLanguage() const override;
std::string getSourceName() const override;
};
using ILanguageRegionsNamesDataSourcePtr = std::unique_ptr<ILanguageRegionsNamesDataSource>;
// Provides access to directory with multiple data source files: one file per language
class RegionsNamesDataProvider : public IRegionsNamesDataProvider
{
private:
std::string directory;
public:
RegionsNamesDataProvider(const std::string & directory_);
ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(
const std::string& language) const override;
private:
std::string getDataFilePath(const std::string & language) const;
};

View File

@ -0,0 +1,18 @@
#pragma once
#include <common/Types.h>
using RegionID = UInt32;
using RegionDepth = UInt8;
using RegionPopulation = UInt32;
enum class RegionType : Int8
{
Hidden = -1,
Continent = 1,
Country = 3,
District = 4,
Area = 5,
City = 6,
};

View File

@ -0,0 +1,23 @@
#pragma once
#include <Dictionaries/Embedded/RegionsHierarchies.h>
#include <Dictionaries/Embedded/RegionsNames.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <memory>
// Provides actual versions of geo dictionaries (regions hierarchies, regions names)
// Bind data structures (RegionsHierarchies, RegionsNames) with data providers
class IGeoDictionariesLoader
{
public:
virtual std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(
const Poco::Util::AbstractConfiguration & config) = 0;
virtual std::unique_ptr<RegionsNames> reloadRegionsNames(
const Poco::Util::AbstractConfiguration & config) = 0;
virtual ~IGeoDictionariesLoader() {}
};

View File

@ -5,53 +5,18 @@
#include <Poco/DirectoryIterator.h>
static constexpr auto config_key = "path_to_regions_hierarchy_file";
void RegionsHierarchies::reload(const Poco::Util::AbstractConfiguration & config)
{
reload(config.getString(config_key));
}
void RegionsHierarchies::reload(const std::string & path)
RegionsHierarchies::RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider)
{
Logger * log = &Logger::get("RegionsHierarchies");
LOG_DEBUG(log, "Adding default regions hierarchy from " << path);
LOG_DEBUG(log, "Adding default regions hierarchy");
data.emplace("", data_provider->getDefaultHierarchySource());
data.emplace(std::piecewise_construct,
std::forward_as_tuple(""),
std::forward_as_tuple(path));
std::string basename = Poco::Path(path).getBaseName();
Poco::Path dir_path = Poco::Path(path).absolute().parent();
Poco::DirectoryIterator dir_end;
for (Poco::DirectoryIterator dir_it(dir_path); dir_it != dir_end; ++dir_it)
for (const auto & name : data_provider->listCustomHierarchies())
{
std::string other_basename = dir_it.path().getBaseName();
if (0 == other_basename.compare(0, basename.size(), basename) && other_basename.size() > basename.size() + 1)
{
if (other_basename[basename.size()] != '_')
continue;
std::string suffix = other_basename.substr(basename.size() + 1);
LOG_DEBUG(log, "Adding regions hierarchy from " << dir_it->path() << ", key: " << suffix);
data.emplace(std::piecewise_construct,
std::forward_as_tuple(suffix),
std::forward_as_tuple(dir_it->path()));
}
LOG_DEBUG(log, "Adding regions hierarchy for " << name);
data.emplace(name, data_provider->getHierarchySource(name));
}
reload();
}
bool RegionsHierarchies::isConfigured(const Poco::Util::AbstractConfiguration & config)
{
return config.has(config_key);
}

View File

@ -1,14 +1,14 @@
#pragma once
#include <Dictionaries/Embedded/RegionsHierarchy.h>
#include <Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/Exception.h>
#include <unordered_map>
/** Contains several hierarchies of regions, loaded from several different files.
/** Contains several hierarchies of regions.
* Used to support several different perspectives on the ownership of regions by countries.
* First of all, for the Crimea (Russian and Ukrainian points of view).
*/
@ -19,20 +19,7 @@ private:
Container data;
public:
/** path_to_regions_hierarchy_file in configuration file
* must point to the file with the hierarchy of regions "by default". It will be accessible by an empty key.
* In addition, a number of files are searched for, the name of which (before the extension, if any) is added arbitrary _suffix.
* Such files are loaded, and the hierarchy of regions is put on the `suffix` key.
*
* For example, if /opt/geo/regions_hierarchy.txt is specified,
* then the /opt/geo/regions_hierarchy_ua.txt file will also be loaded, if any, it will be accessible by the `ua` key.
*/
void reload(const Poco::Util::AbstractConfiguration & config);
void reload(const std::string & directory);
/// Has corresponding section in configuration file.
static bool isConfigured(const Poco::Util::AbstractConfiguration & config);
RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider);
/** Reloads, if necessary, all hierarchies of regions.
*/

View File

@ -1,21 +1,18 @@
#include <Dictionaries/Embedded/RegionsHierarchy.h>
#include <Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h>
#include <Poco/Util/Application.h>
#include <Poco/Exception.h>
#include <Poco/File.h>
#include <common/logger_useful.h>
#include <ext/singleton.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
RegionsHierarchy::RegionsHierarchy(const std::string & path_)
RegionsHierarchy::RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_)
: data_source(data_source_)
{
path = path_;
}
@ -23,10 +20,8 @@ void RegionsHierarchy::reload()
{
Logger * log = &Logger::get("RegionsHierarchy");
time_t new_modification_time = Poco::File(path).getLastModified().epochTime();
if (new_modification_time <= file_modification_time)
if (!data_source->isModified())
return;
file_modification_time = new_modification_time;
LOG_DEBUG(log, "Reloading regions hierarchy");
@ -44,58 +39,22 @@ void RegionsHierarchy::reload()
RegionDepths new_depths(initial_size);
RegionTypes types(initial_size);
DB::ReadBufferFromFile in(path);
RegionID max_region_id = 0;
while (!in.eof())
auto regions_reader = data_source->createReader();
RegionEntry region_entry;
while (!regions_reader->readNext(region_entry))
{
/** Our internal geobase has negative numbers,
* that means "this is garbage, ignore this row".
*/
Int32 read_region_id = 0;
Int32 read_parent_id = 0;
Int8 read_type = 0;
DB::readIntText(read_region_id, in);
DB::assertChar('\t', in);
DB::readIntText(read_parent_id, in);
DB::assertChar('\t', in);
DB::readIntText(read_type, in);
/** Then there can be a newline (old version)
* or tab, the region's population, line feed (new version).
*/
RegionPopulation population = 0;
if (!in.eof() && *in.position() == '\t')
if (region_entry.id > max_region_id)
{
++in.position();
UInt64 population_big = 0;
DB::readIntText(population_big, in);
population = population_big > std::numeric_limits<RegionPopulation>::max()
? std::numeric_limits<RegionPopulation>::max()
: population_big;
}
DB::assertChar('\n', in);
if (region_entry.id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(region_entry.id) + ", should be not more than " + DB::toString(max_size));
if (read_region_id <= 0)
continue;
max_region_id = region_entry.id;
RegionID region_id = read_region_id;
RegionID parent_id = 0;
if (read_parent_id >= 0)
parent_id = read_parent_id;
RegionType type = static_cast<RegionType>(read_type);
if (region_id > max_region_id)
{
if (region_id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(region_id) + ", should be not more than " + DB::toString(max_size));
max_region_id = region_id;
while (region_id >= new_parents.size())
while (region_entry.id >= new_parents.size())
{
new_parents.resize(new_parents.size() * 2);
new_populations.resize(new_parents.size());
@ -103,9 +62,9 @@ void RegionsHierarchy::reload()
}
}
new_parents[region_id] = parent_id;
new_populations[region_id] = population;
types[region_id] = type;
new_parents[region_entry.id] = region_entry.parent_id;
new_populations[region_entry.id] = region_entry.population;
types[region_entry.id] = region_entry.type;
}
new_parents .resize(max_region_id + 1);

View File

@ -1,10 +1,14 @@
#pragma once
#include <Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h>
#include <vector>
#include <boost/noncopyable.hpp>
#include <common/Types.h>
class IRegionsHierarchyDataProvider;
/** A class that lets you know if a region belongs to one RegionID region with another RegionID.
* Information about the hierarchy of regions is downloaded from a text file.
* Can on request update the data.
@ -12,22 +16,6 @@
class RegionsHierarchy : private boost::noncopyable
{
private:
time_t file_modification_time = 0;
using RegionID = UInt32;
using RegionDepth = UInt8;
using RegionPopulation = UInt32;
enum class RegionType : Int8
{
Hidden = -1,
Continent = 1,
Country = 3,
District = 4,
Area = 5,
City = 6,
};
/// Relationship parent; 0, if there are no parents, the usual lookup table.
using RegionParents = std::vector<RegionID>;
/// type of region
@ -58,11 +46,10 @@ private:
/// region - depth in the tree
RegionDepths depths;
/// path to file with data
std::string path;
IRegionsHierarchyDataSourcePtr data_source;
public:
RegionsHierarchy(const std::string & path_);
RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_);
/// Reloads, if necessary, the hierarchy of regions. Not threadsafe.
void reload();

View File

@ -1,18 +1,22 @@
#include <Dictionaries/Embedded/RegionsNames.h>
#include <Dictionaries/Embedded/GeodataProviders/INamesProvider.h>
#include <Poco/File.h>
#include <Poco/Util/Application.h>
#include <Poco/Exception.h>
#include <common/logger_useful.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromFile.h>
static constexpr auto config_key = "path_to_regions_names_files";
RegionsNames::RegionsNames(IRegionsNamesDataProviderPtr data_provider)
{
for (size_t language_id = 0; language_id < SUPPORTED_LANGUAGES_COUNT; ++language_id)
{
const std::string & language = getSupportedLanguages()[language_id];
names_sources[language_id] = data_provider->getLanguageRegionsNamesSource(language);
}
}
std::string RegionsNames::dumpSupportedLanguagesNames()
{
@ -28,12 +32,7 @@ std::string RegionsNames::dumpSupportedLanguagesNames()
return res;
}
void RegionsNames::reload(const Poco::Util::AbstractConfiguration & config)
{
reload(config.getString(config_key));
}
void RegionsNames::reload(const std::string & directory)
void RegionsNames::reload()
{
Logger * log = &Logger::get("RegionsNames");
LOG_DEBUG(log, "Reloading regions names");
@ -42,17 +41,15 @@ void RegionsNames::reload(const std::string & directory)
for (size_t language_id = 0; language_id < SUPPORTED_LANGUAGES_COUNT; ++language_id)
{
const std::string & language = getSupportedLanguages()[language_id];
std::string path = directory + "/regions_names_" + language + ".txt";
Poco::File file(path);
time_t new_modification_time = file.getLastModified().epochTime();
if (new_modification_time <= file_modification_times[language_id])
auto names_source = names_sources[language_id];
if (!names_source->isModified())
continue;
file_modification_times[language_id] = new_modification_time;
LOG_DEBUG(log, "Reloading regions names for language: " << language);
DB::ReadBufferFromFile in(path);
auto names_reader = names_source->createReader();
const size_t initial_size = 10000;
const size_t max_size = 15000000;
@ -61,43 +58,28 @@ void RegionsNames::reload(const std::string & directory)
StringRefs new_names_refs(initial_size, StringRef("", 0));
/// Allocate a continuous slice of memory, which is enough to store all names.
new_chars.reserve(Poco::File(path).getSize());
new_chars.reserve(names_source->estimateTotalSize());
while (!in.eof())
RegionNameEntry name_entry;
while (names_reader->readNext(name_entry))
{
Int32 read_region_id;
std::string region_name;
DB::readIntText(read_region_id, in);
DB::assertChar('\t', in);
DB::readString(region_name, in);
DB::assertChar('\n', in);
if (read_region_id <= 0)
continue;
RegionID region_id = read_region_id;
size_t old_size = new_chars.size();
if (new_chars.capacity() < old_size + region_name.length() + 1)
throw Poco::Exception("Logical error. Maybe size of file " + path + " is wrong.");
new_chars.resize(old_size + name_entry.name.length() + 1);
memcpy(&new_chars[old_size], name_entry.name.c_str(), name_entry.name.length() + 1);
new_chars.resize(old_size + region_name.length() + 1);
memcpy(&new_chars[old_size], region_name.c_str(), region_name.length() + 1);
if (region_id > max_region_id)
if (name_entry.id > max_region_id)
{
max_region_id = region_id;
max_region_id = name_entry.id;
if (region_id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(region_id) + ", should be not more than " + DB::toString(max_size));
if (name_entry.id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(name_entry.id) + ", should be not more than " + DB::toString(max_size));
}
while (region_id >= new_names_refs.size())
while (name_entry.id >= new_names_refs.size())
new_names_refs.resize(new_names_refs.size() * 2, StringRef("", 0));
new_names_refs[region_id] = StringRef(&new_chars[old_size], region_name.length());
new_names_refs[name_entry.id] = StringRef(&new_chars[old_size], name_entry.name.length());
}
chars[language_id].swap(new_chars);
@ -107,9 +89,3 @@ void RegionsNames::reload(const std::string & directory)
for (size_t language_id = 0; language_id < SUPPORTED_LANGUAGES_COUNT; ++language_id)
names_refs[language_id].resize(max_region_id + 1, StringRef("", 0));
}
bool RegionsNames::isConfigured(const Poco::Util::AbstractConfiguration & config)
{
return config.has(config_key);
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Poco/Util/AbstractConfiguration.h>
#include <Dictionaries/Embedded/GeodataProviders/INamesProvider.h>
#include <Poco/Exception.h>
#include <common/Types.h>
@ -59,23 +60,15 @@ private:
return language_aliases;
}
using RegionID = UInt32;
using NamesSources = std::vector<std::shared_ptr<ILanguageRegionsNamesDataSource>>;
using Chars = std::vector<char>;
using CharsForLanguageID = std::vector<Chars>;
using ModificationTimes = std::vector<time_t>;
using StringRefs = std::vector<StringRef>; /// Lookup table RegionID -> StringRef
using StringRefsForLanguageID = std::vector<StringRefs>;
public:
/** Reload the names of regions if necessary.
*/
void reload(const Poco::Util::AbstractConfiguration & config);
void reload(const std::string & directory);
/// Has corresponding section in configuration file.
static bool isConfigured(const Poco::Util::AbstractConfiguration & config);
RegionsNames(IRegionsNamesDataProviderPtr data_provider);
StringRef getRegionName(RegionID region_id, Language language = Language::RU) const
{
@ -110,10 +103,12 @@ public:
throw Poco::Exception("Unsupported language for region name. Supported languages are: " + dumpSupportedLanguagesNames() + ".");
}
void reload();
private:
static std::string dumpSupportedLanguagesNames();
ModificationTimes file_modification_times = ModificationTimes(SUPPORTED_LANGUAGES_COUNT);
NamesSources names_sources = NamesSources(SUPPORTED_LANGUAGES_COUNT);
/// Bytes of names for each language, laid out in a row, separated by zeros
CharsForLanguageID chars = CharsForLanguageID(SUPPORTED_LANGUAGES_COUNT);

View File

@ -1099,7 +1099,14 @@ EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_
std::lock_guard<std::mutex> lock(shared->embedded_dictionaries_mutex);
if (!shared->embedded_dictionaries)
shared->embedded_dictionaries = std::make_shared<EmbeddedDictionaries>(*this->global_context, throw_on_error);
{
auto geo_dictionaries_loader = runtime_components_factory->createGeoDictionariesLoader();
shared->embedded_dictionaries = std::make_shared<EmbeddedDictionaries>(
std::move(geo_dictionaries_loader),
*this->global_context,
throw_on_error);
}
return *shared->embedded_dictionaries;
}

View File

@ -1,6 +1,7 @@
#include <Dictionaries/Embedded/RegionsHierarchies.h>
#include <Dictionaries/Embedded/RegionsNames.h>
#include <Dictionaries/Embedded/TechDataHierarchy.h>
#include <Dictionaries/Embedded/IGeoDictionariesLoader.h>
#include <Interpreters/Context.h>
#include <Interpreters/EmbeddedDictionaries.h>
@ -32,20 +33,23 @@ void EmbeddedDictionaries::handleException(const bool throw_on_error) const
template <typename Dictionary>
bool EmbeddedDictionaries::reloadDictionary(MultiVersion<Dictionary> & dictionary, const bool throw_on_error, const bool force_reload)
bool EmbeddedDictionaries::reloadDictionary(
MultiVersion<Dictionary> & dictionary,
DictionaryReloader<Dictionary> reload_dictionary,
const bool throw_on_error,
const bool force_reload)
{
const auto & config = context.getConfigRef();
bool defined_in_config = Dictionary::isConfigured(config);
bool not_initialized = dictionary.get() == nullptr;
if (defined_in_config && (force_reload || !is_fast_start_stage || not_initialized))
if (force_reload || !is_fast_start_stage || not_initialized)
{
try
{
auto new_dictionary = std::make_unique<Dictionary>();
new_dictionary->reload(config);
dictionary.set(new_dictionary.release());
auto new_dictionary = reload_dictionary(config);
if (new_dictionary)
dictionary.set(new_dictionary.release());
}
catch (...)
{
@ -73,14 +77,35 @@ bool EmbeddedDictionaries::reloadImpl(const bool throw_on_error, const bool forc
bool was_exception = false;
#if USE_MYSQL
if (!reloadDictionary<TechDataHierarchy>(tech_data_hierarchy, throw_on_error, force_reload))
DictionaryReloader<TechDataHierarchy> reload_tech_data = [=] (const Poco::Util::AbstractConfiguration & config)
-> std::unique_ptr<TechDataHierarchy>
{
if (!TechDataHierarchy::isConfigured(config))
return {};
auto dictionary = std::make_unique<TechDataHierarchy>();
dictionary->reload(config);
return dictionary;
};
if (!reloadDictionary<TechDataHierarchy>(tech_data_hierarchy, reload_tech_data, throw_on_error, force_reload))
was_exception = true;
#endif
if (!reloadDictionary<RegionsHierarchies>(regions_hierarchies, throw_on_error, force_reload))
DictionaryReloader<RegionsHierarchies> reload_regions_hierarchies = [=] (const Poco::Util::AbstractConfiguration & config)
{
return geo_dictionaries_loader->reloadRegionsHierarchies(config);
};
if (!reloadDictionary<RegionsHierarchies>(regions_hierarchies, std::move(reload_regions_hierarchies), throw_on_error, force_reload))
was_exception = true;
if (!reloadDictionary<RegionsNames>(regions_names, throw_on_error, force_reload))
DictionaryReloader<RegionsNames> reload_regions_names = [=] (const Poco::Util::AbstractConfiguration & config)
{
return geo_dictionaries_loader->reloadRegionsNames(config);
};
if (!reloadDictionary<RegionsNames>(regions_names, std::move(reload_regions_names), throw_on_error, force_reload))
was_exception = true;
if (!was_exception)
@ -115,9 +140,13 @@ void EmbeddedDictionaries::reloadPeriodically()
}
EmbeddedDictionaries::EmbeddedDictionaries(Context & context_, const bool throw_on_error)
EmbeddedDictionaries::EmbeddedDictionaries(
std::unique_ptr<IGeoDictionariesLoader> geo_dictionaries_loader_,
Context & context_,
const bool throw_on_error)
: log(&Logger::get("EmbeddedDictionaries"))
, context(context_)
, geo_dictionaries_loader(std::move(geo_dictionaries_loader_))
, reload_period(context_.getConfigRef().getInt("builtin_dictionaries_reload_interval", 3600))
{
reloadImpl(throw_on_error);

View File

@ -10,6 +10,7 @@ namespace Poco { class Logger; }
class RegionsHierarchies;
class TechDataHierarchy;
class RegionsNames;
class IGeoDictionariesLoader;
namespace DB
@ -30,6 +31,8 @@ private:
MultiVersion<TechDataHierarchy> tech_data_hierarchy;
MultiVersion<RegionsNames> regions_names;
std::unique_ptr<IGeoDictionariesLoader> geo_dictionaries_loader;
/// Directories' updating periodicity (in seconds).
int reload_period;
int cur_reload_period = 1;
@ -53,11 +56,21 @@ private:
bool reloadImpl(const bool throw_on_error, const bool force_reload = false);
template <typename Dictionary>
bool reloadDictionary(MultiVersion<Dictionary> & dictionary, const bool throw_on_error, const bool force_reload);
using DictionaryReloader = std::function<std::unique_ptr<Dictionary>(const Poco::Util::AbstractConfiguration & config)>;
template <typename Dictionary>
bool reloadDictionary(
MultiVersion<Dictionary> & dictionary,
DictionaryReloader<Dictionary> reload_dictionary,
const bool throw_on_error,
const bool force_reload);
public:
/// Every reload_period seconds directories are updated inside a separate thread.
EmbeddedDictionaries(Context & context, const bool throw_on_error);
EmbeddedDictionaries(
std::unique_ptr<IGeoDictionariesLoader> geo_dictionaries_loader,
Context & context,
const bool throw_on_error);
/// Forcibly reloads all dictionaries.
void reload();

View File

@ -1,5 +1,6 @@
#pragma once
#include <Dictionaries/Embedded/IGeoDictionariesLoader.h>
#include <Interpreters/IExternalLoaderConfigRepository.h>
#include <Interpreters/ISecurityManager.h>
@ -17,6 +18,8 @@ class IRuntimeComponentsFactory
public:
virtual std::unique_ptr<ISecurityManager> createSecurityManager() = 0;
virtual std::unique_ptr<IGeoDictionariesLoader> createGeoDictionariesLoader() = 0;
// Repositories with configurations of user-defined objects (dictionaries, models)
virtual std::unique_ptr<IExternalLoaderConfigRepository> createExternalDictionariesConfigRepository() = 0;

View File

@ -1,5 +1,6 @@
#pragma once
#include <Dictionaries/Embedded/GeoDictionariesLoader.h>
#include <Interpreters/IRuntimeComponentsFactory.h>
#include <Interpreters/ExternalLoaderConfigRepository.h>
#include <Interpreters/SecurityManager.h>
@ -18,6 +19,11 @@ public:
return std::make_unique<SecurityManager>();
}
std::unique_ptr<IGeoDictionariesLoader> createGeoDictionariesLoader() override
{
return std::make_unique<GeoDictionariesLoader>();
}
std::unique_ptr<IExternalLoaderConfigRepository> createExternalDictionariesConfigRepository() override
{
return std::make_unique<ExternalLoaderConfigRepository>();