From c18934f45434fc779abbde8e72ea3ffbabd04548 Mon Sep 17 00:00:00 2001 From: Roman Lipovsky Date: Mon, 4 Dec 2017 15:30:21 +0300 Subject: [PATCH] separate format readers from files, move readers to separate headers --- .../GeodataProviders/HierarchiesProvider.cpp | 74 +------------------ .../HierarchyFormatReader.cpp | 58 +++++++++++++++ .../GeodataProviders/HierarchyFormatReader.h | 20 +++++ .../GeodataProviders/NamesFormatReader.cpp | 27 +++++++ .../GeodataProviders/NamesFormatReader.h | 19 +++++ .../GeodataProviders/NamesProvider.cpp | 43 +---------- debian/changelog | 2 +- 7 files changed, 131 insertions(+), 112 deletions(-) create mode 100644 dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp create mode 100644 dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h create mode 100644 dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp create mode 100644 dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp index 2e1f70e7f2c..fdd69d26198 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp @@ -1,82 +1,13 @@ #include +#include #include -#include -#include #include #include -#include #include -class RegionsHierarchyFileReader : public IRegionsHierarchyReader -{ -private: - DB::ReadBufferFromFile in; - -public: - RegionsHierarchyFileReader(const std::string & path) - : in(path) - {} - - bool readNext(RegionEntry & entry) override; -}; - -bool RegionsHierarchyFileReader::readNext(RegionEntry & entry) -{ - while (!in.eof()) - { - /** Our internal geobase has negative numbers, - * that means "this is garbage, ignore this row". - */ - Int32 read_region_id = 0; - Int32 read_parent_id = 0; - Int8 read_type = 0; - - DB::readIntText(read_region_id, in); - DB::assertChar('\t', in); - DB::readIntText(read_parent_id, in); - DB::assertChar('\t', in); - DB::readIntText(read_type, in); - - /** Then there can be a newline (old version) - * or tab, the region's population, line feed (new version). - */ - RegionPopulation population = 0; - if (!in.eof() && *in.position() == '\t') - { - ++in.position(); - UInt64 population_big = 0; - DB::readIntText(population_big, in); - population = population_big > std::numeric_limits::max() - ? std::numeric_limits::max() - : population_big; - } - DB::assertChar('\n', in); - - if (read_region_id <= 0 || read_type < 0) - continue; - - RegionID region_id = read_region_id; - RegionID parent_id = 0; - - if (read_parent_id >= 0) - parent_id = read_parent_id; - - RegionType type = static_cast(read_type); - - entry.id = region_id; - entry.parent_id = parent_id; - entry.type = type; - entry.population = population; - return true; - } - - return false; -} - - bool RegionsHierarchyDataSource::isModified() const { return updates_tracker.isModified(); @@ -85,7 +16,8 @@ bool RegionsHierarchyDataSource::isModified() const IRegionsHierarchyReaderPtr RegionsHierarchyDataSource::createReader() { updates_tracker.fixCurrentVersion(); - return std::make_unique(path); + auto file_reader = std::make_shared(path); + return std::make_unique(std::move(file_reader)); } diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp new file mode 100644 index 00000000000..5716879ce92 --- /dev/null +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp @@ -0,0 +1,58 @@ +#include + +#include +#include + + +bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry) +{ + while (!input->eof()) + { + /** Our internal geobase has negative numbers, + * that means "this is garbage, ignore this row". + */ + Int32 read_region_id = 0; + Int32 read_parent_id = 0; + Int8 read_type = 0; + + DB::readIntText(read_region_id, *input); + DB::assertChar('\t', *input); + DB::readIntText(read_parent_id, *input); + DB::assertChar('\t', *input); + DB::readIntText(read_type, *input); + + /** Then there can be a newline (old version) + * or tab, the region's population, line feed (new version). + */ + RegionPopulation population = 0; + if (!input->eof() && *input->position() == '\t') + { + ++input->position(); + UInt64 population_big = 0; + DB::readIntText(population_big, *input); + population = population_big > std::numeric_limits::max() + ? std::numeric_limits::max() + : population_big; + } + DB::assertChar('\n', *input); + + if (read_region_id <= 0 || read_type < 0) + continue; + + RegionID region_id = read_region_id; + RegionID parent_id = 0; + + if (read_parent_id >= 0) + parent_id = read_parent_id; + + RegionType type = static_cast(read_type); + + entry.id = region_id; + entry.parent_id = parent_id; + entry.type = type; + entry.population = population; + return true; + } + + return false; +} diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h new file mode 100644 index 00000000000..f66b32086fc --- /dev/null +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include + + +class RegionsHierarchyFormatReader : public IRegionsHierarchyReader +{ +private: + DB::ReadBufferPtr input; + +public: + RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) + : input(std::move(input_)) + {} + + bool readNext(RegionEntry & entry) override; +}; + diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp new file mode 100644 index 00000000000..df198ebde54 --- /dev/null +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp @@ -0,0 +1,27 @@ +#include + +#include + + +bool LanguageRegionsNamesFormatReader::readNext(RegionNameEntry & entry) +{ + while (!input->eof()) + { + Int32 read_region_id; + std::string region_name; + + DB::readIntText(read_region_id, *input); + DB::assertChar('\t', *input); + DB::readString(region_name, *input); + DB::assertChar('\n', *input); + + if (read_region_id <= 0) + continue; + + entry.id = read_region_id; + entry.name = region_name; + return true; + } + + return false; +} diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h new file mode 100644 index 00000000000..4627f722e38 --- /dev/null +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +#include + + +class LanguageRegionsNamesFormatReader : public ILanguageRegionsNamesReader +{ +private: + DB::ReadBufferPtr input; + +public: + LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) + : input(std::move(input_)) + {} + + bool readNext(RegionNameEntry & entry) override; +}; diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp index 1effedc01d9..ef44c0a4e03 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp @@ -1,45 +1,7 @@ #include +#include #include -#include -#include - - -class LanguageRegionsNamesFileReader : public ILanguageRegionsNamesReader -{ -private: - DB::ReadBufferFromFile in; - -public: - LanguageRegionsNamesFileReader(const std::string & path) - : in(path) - {} - - bool readNext(RegionNameEntry & entry) override; -}; - -bool LanguageRegionsNamesFileReader::readNext(RegionNameEntry & entry) -{ - while (!in.eof()) - { - Int32 read_region_id; - std::string region_name; - - DB::readIntText(read_region_id, in); - DB::assertChar('\t', in); - DB::readString(region_name, in); - DB::assertChar('\n', in); - - if (read_region_id <= 0) - continue; - - entry.id = read_region_id; - entry.name = region_name; - return true; - } - - return false; -} bool LanguageRegionsNamesDataSource::isModified() const @@ -55,7 +17,8 @@ size_t LanguageRegionsNamesDataSource::estimateTotalSize() const ILanguageRegionsNamesReaderPtr LanguageRegionsNamesDataSource::createReader() { updates_tracker.fixCurrentVersion(); - return std::make_unique(path); + auto file_reader = std::make_shared(path); + return std::make_unique(std::move(file_reader)); } std::string LanguageRegionsNamesDataSource::getLanguage() const diff --git a/debian/changelog b/debian/changelog index 5223e81a8b8..c92f31dbe6e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,4 +2,4 @@ clickhouse (1.1.54319) unstable; urgency=low * Modified source code - -- Fri, 01 Dec 2017 09:36:27 +0300 + -- rlipovsky Mon, 04 Dec 2017 13:44:27 +0300