mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
Merge pull request #1587 from rlipovsky/geodata_readers
[clickhouse-yt] separate geoexport format readers from data files
This commit is contained in:
commit
20a8812eba
@ -1,82 +1,13 @@
|
||||
#include <Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h>
|
||||
#include <Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h>
|
||||
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Poco/Exception.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Poco/DirectoryIterator.h>
|
||||
|
||||
|
||||
class RegionsHierarchyFileReader : public IRegionsHierarchyReader
|
||||
{
|
||||
private:
|
||||
DB::ReadBufferFromFile in;
|
||||
|
||||
public:
|
||||
RegionsHierarchyFileReader(const std::string & path)
|
||||
: in(path)
|
||||
{}
|
||||
|
||||
bool readNext(RegionEntry & entry) override;
|
||||
};
|
||||
|
||||
bool RegionsHierarchyFileReader::readNext(RegionEntry & entry)
|
||||
{
|
||||
while (!in.eof())
|
||||
{
|
||||
/** Our internal geobase has negative numbers,
|
||||
* that means "this is garbage, ignore this row".
|
||||
*/
|
||||
Int32 read_region_id = 0;
|
||||
Int32 read_parent_id = 0;
|
||||
Int8 read_type = 0;
|
||||
|
||||
DB::readIntText(read_region_id, in);
|
||||
DB::assertChar('\t', in);
|
||||
DB::readIntText(read_parent_id, in);
|
||||
DB::assertChar('\t', in);
|
||||
DB::readIntText(read_type, in);
|
||||
|
||||
/** Then there can be a newline (old version)
|
||||
* or tab, the region's population, line feed (new version).
|
||||
*/
|
||||
RegionPopulation population = 0;
|
||||
if (!in.eof() && *in.position() == '\t')
|
||||
{
|
||||
++in.position();
|
||||
UInt64 population_big = 0;
|
||||
DB::readIntText(population_big, in);
|
||||
population = population_big > std::numeric_limits<RegionPopulation>::max()
|
||||
? std::numeric_limits<RegionPopulation>::max()
|
||||
: population_big;
|
||||
}
|
||||
DB::assertChar('\n', in);
|
||||
|
||||
if (read_region_id <= 0 || read_type < 0)
|
||||
continue;
|
||||
|
||||
RegionID region_id = read_region_id;
|
||||
RegionID parent_id = 0;
|
||||
|
||||
if (read_parent_id >= 0)
|
||||
parent_id = read_parent_id;
|
||||
|
||||
RegionType type = static_cast<RegionType>(read_type);
|
||||
|
||||
entry.id = region_id;
|
||||
entry.parent_id = parent_id;
|
||||
entry.type = type;
|
||||
entry.population = population;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool RegionsHierarchyDataSource::isModified() const
|
||||
{
|
||||
return updates_tracker.isModified();
|
||||
@ -85,7 +16,8 @@ bool RegionsHierarchyDataSource::isModified() const
|
||||
IRegionsHierarchyReaderPtr RegionsHierarchyDataSource::createReader()
|
||||
{
|
||||
updates_tracker.fixCurrentVersion();
|
||||
return std::make_unique<RegionsHierarchyFileReader>(path);
|
||||
auto file_reader = std::make_shared<DB::ReadBufferFromFile>(path);
|
||||
return std::make_unique<RegionsHierarchyFormatReader>(std::move(file_reader));
|
||||
}
|
||||
|
||||
|
||||
|
@ -0,0 +1,58 @@
|
||||
#include <Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry)
|
||||
{
|
||||
while (!input->eof())
|
||||
{
|
||||
/** Our internal geobase has negative numbers,
|
||||
* that means "this is garbage, ignore this row".
|
||||
*/
|
||||
Int32 read_region_id = 0;
|
||||
Int32 read_parent_id = 0;
|
||||
Int8 read_type = 0;
|
||||
|
||||
DB::readIntText(read_region_id, *input);
|
||||
DB::assertChar('\t', *input);
|
||||
DB::readIntText(read_parent_id, *input);
|
||||
DB::assertChar('\t', *input);
|
||||
DB::readIntText(read_type, *input);
|
||||
|
||||
/** Then there can be a newline (old version)
|
||||
* or tab, the region's population, line feed (new version).
|
||||
*/
|
||||
RegionPopulation population = 0;
|
||||
if (!input->eof() && *input->position() == '\t')
|
||||
{
|
||||
++input->position();
|
||||
UInt64 population_big = 0;
|
||||
DB::readIntText(population_big, *input);
|
||||
population = population_big > std::numeric_limits<RegionPopulation>::max()
|
||||
? std::numeric_limits<RegionPopulation>::max()
|
||||
: population_big;
|
||||
}
|
||||
DB::assertChar('\n', *input);
|
||||
|
||||
if (read_region_id <= 0 || read_type < 0)
|
||||
continue;
|
||||
|
||||
RegionID region_id = read_region_id;
|
||||
RegionID parent_id = 0;
|
||||
|
||||
if (read_parent_id >= 0)
|
||||
parent_id = read_parent_id;
|
||||
|
||||
RegionType type = static_cast<RegionType>(read_type);
|
||||
|
||||
entry.id = region_id;
|
||||
entry.parent_id = parent_id;
|
||||
entry.type = type;
|
||||
entry.population = population;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h>
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
|
||||
|
||||
// Reads regions hierarchy in geoexport format
|
||||
class RegionsHierarchyFormatReader : public IRegionsHierarchyReader
|
||||
{
|
||||
private:
|
||||
DB::ReadBufferPtr input;
|
||||
|
||||
public:
|
||||
RegionsHierarchyFormatReader(DB::ReadBufferPtr input_)
|
||||
: input(std::move(input_))
|
||||
{}
|
||||
|
||||
bool readNext(RegionEntry & entry) override;
|
||||
};
|
||||
|
@ -0,0 +1,27 @@
|
||||
#include <Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
bool LanguageRegionsNamesFormatReader::readNext(RegionNameEntry & entry)
|
||||
{
|
||||
while (!input->eof())
|
||||
{
|
||||
Int32 read_region_id;
|
||||
std::string region_name;
|
||||
|
||||
DB::readIntText(read_region_id, *input);
|
||||
DB::assertChar('\t', *input);
|
||||
DB::readString(region_name, *input);
|
||||
DB::assertChar('\n', *input);
|
||||
|
||||
if (read_region_id <= 0)
|
||||
continue;
|
||||
|
||||
entry.id = read_region_id;
|
||||
entry.name = region_name;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Dictionaries/Embedded/GeodataProviders/INamesProvider.h>
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
|
||||
|
||||
// Reads regions names list in geoexport format
|
||||
class LanguageRegionsNamesFormatReader : public ILanguageRegionsNamesReader
|
||||
{
|
||||
private:
|
||||
DB::ReadBufferPtr input;
|
||||
|
||||
public:
|
||||
LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_)
|
||||
: input(std::move(input_))
|
||||
{}
|
||||
|
||||
bool readNext(RegionNameEntry & entry) override;
|
||||
};
|
@ -1,45 +1,7 @@
|
||||
#include <Dictionaries/Embedded/GeodataProviders/NamesProvider.h>
|
||||
#include <Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h>
|
||||
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
class LanguageRegionsNamesFileReader : public ILanguageRegionsNamesReader
|
||||
{
|
||||
private:
|
||||
DB::ReadBufferFromFile in;
|
||||
|
||||
public:
|
||||
LanguageRegionsNamesFileReader(const std::string & path)
|
||||
: in(path)
|
||||
{}
|
||||
|
||||
bool readNext(RegionNameEntry & entry) override;
|
||||
};
|
||||
|
||||
bool LanguageRegionsNamesFileReader::readNext(RegionNameEntry & entry)
|
||||
{
|
||||
while (!in.eof())
|
||||
{
|
||||
Int32 read_region_id;
|
||||
std::string region_name;
|
||||
|
||||
DB::readIntText(read_region_id, in);
|
||||
DB::assertChar('\t', in);
|
||||
DB::readString(region_name, in);
|
||||
DB::assertChar('\n', in);
|
||||
|
||||
if (read_region_id <= 0)
|
||||
continue;
|
||||
|
||||
entry.id = read_region_id;
|
||||
entry.name = region_name;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool LanguageRegionsNamesDataSource::isModified() const
|
||||
@ -55,7 +17,8 @@ size_t LanguageRegionsNamesDataSource::estimateTotalSize() const
|
||||
ILanguageRegionsNamesReaderPtr LanguageRegionsNamesDataSource::createReader()
|
||||
{
|
||||
updates_tracker.fixCurrentVersion();
|
||||
return std::make_unique<LanguageRegionsNamesFileReader>(path);
|
||||
auto file_reader = std::make_shared<DB::ReadBufferFromFile>(path);
|
||||
return std::make_unique<LanguageRegionsNamesFormatReader>(std::move(file_reader));
|
||||
}
|
||||
|
||||
std::string LanguageRegionsNamesDataSource::getLanguage() const
|
||||
|
Loading…
Reference in New Issue
Block a user