2017-10-30 14:38:14 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Storages/IStorage.h>
|
|
|
|
#include <Core/Defines.h>
|
|
|
|
#include <ext/shared_ptr_helper.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-12-30 03:49:02 +00:00
|
|
|
class StorageCatBoostPool : public ext::shared_ptr_helper<StorageCatBoostPool>, public IStorage
|
2017-10-30 14:38:14 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
std::string getName() const override { return "CatBoostPool"; }
|
|
|
|
|
|
|
|
std::string getTableName() const override { return table_name; }
|
|
|
|
|
|
|
|
BlockInputStreams read(const Names & column_names,
|
|
|
|
const SelectQueryInfo & query_info,
|
|
|
|
const Context & context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2017-10-30 14:38:14 +00:00
|
|
|
unsigned threads) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
String table_name;
|
2018-01-25 14:42:39 +00:00
|
|
|
|
2017-10-30 14:38:14 +00:00
|
|
|
String column_description_file_name;
|
|
|
|
String data_description_file_name;
|
|
|
|
Block sample_block;
|
|
|
|
|
|
|
|
enum class DatasetColumnType
|
|
|
|
{
|
|
|
|
Target,
|
|
|
|
Num,
|
|
|
|
Categ,
|
|
|
|
Auxiliary,
|
|
|
|
DocId,
|
|
|
|
Weight,
|
|
|
|
Baseline
|
|
|
|
};
|
|
|
|
|
|
|
|
using ColumnTypesMap = std::map<std::string, DatasetColumnType>;
|
|
|
|
|
|
|
|
ColumnTypesMap getColumnTypesMap() const
|
|
|
|
{
|
2018-01-10 00:04:08 +00:00
|
|
|
return
|
|
|
|
{
|
2017-10-30 14:38:14 +00:00
|
|
|
{"Target", DatasetColumnType::Target},
|
|
|
|
{"Num", DatasetColumnType::Num},
|
|
|
|
{"Categ", DatasetColumnType::Categ},
|
|
|
|
{"Auxiliary", DatasetColumnType::Auxiliary},
|
|
|
|
{"DocId", DatasetColumnType::DocId},
|
|
|
|
{"Weight", DatasetColumnType::Weight},
|
|
|
|
{"Baseline", DatasetColumnType::Baseline},
|
|
|
|
};
|
2018-06-03 20:39:06 +00:00
|
|
|
}
|
2017-10-30 14:38:14 +00:00
|
|
|
|
|
|
|
std::string getColumnTypesString(const ColumnTypesMap & columnTypesMap);
|
|
|
|
|
|
|
|
struct ColumnDescription
|
|
|
|
{
|
|
|
|
std::string column_name;
|
2017-10-31 12:22:42 +00:00
|
|
|
std::string alias;
|
2017-10-30 14:38:14 +00:00
|
|
|
DatasetColumnType column_type;
|
|
|
|
|
|
|
|
ColumnDescription() : column_type(DatasetColumnType::Num) {}
|
2017-10-31 12:22:42 +00:00
|
|
|
ColumnDescription(std::string column_name, std::string alias, DatasetColumnType column_type)
|
|
|
|
: column_name(std::move(column_name)), alias(std::move(alias)), column_type(column_type) {}
|
2017-10-30 14:38:14 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<ColumnDescription> columns_description;
|
|
|
|
|
|
|
|
void checkDatasetDescription();
|
|
|
|
void parseColumnDescription();
|
|
|
|
void createSampleBlockAndColumns();
|
2017-11-14 00:11:33 +00:00
|
|
|
|
|
|
|
protected:
|
|
|
|
StorageCatBoostPool(const Context & context, String column_description_file_name, String data_description_file_name);
|
2017-10-30 14:38:14 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|