mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-22 17:50:47 +00:00
dbms: improved performance on short queries from wide tables [#METR-2944].
This commit is contained in:
parent
a8f155cb06
commit
68dd2317ce
@ -6,6 +6,7 @@
|
||||
#include <set>
|
||||
|
||||
#include <Poco/SharedPtr.h>
|
||||
#include <sparsehash/dense_hash_map>
|
||||
|
||||
#include <DB/DataTypes/IDataType.h>
|
||||
#include <DB/DataTypes/DataTypeFactory.h>
|
||||
@ -44,121 +45,29 @@ class NamesAndTypesList : public std::list<NameAndTypePair>
|
||||
public:
|
||||
using std::list<NameAndTypePair>::list;
|
||||
|
||||
void readText(ReadBuffer & buf, const DataTypeFactory & data_type_factory)
|
||||
{
|
||||
DB::assertString("columns format version: 1\n", buf);
|
||||
size_t count;
|
||||
DB::readText(count, buf);
|
||||
DB::assertString(" columns:\n", buf);
|
||||
resize(count);
|
||||
for (NameAndTypePair & it : *this)
|
||||
{
|
||||
DB::readBackQuotedString(it.name, buf);
|
||||
DB::assertString(" ", buf);
|
||||
String type_name;
|
||||
DB::readString(type_name, buf);
|
||||
it.type = data_type_factory.get(type_name);
|
||||
DB::assertString("\n", buf);
|
||||
}
|
||||
}
|
||||
void readText(ReadBuffer & buf, const DataTypeFactory & data_type_factory);
|
||||
void writeText(WriteBuffer & buf) const;
|
||||
|
||||
void writeText(WriteBuffer & buf) const
|
||||
{
|
||||
DB::writeString("columns format version: 1\n", buf);
|
||||
DB::writeText(size(), buf);
|
||||
DB::writeString(" columns:\n", buf);
|
||||
for (const auto & it : *this)
|
||||
{
|
||||
DB::writeBackQuotedString(it.name, buf);
|
||||
DB::writeChar(' ', buf);
|
||||
DB::writeString(it.type->getName(), buf);
|
||||
DB::writeChar('\n', buf);
|
||||
}
|
||||
}
|
||||
|
||||
String toString() const
|
||||
{
|
||||
String s;
|
||||
{
|
||||
WriteBufferFromString out(s);
|
||||
writeText(out);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static NamesAndTypesList parse(const String & s, const DataTypeFactory & data_type_factory)
|
||||
{
|
||||
ReadBufferFromString in(s);
|
||||
NamesAndTypesList res;
|
||||
res.readText(in, data_type_factory);
|
||||
assertEOF(in);
|
||||
return res;
|
||||
}
|
||||
String toString() const;
|
||||
static NamesAndTypesList parse(const String & s, const DataTypeFactory & data_type_factory);
|
||||
|
||||
/// Все элементы rhs должны быть различны.
|
||||
bool isSubsetOf(const NamesAndTypesList & rhs) const
|
||||
{
|
||||
NamesAndTypes vector(rhs.begin(), rhs.end());
|
||||
vector.insert(vector.end(), begin(), end());
|
||||
std::sort(vector.begin(), vector.end());
|
||||
return std::unique(vector.begin(), vector.end()) == vector.begin() + rhs.size();
|
||||
}
|
||||
bool isSubsetOf(const NamesAndTypesList & rhs) const;
|
||||
|
||||
/// Расстояние Хемминга между множествами
|
||||
/// (иными словами, добавленные и удаленные столбцы считаются один раз; столбцы, изменившие тип, - дважды).
|
||||
size_t sizeOfDifference(const NamesAndTypesList & rhs) const
|
||||
{
|
||||
NamesAndTypes vector(rhs.begin(), rhs.end());
|
||||
vector.insert(vector.end(), begin(), end());
|
||||
std::sort(vector.begin(), vector.end());
|
||||
return (std::unique(vector.begin(), vector.end()) - vector.begin()) * 2 - size() - rhs.size();
|
||||
}
|
||||
size_t sizeOfDifference(const NamesAndTypesList & rhs) const;
|
||||
|
||||
Names getNames() const
|
||||
{
|
||||
Names res;
|
||||
res.reserve(size());
|
||||
for (const NameAndTypePair & column : *this)
|
||||
{
|
||||
res.push_back(column.name);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
Names getNames() const;
|
||||
|
||||
/// Оставить только столбцы, имена которых есть в names. В names могут быть лишние столбцы.
|
||||
NamesAndTypesList filter(const NameSet & names) const
|
||||
{
|
||||
NamesAndTypesList res;
|
||||
for (const NameAndTypePair & column : *this)
|
||||
{
|
||||
if (names.count(column.name))
|
||||
res.push_back(column);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
NamesAndTypesList filter(const NameSet & names) const;
|
||||
|
||||
/// Оставить только столбцы, имена которых есть в names. В names могут быть лишние столбцы.
|
||||
NamesAndTypesList filter(const Names & names) const
|
||||
{
|
||||
return filter(NameSet(names.begin(), names.end()));
|
||||
}
|
||||
NamesAndTypesList filter(const Names & names) const;
|
||||
|
||||
/// В отличие от filter, возвращает столбцы в том порядке, в котором они идут в names.
|
||||
NamesAndTypesList addTypes(const Names & names) const
|
||||
{
|
||||
std::map<String, DataTypePtr> types;
|
||||
for (const NameAndTypePair & column : *this)
|
||||
types[column.name] = column.type;
|
||||
NamesAndTypesList res;
|
||||
for (const String & name : names)
|
||||
{
|
||||
auto it = types.find(name);
|
||||
if (it == types.end())
|
||||
throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
res.push_back(NameAndTypePair(name, it->second));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
NamesAndTypesList addTypes(const Names & names) const;
|
||||
};
|
||||
|
||||
typedef SharedPtr<NamesAndTypesList> NamesAndTypesListPtr;
|
||||
|
120
dbms/src/Core/NamesAndTypes.cpp
Normal file
120
dbms/src/Core/NamesAndTypes.cpp
Normal file
@ -0,0 +1,120 @@
|
||||
#include <DB/Core/NamesAndTypes.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void NamesAndTypesList::readText(ReadBuffer & buf, const DataTypeFactory & data_type_factory)
|
||||
{
|
||||
DB::assertString("columns format version: 1\n", buf);
|
||||
size_t count;
|
||||
DB::readText(count, buf);
|
||||
DB::assertString(" columns:\n", buf);
|
||||
resize(count);
|
||||
for (NameAndTypePair & it : *this)
|
||||
{
|
||||
DB::readBackQuotedString(it.name, buf);
|
||||
DB::assertString(" ", buf);
|
||||
String type_name;
|
||||
DB::readString(type_name, buf);
|
||||
it.type = data_type_factory.get(type_name);
|
||||
DB::assertString("\n", buf);
|
||||
}
|
||||
}
|
||||
|
||||
void NamesAndTypesList::writeText(WriteBuffer & buf) const
|
||||
{
|
||||
DB::writeString("columns format version: 1\n", buf);
|
||||
DB::writeText(size(), buf);
|
||||
DB::writeString(" columns:\n", buf);
|
||||
for (const auto & it : *this)
|
||||
{
|
||||
DB::writeBackQuotedString(it.name, buf);
|
||||
DB::writeChar(' ', buf);
|
||||
DB::writeString(it.type->getName(), buf);
|
||||
DB::writeChar('\n', buf);
|
||||
}
|
||||
}
|
||||
|
||||
String NamesAndTypesList::toString() const
|
||||
{
|
||||
String s;
|
||||
{
|
||||
WriteBufferFromString out(s);
|
||||
writeText(out);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
NamesAndTypesList NamesAndTypesList::parse(const String & s, const DataTypeFactory & data_type_factory)
|
||||
{
|
||||
ReadBufferFromString in(s);
|
||||
NamesAndTypesList res;
|
||||
res.readText(in, data_type_factory);
|
||||
assertEOF(in);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool NamesAndTypesList::isSubsetOf(const NamesAndTypesList & rhs) const
|
||||
{
|
||||
NamesAndTypes vector(rhs.begin(), rhs.end());
|
||||
vector.insert(vector.end(), begin(), end());
|
||||
std::sort(vector.begin(), vector.end());
|
||||
return std::unique(vector.begin(), vector.end()) == vector.begin() + rhs.size();
|
||||
}
|
||||
|
||||
size_t NamesAndTypesList::sizeOfDifference(const NamesAndTypesList & rhs) const
|
||||
{
|
||||
NamesAndTypes vector(rhs.begin(), rhs.end());
|
||||
vector.insert(vector.end(), begin(), end());
|
||||
std::sort(vector.begin(), vector.end());
|
||||
return (std::unique(vector.begin(), vector.end()) - vector.begin()) * 2 - size() - rhs.size();
|
||||
}
|
||||
|
||||
Names NamesAndTypesList::getNames() const
|
||||
{
|
||||
Names res;
|
||||
res.reserve(size());
|
||||
for (const NameAndTypePair & column : *this)
|
||||
{
|
||||
res.push_back(column.name);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
NamesAndTypesList NamesAndTypesList::filter(const NameSet & names) const
|
||||
{
|
||||
NamesAndTypesList res;
|
||||
for (const NameAndTypePair & column : *this)
|
||||
{
|
||||
if (names.count(column.name))
|
||||
res.push_back(column);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
NamesAndTypesList NamesAndTypesList::filter(const Names & names) const
|
||||
{
|
||||
return filter(NameSet(names.begin(), names.end()));
|
||||
}
|
||||
|
||||
NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const
|
||||
{
|
||||
/// NOTE Лучше сделать map в IStorage, чем создавать его здесь каждый раз заново.
|
||||
google::dense_hash_map<StringRef, const DataTypePtr *, StringRefHash> types;
|
||||
types.set_empty_key(StringRef());
|
||||
|
||||
for (const NameAndTypePair & column : *this)
|
||||
types[column.name] = &column.type;
|
||||
|
||||
NamesAndTypesList res;
|
||||
for (const String & name : names)
|
||||
{
|
||||
auto it = types.find(name);
|
||||
if (it == types.end())
|
||||
throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
res.push_back(NameAndTypePair(name, *it->second));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user