From 68dd2317ce304097cd7f335d455f9666ef112b76 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Feb 2015 16:30:07 +0300 Subject: [PATCH] dbms: improved performance on short queries from wide tables [#METR-2944]. --- dbms/include/DB/Core/NamesAndTypes.h | 113 +++---------------------- dbms/src/Core/NamesAndTypes.cpp | 120 +++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 102 deletions(-) create mode 100644 dbms/src/Core/NamesAndTypes.cpp diff --git a/dbms/include/DB/Core/NamesAndTypes.h b/dbms/include/DB/Core/NamesAndTypes.h index 96e2cf6b2da..ad3c60defa7 100644 --- a/dbms/include/DB/Core/NamesAndTypes.h +++ b/dbms/include/DB/Core/NamesAndTypes.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -44,121 +45,29 @@ class NamesAndTypesList : public std::list public: using std::list::list; - void readText(ReadBuffer & buf, const DataTypeFactory & data_type_factory) - { - DB::assertString("columns format version: 1\n", buf); - size_t count; - DB::readText(count, buf); - DB::assertString(" columns:\n", buf); - resize(count); - for (NameAndTypePair & it : *this) - { - DB::readBackQuotedString(it.name, buf); - DB::assertString(" ", buf); - String type_name; - DB::readString(type_name, buf); - it.type = data_type_factory.get(type_name); - DB::assertString("\n", buf); - } - } + void readText(ReadBuffer & buf, const DataTypeFactory & data_type_factory); + void writeText(WriteBuffer & buf) const; - void writeText(WriteBuffer & buf) const - { - DB::writeString("columns format version: 1\n", buf); - DB::writeText(size(), buf); - DB::writeString(" columns:\n", buf); - for (const auto & it : *this) - { - DB::writeBackQuotedString(it.name, buf); - DB::writeChar(' ', buf); - DB::writeString(it.type->getName(), buf); - DB::writeChar('\n', buf); - } - } - - String toString() const - { - String s; - { - WriteBufferFromString out(s); - writeText(out); - } - return s; - } - - static NamesAndTypesList parse(const String & s, const DataTypeFactory & data_type_factory) - { - ReadBufferFromString in(s); - NamesAndTypesList res; - res.readText(in, data_type_factory); - assertEOF(in); - return res; - } + String toString() const; + static NamesAndTypesList parse(const String & s, const DataTypeFactory & data_type_factory); /// Все элементы rhs должны быть различны. - bool isSubsetOf(const NamesAndTypesList & rhs) const - { - NamesAndTypes vector(rhs.begin(), rhs.end()); - vector.insert(vector.end(), begin(), end()); - std::sort(vector.begin(), vector.end()); - return std::unique(vector.begin(), vector.end()) == vector.begin() + rhs.size(); - } + bool isSubsetOf(const NamesAndTypesList & rhs) const; /// Расстояние Хемминга между множествами /// (иными словами, добавленные и удаленные столбцы считаются один раз; столбцы, изменившие тип, - дважды). - size_t sizeOfDifference(const NamesAndTypesList & rhs) const - { - NamesAndTypes vector(rhs.begin(), rhs.end()); - vector.insert(vector.end(), begin(), end()); - std::sort(vector.begin(), vector.end()); - return (std::unique(vector.begin(), vector.end()) - vector.begin()) * 2 - size() - rhs.size(); - } + size_t sizeOfDifference(const NamesAndTypesList & rhs) const; - Names getNames() const - { - Names res; - res.reserve(size()); - for (const NameAndTypePair & column : *this) - { - res.push_back(column.name); - } - return res; - } + Names getNames() const; /// Оставить только столбцы, имена которых есть в names. В names могут быть лишние столбцы. - NamesAndTypesList filter(const NameSet & names) const - { - NamesAndTypesList res; - for (const NameAndTypePair & column : *this) - { - if (names.count(column.name)) - res.push_back(column); - } - return res; - } + NamesAndTypesList filter(const NameSet & names) const; /// Оставить только столбцы, имена которых есть в names. В names могут быть лишние столбцы. - NamesAndTypesList filter(const Names & names) const - { - return filter(NameSet(names.begin(), names.end())); - } + NamesAndTypesList filter(const Names & names) const; /// В отличие от filter, возвращает столбцы в том порядке, в котором они идут в names. - NamesAndTypesList addTypes(const Names & names) const - { - std::map types; - for (const NameAndTypePair & column : *this) - types[column.name] = column.type; - NamesAndTypesList res; - for (const String & name : names) - { - auto it = types.find(name); - if (it == types.end()) - throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN); - res.push_back(NameAndTypePair(name, it->second)); - } - return res; - } + NamesAndTypesList addTypes(const Names & names) const; }; typedef SharedPtr NamesAndTypesListPtr; diff --git a/dbms/src/Core/NamesAndTypes.cpp b/dbms/src/Core/NamesAndTypes.cpp new file mode 100644 index 00000000000..d70e6a9932a --- /dev/null +++ b/dbms/src/Core/NamesAndTypes.cpp @@ -0,0 +1,120 @@ +#include + +namespace DB +{ + +void NamesAndTypesList::readText(ReadBuffer & buf, const DataTypeFactory & data_type_factory) +{ + DB::assertString("columns format version: 1\n", buf); + size_t count; + DB::readText(count, buf); + DB::assertString(" columns:\n", buf); + resize(count); + for (NameAndTypePair & it : *this) + { + DB::readBackQuotedString(it.name, buf); + DB::assertString(" ", buf); + String type_name; + DB::readString(type_name, buf); + it.type = data_type_factory.get(type_name); + DB::assertString("\n", buf); + } +} + +void NamesAndTypesList::writeText(WriteBuffer & buf) const +{ + DB::writeString("columns format version: 1\n", buf); + DB::writeText(size(), buf); + DB::writeString(" columns:\n", buf); + for (const auto & it : *this) + { + DB::writeBackQuotedString(it.name, buf); + DB::writeChar(' ', buf); + DB::writeString(it.type->getName(), buf); + DB::writeChar('\n', buf); + } +} + +String NamesAndTypesList::toString() const +{ + String s; + { + WriteBufferFromString out(s); + writeText(out); + } + return s; +} + +NamesAndTypesList NamesAndTypesList::parse(const String & s, const DataTypeFactory & data_type_factory) +{ + ReadBufferFromString in(s); + NamesAndTypesList res; + res.readText(in, data_type_factory); + assertEOF(in); + return res; +} + +bool NamesAndTypesList::isSubsetOf(const NamesAndTypesList & rhs) const +{ + NamesAndTypes vector(rhs.begin(), rhs.end()); + vector.insert(vector.end(), begin(), end()); + std::sort(vector.begin(), vector.end()); + return std::unique(vector.begin(), vector.end()) == vector.begin() + rhs.size(); +} + +size_t NamesAndTypesList::sizeOfDifference(const NamesAndTypesList & rhs) const +{ + NamesAndTypes vector(rhs.begin(), rhs.end()); + vector.insert(vector.end(), begin(), end()); + std::sort(vector.begin(), vector.end()); + return (std::unique(vector.begin(), vector.end()) - vector.begin()) * 2 - size() - rhs.size(); +} + +Names NamesAndTypesList::getNames() const +{ + Names res; + res.reserve(size()); + for (const NameAndTypePair & column : *this) + { + res.push_back(column.name); + } + return res; +} + +NamesAndTypesList NamesAndTypesList::filter(const NameSet & names) const +{ + NamesAndTypesList res; + for (const NameAndTypePair & column : *this) + { + if (names.count(column.name)) + res.push_back(column); + } + return res; +} + +NamesAndTypesList NamesAndTypesList::filter(const Names & names) const +{ + return filter(NameSet(names.begin(), names.end())); +} + +NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const +{ + /// NOTE Лучше сделать map в IStorage, чем создавать его здесь каждый раз заново. + google::dense_hash_map types; + types.set_empty_key(StringRef()); + + for (const NameAndTypePair & column : *this) + types[column.name] = &column.type; + + NamesAndTypesList res; + for (const String & name : names) + { + auto it = types.find(name); + if (it == types.end()) + throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN); + res.push_back(NameAndTypePair(name, *it->second)); + } + return res; +} + +}