From f24bedd2b8e98d44aa8a5b49c22891cbf78e0e51 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 12 May 2019 14:36:02 +0300 Subject: [PATCH 001/181] Added constraints to parsers --- dbms/src/Parsers/ASTConstraintDeclaration.h | 38 ++++++++++++++ dbms/src/Parsers/ASTCreateQuery.cpp | 12 +++++ dbms/src/Parsers/ASTCreateQuery.h | 1 + dbms/src/Parsers/ParserCreateQuery.cpp | 55 ++++++++++++++++++--- dbms/src/Parsers/ParserCreateQuery.h | 22 +++++++-- 5 files changed, 118 insertions(+), 10 deletions(-) create mode 100644 dbms/src/Parsers/ASTConstraintDeclaration.h diff --git a/dbms/src/Parsers/ASTConstraintDeclaration.h b/dbms/src/Parsers/ASTConstraintDeclaration.h new file mode 100644 index 00000000000..cfa4f8f98ab --- /dev/null +++ b/dbms/src/Parsers/ASTConstraintDeclaration.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +namespace DB +{ + +/** name CHECK logical_expr + */ +class ASTConstraintDeclaration : public IAST { +public: + String name; + IAST *expr; + + String getID(char) const override { return "Constraint"; } + + ASTPtr clone() const override { + auto res = std::make_shared(); + + res->name = name; + + if (expr) + res->set(res->expr, expr->clone()); + + return res; + } + + void formatImpl(const FormatSettings &s, FormatState &state, FormatStateStacked frame) const override { + frame.need_parens = false; + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + + s.ostr << s.nl_or_ws << indent_str; + s.ostr << backQuoteIfNeed(name); + s.ostr << (s.hilite ? hilite_keyword : "") << " CHECK " << (s.hilite ? hilite_none : ""); + expr->formatImpl(s, state, frame); + } +}; +} diff --git a/dbms/src/Parsers/ASTCreateQuery.cpp b/dbms/src/Parsers/ASTCreateQuery.cpp index e99c543f5ec..b60eceb5167 100644 --- a/dbms/src/Parsers/ASTCreateQuery.cpp +++ b/dbms/src/Parsers/ASTCreateQuery.cpp @@ -128,6 +128,8 @@ ASTPtr ASTColumns::clone() const res->set(res->columns, columns->clone()); if (indices) res->set(res->indices, indices->clone()); + if (constraints) + res->set(res->constraints, constraints->clone()); return res; } @@ -156,6 +158,16 @@ void ASTColumns::formatImpl(const FormatSettings & s, FormatState & state, Forma list.children.push_back(elem); } } + if (constraints) + { + for (const auto & constraint : constraints->children) + { + auto elem = std::make_shared(); + elem->prefix = "CONSTRAINT"; + elem->set(elem->elem, constraint->clone()); + list.children.push_back(elem); + } + } if (!list.children.empty()) list.formatImpl(s, state, frame); diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 2755e1a3d78..b6948c19146 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -36,6 +36,7 @@ class ASTColumns : public IAST public: ASTExpressionList * columns = nullptr; ASTExpressionList * indices = nullptr; + ASTExpressionList * constraints = nullptr; String getID(char) const override { return "Columns definition"; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index fd6665a5a2c..9f584dbbf8c 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -137,12 +138,41 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } +bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_check("CHECK"); -bool ParserColumnAndIndexDeclaraion::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) + ParserIdentifier name_p; + ParserLogicalOrExpression expression_p; + + ASTPtr name; + ASTPtr expr; + + if (!name_p.parse(pos, name, expected)) + return false; + + if (!s_check.ignore(pos, expected)) + return false; + + if (!expression_p.parse(pos, expr, expected)) + return false; + + auto constraint = std::make_shared(); + constraint->name = name->as().name; + constraint->set(constraint->expr, expr); + node = constraint; + + return true; +} + + +bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_index("INDEX"); + ParserKeyword s_constraint("CONSTRAINT"); ParserIndexDeclaration index_p; + ParserConstraintDeclaration constraint_p; ParserColumnDeclaration column_p; ASTPtr new_node = nullptr; @@ -152,6 +182,11 @@ bool ParserColumnAndIndexDeclaraion::parseImpl(Pos & pos, ASTPtr & node, Expecte if (!index_p.parse(pos, new_node, expected)) return false; } + else if (s_constraint.ignore(pos, expected)) + { + if (!constraint_p.parse(pos, new_node, expected)) + return false; + } else { if (!column_p.parse(pos, new_node, expected)) @@ -168,16 +203,18 @@ bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & .parse(pos, node, expected); } - -bool ParserColumnsOrIndicesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list; - if (!ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + if (!ParserList( + std::make_unique(), + std::make_unique(TokenType::Comma), false) .parse(pos, list, expected)) return false; ASTPtr columns = std::make_shared(); ASTPtr indices = std::make_shared(); + ASTPtr constraints = std::make_shared(); for (const auto & elem : list->children) { @@ -185,6 +222,8 @@ bool ParserColumnsOrIndicesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, columns->children.push_back(elem); else if (elem->as()) indices->children.push_back(elem); + else if (elem->as()) + constraints->children.push_back(elem); else return false; } @@ -195,6 +234,8 @@ bool ParserColumnsOrIndicesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, res->set(res->columns, columns); if (!indices->children.empty()) res->set(res->indices, indices); + if (!constraints->children.empty()) + res->set(res->constraints, constraints); node = res; @@ -317,7 +358,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserStorage storage_p; ParserIdentifier name_p; - ParserColumnsOrIndicesDeclarationList columns_or_indices_p; + ParserTablePropertiesDeclarationList table_properties_p; ParserSelectWithUnionQuery select_p; ASTPtr database; @@ -391,7 +432,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// List of columns. if (s_lparen.ignore(pos, expected)) { - if (!columns_or_indices_p.parse(pos, columns_list, expected)) + if (!table_properties_p.parse(pos, columns_list, expected)) return false; if (!s_rparen.ignore(pos, expected)) @@ -498,7 +539,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { - if (!columns_or_indices_p.parse(pos, columns_list, expected)) + if (!table_properties_p.parse(pos, columns_list, expected)) return false; if (!s_rparen.ignore(pos, expected)) diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index bd3c8f671f0..42583d8dd19 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -244,11 +244,20 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserConstraintDeclaration : public IParserBase +{ +public: + ParserConstraintDeclaration() {} -class ParserColumnAndIndexDeclaraion : public IParserBase +protected: + const char * getName() const override { return "constraint declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserTablePropertyDeclaration : public IParserBase { protected: - const char * getName() const override { return "column or index declaration"; } + const char * getName() const override { return "table propery (column, index, constraint) declaration"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; @@ -260,8 +269,15 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserConstraintDeclarationList : public IParserBase +{ +protected: + const char * getName() const override { return "constraint declaration list"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; -class ParserColumnsOrIndicesDeclarationList : public IParserBase + +class ParserTablePropertiesDeclarationList : public IParserBase { protected: const char * getName() const override { return "columns or indices declaration list"; } From 502d86bd022bc53f8f4f9452cd4d1201e849aa4c Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Fri, 17 May 2019 07:08:03 +0300 Subject: [PATCH 002/181] Added constraints to InterpreterCreateQuery and storages --- .../Interpreters/InterpreterCreateQuery.cpp | 23 +++++++++++ .../src/Interpreters/InterpreterCreateQuery.h | 2 + dbms/src/Storages/ConstraintsDescription.cpp | 39 +++++++++++++++++++ dbms/src/Storages/ConstraintsDescription.h | 23 +++++++++++ dbms/src/Storages/ITableDeclaration.cpp | 5 +++ dbms/src/Storages/ITableDeclaration.h | 5 +++ dbms/src/Storages/StorageMergeTree.h | 3 ++ 7 files changed, 100 insertions(+) create mode 100644 dbms/src/Storages/ConstraintsDescription.cpp create mode 100644 dbms/src/Storages/ConstraintsDescription.h diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 4bc35f1e378..e4e5a8e7f83 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -252,6 +252,16 @@ ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices) return res; } +ASTPtr InterpreterCreateQuery::formatConstraints(const ConstraintsDescription & constraints) +{ + auto res = std::make_shared(); + + for (const auto & constraint : constraints.constraints) + res->children.push_back(constraint->clone()); + + return res; +} + ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns_ast, const Context & context) { /// First, deduce implicit types. @@ -370,6 +380,8 @@ ColumnsDescription InterpreterCreateQuery::setColumns( { ColumnsDescription columns; IndicesDescription indices; + ConstraintsDescription constraints; + if (create.columns_list) { @@ -379,11 +391,16 @@ ColumnsDescription InterpreterCreateQuery::setColumns( for (const auto & index : create.columns_list->indices->children) indices.indices.push_back( std::dynamic_pointer_cast(index->clone())); + if (create.columns_list->constraints) + for (const auto & constraint : create.columns_list->constraints->children) + constraints.constraints.push_back( + std::dynamic_pointer_cast(constraint->clone())); } else if (!create.as_table.empty()) { columns = as_storage->getColumns(); indices = as_storage->getIndicesDescription(); + constraints = as_storage->getConstraintsDescription(); } else if (create.select) { @@ -395,6 +412,7 @@ ColumnsDescription InterpreterCreateQuery::setColumns( /// Even if query has list of columns, canonicalize it (unfold Nested columns). ASTPtr new_columns = formatColumns(columns); ASTPtr new_indices = formatIndices(indices); + ASTPtr new_constraints = formatConstraints(constraints); if (!create.columns_list) { @@ -412,6 +430,11 @@ ColumnsDescription InterpreterCreateQuery::setColumns( else if (new_indices) create.columns_list->set(create.columns_list->indices, new_indices); + if (new_constraints && create.columns_list->constraints) + create.columns_list->replace(create.columns_list->constraints, new_constraints); + else if (new_constraints) + create.columns_list->set(create.columns_list->constraints, new_constraints); + /// Check for duplicates std::set all_columns; for (const auto & column : columns) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 2f124e7df9b..a7886f644ad 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -31,6 +32,7 @@ public: static ASTPtr formatColumns(const ColumnsDescription & columns); static ASTPtr formatIndices(const IndicesDescription & indices); + static ASTPtr formatConstraints(const ConstraintsDescription & constraints); void setDatabaseLoadingThreadpool(ThreadPool & thread_pool_) { diff --git a/dbms/src/Storages/ConstraintsDescription.cpp b/dbms/src/Storages/ConstraintsDescription.cpp new file mode 100644 index 00000000000..042ee06ff59 --- /dev/null +++ b/dbms/src/Storages/ConstraintsDescription.cpp @@ -0,0 +1,39 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +String ConstraintsDescription::toString() const +{ + if (constraints.empty()) + return {}; + + ASTExpressionList list; + for (const auto & constraint : constraints) + list.children.push_back(constraint); + + return serializeAST(list, true); +} + +ConstraintsDescription ConstraintsDescription::parse(const String & str) +{ + if (str.empty()) + return {}; + + ConstraintsDescription res; + ParserConstraintDeclarationList parser; + ASTPtr list = parseQuery(parser, str, 0); + + for (const auto & constraint : list->children) + res.constraints.push_back(std::dynamic_pointer_cast(constraint)); + + return res; +} + +} diff --git a/dbms/src/Storages/ConstraintsDescription.h b/dbms/src/Storages/ConstraintsDescription.h new file mode 100644 index 00000000000..c2954d94428 --- /dev/null +++ b/dbms/src/Storages/ConstraintsDescription.h @@ -0,0 +1,23 @@ +#pragma once + +#include + + +namespace DB +{ + +using ConstraintsASTs = std::vector>; + +struct ConstraintsDescription +{ + ConstraintsASTs constraints; + + ConstraintsDescription() = default; + + bool empty() const { return constraints.empty(); } + String toString() const; + + static ConstraintsDescription parse(const String & str); +}; + +} diff --git a/dbms/src/Storages/ITableDeclaration.cpp b/dbms/src/Storages/ITableDeclaration.cpp index c9385c24cbe..b24072f0de8 100644 --- a/dbms/src/Storages/ITableDeclaration.cpp +++ b/dbms/src/Storages/ITableDeclaration.cpp @@ -36,6 +36,11 @@ void ITableDeclaration::setIndicesDescription(IndicesDescription indices_) indices = std::move(indices_); } +void ITableDeclaration::setConstraintsDescription(ConstraintsDescription constraints_) +{ + constraints = std::move(constraints_); +} + bool ITableDeclaration::hasColumn(const String & column_name) const { diff --git a/dbms/src/Storages/ITableDeclaration.h b/dbms/src/Storages/ITableDeclaration.h index e2ac3b1d6c4..e1e6d88a68f 100644 --- a/dbms/src/Storages/ITableDeclaration.h +++ b/dbms/src/Storages/ITableDeclaration.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -19,6 +20,9 @@ public: virtual const IndicesDescription & getIndicesDescription() const { return indices; } virtual void setIndicesDescription(IndicesDescription indices_); + virtual const ConstraintsDescription & getConstraintsDescription() const { return constraints; } + virtual void setConstraintsDescription(ConstraintsDescription constraints_); + /// NOTE: These methods should include virtual columns, but should NOT include ALIAS columns /// (they are treated separately). virtual NameAndTypePair getColumn(const String & column_name) const; @@ -57,6 +61,7 @@ public: private: ColumnsDescription columns; IndicesDescription indices; + ConstraintsDescription constraints; }; } diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index c5ea2d8c3a0..0b4b00b7cb7 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -46,6 +46,9 @@ public: virtual const IndicesDescription & getIndicesDescription() const override { return data.getIndicesDescription(); } virtual void setIndicesDescription(IndicesDescription indices_) override { data.setIndicesDescription(std::move(indices_)); } + virtual const ConstraintsDescription & getConstraintsDescription() const override { return data.getConstraintsDescription(); } + virtual void setConstraintsDescription(ConstraintsDescription constraints_) override { data.setConstraintsDescription(constraints_; )} + NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); } bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); } From 2f8864a6ea96f124af79914fd351fe998d47b538 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Fri, 17 May 2019 07:14:13 +0300 Subject: [PATCH 003/181] Some style fixes --- dbms/src/Parsers/ASTConstraintDeclaration.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Parsers/ASTConstraintDeclaration.h b/dbms/src/Parsers/ASTConstraintDeclaration.h index cfa4f8f98ab..d72358be498 100644 --- a/dbms/src/Parsers/ASTConstraintDeclaration.h +++ b/dbms/src/Parsers/ASTConstraintDeclaration.h @@ -7,14 +7,16 @@ namespace DB /** name CHECK logical_expr */ -class ASTConstraintDeclaration : public IAST { +class ASTConstraintDeclaration : public IAST +{ public: String name; IAST *expr; String getID(char) const override { return "Constraint"; } - ASTPtr clone() const override { + ASTPtr clone() const override + { auto res = std::make_shared(); res->name = name; @@ -25,7 +27,8 @@ public: return res; } - void formatImpl(const FormatSettings &s, FormatState &state, FormatStateStacked frame) const override { + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override + { frame.need_parens = false; std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); From 72d5a4634c10d4d1e846867c22da3061fe20417e Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Fri, 17 May 2019 07:27:09 +0300 Subject: [PATCH 004/181] Syntax fix --- dbms/src/Storages/StorageMergeTree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 0b4b00b7cb7..184db1a858b 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -47,7 +47,7 @@ public: virtual void setIndicesDescription(IndicesDescription indices_) override { data.setIndicesDescription(std::move(indices_)); } virtual const ConstraintsDescription & getConstraintsDescription() const override { return data.getConstraintsDescription(); } - virtual void setConstraintsDescription(ConstraintsDescription constraints_) override { data.setConstraintsDescription(constraints_; )} + virtual void setConstraintsDescription(ConstraintsDescription constraints_) override { data.setConstraintsDescription(constraints_); } NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); } bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); } From 07abed6d31b2a024527f34601c457de40bdcc220 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Fri, 17 May 2019 08:05:19 +0300 Subject: [PATCH 005/181] Renamed constraints getter and setter --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 2 +- dbms/src/Storages/ITableDeclaration.cpp | 2 +- dbms/src/Storages/ITableDeclaration.h | 4 ++-- dbms/src/Storages/StorageMergeTree.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index e9027f473e5..15968d58ac5 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -400,7 +400,7 @@ ColumnsDescription InterpreterCreateQuery::setColumns( { columns = as_storage->getColumns(); indices = as_storage->getIndices(); - constraints = as_storage->getConstraintsDescription(); + constraints = as_storage->getConstraints(); } else if (create.select) { diff --git a/dbms/src/Storages/ITableDeclaration.cpp b/dbms/src/Storages/ITableDeclaration.cpp index cb9ba9daf9a..47d24300452 100644 --- a/dbms/src/Storages/ITableDeclaration.cpp +++ b/dbms/src/Storages/ITableDeclaration.cpp @@ -46,7 +46,7 @@ void ITableDeclaration::setIndices(IndicesDescription indices_) indices = std::move(indices_); } -void ITableDeclaration::setConstraintsDescription(ConstraintsDescription constraints_) +void ITableDeclaration::setConstraints(ConstraintsDescription constraints_) { constraints = std::move(constraints_); } diff --git a/dbms/src/Storages/ITableDeclaration.h b/dbms/src/Storages/ITableDeclaration.h index 6d50f8acc0f..52db660b19b 100644 --- a/dbms/src/Storages/ITableDeclaration.h +++ b/dbms/src/Storages/ITableDeclaration.h @@ -20,8 +20,8 @@ public: const IndicesDescription & getIndices() const; void setIndices(IndicesDescription indices_); - virtual const ConstraintsDescription & getConstraintsDescription() const { return constraints; } - virtual void setConstraintsDescription(ConstraintsDescription constraints_); + virtual const ConstraintsDescription & getConstraints() const { return constraints; } + virtual void setConstraints(ConstraintsDescription constraints_); /// NOTE: These methods should include virtual columns, but should NOT include ALIAS columns /// (they are treated separately). diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 2c42bbd35c5..18386d78b8c 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -32,8 +32,8 @@ public: std::string getDatabaseName() const override { return database_name; } bool supportsIndexForIn() const override { return true; } - virtual const ConstraintsDescription & getConstraintsDescription() const override { return data.getConstraintsDescription(); } - virtual void setConstraintsDescription(ConstraintsDescription constraints_) override { data.setConstraintsDescription(constraints_); } + virtual const ConstraintsDescription & getConstraints() const override { return data.getConstraints(); } + virtual void setConstraints(ConstraintsDescription constraints_) override { data.setConstraints(constraints_); } BlockInputStreams read( const Names & column_names, From 7919a62198ce97493f1f151914ea69765423037c Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Fri, 17 May 2019 09:21:29 +0300 Subject: [PATCH 006/181] Removed constraints getter and setter from StorageMergeTree --- dbms/src/Storages/StorageMergeTree.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 18386d78b8c..b5156ce7137 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -32,8 +32,6 @@ public: std::string getDatabaseName() const override { return database_name; } bool supportsIndexForIn() const override { return true; } - virtual const ConstraintsDescription & getConstraints() const override { return data.getConstraints(); } - virtual void setConstraints(ConstraintsDescription constraints_) override { data.setConstraints(constraints_); } BlockInputStreams read( const Names & column_names, From 1f5715b985125d8820d06c5a23a98d290d278cdb Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 18 May 2019 08:16:33 +0300 Subject: [PATCH 007/181] Removed double whitespace --- dbms/src/Parsers/ParserCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 9f584dbbf8c..76150b95b07 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -143,7 +143,7 @@ bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_check("CHECK"); ParserIdentifier name_p; - ParserLogicalOrExpression expression_p; + ParserLogicalOrExpression expression_p; ASTPtr name; ASTPtr expr; From ce9389660853759cb7566c7bf2a932894ccd06de Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 18 May 2019 11:05:52 +0300 Subject: [PATCH 008/181] Added constraints description to MergeTree and related storages --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + .../Storages/MergeTree/registerStorageMergeTree.cpp | 10 +++++++--- dbms/src/Storages/StorageMergeTree.cpp | 3 ++- dbms/src/Storages/StorageMergeTree.h | 1 + dbms/src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- dbms/src/Storages/StorageReplicatedMergeTree.h | 1 + 7 files changed, 16 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index ea51159d9ba..4f09547a578 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -93,6 +93,7 @@ MergeTreeData::MergeTreeData( const String & database_, const String & table_, const String & full_path_, const ColumnsDescription & columns_, const IndicesDescription & indices_, + const ConstraintsDescription & constraints_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, @@ -121,6 +122,7 @@ MergeTreeData::MergeTreeData( data_parts_by_state_and_info(data_parts_indexes.get()) { setPrimaryKeyIndicesAndColumns(order_by_ast_, primary_key_ast_, columns_, indices_); + setConstraints(constraints_); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index fecddb28540..294588721ae 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -332,6 +332,7 @@ public: const String & full_path_, const ColumnsDescription & columns_, const IndicesDescription & indices_, + const ConstraintsDescription & constraints_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index b23a2eedc0e..b255f16c327 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -574,6 +574,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr sample_by_ast; ASTPtr ttl_table_ast; IndicesDescription indices_description; + ConstraintsDescription constraints_description; MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); if (is_extended_storage_def) @@ -602,7 +603,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) indices_description.indices.push_back( std::dynamic_pointer_cast(index->clone())); - + if (args.query.columns_list && args.query.columns_list->constraints) + for (const auto & constraint : args.query.columns_list->constraints->children) + constraints_description.constraints.push_back( + std::dynamic_pointer_cast(constraint->clone())); storage_settings.loadFromQuery(*args.storage_def); } else @@ -639,14 +643,14 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) return StorageReplicatedMergeTree::create( zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, - args.columns, indices_description, + args.columns, indices_description, constraints_description, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, sample_by_ast, ttl_table_ast, merging_params, storage_settings, args.has_force_restore_data_flag); else return StorageMergeTree::create( args.data_path, args.database_name, args.table_name, args.columns, indices_description, - args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, + constraints_description, args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, sample_by_ast, ttl_table_ast, merging_params, storage_settings, args.has_force_restore_data_flag); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 8feb2d1fe81..12e2ffc77af 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -53,6 +53,7 @@ StorageMergeTree::StorageMergeTree( const String & table_name_, const ColumnsDescription & columns_, const IndicesDescription & indices_, + const ConstraintsDescription & constraints_, bool attach, Context & context_, const String & date_column_name, @@ -66,7 +67,7 @@ StorageMergeTree::StorageMergeTree( bool has_force_restore_data_flag) : MergeTreeData(database_name_, table_name_, path_ + escapeForFileName(table_name_) + '/', - columns_, indices_, + columns_, indices_, constraints_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, sample_by_ast_, ttl_table_ast_, merging_params_, settings_, false, attach), diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index b5156ce7137..74e0d85bfb8 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -139,6 +139,7 @@ protected: const String & table_name_, const ColumnsDescription & columns_, const IndicesDescription & indices_, + const ConstraintsDescription & constraints_, bool attach, Context & context_, const String & date_column_name, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index daa1b0d10e2..8d467418f12 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -199,6 +199,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const String & table_name_, const ColumnsDescription & columns_, const IndicesDescription & indices_, + const ConstraintsDescription & constraints_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, @@ -211,7 +212,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( bool has_force_restore_data_flag) : MergeTreeData(database_name_, table_name_, path_ + escapeForFileName(table_name_) + '/', - columns_, indices_, + columns_, indices_, constraints_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, sample_by_ast_, ttl_table_ast_, merging_params_, settings_, true, attach, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index eba0511e15e..cd7d043a54a 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -513,6 +513,7 @@ protected: const String & path_, const String & database_name_, const String & name_, const ColumnsDescription & columns_, const IndicesDescription & indices_, + const ConstraintsDescription & constraints_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, From 300ec160f40fffb48bf160fe82a11781819ba30e Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 19 May 2019 08:27:00 +0300 Subject: [PATCH 009/181] Constraints MVP --- .../CheckConstraintsBlockOutputStream.cpp | 43 +++++++++++++++++ .../CheckConstraintsBlockOutputStream.h | 48 +++++++++++++++++++ .../Interpreters/InterpreterInsertQuery.cpp | 4 ++ dbms/src/Storages/ConstraintsDescription.h | 14 +++++- 4 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp create mode 100644 dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp new file mode 100644 index 00000000000..99f9f9bc90d --- /dev/null +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -0,0 +1,43 @@ +#include + + +namespace DB +{ + +void CheckConstraintsBlockOutputStream::write(const Block & block) +{ + for (auto & constraint_expr: expressions) + if (!checkConstraintOnBlock(block, constraint_expr)) + throw Exception("Some constraints are not satisfied", ErrorCodes::QUERY_WAS_CANCELLED); + output->write(block); +} + +void CheckConstraintsBlockOutputStream::flush() +{ + output->flush(); +} + +void CheckConstraintsBlockOutputStream::writePrefix() +{ + output->writePrefix(); +} + +void CheckConstraintsBlockOutputStream::writeSuffix() +{ + output->writeSuffix(); +} + +bool CheckConstraintsBlockOutputStream::checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint) +{ + Block res = block; + constraint->execute(res); + assert(block.columns() == res.columns() - 1); + ColumnWithTypeAndName res_column = res.safeGetByPosition(res.columns() - 1); + size_t column_size = res_column.column->size(); + for (size_t i = 0; i < column_size; ++i) + if (!res_column.column->getBool(i)) + return false; + return true; +} + +} diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h new file mode 100644 index 00000000000..623eccc8172 --- /dev/null +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CONSTRAINTS_ARE_NOT_SATISFIED; +} + +class CheckConstraintsBlockOutputStream : public IBlockOutputStream +{ +public: + CheckConstraintsBlockOutputStream( + const BlockOutputStreamPtr & output_, + const Block & header_, + const ConstraintsDescription & constraints_, + const Context & context_) + : output(output_), + header(header_), + constraints(constraints_), + expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())), + context(context_) + { } + + Block getHeader() const override { return header; } + void write(const Block & block) override; + + void flush() override; + + void writePrefix() override; + void writeSuffix() override; + + bool checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint); + +private: + BlockOutputStreamPtr output; + Block header; + const ConstraintsDescription constraints; + const ConstraintsExpressions expressions; + const Context & context; +}; +} diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index e4391f52247..fa6df1599ea 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,9 @@ BlockIO InterpreterInsertQuery::execute() out = std::make_shared( out, query_sample_block, table->getSampleBlock(), table->getColumns().getDefaults(), context); + out = std::make_shared( + out, query_sample_block, table->getConstraints(), context); + auto out_wrapper = std::make_shared(out); out_wrapper->setProcessListElement(context.getProcessListElement()); out = std::move(out_wrapper); diff --git a/dbms/src/Storages/ConstraintsDescription.h b/dbms/src/Storages/ConstraintsDescription.h index c2954d94428..fbb0f5167fc 100644 --- a/dbms/src/Storages/ConstraintsDescription.h +++ b/dbms/src/Storages/ConstraintsDescription.h @@ -1,12 +1,13 @@ #pragma once #include - +#include namespace DB { using ConstraintsASTs = std::vector>; +using ConstraintsExpressions = std::vector; struct ConstraintsDescription { @@ -18,6 +19,17 @@ struct ConstraintsDescription String toString() const; static ConstraintsDescription parse(const String & str); + + ConstraintsExpressions getExpressions(const Context & context, const NamesAndTypesList & source_columns_) const { + ConstraintsExpressions res; + res.reserve(constraints.size()); + for (const auto & constraint : constraints) { + ASTPtr expr = constraint->expr->clone(); + auto syntax_result = SyntaxAnalyzer(context).analyze(expr, source_columns_); + res.push_back(ExpressionAnalyzer(constraint->expr->clone(), syntax_result, context).getActions(false)); + } + return res; + } }; } From 60a5b94ba47ee4050dbb5d38822ceb2d15a479f2 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 19 May 2019 08:44:31 +0300 Subject: [PATCH 010/181] Style fix --- dbms/src/Storages/ConstraintsDescription.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/ConstraintsDescription.h b/dbms/src/Storages/ConstraintsDescription.h index fbb0f5167fc..0f565379204 100644 --- a/dbms/src/Storages/ConstraintsDescription.h +++ b/dbms/src/Storages/ConstraintsDescription.h @@ -20,10 +20,12 @@ struct ConstraintsDescription static ConstraintsDescription parse(const String & str); - ConstraintsExpressions getExpressions(const Context & context, const NamesAndTypesList & source_columns_) const { + ConstraintsExpressions getExpressions(const Context & context, const NamesAndTypesList & source_columns_) const + { ConstraintsExpressions res; res.reserve(constraints.size()); - for (const auto & constraint : constraints) { + for (const auto & constraint : constraints) + { ASTPtr expr = constraint->expr->clone(); auto syntax_result = SyntaxAnalyzer(context).analyze(expr, source_columns_); res.push_back(ExpressionAnalyzer(constraint->expr->clone(), syntax_result, context).getActions(false)); From 19d099f90a147fc385721a525a39249bfb682304 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 19 May 2019 09:03:18 +0300 Subject: [PATCH 011/181] Removed ITableDeclaration --- dbms/src/Storages/ITableDeclaration.cpp | 0 dbms/src/Storages/ITableDeclaration.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dbms/src/Storages/ITableDeclaration.cpp delete mode 100644 dbms/src/Storages/ITableDeclaration.h diff --git a/dbms/src/Storages/ITableDeclaration.cpp b/dbms/src/Storages/ITableDeclaration.cpp deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/src/Storages/ITableDeclaration.h b/dbms/src/Storages/ITableDeclaration.h deleted file mode 100644 index e69de29bb2d..00000000000 From 773849a43d9ab25df77c38e00de5d6b914c0b0f2 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 19 May 2019 09:08:25 +0300 Subject: [PATCH 012/181] Added constraints getter and setter to IStorage --- dbms/src/Storages/IStorage.cpp | 10 ++++++++++ dbms/src/Storages/IStorage.h | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 06320cc1f30..7c19fd94aea 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -47,6 +47,16 @@ void IStorage::setIndices(IndicesDescription indices_) indices = std::move(indices_); } +const ConstraintsDescription & IStorage::getConstraints() const +{ + return constraints; +} + +void IStorage::setConstraints(ConstraintsDescription constraints_) +{ + constraints = std::move(constraints_); +} + NameAndTypePair IStorage::getColumn(const String & column_name) const { /// By default, we assume that there are no virtual columns in the storage. diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index f18592ebce5..b01244ba111 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -88,6 +89,9 @@ public: /// thread-unsafe part. lockStructure must be acquired const IndicesDescription & getIndices() const; void setIndices(IndicesDescription indices_); + const ConstraintsDescription & getConstraints() const; + void setConstraints(ConstraintsDescription constraints_); + /// NOTE: these methods should include virtual columns, /// but should NOT include ALIAS columns (they are treated separately). virtual NameAndTypePair getColumn(const String & column_name) const; @@ -115,6 +119,7 @@ public: /// thread-unsafe part. lockStructure must be acquired private: ColumnsDescription columns; IndicesDescription indices; + ConstraintsDescription constraints; public: /// Acquire this lock if you need the table structure to remain constant during the execution of From d1492fc05d534034d1a0ed1322a9e86f1020cc62 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 19 May 2019 10:17:06 +0300 Subject: [PATCH 013/181] Removed context from CheckConstraintsBlockOutputStream --- dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h | 7 ++----- dbms/src/Interpreters/InterpreterInsertQuery.cpp | 3 +-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h index 623eccc8172..e1e15f8e454 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h @@ -19,13 +19,11 @@ public: CheckConstraintsBlockOutputStream( const BlockOutputStreamPtr & output_, const Block & header_, - const ConstraintsDescription & constraints_, - const Context & context_) + const ConstraintsDescription & constraints_) : output(output_), header(header_), constraints(constraints_), - expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())), - context(context_) + expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())) { } Block getHeader() const override { return header; } @@ -43,6 +41,5 @@ private: Block header; const ConstraintsDescription constraints; const ConstraintsExpressions expressions; - const Context & context; }; } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index fa6df1599ea..d47dd978b3a 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -118,8 +118,7 @@ BlockIO InterpreterInsertQuery::execute() out = std::make_shared( out, query_sample_block, table->getSampleBlock(), table->getColumns().getDefaults(), context); - out = std::make_shared( - out, query_sample_block, table->getConstraints(), context); + out = std::make_shared(out, query_sample_block, table->getConstraints()); auto out_wrapper = std::make_shared(out); out_wrapper->setProcessListElement(context.getProcessListElement()); From 9e6625441c936a4fef236c34cefa0b0654645364 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 19 May 2019 10:19:44 +0300 Subject: [PATCH 014/181] Returned context to CheckConstraintsBlockOutputStream constructor --- dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h | 3 ++- dbms/src/Interpreters/InterpreterInsertQuery.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h index e1e15f8e454..16b240eb758 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h @@ -19,7 +19,8 @@ public: CheckConstraintsBlockOutputStream( const BlockOutputStreamPtr & output_, const Block & header_, - const ConstraintsDescription & constraints_) + const ConstraintsDescription & constraints_, + const Context & context_) : output(output_), header(header_), constraints(constraints_), diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index d47dd978b3a..fa6df1599ea 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -118,7 +118,8 @@ BlockIO InterpreterInsertQuery::execute() out = std::make_shared( out, query_sample_block, table->getSampleBlock(), table->getColumns().getDefaults(), context); - out = std::make_shared(out, query_sample_block, table->getConstraints()); + out = std::make_shared( + out, query_sample_block, table->getConstraints(), context); auto out_wrapper = std::make_shared(out); out_wrapper->setProcessListElement(context.getProcessListElement()); From c926cf65627dfbbc724b8b469c254399e6fb8486 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 25 May 2019 17:07:45 +0300 Subject: [PATCH 015/181] Minor review fixes --- dbms/src/Parsers/ASTConstraintDeclaration.cpp | 28 +++++++++++++++++++ dbms/src/Parsers/ASTConstraintDeclaration.h | 25 ++--------------- dbms/src/Parsers/ParserCreateQuery.cpp | 6 ++++ dbms/src/Parsers/ParserCreateQuery.h | 3 -- dbms/src/Storages/ConstraintsDescription.cpp | 15 ++++++++++ dbms/src/Storages/ConstraintsDescription.h | 13 +-------- 6 files changed, 53 insertions(+), 37 deletions(-) create mode 100644 dbms/src/Parsers/ASTConstraintDeclaration.cpp diff --git a/dbms/src/Parsers/ASTConstraintDeclaration.cpp b/dbms/src/Parsers/ASTConstraintDeclaration.cpp new file mode 100644 index 00000000000..834ac81891b --- /dev/null +++ b/dbms/src/Parsers/ASTConstraintDeclaration.cpp @@ -0,0 +1,28 @@ +#include + +namespace DB { + +ASTPtr ASTConstraintDeclaration::clone() const +{ + auto res = std::make_shared(); + + res->name = name; + + if (expr) + res->set(res->expr, expr->clone()); + + return res; +} + +void ASTConstraintDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + frame.need_parens = false; + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + + s.ostr << s.nl_or_ws << indent_str; + s.ostr << backQuoteIfNeed(name); + s.ostr << (s.hilite ? hilite_keyword : "") << " CHECK " << (s.hilite ? hilite_none : ""); + expr->formatImpl(s, state, frame); +} + +} \ No newline at end of file diff --git a/dbms/src/Parsers/ASTConstraintDeclaration.h b/dbms/src/Parsers/ASTConstraintDeclaration.h index d72358be498..3a8ad75f54b 100644 --- a/dbms/src/Parsers/ASTConstraintDeclaration.h +++ b/dbms/src/Parsers/ASTConstraintDeclaration.h @@ -11,31 +11,12 @@ class ASTConstraintDeclaration : public IAST { public: String name; - IAST *expr; + IAST * expr; String getID(char) const override { return "Constraint"; } - ASTPtr clone() const override - { - auto res = std::make_shared(); + ASTPtr clone() const override; - res->name = name; - - if (expr) - res->set(res->expr, expr->clone()); - - return res; - } - - void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override - { - frame.need_parens = false; - std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - - s.ostr << s.nl_or_ws << indent_str; - s.ostr << backQuoteIfNeed(name); - s.ostr << (s.hilite ? hilite_keyword : "") << " CHECK " << (s.hilite ? hilite_none : ""); - expr->formatImpl(s, state, frame); - } + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; }; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 76150b95b07..c828cd0d780 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -203,6 +203,12 @@ bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & .parse(pos, node, expected); } +bool ParserConstraintDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + .parse(pos, node, expected); +} + bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 42583d8dd19..bc921773605 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -246,9 +246,6 @@ protected: class ParserConstraintDeclaration : public IParserBase { -public: - ParserConstraintDeclaration() {} - protected: const char * getName() const override { return "constraint declaration"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/dbms/src/Storages/ConstraintsDescription.cpp b/dbms/src/Storages/ConstraintsDescription.cpp index 042ee06ff59..ad0cd76733a 100644 --- a/dbms/src/Storages/ConstraintsDescription.cpp +++ b/dbms/src/Storages/ConstraintsDescription.cpp @@ -36,4 +36,19 @@ ConstraintsDescription ConstraintsDescription::parse(const String & str) return res; } +ConstraintsExpressions ConstraintsDescription::getExpressions(const DB::Context & context, + const DB::NamesAndTypesList & source_columns_) const +{ + ConstraintsExpressions res; + res.reserve(constraints.size()); + for (const auto & constraint : constraints) + { + // SyntaxAnalyzer::analyze has query as non-const argument so to avoid accidental query changes we clone it + ASTPtr expr = constraint->expr->clone(); + auto syntax_result = SyntaxAnalyzer(context).analyze(expr, source_columns_); + res.push_back(ExpressionAnalyzer(constraint->expr->clone(), syntax_result, context).getActions(false)); + } + return res; +} + } diff --git a/dbms/src/Storages/ConstraintsDescription.h b/dbms/src/Storages/ConstraintsDescription.h index 0f565379204..3ced0e8ddc9 100644 --- a/dbms/src/Storages/ConstraintsDescription.h +++ b/dbms/src/Storages/ConstraintsDescription.h @@ -20,18 +20,7 @@ struct ConstraintsDescription static ConstraintsDescription parse(const String & str); - ConstraintsExpressions getExpressions(const Context & context, const NamesAndTypesList & source_columns_) const - { - ConstraintsExpressions res; - res.reserve(constraints.size()); - for (const auto & constraint : constraints) - { - ASTPtr expr = constraint->expr->clone(); - auto syntax_result = SyntaxAnalyzer(context).analyze(expr, source_columns_); - res.push_back(ExpressionAnalyzer(constraint->expr->clone(), syntax_result, context).getActions(false)); - } - return res; - } + ConstraintsExpressions getExpressions(const Context & context, const NamesAndTypesList & source_columns_) const; }; } From 94db808cd7f73c9ad5ec3dfcd2f348061c38b8e3 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 25 May 2019 18:54:49 +0300 Subject: [PATCH 016/181] Basic test for constraints parsers and failures --- .../0_stateless/00951_basic_constraints.sh | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00951_basic_constraints.sh diff --git a/dbms/tests/queries/0_stateless/00951_basic_constraints.sh b/dbms/tests/queries/0_stateless/00951_basic_constraints.sh new file mode 100644 index 00000000000..43bf274a82b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00951_basic_constraints.sh @@ -0,0 +1,45 @@ +exec 2>&1 + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_constraints;" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints +( + a UInt32, + b UInt32, + CONSTRAINT b_constraint CHECK b > 0 +) +ENGINE = MergeTree ORDER BY (a);" + +# This one must succeed +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" + +# This one must throw and exception +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (3, 4), (1, 0);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" + +$CLICKHOUSE_CLIENT --query="DROP TABLE test_constraints;" + +# Test two constraints on one table +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints +( + a UInt32, + b UInt32, + CONSTRAINT b_constraint CHECK b > 10, + CONSTRAINT a_constraint CHECK a < 10 +) +ENGINE = MergeTree ORDER BY (a);" + +# This one must throw an exception +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" + +# This one must throw an exception +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (5, 16), (10, 11);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" + +# This one must succeed +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (7, 18), (0, 11);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" + +$CLICKHOUSE_CLIENT --query="DROP TABLE test_constraints;" \ No newline at end of file From a3535c69b23aff266dcbdc8ed856024efff7fa69 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 25 May 2019 18:57:35 +0300 Subject: [PATCH 017/181] Minor style fix --- dbms/src/Parsers/ASTConstraintDeclaration.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTConstraintDeclaration.cpp b/dbms/src/Parsers/ASTConstraintDeclaration.cpp index 834ac81891b..a1b063fc44a 100644 --- a/dbms/src/Parsers/ASTConstraintDeclaration.cpp +++ b/dbms/src/Parsers/ASTConstraintDeclaration.cpp @@ -1,6 +1,7 @@ #include -namespace DB { +namespace DB +{ ASTPtr ASTConstraintDeclaration::clone() const { @@ -25,4 +26,4 @@ void ASTConstraintDeclaration::formatImpl(const FormatSettings & s, FormatState expr->formatImpl(s, state, frame); } -} \ No newline at end of file +} From c185f5741c116f6038979e56f220e8f2b632ab2a Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 25 May 2019 21:13:43 +0300 Subject: [PATCH 018/181] Fixed test --- dbms/tests/queries/0_stateless/00951_basic_constraints.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00951_basic_constraints.sh b/dbms/tests/queries/0_stateless/00951_basic_constraints.sh index 43bf274a82b..cf622293688 100644 --- a/dbms/tests/queries/0_stateless/00951_basic_constraints.sh +++ b/dbms/tests/queries/0_stateless/00951_basic_constraints.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + exec 2>&1 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_constraints;" From b0e3315ac7855bae85dbb8adc21b826e971f6ce0 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 26 May 2019 08:32:22 +0300 Subject: [PATCH 019/181] Added tests reference --- .../0_stateless/00951_basic_constraints.reference | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00951_basic_constraints.reference diff --git a/dbms/tests/queries/0_stateless/00951_basic_constraints.reference b/dbms/tests/queries/0_stateless/00951_basic_constraints.reference new file mode 100644 index 00000000000..28bf65f8d48 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00951_basic_constraints.reference @@ -0,0 +1,10 @@ +1 2 +Received exception from server (version 19.8.1): +Code: 394. DB::Exception: Received from localhost:9001, ::1. DB::Exception: Some constraints are not satisfied. +1 2 +Received exception from server (version 19.8.1): +Code: 394. DB::Exception: Received from localhost:9001, ::1. DB::Exception: Some constraints are not satisfied. +Received exception from server (version 19.8.1): +Code: 394. DB::Exception: Received from localhost:9001, ::1. DB::Exception: Some constraints are not satisfied. +0 11 +7 18 From f87bb846586a848d5c24a793d8474783008f4cd8 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 26 May 2019 11:43:45 +0300 Subject: [PATCH 020/181] Added curdir to 00951 test --- dbms/tests/queries/0_stateless/00951_basic_constraints.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00951_basic_constraints.sh b/dbms/tests/queries/0_stateless/00951_basic_constraints.sh index cf622293688..49bf6771ab4 100644 --- a/dbms/tests/queries/0_stateless/00951_basic_constraints.sh +++ b/dbms/tests/queries/0_stateless/00951_basic_constraints.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + exec 2>&1 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_constraints;" From 94e7521beae08dafcc700bf8c9991f786a6b39ec Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 26 May 2019 12:37:35 +0300 Subject: [PATCH 021/181] Changed number of tests --- ...ic_constraints.reference => 00952_basic_constraints.reference} | 0 .../{00951_basic_constraints.sh => 00952_basic_constraints.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00951_basic_constraints.reference => 00952_basic_constraints.reference} (100%) rename dbms/tests/queries/0_stateless/{00951_basic_constraints.sh => 00952_basic_constraints.sh} (100%) diff --git a/dbms/tests/queries/0_stateless/00951_basic_constraints.reference b/dbms/tests/queries/0_stateless/00952_basic_constraints.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00951_basic_constraints.reference rename to dbms/tests/queries/0_stateless/00952_basic_constraints.reference diff --git a/dbms/tests/queries/0_stateless/00951_basic_constraints.sh b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh similarity index 100% rename from dbms/tests/queries/0_stateless/00951_basic_constraints.sh rename to dbms/tests/queries/0_stateless/00952_basic_constraints.sh From af50b1ff5cef7bea4c35d25067c914174acc7b89 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 26 May 2019 14:08:37 +0300 Subject: [PATCH 022/181] Made 00952 test executable --- dbms/tests/queries/0_stateless/00952_basic_constraints.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 dbms/tests/queries/0_stateless/00952_basic_constraints.sh diff --git a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh old mode 100644 new mode 100755 From 1aaab0745969277b9e9fab643d10832a571ff469 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Mon, 27 May 2019 02:57:18 +0300 Subject: [PATCH 023/181] Fixed exception handling in 00952 test --- .../CheckConstraintsBlockOutputStream.cpp | 2 +- .../0_stateless/00952_basic_constraints.reference | 9 +++------ .../queries/0_stateless/00952_basic_constraints.sh | 14 +++++++++----- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 99f9f9bc90d..1cc271e5578 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -8,7 +8,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) { for (auto & constraint_expr: expressions) if (!checkConstraintOnBlock(block, constraint_expr)) - throw Exception("Some constraints are not satisfied", ErrorCodes::QUERY_WAS_CANCELLED); + throw Exception{"Some constraints are not satisfied", ErrorCodes::QUERY_WAS_CANCELLED}; output->write(block); } diff --git a/dbms/tests/queries/0_stateless/00952_basic_constraints.reference b/dbms/tests/queries/0_stateless/00952_basic_constraints.reference index 28bf65f8d48..1bede18351d 100644 --- a/dbms/tests/queries/0_stateless/00952_basic_constraints.reference +++ b/dbms/tests/queries/0_stateless/00952_basic_constraints.reference @@ -1,10 +1,7 @@ 1 2 -Received exception from server (version 19.8.1): -Code: 394. DB::Exception: Received from localhost:9001, ::1. DB::Exception: Some constraints are not satisfied. +Exception ok 1 2 -Received exception from server (version 19.8.1): -Code: 394. DB::Exception: Received from localhost:9001, ::1. DB::Exception: Some constraints are not satisfied. -Received exception from server (version 19.8.1): -Code: 394. DB::Exception: Received from localhost:9001, ::1. DB::Exception: Some constraints are not satisfied. +Exception ok +Exception ok 0 11 7 18 diff --git a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh index 49bf6771ab4..b214982da5e 100755 --- a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh +++ b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh @@ -3,8 +3,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -exec 2>&1 - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_constraints;" $CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints @@ -20,7 +18,9 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw and exception -$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (3, 4), (1, 0);" +EXCEPTION_TEXT="Some constraints are not satisfied" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (3, 4), (1, 0);" 2>&1 \ + | grep -q "$EXCEPTION_TEXT" && echo "Exception ok" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" $CLICKHOUSE_CLIENT --query="DROP TABLE test_constraints;" @@ -36,11 +36,15 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints ENGINE = MergeTree ORDER BY (a);" # This one must throw an exception -$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" +EXCEPTION_TEXT="Some constraints are not satisfied" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" 2>&1 \ + | grep -q "$EXCEPTION_TEXT" && echo "Exception ok" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw an exception -$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (5, 16), (10, 11);" +EXCEPTION_TEXT="Some constraints are not satisfied" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (5, 16), (10, 11);" 2>&1 \ + | grep -q "$EXCEPTION_TEXT" && echo "Exception ok" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must succeed From ff6cdaeb9846ae63ddba399e215809f0be641793 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Mon, 27 May 2019 09:30:18 +0300 Subject: [PATCH 024/181] Removed word "exception" from test reference --- .../queries/0_stateless/00952_basic_constraints.reference | 6 +++--- dbms/tests/queries/0_stateless/00952_basic_constraints.sh | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00952_basic_constraints.reference b/dbms/tests/queries/0_stateless/00952_basic_constraints.reference index 1bede18351d..4d98efd8939 100644 --- a/dbms/tests/queries/0_stateless/00952_basic_constraints.reference +++ b/dbms/tests/queries/0_stateless/00952_basic_constraints.reference @@ -1,7 +1,7 @@ 1 2 -Exception ok +ok 1 2 -Exception ok -Exception ok +ok +ok 0 11 7 18 diff --git a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh index b214982da5e..93fa16ce4af 100755 --- a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh +++ b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +EXCEPTION_SUCCESS_TEXT=ok + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_constraints;" $CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints @@ -20,7 +22,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw and exception EXCEPTION_TEXT="Some constraints are not satisfied" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (3, 4), (1, 0);" 2>&1 \ - | grep -q "$EXCEPTION_TEXT" && echo "Exception ok" || echo "Did not thrown an exception" + | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" $CLICKHOUSE_CLIENT --query="DROP TABLE test_constraints;" @@ -38,13 +40,13 @@ ENGINE = MergeTree ORDER BY (a);" # This one must throw an exception EXCEPTION_TEXT="Some constraints are not satisfied" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" 2>&1 \ - | grep -q "$EXCEPTION_TEXT" && echo "Exception ok" || echo "Did not thrown an exception" + | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw an exception EXCEPTION_TEXT="Some constraints are not satisfied" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (5, 16), (10, 11);" 2>&1 \ - | grep -q "$EXCEPTION_TEXT" && echo "Exception ok" || echo "Did not thrown an exception" + | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must succeed From e7293486bd39103a25d4d51ca4e8e8597c1c11ce Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 2 Jun 2019 17:41:12 +0300 Subject: [PATCH 025/181] Added ALTER TABLE support to constraints (ADD CONSTRAINT, DROP CONSTRAINT) --- dbms/src/Databases/DatabaseDictionary.cpp | 1 + dbms/src/Databases/DatabaseDictionary.h | 1 + dbms/src/Databases/DatabaseMemory.cpp | 1 + dbms/src/Databases/DatabaseMemory.h | 1 + dbms/src/Databases/DatabaseOrdinary.cpp | 7 ++ dbms/src/Databases/DatabaseOrdinary.h | 1 + dbms/src/Databases/IDatabase.h | 2 + dbms/src/Parsers/ASTAlterQuery.cpp | 11 +++ dbms/src/Parsers/ASTAlterQuery.h | 13 ++- dbms/src/Parsers/ParserAlterQuery.cpp | 25 ++++++ dbms/src/Storages/AlterCommands.cpp | 80 ++++++++++++++++++- dbms/src/Storages/AlterCommands.h | 17 +++- dbms/src/Storages/IStorage.cpp | 3 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 13 +-- dbms/src/Storages/MergeTree/MergeTreeData.h | 3 +- .../ReplicatedMergeTreeTableMetadata.cpp | 16 ++++ .../ReplicatedMergeTreeTableMetadata.h | 6 +- dbms/src/Storages/StorageBuffer.cpp | 3 +- dbms/src/Storages/StorageDistributed.cpp | 3 +- dbms/src/Storages/StorageMerge.cpp | 3 +- dbms/src/Storages/StorageMergeTree.cpp | 16 ++-- dbms/src/Storages/StorageNull.cpp | 3 +- .../Storages/StorageReplicatedMergeTree.cpp | 14 +++- .../00953_constraints_operations.reference | 4 + .../00953_constraints_operations.sh | 40 ++++++++++ 25 files changed, 255 insertions(+), 32 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00953_constraints_operations.reference create mode 100755 dbms/tests/queries/0_stateless/00953_constraints_operations.sh diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 195dcea5287..8add0fa8911 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -131,6 +131,7 @@ void DatabaseDictionary::alterTable( const String &, const ColumnsDescription &, const IndicesDescription &, + const ConstraintsDescription &, const ASTModifier &) { throw Exception("DatabaseDictionary: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index 3bff84c36b8..d0ea33c6e4e 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -72,6 +72,7 @@ public: const String & name, const ColumnsDescription & columns, const IndicesDescription & indices, + const ConstraintsDescription & constraints, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index 3eea0bc666a..c53309ca6c1 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -54,6 +54,7 @@ void DatabaseMemory::alterTable( const String &, const ColumnsDescription &, const IndicesDescription &, + const ConstraintsDescription &, const ASTModifier &) { throw Exception("DatabaseMemory: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index fe7cc783ba3..dc770373360 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -49,6 +49,7 @@ public: const String & name, const ColumnsDescription & columns, const IndicesDescription & indices, + const ConstraintsDescription & constraints, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 9fa7d1b1196..1ef67dfd9f4 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -516,6 +516,7 @@ void DatabaseOrdinary::alterTable( const String & table_name, const ColumnsDescription & columns, const IndicesDescription & indices, + const ConstraintsDescription & constraints, const ASTModifier & storage_modifier) { /// Read the definition of the table and replace the necessary parts with new ones. @@ -538,6 +539,7 @@ void DatabaseOrdinary::alterTable( ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns); ASTPtr new_indices = InterpreterCreateQuery::formatIndices(indices); + ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(constraints); ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); @@ -546,6 +548,11 @@ void DatabaseOrdinary::alterTable( else ast_create_query.columns_list->set(ast_create_query.columns_list->indices, new_indices); + if (ast_create_query.columns_list->constraints) + ast_create_query.columns_list->replace(ast_create_query.columns_list->constraints, new_constraints); + else + ast_create_query.columns_list->set(ast_create_query.columns_list->constraints, new_constraints); + if (storage_modifier) storage_modifier(*ast_create_query.storage); diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index 887bf101d62..2ed1a426d64 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -43,6 +43,7 @@ public: const String & name, const ColumnsDescription & columns, const IndicesDescription & indices, + const ConstraintsDescription & constraints, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index d53de1dfcb5..37ee1e676e9 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,7 @@ public: const String & name, const ColumnsDescription & columns, const IndicesDescription & indices, + const ConstraintsDescription & constraints, const ASTModifier & engine_modifier) = 0; /// Returns time of table's metadata change, 0 if there is no corresponding metadata file. diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index e614f64d208..a85890d3cd0 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -105,6 +105,17 @@ void ASTAlterCommand::formatImpl( << "DROP INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); index->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::ADD_CONSTRAINT) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD CONSTRAINT" << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + constraint_decl->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::DROP_CONSTRAINT) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << "DROP CONSTRAINT " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + constraint->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::DROP_PARTITION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (detach ? "DETACH" : "DROP") << " PARTITION " diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 2c4b3ddbaf1..2ca88f3145d 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -32,6 +32,9 @@ public: ADD_INDEX, DROP_INDEX, + ADD_CONSTRAINT, + DROP_CONSTRAINT, + DROP_PARTITION, ATTACH_PARTITION, REPLACE_PARTITION, @@ -69,7 +72,15 @@ public: /** The ADD INDEX query stores the name of the index following AFTER. * The DROP INDEX query stores the name for deletion. */ - ASTPtr index; + ASTPtr index; + + /** The ADD CONSTRAINT query stores the ConstraintDeclaration there. + */ + ASTPtr constraint_decl; + + /** The DROP CONSTRAINT query stores the name for deletion. + */ + ASTPtr constraint; /** Used in DROP PARTITION and ATTACH PARTITION FROM queries. * The value or ID of the partition is stored here. diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 98891bbdf5f..e4220fda868 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -32,6 +32,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_add_index("ADD INDEX"); ParserKeyword s_drop_index("DROP INDEX"); + ParserKeyword s_add_constraint("ADD CONSTRAINT"); + ParserKeyword s_drop_constraint("DROP CONSTRAINT"); + ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_detach_partition("DETACH PARTITION"); ParserKeyword s_drop_partition("DROP PARTITION"); @@ -57,6 +60,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserStringLiteral parser_string_literal; ParserCompoundColumnDeclaration parser_col_decl; ParserIndexDeclaration parser_idx_decl; + ParserConstraintDeclaration parser_constraint_decl; ParserCompoundColumnDeclaration parser_modify_col_decl(false); ParserPartition parser_partition; ParserExpression parser_exp_elem; @@ -125,6 +129,27 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::DROP_INDEX; command->detach = false; } + else if (s_add_constraint.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_constraint_decl.parse(pos, command->constraint_decl, expected)) + return false; + + command->type = ASTAlterCommand::ADD_CONSTRAINT; + } + else if (s_drop_constraint.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->constraint, expected)) + return false; + + command->type = ASTAlterCommand::DROP_CONSTRAINT; + command->detach = false; + } else if (s_clear_column.ignore(pos, expected)) { if (s_if_exists.ignore(pos, expected)) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 88f3e909f49..22095ec4ae3 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -165,6 +166,32 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ return command; } + else if (command_ast->type == ASTAlterCommand::ADD_CONSTRAINT) + { + AlterCommand command; + command.constraint_decl = command_ast->constraint_decl; + command.type = AlterCommand::ADD_CONSTRAINT; + + const auto & ast_constraint_decl = command_ast->constraint_decl->as(); + + command.constraint_name = ast_constraint_decl.name; + + command.if_not_exists = command_ast->if_not_exists; + + return command; + } + else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT) + { + if (command_ast->clear_column) + throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); + + AlterCommand command; + command.type = AlterCommand::DROP_CONSTRAINT; + command.constraint_name = command_ast->constraint->as().name; + command.if_exists = command_ast->if_exists; + + return command; + } else if (command_ast->type == ASTAlterCommand::MODIFY_TTL) { AlterCommand command; @@ -178,7 +205,8 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, - ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const + ConstraintsDescription & constraints_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const { if (type == ADD_COLUMN) { @@ -298,6 +326,44 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri indices_description.indices.erase(erase_it); } + else if (type == ADD_CONSTRAINT) + { + if (std::any_of( + constraints_description.constraints.cbegin(), + constraints_description.constraints.cend(), + [this](const ASTPtr & constraint_ast) + { + return constraint_ast->as().name == constraint_name; + })) + { + if (if_not_exists) + return; + throw Exception("Cannot add constraint " + constraint_name + ": constraint with this name already exists", + ErrorCodes::ILLEGAL_COLUMN); + } + + auto insert_it = constraints_description.constraints.end(); + + constraints_description.constraints.emplace(insert_it, std::dynamic_pointer_cast(constraint_decl)); + } + else if (type == DROP_CONSTRAINT) + { + auto erase_it = std::find_if( + constraints_description.constraints.begin(), + constraints_description.constraints.end(), + [this](const ASTPtr & constraint_ast) + { + return constraint_ast->as().name == constraint_name; + }); + + if (erase_it == constraints_description.constraints.end()) { + if (if_exists) + return; + throw Exception("Wrong constraint name. Cannot find constraint `" + constraint_name + "` to drop.", + ErrorCodes::LOGICAL_ERROR); + } + constraints_description.constraints.erase(erase_it); + } else if (type == MODIFY_TTL) { ttl_table_ast = ttl; @@ -317,20 +383,23 @@ bool AlterCommand::isMutable() const } void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, - ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const + ConstraintsDescription & constraints_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const { auto new_columns_description = columns_description; auto new_indices_description = indices_description; + auto new_constraints_description = constraints_description; auto new_order_by_ast = order_by_ast; auto new_primary_key_ast = primary_key_ast; auto new_ttl_table_ast = ttl_table_ast; for (const AlterCommand & command : *this) if (!command.ignore) - command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); + command.apply(new_columns_description, new_indices_description, new_constraints_description, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); columns_description = std::move(new_columns_description); indices_description = std::move(new_indices_description); + constraints_description = std::move(new_constraints_description); order_by_ast = std::move(new_order_by_ast); primary_key_ast = std::move(new_primary_key_ast); ttl_table_ast = std::move(new_ttl_table_ast); @@ -518,10 +587,11 @@ void AlterCommands::apply(ColumnsDescription & columns_description) const { auto out_columns_description = columns_description; IndicesDescription indices_description; + ConstraintsDescription constraints_description; ASTPtr out_order_by; ASTPtr out_primary_key; ASTPtr out_ttl_table; - apply(out_columns_description, indices_description, out_order_by, out_primary_key, out_ttl_table); + apply(out_columns_description, indices_description, constraints_description, out_order_by, out_primary_key, out_ttl_table); if (out_order_by) throw Exception("Storage doesn't support modifying ORDER BY expression", ErrorCodes::NOT_IMPLEMENTED); @@ -529,6 +599,8 @@ void AlterCommands::apply(ColumnsDescription & columns_description) const throw Exception("Storage doesn't support modifying PRIMARY KEY expression", ErrorCodes::NOT_IMPLEMENTED); if (!indices_description.indices.empty()) throw Exception("Storage doesn't support modifying indices", ErrorCodes::NOT_IMPLEMENTED); + if (!constraints_description.constraints.empty()) + throw Exception("Storage doesn't support modifying constraints", ErrorCodes::NOT_IMPLEMENTED); if (out_ttl_table) throw Exception("Storage doesn't support modifying TTL expression", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 4905b80f92f..1dfd46f9617 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -25,6 +26,8 @@ struct AlterCommand MODIFY_ORDER_BY, ADD_INDEX, DROP_INDEX, + ADD_CONSTRAINT, + DROP_CONSTRAINT, MODIFY_TTL, UKNOWN_TYPE, }; @@ -62,6 +65,12 @@ struct AlterCommand /// For ADD/DROP INDEX String index_name; + // For ADD CONSTRAINT + ASTPtr constraint_decl; + + // For ADD/DROP CONSTRAINT + String constraint_name; + /// For MODIFY TTL ASTPtr ttl; @@ -84,7 +93,8 @@ struct AlterCommand static std::optional parse(const ASTAlterCommand * command); void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, - ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const; + ConstraintsDescription & constraints_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const; /// Checks that not only metadata touched by that command bool isMutable() const; @@ -95,8 +105,9 @@ class Context; class AlterCommands : public std::vector { public: - void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast, - ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const; + void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, + ConstraintsDescription & constraints_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const; /// For storages that don't support MODIFY_ORDER_BY. void apply(ColumnsDescription & columns_description) const; diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 7c19fd94aea..07f52749d2d 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -346,8 +346,9 @@ void IStorage::alter( lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, new_constraints, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 8427982efd7..144c6591ed9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -121,7 +121,7 @@ MergeTreeData::MergeTreeData( data_parts_by_info(data_parts_indexes.get()), data_parts_by_state_and_info(data_parts_indexes.get()) { - setPrimaryKeyIndicesAndColumns(order_by_ast_, primary_key_ast_, columns_, indices_); + setPrimaryKeyIndicesAndColumns(order_by_ast_, primary_key_ast_, columns_, indices_, constraints_); setConstraints(constraints_); /// NOTE: using the same columns list as is read when performing actual merges. @@ -255,7 +255,8 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam void MergeTreeData::setPrimaryKeyIndicesAndColumns( const ASTPtr & new_order_by_ast, const ASTPtr & new_primary_key_ast, - const ColumnsDescription & new_columns, const IndicesDescription & indices_description, bool only_check) + const ColumnsDescription & new_columns, const IndicesDescription & indices_description, + const ConstraintsDescription & constraints_description, bool only_check) { if (!new_order_by_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); @@ -425,6 +426,8 @@ void MergeTreeData::setPrimaryKeyIndicesAndColumns( setIndices(indices_description); skip_indices = std::move(new_indices); + setConstraints(constraints_description); + primary_key_and_skip_indices_expr = new_indices_with_primary_key_expr; sorting_key_and_skip_indices_expr = new_indices_with_sorting_key_expr; } @@ -1180,11 +1183,11 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c /// Check that needed transformations can be applied to the list of columns without considering type conversions. auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; - commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); - + commands.apply(new_columns, new_indices, new_constraints, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); if (getIndices().empty() && !new_indices.empty() && !context.getSettingsRef().allow_experimental_data_skipping_indices) throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \ @@ -1267,7 +1270,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c } setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, - new_columns, new_indices, /* only_check = */ true); + new_columns, new_indices, new_constraints, /* only_check = */ true); setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast, /* only_check = */ true); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d589bb77013..bfcfbaba9a1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -778,7 +778,8 @@ protected: void setPrimaryKeyIndicesAndColumns(const ASTPtr & new_order_by_ast, const ASTPtr & new_primary_key_ast, const ColumnsDescription & new_columns, - const IndicesDescription & indices_description, bool only_check = false); + const IndicesDescription & indices_description, + const ConstraintsDescription & constraints_description, bool only_check = false); void initPartitionKey(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index b122785c5fd..8549264e2c2 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -46,6 +46,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); skip_indices = data.getIndices().toString(); + constraints = data.getConstraints().toString(); index_granularity_bytes = data.index_granularity_info.index_granularity_bytes; ttl_table = formattedAST(data.ttl_table_ast); } @@ -229,6 +230,21 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl ErrorCodes::METADATA_MISMATCH); } + if (constraints != from_zk.constraints) + { + if (allow_alter) + { + diff.constraints_changed = true; + diff.new_constraints = from_zk.constraints; + } + else + throw Exception( + "Existing table metadata in ZooKeeper differs in constraints." + " Stored in ZooKeeper: " + from_zk.constraints + + ", local: " + constraints, + ErrorCodes::METADATA_MISMATCH); + } + if (index_granularity_bytes != from_zk.index_granularity_bytes) throw Exception("Existing table metadata in ZooKeeper differs in index granularity bytes." " Stored in ZooKeeper: " + DB::toString(from_zk.index_granularity_bytes) + diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 55cfdb1494d..b28a7306e33 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -26,6 +26,7 @@ struct ReplicatedMergeTreeTableMetadata String partition_key; String sorting_key; String skip_indices; + String constraints; UInt64 index_granularity_bytes; String ttl_table; @@ -46,10 +47,13 @@ struct ReplicatedMergeTreeTableMetadata bool skip_indices_changed = false; String new_skip_indices; + bool constraints_changed = false; + String new_constraints; + bool ttl_table_changed = false; String new_ttl_table; - bool empty() const { return !sorting_key_changed && !skip_indices_changed; } + bool empty() const { return !sorting_key_changed && !skip_indices_changed && !constraints_changed; } }; Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, bool allow_alter) const; diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 24e1ecef2e3..5a4409600d8 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -701,8 +701,9 @@ void StorageBuffer::alter(const AlterCommands & params, const String & database_ auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, new_constraints, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 4440d2b96ee..42e769019d6 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -346,8 +346,9 @@ void StorageDistributed::alter( auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); + context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 1fbde64fd85..9f3331a033e 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -417,8 +417,9 @@ void StorageMerge::alter( auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, new_constraints, {}); setColumns(new_columns); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 01049bfbf76..10709cd8181 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -242,8 +242,9 @@ void StorageMergeTree::alter( lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); params.apply(new_columns); - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, {}); setColumns(std::move(new_columns)); return; } @@ -252,15 +253,14 @@ void StorageMergeTree::alter( auto merge_blocker = merger_mutator.actions_blocker.cancel(); lockNewDataStructureExclusively(table_lock_holder, context.getCurrentQueryId()); - checkAlter(params, context); - auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; - params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); + params.apply(new_columns, new_indices, new_constraints, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); auto transactions = prepareAlterTransactions(new_columns, new_indices, context); @@ -279,11 +279,10 @@ void StorageMergeTree::alter( if (new_ttl_table_ast.get() != ttl_table_ast.get()) storage_ast.set(storage_ast.ttl_table, new_ttl_table_ast); }; - - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, storage_modifier); /// Reinitialize primary key because primary key column types might have changed. - setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices); + setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast); @@ -834,10 +833,11 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; ASTPtr ignored_ttl_table_ast; - alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast); + alter_command.apply(new_columns, new_indices, new_constraints, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast); auto columns_for_parts = new_columns.getAllPhysical(); for (const auto & part : parts) diff --git a/dbms/src/Storages/StorageNull.cpp b/dbms/src/Storages/StorageNull.cpp index 1762c8372f5..73cb0243c11 100644 --- a/dbms/src/Storages/StorageNull.cpp +++ b/dbms/src/Storages/StorageNull.cpp @@ -38,8 +38,9 @@ void StorageNull::alter( ColumnsDescription new_columns = getColumns(); IndicesDescription new_indices = getIndices(); + ConstraintsDescription new_constraints = getConstraints(); params.apply(new_columns); - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index aaf8f18b65b..250a9dc96e8 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -422,6 +422,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_order_by_ast = order_by_ast; auto new_indices = getIndices(); + auto new_constraints = getConstraints(); ASTPtr new_ttl_table_ast = ttl_table_ast; IDatabase::ASTModifier storage_modifier; if (!metadata_diff.empty()) @@ -451,6 +452,9 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column if (metadata_diff.skip_indices_changed) new_indices = IndicesDescription::parse(metadata_diff.new_skip_indices); + if (metadata_diff.constraints_changed) + new_constraints = ConstraintsDescription::parse(metadata_diff.new_constraints); + if (metadata_diff.ttl_table_changed) { ParserExpression parser; @@ -476,11 +480,11 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column }; } - global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, new_indices, storage_modifier); + global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, new_indices, new_constraints, storage_modifier); /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. - setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices); + setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast); } @@ -1507,10 +1511,11 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & auto new_columns = getColumns(); auto new_indices = getIndices(); + auto new_constraints = getConstraints(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; ASTPtr ignored_ttl_table_ast; - alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast); + alter_command.apply(new_columns, new_indices, new_constraints, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast); size_t modified_parts = 0; auto parts = getDataParts(); @@ -3114,10 +3119,11 @@ void StorageReplicatedMergeTree::alter( ColumnsDescription new_columns = getColumns(); IndicesDescription new_indices = getIndices(); + ConstraintsDescription new_constraints = getConstraints(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; - params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); + params.apply(new_columns, new_indices, new_constraints, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); String new_columns_str = new_columns.toString(); if (new_columns_str != getColumns().toString()) diff --git a/dbms/tests/queries/0_stateless/00953_constraints_operations.reference b/dbms/tests/queries/0_stateless/00953_constraints_operations.reference new file mode 100644 index 00000000000..5713da9fef5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00953_constraints_operations.reference @@ -0,0 +1,4 @@ +1 2 +ok +1 2 +ok diff --git a/dbms/tests/queries/0_stateless/00953_constraints_operations.sh b/dbms/tests/queries/0_stateless/00953_constraints_operations.sh new file mode 100755 index 00000000000..917719c3e46 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00953_constraints_operations.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +EXCEPTION_SUCCESS_TEXT=ok + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_constraints;" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints +( + a UInt32, + b UInt32, + CONSTRAINT b_constraint CHECK b > 0 +) +ENGINE = MergeTree ORDER BY (a);" + +# This one must succeed +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" + +# This one must throw and exception +EXCEPTION_TEXT="Some constraints are not satisfied" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 0);" 2>&1 \ + | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE test_constraints DROP CONSTRAINT b_constraint;" + +# This one must suceed now +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 0);" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE test_constraints ADD CONSTRAINT b_constraint CHECK b > 10;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 10);" 2>&1 \ + | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 11);" + +$CLICKHOUSE_CLIENT --query="DROP TABLE test_constraints;" \ No newline at end of file From 33f0ebd8ab83ef61990a66f7fb04d424f25cff5f Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 2 Jun 2019 18:08:28 +0300 Subject: [PATCH 026/181] Brace style fix --- dbms/src/Storages/AlterCommands.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 22095ec4ae3..0d20847727a 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -356,7 +356,8 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri return constraint_ast->as().name == constraint_name; }); - if (erase_it == constraints_description.constraints.end()) { + if (erase_it == constraints_description.constraints.end()) + { if (if_exists) return; throw Exception("Wrong constraint name. Cannot find constraint `" + constraint_name + "` to drop.", From b63623d0146965191cc5dcec728d25ec7d84ab46 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Wed, 5 Jun 2019 10:33:34 +0300 Subject: [PATCH 027/181] Extended constraint exception with constraint name and expression --- .../DataStreams/CheckConstraintsBlockOutputStream.cpp | 10 +++++++--- .../queries/0_stateless/00952_basic_constraints.sh | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 1cc271e5578..da77e4a1c2e 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -1,14 +1,18 @@ #include - +#include namespace DB { void CheckConstraintsBlockOutputStream::write(const Block & block) { - for (auto & constraint_expr: expressions) + for (size_t i = 0; i < expressions.size(); ++i) + { + auto constraint_expr = expressions[i]; if (!checkConstraintOnBlock(block, constraint_expr)) - throw Exception{"Some constraints are not satisfied", ErrorCodes::QUERY_WAS_CANCELLED}; + throw Exception{"Constraint " + constraints.constraints[i]->name + " is not satisfied at, constraint expression: " + + serializeAST(*(constraints.constraints[i]->expr), true), ErrorCodes::LOGICAL_ERROR}; + } output->write(block); } diff --git a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh index 93fa16ce4af..1d2a46dae61 100755 --- a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh +++ b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh EXCEPTION_SUCCESS_TEXT=ok - +$CLICKHOUSE_CLIENT --query="CREATE DATABASE IF NOT EXISTS test;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_constraints;" $CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw and exception -EXCEPTION_TEXT="Some constraints are not satisfied" +EXCEPTION_TEXT="Constraint b_constraint is not satisfied" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (3, 4), (1, 0);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" @@ -38,13 +38,13 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints ENGINE = MergeTree ORDER BY (a);" # This one must throw an exception -EXCEPTION_TEXT="Some constraints are not satisfied" +EXCEPTION_TEXT="Constraint b_constraint is not satisfied" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw an exception -EXCEPTION_TEXT="Some constraints are not satisfied" +EXCEPTION_TEXT="Constraint a_constraint is not satisfied" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (5, 16), (10, 11);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" From f413b1e346270e732988826e84e548857dc8174e Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Wed, 5 Jun 2019 11:05:46 +0300 Subject: [PATCH 028/181] Implemented memoryIsByte, replaced memoryIsZero with it, implemented memory check in CheckConstraintsBlockOutputStream --- dbms/src/Columns/ColumnsCommon.cpp | 43 ++++--------------- dbms/src/Columns/ColumnsCommon.h | 2 +- .../CheckConstraintsBlockOutputStream.cpp | 21 ++++++++- .../CheckConstraintsBlockOutputStream.h | 2 + 4 files changed, 32 insertions(+), 36 deletions(-) diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index 6ad3d0907ab..0745a3d5b9f 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -61,43 +61,18 @@ std::vector countColumnsSizeInSelector(IColumn::ColumnIndex num_columns, return counts; } -/** clang 4 generates better code than gcc 6. - * And both gcc and clang could not vectorize trivial loop by bytes automatically. - */ -bool memoryIsZero(const void * data, size_t size) +bool memoryIsByte(const void * data, size_t size, uint8_t byte) { - const Int8 * pos = reinterpret_cast(data); - const Int8 * end = pos + size; - -#ifdef __SSE2__ - const __m128 zero16 = _mm_setzero_ps(); - const Int8 * end64 = pos + size / 64 * 64; - - for (; pos < end64; pos += 64) - if (_mm_movemask_ps(_mm_cmpneq_ps( - _mm_loadu_ps(reinterpret_cast(pos)), - zero16)) - | _mm_movemask_ps(_mm_cmpneq_ps( - _mm_loadu_ps(reinterpret_cast(pos + 16)), - zero16)) - | _mm_movemask_ps(_mm_cmpneq_ps( - _mm_loadu_ps(reinterpret_cast(pos + 32)), - zero16)) - | _mm_movemask_ps(_mm_cmpneq_ps( - _mm_loadu_ps(reinterpret_cast(pos + 48)), - zero16))) - return false; - - /// TODO Add duff device for tail? -#endif - - for (; pos < end; ++pos) - if (*pos) - return false; - - return true; + if (size == 0) + return true; + auto ptr = reinterpret_cast(data); + return *ptr == byte && memcmp(ptr, ptr + 1, size - 1) == 0; } +bool memoryIsZero(const void * data, size_t size) +{ + return memoryIsByte(data, size, 0x0); +} namespace ErrorCodes { diff --git a/dbms/src/Columns/ColumnsCommon.h b/dbms/src/Columns/ColumnsCommon.h index 0b14b76ad39..46c6c0e9df3 100644 --- a/dbms/src/Columns/ColumnsCommon.h +++ b/dbms/src/Columns/ColumnsCommon.h @@ -22,7 +22,7 @@ std::vector countColumnsSizeInSelector(IColumn::ColumnIndex num_columns, /// Returns true, if the memory contains only zeros. bool memoryIsZero(const void * data, size_t size); - +bool memoryIsByte(const void * data, size_t size, uint8_t byte); /// The general implementation of `filter` function for ColumnArray and ColumnString. template diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index da77e4a1c2e..ac432694d83 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -1,5 +1,8 @@ #include +#include +#include #include +#include namespace DB { @@ -31,17 +34,33 @@ void CheckConstraintsBlockOutputStream::writeSuffix() output->writeSuffix(); } -bool CheckConstraintsBlockOutputStream::checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint) +bool CheckConstraintsBlockOutputStream::checkImplMemory(const Block & block, const ExpressionActionsPtr & constraint) +{ + Block res = block; + constraint->execute(res); + assert(block.columns() == res.columns() - 1); + ColumnWithTypeAndName res_column = res.safeGetByPosition(res.columns() - 1); + auto res_column_uint8 = checkAndGetColumn(res_column.column.get()); + return memoryIsByte(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize(), 0x1); +} + +bool CheckConstraintsBlockOutputStream::checkImplBool(const Block & block, const ExpressionActionsPtr & constraint) { Block res = block; constraint->execute(res); assert(block.columns() == res.columns() - 1); ColumnWithTypeAndName res_column = res.safeGetByPosition(res.columns() - 1); size_t column_size = res_column.column->size(); + // std::cerr << "Sizes of constraints: " << res_column.column->size() << ' ' << res_column.column->get << '\n'; for (size_t i = 0; i < column_size; ++i) if (!res_column.column->getBool(i)) return false; return true; } +bool CheckConstraintsBlockOutputStream::checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint) +{ + return checkImplMemory(block, constraint); +} + } diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h index 16b240eb758..6ea42cf44af 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h @@ -35,6 +35,8 @@ public: void writePrefix() override; void writeSuffix() override; + bool checkImplMemory(const Block & block, const ExpressionActionsPtr & constraint); + bool checkImplBool(const Block & block, const ExpressionActionsPtr & constraint); bool checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint); private: From bba3b33bdca89da53a8ac1707234b02959e8486c Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Wed, 5 Jun 2019 15:17:53 +0300 Subject: [PATCH 029/181] Fixed exception text in 00953_constraints_operations --- dbms/tests/queries/0_stateless/00953_constraints_operations.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00953_constraints_operations.sh b/dbms/tests/queries/0_stateless/00953_constraints_operations.sh index 917719c3e46..f0fc5b71fbf 100755 --- a/dbms/tests/queries/0_stateless/00953_constraints_operations.sh +++ b/dbms/tests/queries/0_stateless/00953_constraints_operations.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw and exception -EXCEPTION_TEXT="Some constraints are not satisfied" +EXCEPTION_TEXT="Constraint b_constraint is not satisfied" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 0);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" From 375e4640867c0318b47fbab3149a35a53d0d46bd Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Thu, 6 Jun 2019 00:25:48 +0300 Subject: [PATCH 030/181] Fixed exception text --- dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index ac432694d83..75067b6afa7 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -13,7 +13,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) { auto constraint_expr = expressions[i]; if (!checkConstraintOnBlock(block, constraint_expr)) - throw Exception{"Constraint " + constraints.constraints[i]->name + " is not satisfied at, constraint expression: " + + throw Exception{"Constraint " + constraints.constraints[i]->name + " is not satisfied, constraint expression: " + serializeAST(*(constraints.constraints[i]->expr), true), ErrorCodes::LOGICAL_ERROR}; } output->write(block); From 76772d1de095ff3c41174a0a914d544162d60c02 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Thu, 6 Jun 2019 01:25:57 +0300 Subject: [PATCH 031/181] Documentation on constraints (RU, EN) --- docs/en/query_language/alter.md | 16 ++++++++++++++++ docs/en/query_language/create.md | 20 ++++++++++++++++++++ docs/en/query_language/insert_into.md | 3 +++ docs/ru/query_language/alter.md | 16 ++++++++++++++++ docs/ru/query_language/create.md | 20 ++++++++++++++++++++ docs/ru/query_language/insert_into.md | 4 ++++ 6 files changed, 79 insertions(+) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 85941987ce9..fc42fc636e7 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -166,6 +166,22 @@ are available: These commands are lightweight in a sense that they only change metadata or remove files. Also, they are replicated (syncing indices metadata through ZooKeeper). +### Manipulations with constraints + +See more on [constraints](create.md#constraints) + +Constraints could be added or deleted using following syntax: +``` +ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; +ALTER TABLE [db].name DROP CONSTRAINT constraint_name; +``` + +Queries will add or remove metadata about constraints from table so they are processed immediately. + +Constraint check *will not be executed* on existing table if it was added. For now, we recommend to create new table and use `INSERT SELECT` query to fill new table. + +All changes on distributed tables are broadcasting to ZooKeeper so will be applied on other replicas. + ### Manipulations With Partitions and Parts {#alter_manipulations-with-partitions} The following operations with [partitions](../operations/table_engines/custom_partitioning_key.md) are available: diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index 9ebd50839d3..573388195e3 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -80,6 +80,26 @@ If you add a new column to a table but later change its default expression, the It is not possible to set default values for elements in nested data structures. +### Constraints {#constraints} + +WARNING: This feature is experimental. Correct work is not guaranteed on non-MergeTree family engines. + +Along with columns descriptions constraints could be defined: + +``sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1], + ... + CONSTRAINT constraint_name_1 CHECK boolean_expr_1, + ... +) ENGINE = engine +``` + +`boolean_expr_1` could by any boolean expression. If constraints are defined for the table, each of them will be checked for every row in `INSERT` query. If any constraint is not satisfied — server will raise an exception with constraint name and checking expression. + +Adding large amount of constraints can negatively affect performance of big `INSERT` queries. + ### TTL expression Can be specified only for MergeTree-family tables. An expression for setting storage time for values. It must depends on `Date` or `DateTime` column and has one `Date` or `DateTime` column as a result. Example: diff --git a/docs/en/query_language/insert_into.md b/docs/en/query_language/insert_into.md index 914c3b2917f..c0cb9f8c3b1 100644 --- a/docs/en/query_language/insert_into.md +++ b/docs/en/query_language/insert_into.md @@ -40,6 +40,9 @@ INSERT INTO t FORMAT TabSeparated You can insert data separately from the query by using the command-line client or the HTTP interface. For more information, see the section "[Interfaces](../interfaces/index.md#interfaces)". +### Constraints + +If table has [constraints](create.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped. ### Inserting The Results of `SELECT` {#insert_query_insert-select} diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index 2367386172a..5e847abce4d 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -165,6 +165,22 @@ ALTER TABLE [db].name DROP INDEX name Запрос на изменение индексов реплицируется, сохраняя новые метаданные в ZooKeeper и применяя изменения на всех репликах. +### Манипуляции с ограничениями (constraints) + +Про ограничения подробнее написано [тут](create.md#constraints). + +Добавить или удалить ограничение можно с помощью запросов +``` +ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; +ALTER TABLE [db].name DROP CONSTRAINT constraint_name; +``` + +Запросы выполняют добавление или удаление метаданных об ограничениях таблицы `[db].name`, поэтому выполняются мнгновенно. + +Если ограничение появилось для непустой таблицы, то *проверка ограничения вызвана не будет*. Если же важно добавить ограничение на существующую таблицу, то рекомендуется создать новую таблицу с нужным ограничением и выполнить `INSERT SELECT` запрос для перекачки данных из одной таблицы в другую. + +Запрос на изменение ограничений так же, как и с индексами, реплицируется через ZooKeeper. + ### Манипуляции с партициями и кусками {#alter_manipulations-with-partitions} Для работы с [партициями](../operations/table_engines/custom_partitioning_key.md) доступны следующие операции: diff --git a/docs/ru/query_language/create.md b/docs/ru/query_language/create.md index ee6dc3c7820..0b842e22e47 100644 --- a/docs/ru/query_language/create.md +++ b/docs/ru/query_language/create.md @@ -80,6 +80,26 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... Отсутствует возможность задать значения по умолчанию для элементов вложенных структур данных. +### Ограничения (constraints) {#constraints} + +WARNING: Находится в экспериментальном режиме, поддержано в MergeTree (работоспособность на других типах движков таблиц не гарантируется). + +Наряду с объявлением столбцов можно объявить ограчения на значения в столбцах таблицы: + +```sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1], + ... + CONSTRAINT constraint_name_1 CHECK boolean_expr_1, + ... +) ENGINE = engine +``` + +`boolean_expr_1` может быть любым булевым выражением, состоящим из операторов сравнения или функций. При наличии одного или нескольких ограничений в момент вставки данных выражения ограничений будут проверяться на истинность для каждой вставляемой строки данных. В случае, если в теле INSERT запроса придут некорректные данные — клиентов будет выкинуто исключение с нарушенным ограничением. + +Добавление большого числа ограничений может негативно повлиять на производительность объёмных `INSERT` запросов. + ### Выражение для TTL Может быть указано только для таблиц семейства MergeTree. Выражение для указания времени хранения значений. Оно должно зависеть от стобца типа `Date` или `DateTime` и в качестве результата вычислять столбец типа `Date` или `DateTime`. Пример: diff --git a/docs/ru/query_language/insert_into.md b/docs/ru/query_language/insert_into.md index 356b720e157..454339ebcdb 100644 --- a/docs/ru/query_language/insert_into.md +++ b/docs/ru/query_language/insert_into.md @@ -40,6 +40,10 @@ INSERT INTO t FORMAT TabSeparated С помощью консольного клиента или HTTP интерфейса можно вставлять данные отдельно от запроса. Как это сделать, читайте в разделе "[Интерфейсы](../interfaces/index.md#interfaces)". +### Ограничения (constraints) + +Если в таблице объявлены [ограничения](create.md#constraints), то их выполнимость будет проверена для каждой вставляемой строки. Если для хотя бы одной строки ограничения не будут выполнены, запрос будет остановлен. + ### Вставка результатов `SELECT` {#insert_query_insert-select} ``` sql From bb78012cf91e21d7394d001eba6480918ab4fb01 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 30 Jun 2019 12:09:06 +0200 Subject: [PATCH 032/181] Removed unused method and assertions --- .../CheckConstraintsBlockOutputStream.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 75067b6afa7..ec4a7bd45b8 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -38,26 +38,11 @@ bool CheckConstraintsBlockOutputStream::checkImplMemory(const Block & block, con { Block res = block; constraint->execute(res); - assert(block.columns() == res.columns() - 1); ColumnWithTypeAndName res_column = res.safeGetByPosition(res.columns() - 1); auto res_column_uint8 = checkAndGetColumn(res_column.column.get()); return memoryIsByte(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize(), 0x1); } -bool CheckConstraintsBlockOutputStream::checkImplBool(const Block & block, const ExpressionActionsPtr & constraint) -{ - Block res = block; - constraint->execute(res); - assert(block.columns() == res.columns() - 1); - ColumnWithTypeAndName res_column = res.safeGetByPosition(res.columns() - 1); - size_t column_size = res_column.column->size(); - // std::cerr << "Sizes of constraints: " << res_column.column->size() << ' ' << res_column.column->get << '\n'; - for (size_t i = 0; i < column_size; ++i) - if (!res_column.column->getBool(i)) - return false; - return true; -} - bool CheckConstraintsBlockOutputStream::checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint) { return checkImplMemory(block, constraint); From ea4d42c3aca78eb6d114ea3703042649cc0db110 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 30 Jun 2019 12:09:58 +0200 Subject: [PATCH 033/181] Merged DROP_(COLUMN|INDEX|CONSTRAINT) AST parsing into one block --- dbms/src/Storages/AlterCommands.cpp | 39 ++++++++++------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 80c01a0028e..1ce70723238 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -74,17 +74,6 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ return command; } - else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && !command_ast->partition) - { - if (command_ast->clear_column) - throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); - - AlterCommand command; - command.type = AlterCommand::DROP_COLUMN; - command.column_name = *getIdentifierName(command_ast->column); - command.if_exists = command_ast->if_exists; - return command; - } else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN) { AlterCommand command; @@ -154,18 +143,6 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ return command; } - else if (command_ast->type == ASTAlterCommand::DROP_INDEX) - { - if (command_ast->clear_column) - throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); - - AlterCommand command; - command.type = AlterCommand::DROP_INDEX; - command.index_name = command_ast->index->as().name; - command.if_exists = command_ast->if_exists; - - return command; - } else if (command_ast->type == ASTAlterCommand::ADD_CONSTRAINT) { AlterCommand command; @@ -180,15 +157,25 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ return command; } - else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT) + else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT + || command_ast->type == ASTAlterCommand::DROP_INDEX + || (command_ast->type == ASTAlterCommand::DROP_COLUMN && !command_ast->partition)) { if (command_ast->clear_column) throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); AlterCommand command; - command.type = AlterCommand::DROP_CONSTRAINT; - command.constraint_name = command_ast->constraint->as().name; command.if_exists = command_ast->if_exists; + if (command_ast->type == ASTAlterCommand::DROP_INDEX) { + command.type = AlterCommand::DROP_INDEX; + command.index_name = command_ast->index->as().name; + } else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT) { + command.type = AlterCommand::DROP_CONSTRAINT; + command.constraint_name = command_ast->constraint->as().name; + } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) { + command.type = AlterCommand::DROP_COLUMN; + command.column_name = *getIdentifierName(command_ast->column); + } return command; } From 06e92e14b828c1457410d04aa8682acbda11d182 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Fri, 5 Jul 2019 10:10:15 +0300 Subject: [PATCH 034/181] alterTable fix in MySQL --- dbms/src/Databases/DatabaseMySQL.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index 7ce836d6a64..01a8c1df0d2 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -71,7 +71,7 @@ public: throw Exception("MySQL database engine does not support create table.", ErrorCodes::NOT_IMPLEMENTED); } - void alterTable(const Context &, const String &, const ColumnsDescription &, const IndicesDescription &, const ASTModifier &) override + void alterTable(const Context &, const String &, const ColumnsDescription &, const IndicesDescription &, const ConstraintsDescription &, const ASTModifier &) override { throw Exception("MySQL database engine does not support alter table.", ErrorCodes::NOT_IMPLEMENTED); } From 430400c3c5206c383a1e0870f00a77f5923147a3 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Fri, 5 Jul 2019 10:16:34 +0300 Subject: [PATCH 035/181] Style fix --- dbms/src/Storages/AlterCommands.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 8b3713ae31f..c90d5ca2c25 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -166,13 +166,16 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.if_exists = command_ast->if_exists; - if (command_ast->type == ASTAlterCommand::DROP_INDEX) { + if (command_ast->type == ASTAlterCommand::DROP_INDEX) + { command.type = AlterCommand::DROP_INDEX; command.index_name = command_ast->index->as().name; - } else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT) { + } else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT) + { command.type = AlterCommand::DROP_CONSTRAINT; command.constraint_name = command_ast->constraint->as().name; - } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) { + } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) + { command.type = AlterCommand::DROP_COLUMN; command.column_name = *getIdentifierName(command_ast->column); } From 3757aa9d020046d0a825a27546b43066ced2ffec Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 6 Jul 2019 23:30:48 +0300 Subject: [PATCH 036/181] More style fix --- dbms/src/Storages/AlterCommands.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index c90d5ca2c25..b250452c683 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -170,11 +170,13 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { command.type = AlterCommand::DROP_INDEX; command.index_name = command_ast->index->as().name; - } else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT) + } + else if (command_ast->type == ASTAlterCommand::DROP_CONSTRAINT) { command.type = AlterCommand::DROP_CONSTRAINT; command.constraint_name = command_ast->constraint->as().name; - } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) + } + else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) { command.type = AlterCommand::DROP_COLUMN; command.column_name = *getIdentifierName(command_ast->column); From c1d91222d50e368529e0c6675b4a6a6bb6681103 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sat, 13 Jul 2019 13:42:52 +0300 Subject: [PATCH 037/181] Ranamed setPrimaryKeyIndicesAndColumns to setProperties --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 3c30e7ce778..c2aa0a61855 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -121,7 +121,7 @@ MergeTreeData::MergeTreeData( data_parts_by_info(data_parts_indexes.get()), data_parts_by_state_and_info(data_parts_indexes.get()) { - setPrimaryKeyIndicesAndColumns(order_by_ast_, primary_key_ast_, columns_, indices_, constraints_); + setProperties(order_by_ast_, primary_key_ast_, columns_, indices_, constraints_); setConstraints(constraints_); /// NOTE: using the same columns list as is read when performing actual merges. @@ -232,7 +232,7 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam } -void MergeTreeData::setPrimaryKeyIndicesAndColumns( +void MergeTreeData::setProperties( const ASTPtr & new_order_by_ast, const ASTPtr & new_primary_key_ast, const ColumnsDescription & new_columns, const IndicesDescription & indices_description, const ConstraintsDescription & constraints_description, bool only_check) @@ -1262,7 +1262,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c } } - setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, + setProperties(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints, /* only_check = */ true); setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast, /* only_check = */ true); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 7572e9f2856..d4848462df0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -757,7 +757,7 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void setPrimaryKeyIndicesAndColumns(const ASTPtr & new_order_by_ast, const ASTPtr & new_primary_key_ast, + void setProperties(const ASTPtr & new_order_by_ast, const ASTPtr & new_primary_key_ast, const ColumnsDescription & new_columns, const IndicesDescription & indices_description, const ConstraintsDescription & constraints_description, bool only_check = false); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 7cf8facb5f2..b7adc8c5bff 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -282,7 +282,7 @@ void StorageMergeTree::alter( context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, storage_modifier); /// Reinitialize primary key because primary key column types might have changed. - setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); + setProperties(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index f1f4a4049f8..ae0ccfaaf1e 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -484,7 +484,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. - setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); + setProperties(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast); } From 1feb20d9e01b17876e768c8db2672f9d28e4eac9 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 23 Jul 2019 11:01:08 +0300 Subject: [PATCH 038/181] DOCAPI-7460: The histogram function docs. --- .../agg_functions/parametric_functions.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index c6a9694ed0c..cefc9e6777f 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -2,6 +2,42 @@ Some aggregate functions can accept not only argument columns (used for compression), but a set of parameters – constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. +## histogram + +Calculates a histogram. + +``` +histogram(number_of_bins)(values) +``` + +**Parameters** + +`number_of_bins` — Number of bins for the histogram. +`values` — [Expression](../syntax.md#expressions) resulting in a data sample. + +**Returned values** + +- [Array](../../data_types/array.md) of [Tuples](../../data_types/tuple.md) of the following format: + + ``` + [(lower_1, upper_1, height_1), ... (lower_N, upper_N, height_N)] + ``` + + - `lower` — Lower bound of the bin. + - `upper` — Upper bound of the bin. + - `height` — Calculated height of the bin. + +**Example** + +```sql +SELECT histogram(5)(number + 1) FROM (SELECT * FROM system.numbers LIMIT 20) +``` +```text +┌─histogram(5)(plus(number, 1))───────────────────────────────────────────┐ +│ [(1,4.5,4),(4.5,8.5,4),(8.5,12.75,4.125),(12.75,17,4.625),(17,20,3.25)] │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + ## sequenceMatch(pattern)(time, cond1, cond2, ...) Pattern matching for event chains. From 221ab6a04f32b3be40cf80c123b8b67d5609fd01 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 23 Jul 2019 11:18:09 +0300 Subject: [PATCH 039/181] DOCAPI-7460: Link fix. --- docs/en/query_language/agg_functions/parametric_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index cefc9e6777f..da6052545dc 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -13,7 +13,7 @@ histogram(number_of_bins)(values) **Parameters** `number_of_bins` — Number of bins for the histogram. -`values` — [Expression](../syntax.md#expressions) resulting in a data sample. +`values` — [Expression](../syntax.md#syntax-expressions) resulting in a data sample. **Returned values** From 3c2172b750989b72d9290615aeb669a6b90dc096 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 22 Jul 2019 14:23:11 +0300 Subject: [PATCH 040/181] parse and interpret query --- dbms/src/Core/Settings.h | 3 ++- dbms/src/Interpreters/InterpreterAlterQuery.cpp | 7 +++++++ dbms/src/Parsers/ASTAlterQuery.cpp | 6 ++++++ dbms/src/Parsers/ASTAlterQuery.h | 3 ++- dbms/src/Parsers/ParserAlterQuery.cpp | 17 +++++++++++++++++ dbms/src/Storages/PartitionCommands.cpp | 11 +++++++++++ dbms/src/Storages/PartitionCommands.h | 3 ++- dbms/src/Storages/StorageMergeTree.cpp | 4 ++++ .../src/Storages/StorageReplicatedMergeTree.cpp | 4 ++++ 9 files changed, 55 insertions(+), 3 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index b1182cae9bf..4bb76039cab 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -336,7 +336,8 @@ struct Settings : public SettingsCollection \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ - M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") + M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ + M(SettingBool, allow_drop_detached_part, false, "Allow ALTER TABLE ... DROP DETACHED PART ... queries") DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 8751ff067b1..074fbb7d4c2 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; + extern const int SUPPORT_IS_DISABLED; } @@ -53,7 +54,13 @@ BlockIO InterpreterAlterQuery::execute() if (auto alter_command = AlterCommand::parse(command_ast)) alter_commands.emplace_back(std::move(*alter_command)); else if (auto partition_command = PartitionCommand::parse(command_ast)) + { + if (partition_command->type == PartitionCommand::DROP_DETACHED_PARTITION + && !context.getSettingsRef().allow_drop_detached_part) + throw DB::Exception("Cannot execute query: DROP DETACHED PART is disabled " + "(see allow_drop_detached setting)", ErrorCodes::SUPPORT_IS_DISABLED); partition_commands.emplace_back(std::move(*partition_command)); + } else if (auto mut_command = MutationCommand::parse(command_ast)) mutation_commands.emplace_back(std::move(*mut_command)); else diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index c7cd100b415..6d87156a19b 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -118,6 +118,12 @@ void ASTAlterCommand::formatImpl( << (settings.hilite ? hilite_none : ""); partition->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::DROP_DETACHED_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP DETACHED" << (part ? " PART " : " PARTITION ") + << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::ATTACH_PARTITION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH " diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 2c4b3ddbaf1..d6a54812960 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -33,6 +33,7 @@ public: DROP_INDEX, DROP_PARTITION, + DROP_DETACHED_PARTITION, ATTACH_PARTITION, REPLACE_PARTITION, FETCH_PARTITION, @@ -90,7 +91,7 @@ public: bool detach = false; /// true for DETACH PARTITION - bool part = false; /// true for ATTACH PART + bool part = false; /// true for ATTACH PART and DROP DETACHED PART bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 98891bbdf5f..75c6f6291a8 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -35,6 +35,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_detach_partition("DETACH PARTITION"); ParserKeyword s_drop_partition("DROP PARTITION"); + ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION"); + ParserKeyword s_drop_detached_part("DROP DETACHED PART"); ParserKeyword s_attach_part("ATTACH PART"); ParserKeyword s_fetch_partition("FETCH PARTITION"); ParserKeyword s_replace_partition("REPLACE PARTITION"); @@ -87,6 +89,21 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::DROP_PARTITION; } + else if (s_drop_detached_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; + } + else if (s_drop_detached_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; + command->part = true; + } else if (s_drop_column.ignore(pos, expected)) { if (s_if_exists.ignore(pos, expected)) diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index f6aaee4c70e..bab3f6ced24 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -23,6 +23,17 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.detach = command_ast->detach; return res; } + else if (command_ast->type == ASTAlterCommand::DROP_DETACHED_PARTITION) + { + if (!command_ast->part) // TODO + throw DB::Exception("Not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + + PartitionCommand res; + res.type = DROP_DETACHED_PARTITION; + res.partition = command_ast->partition; + res.part = command_ast->part; + return res; + } else if (command_ast->type == ASTAlterCommand::ATTACH_PARTITION) { PartitionCommand res; diff --git a/dbms/src/Storages/PartitionCommands.h b/dbms/src/Storages/PartitionCommands.h index 1f66c3f0c30..cb71a02548c 100644 --- a/dbms/src/Storages/PartitionCommands.h +++ b/dbms/src/Storages/PartitionCommands.h @@ -21,6 +21,7 @@ struct PartitionCommand ATTACH_PARTITION, CLEAR_COLUMN, DROP_PARTITION, + DROP_DETACHED_PARTITION, FETCH_PARTITION, FREEZE_ALL_PARTITIONS, FREEZE_PARTITION, @@ -35,7 +36,7 @@ struct PartitionCommand /// true for DETACH PARTITION. bool detach = false; - /// true for ATTACH PART (and false for PARTITION) + /// true for ATTACH PART and DROP DETACHED PART (and false for PARTITION) bool part = false; /// For ATTACH PARTITION partition FROM db.table diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index d021866487c..6e527c0c6c1 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -920,6 +920,10 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma dropPartition(command.partition, command.detach, context); break; + case PartitionCommand::DROP_DETACHED_PARTITION: + // TODO + throw DB::Exception("Not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + case PartitionCommand::ATTACH_PARTITION: attachPartition(command.partition, command.part, context); break; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index b51da168192..5f91c304e98 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3348,6 +3348,10 @@ void StorageReplicatedMergeTree::alterPartition(const ASTPtr & query, const Part dropPartition(query, command.partition, command.detach, query_context); break; + case PartitionCommand::DROP_DETACHED_PARTITION: + // TODO + throw DB::Exception("Not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + case PartitionCommand::ATTACH_PARTITION: attachPartition(command.partition, command.part, query_context); break; From ad787938f5fa247901da7003c5717fed6a838445 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 Jul 2019 22:43:33 +0300 Subject: [PATCH 041/181] better detached part name parsing --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 17 +------------- .../Storages/MergeTree/MergeTreePartInfo.cpp | 22 +++++++++++++++++++ .../Storages/MergeTree/MergeTreePartInfo.h | 6 +++++ .../System/StorageSystemDetachedParts.cpp | 1 + 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b32470f9f77..6a7b6d5405e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2579,22 +2579,7 @@ MergeTreeData::getDetachedParts() const res.emplace_back(); auto & part = res.back(); - /// First, try to parse as . - if (MergeTreePartInfo::tryParsePartName(dir_name, &part, format_version)) - continue; - - /// Next, as _. Use entire name as prefix if it fails. - part.prefix = dir_name; - const auto first_separator = dir_name.find_first_of('_'); - if (first_separator == String::npos) - continue; - - const auto part_name = dir_name.substr(first_separator + 1, - dir_name.size() - first_separator - 1); - if (!MergeTreePartInfo::tryParsePartName(part_name, &part, format_version)) - continue; - - part.prefix = dir_name.substr(0, first_separator); + DetachedPartInfo::tryParseDetachedPartName(dir_name, &part, format_version); } return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 19f77448110..732cc3436f4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -188,4 +188,26 @@ String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) con return wb.str(); } +bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, DetachedPartInfo * part_info, + MergeTreeDataFormatVersion format_version) +{ + /// First, try to parse as . + if (MergeTreePartInfo::tryParsePartName(dir_name, part_info, format_version)) + return part_info->valid_name = true; + + /// Next, as _. Use entire name as prefix if it fails. + part_info->prefix = dir_name; + const auto first_separator = dir_name.find_first_of('_'); + if (first_separator == String::npos) + return part_info->valid_name = false; + + // TODO what if contains '_'? + const auto part_name = dir_name.substr(first_separator + 1, + dir_name.size() - first_separator - 1); + if (!MergeTreePartInfo::tryParsePartName(part_name, part_info, format_version)) + return part_info->valid_name = false; + + part_info->prefix = dir_name.substr(0, first_separator); + return part_info->valid_name = true; +} } diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index e80664c3dd9..2a168086a1c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -93,6 +93,12 @@ struct MergeTreePartInfo struct DetachedPartInfo : public MergeTreePartInfo { String prefix; + + /// If false, prefix contains full directory name and MergeTreePartInfo may be in invalid state + /// (directory name was not successfully parsed). + bool valid_name; + + static bool tryParseDetachedPartName(const String & dir_name, DetachedPartInfo * part_info, MergeTreeDataFormatVersion format_version); }; } diff --git a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp index 9ae6f7b607a..9f33a60b84a 100644 --- a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp @@ -28,6 +28,7 @@ public: protected: explicit StorageSystemDetachedParts() { + // TODO add column "directory_name" or "is_valid_name" setColumns(ColumnsDescription{{ {"database", std::make_shared()}, {"table", std::make_shared()}, From 6e4aabbb1a4074d1a979190b063ffbe959894ebc Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 Jul 2019 23:18:18 +0300 Subject: [PATCH 042/181] draft for StorageMergeTree --- dbms/src/Storages/StorageMergeTree.cpp | 33 ++++++++++++++++++++++++-- dbms/src/Storages/StorageMergeTree.h | 1 + 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 6e527c0c6c1..bd09588981b 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -921,8 +921,8 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma break; case PartitionCommand::DROP_DETACHED_PARTITION: - // TODO - throw DB::Exception("Not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + dropDetached(command.partition, command.part, context); + break; case PartitionCommand::ATTACH_PARTITION: attachPartition(command.partition, command.part, context); @@ -993,6 +993,34 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons } +void StorageMergeTree::dropDetached(const ASTPtr & partition, bool part, const Context & /*context*/) +{ + if (!part) // TODO + throw DB::Exception("DROP DETACHED PARTITION is not implemented, use DROP DETACHED PART", ErrorCodes::NOT_IMPLEMENTED); + + String part_id = partition->as().value.safeGet(); + Poco::Path part_path(part_id); + const bool file_zero_depth = part_path.isFile() && part_path.depth() == 0 && part_path.getFileName() != ".."; + const bool dir_zero_depth = part_path.isDirectory() && part_path.depth() == 1 && part_path.directory(0) != ".."; + const bool zero_depth = file_zero_depth || dir_zero_depth; + if (!part_path.isRelative() || !zero_depth) + throw DB::Exception("Part name must contain exactly one path component: name of detached part", ErrorCodes::INCORRECT_FILE_NAME); + + part_id = part_path.isFile() ? part_path.getFileName() : part_path.directory(0); + Poco::Path base_dir(full_path + "detached"); + Poco::File detached_part_dir(Poco::Path(base_dir, part_id)); + if (!detached_part_dir.exists()) + throw DB::Exception("Detached part \"" + part_id + "\" not found" , ErrorCodes::INCORRECT_FILE_NAME); + + DetachedPartInfo info; + DetachedPartInfo::tryParseDetachedPartName(part_id, &info, format_version); + MergeTreeDataPart detached_part(*this, part_id, info); + detached_part.relative_path = "detached/" + part_id; + + // TODO make sure it's ok + detached_part.remove(); +} + void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) { // TODO: should get some locks to prevent race with 'alter … modify column' @@ -1039,6 +1067,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par LOG_DEBUG(log, "Checking data"); MutableDataPartPtr part = loadPartAndFixMetadata(source_path); + // TODO fix race with DROP DETACHED PARTITION LOG_INFO(log, "Attaching part " << source_part_name << " from " << source_path); renameTempPartAndAdd(part, &increment); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 0de9618d915..fa2561e4ab2 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -120,6 +120,7 @@ private: // Partition helpers void dropPartition(const ASTPtr & partition, bool detach, const Context & context); + void dropDetached(const ASTPtr & partition, bool part, const Context & context); void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context); void attachPartition(const ASTPtr & partition, bool part, const Context & context); void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context); From 856c8ef0e8dedf6a0166673c2a1b292b4648c655 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 24 Jul 2019 18:53:41 +0300 Subject: [PATCH 043/181] test for bug in ATTACH PART --- .../0_stateless/00974_attach_active_part.reference | 5 +++++ .../0_stateless/00974_attach_active_part.sh | 14 ++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00974_attach_active_part.reference create mode 100755 dbms/tests/queries/0_stateless/00974_attach_active_part.sh diff --git a/dbms/tests/queries/0_stateless/00974_attach_active_part.reference b/dbms/tests/queries/0_stateless/00974_attach_active_part.reference new file mode 100644 index 00000000000..3a90499810c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00974_attach_active_part.reference @@ -0,0 +1,5 @@ +0_1_1_0 +1_2_2_0 +2_3_3_0 +3_4_4_0 +16 \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00974_attach_active_part.sh b/dbms/tests/queries/0_stateless/00974_attach_active_part.sh new file mode 100755 index 00000000000..a3b2505f197 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00974_attach_active_part.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_bug"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE attach_bug (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n"; +$CLICKHOUSE_CLIENT --query="INSERT INTO attach_bug SELECT number FROM system.numbers LIMIT 16"; +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_bug ATTACH PART '../1_2_2_0'" 2> /dev/null; # | grep "" +$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_bug' ORDER BY name FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SElECT count() FROM attach_bug FORMAT TSV"; # will fail +$CLICKHOUSE_CLIENT --query="DROP TABLE attach_bug"; + + From 1805ab5736ec6c934af98dbda8c50a00fb6bb165 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2019 13:46:07 +0300 Subject: [PATCH 044/181] attach --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 ++- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + .../Storages/MergeTree/MergeTreePartInfo.cpp | 9 +++ .../Storages/MergeTree/MergeTreePartInfo.h | 2 + dbms/src/Storages/StorageMergeTree.cpp | 63 +++++++++++++------ dbms/src/Storages/StorageMergeTree.h | 1 + 6 files changed, 62 insertions(+), 22 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 6a7b6d5405e..94e42c34d0f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2336,6 +2336,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St { MutableDataPartPtr part = std::make_shared(*this, Poco::Path(relative_path).getFileName()); part->relative_path = relative_path; + loadPartAndFixMetadata(part); + return part; +} + +void MergeTreeData::loadPartAndFixMetadata(MutableDataPartPtr part) +{ String full_part_path = part->getFullPath(); /// Earlier the list of columns was written incorrectly. Delete it and re-create. @@ -2357,8 +2363,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St Poco::File(full_part_path + "checksums.txt.tmp").renameTo(full_part_path + "checksums.txt"); } - - return part; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 29962382749..cec3651652b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -533,6 +533,7 @@ public: /// Check that the part is not broken and calculate the checksums for it if they are not present. MutableDataPartPtr loadPartAndFixMetadata(const String & relative_path); + void loadPartAndFixMetadata(MutableDataPartPtr part); /** Create local backup (snapshot) for parts with specified prefix. * Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number, diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 732cc3436f4..45a0e1d488c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -210,4 +210,13 @@ bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, Detache part_info->prefix = dir_name.substr(0, first_separator); return part_info->valid_name = true; } + +String DetachedPartInfo::fullDirName() const +{ + if (!valid_name) + return prefix; + if (prefix.empty()) + return getPartName(); + return prefix + "_" + fullDirName(); +} } diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index 2a168086a1c..7d0fb446ee3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -98,6 +98,8 @@ struct DetachedPartInfo : public MergeTreePartInfo /// (directory name was not successfully parsed). bool valid_name; + String fullDirName() const; + static bool tryParseDetachedPartName(const String & dir_name, DetachedPartInfo * part_info, MergeTreeDataFormatVersion format_version); }; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index bd09588981b..2f437e6d46b 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -36,6 +36,7 @@ namespace ErrorCodes extern const int INCORRECT_FILE_NAME; extern const int CANNOT_ASSIGN_OPTIMIZE; extern const int INCOMPATIBLE_COLUMNS; + extern const int BAD_DATA_PART_NAME; } namespace ActionLocks @@ -999,25 +1000,13 @@ void StorageMergeTree::dropDetached(const ASTPtr & partition, bool part, const C throw DB::Exception("DROP DETACHED PARTITION is not implemented, use DROP DETACHED PART", ErrorCodes::NOT_IMPLEMENTED); String part_id = partition->as().value.safeGet(); - Poco::Path part_path(part_id); - const bool file_zero_depth = part_path.isFile() && part_path.depth() == 0 && part_path.getFileName() != ".."; - const bool dir_zero_depth = part_path.isDirectory() && part_path.depth() == 1 && part_path.directory(0) != ".."; - const bool zero_depth = file_zero_depth || dir_zero_depth; - if (!part_path.isRelative() || !zero_depth) - throw DB::Exception("Part name must contain exactly one path component: name of detached part", ErrorCodes::INCORRECT_FILE_NAME); - - part_id = part_path.isFile() ? part_path.getFileName() : part_path.directory(0); - Poco::Path base_dir(full_path + "detached"); - Poco::File detached_part_dir(Poco::Path(base_dir, part_id)); - if (!detached_part_dir.exists()) - throw DB::Exception("Detached part \"" + part_id + "\" not found" , ErrorCodes::INCORRECT_FILE_NAME); + validateDetachedPartName(part_id); DetachedPartInfo info; DetachedPartInfo::tryParseDetachedPartName(part_id, &info, format_version); MergeTreeDataPart detached_part(*this, part_id, info); detached_part.relative_path = "detached/" + part_id; - // TODO make sure it's ok detached_part.remove(); } @@ -1038,6 +1027,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par Strings parts; if (attach_part) { + validateDetachedPartName(partition_id); parts.push_back(partition_id); } else @@ -1048,6 +1038,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par { const String & name = it.name(); MergeTreePartInfo part_info; + /// Parts with prefix in name (e.g. attaching_1_3_3_0, delete_tmp_1_3_3_0) will be ignored if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version) || part_info.partition_id != partition_id) { @@ -1062,16 +1053,38 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par for (const auto & source_part_name : parts) { - String source_path = source_dir + source_part_name; + MutableDataPartPtr part; + try + { + part = std::make_shared(*this, source_part_name); + part->relative_path = "detached/" + source_part_name; + part->renameTo("detached/attaching_" + source_part_name, false); - LOG_DEBUG(log, "Checking data"); - MutableDataPartPtr part = loadPartAndFixMetadata(source_path); + LOG_DEBUG(log, "Checking data in " << part->relative_path); + loadPartAndFixMetadata(part); - // TODO fix race with DROP DETACHED PARTITION - LOG_INFO(log, "Attaching part " << source_part_name << " from " << source_path); - renameTempPartAndAdd(part, &increment); + LOG_INFO(log, "Attaching part " << source_part_name << " from " << part->relative_path); + renameTempPartAndAdd(part, &increment); - LOG_INFO(log, "Finished attaching part"); + LOG_INFO(log, "Finished attaching part"); + } + catch (...) + { + tryLogCurrentException(log, String(__PRETTY_FUNCTION__) + ": cannot attach part " + source_part_name); + + if (part->relative_path == "detached/attaching_" + source_part_name) + { + try + { + part->renameTo("detached/" + source_part_name, false); + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + } + } + + } } /// New parts with other data may appear in place of deleted parts. @@ -1150,6 +1163,16 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con } } +void StorageMergeTree::validateDetachedPartName(const String & name) const +{ + if (name.find('/') != std::string::npos || name == "." || name == "..") + throw DB::Exception("Invalid part name", ErrorCodes::INCORRECT_FILE_NAME); + + Poco::File detached_part_dir(full_path + "detached/" + name); + if (!detached_part_dir.exists()) + throw DB::Exception("Detached part \"" + name + "\" not found" , ErrorCodes::BAD_DATA_PART_NAME); +} + ActionLock StorageMergeTree::getActionLock(StorageActionBlockType action_type) { if (action_type == ActionLocks::PartsMerge) diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index fa2561e4ab2..42061894a8e 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -124,6 +124,7 @@ private: void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context); void attachPartition(const ASTPtr & partition, bool part, const Context & context); void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context); + void validateDetachedPartName(const String & name) const; friend class MergeTreeBlockOutputStream; friend class MergeTreeData; From 6ac950c6dc710fed7e1daf0a4bbdb09d3c0feb86 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2019 14:09:01 +0300 Subject: [PATCH 045/181] test attach active part fails --- .../queries/0_stateless/00974_attach_active_part.reference | 3 ++- dbms/tests/queries/0_stateless/00974_attach_active_part.sh | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00974_attach_active_part.reference b/dbms/tests/queries/0_stateless/00974_attach_active_part.reference index 3a90499810c..fc0fce0a541 100644 --- a/dbms/tests/queries/0_stateless/00974_attach_active_part.reference +++ b/dbms/tests/queries/0_stateless/00974_attach_active_part.reference @@ -1,5 +1,6 @@ +OK 0_1_1_0 1_2_2_0 2_3_3_0 3_4_4_0 -16 \ No newline at end of file +16 diff --git a/dbms/tests/queries/0_stateless/00974_attach_active_part.sh b/dbms/tests/queries/0_stateless/00974_attach_active_part.sh index a3b2505f197..32e2b21608f 100755 --- a/dbms/tests/queries/0_stateless/00974_attach_active_part.sh +++ b/dbms/tests/queries/0_stateless/00974_attach_active_part.sh @@ -6,9 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_bug"; $CLICKHOUSE_CLIENT --query="CREATE TABLE attach_bug (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n"; $CLICKHOUSE_CLIENT --query="INSERT INTO attach_bug SELECT number FROM system.numbers LIMIT 16"; -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_bug ATTACH PART '../1_2_2_0'" 2> /dev/null; # | grep "" +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_bug ATTACH PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK' $CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_bug' ORDER BY name FORMAT TSV"; -$CLICKHOUSE_CLIENT --query="SElECT count() FROM attach_bug FORMAT TSV"; # will fail +$CLICKHOUSE_CLIENT --query="SElECT count() FROM attach_bug FORMAT TSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE attach_bug"; - - From e4212bfe593f3e28b42603c4dfeffcb2f60702ce Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2019 14:57:16 +0300 Subject: [PATCH 046/181] add full part name to detached_parts --- dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp | 2 +- dbms/src/Storages/System/StorageSystemDetachedParts.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 45a0e1d488c..de7150e4cea 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -217,6 +217,6 @@ String DetachedPartInfo::fullDirName() const return prefix; if (prefix.empty()) return getPartName(); - return prefix + "_" + fullDirName(); + return prefix + "_" + getPartName(); } } diff --git a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp index 9f33a60b84a..9b32f1fb29b 100644 --- a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp @@ -28,7 +28,6 @@ public: protected: explicit StorageSystemDetachedParts() { - // TODO add column "directory_name" or "is_valid_name" setColumns(ColumnsDescription{{ {"database", std::make_shared()}, {"table", std::make_shared()}, @@ -37,7 +36,8 @@ protected: {"reason", std::make_shared()}, {"min_block_number", std::make_shared()}, {"max_block_number", std::make_shared()}, - {"level", std::make_shared()} + {"level", std::make_shared()}, + {"directory_name", std::make_shared()} }}); } @@ -63,12 +63,13 @@ protected: int i = 0; columns[i++]->insert(info.database); columns[i++]->insert(info.table); - columns[i++]->insert(p.partition_id); - columns[i++]->insert(p.getPartName()); + columns[i++]->insert(p.valid_name ? p.partition_id : ""); + columns[i++]->insert(p.valid_name ? p.getPartName() : ""); columns[i++]->insert(p.prefix); columns[i++]->insert(p.min_block); columns[i++]->insert(p.max_block); columns[i++]->insert(p.level); + columns[i++]->insert(p.fullDirName()); } } From 3ba26aba43e30aa92fed80fb6f842dc3eb064ef1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2019 19:28:08 +0300 Subject: [PATCH 047/181] tests --- .../Storages/MergeTree/MergeTreeDataPart.cpp | 9 +++- .../Storages/MergeTree/MergeTreeDataPart.h | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 6 +-- .../00974_attach_active_part.reference | 6 --- .../0_stateless/00974_attach_active_part.sh | 12 ------ .../00974_attach_invalid_parts.reference | 17 ++++++++ .../0_stateless/00974_attach_invalid_parts.sh | 41 +++++++++++++++++++ .../0_stateless/00975_drop_detached.reference | 2 + .../0_stateless/00975_drop_detached.sh | 30 ++++++++++++++ 9 files changed, 102 insertions(+), 23 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/00974_attach_active_part.reference delete mode 100755 dbms/tests/queries/0_stateless/00974_attach_active_part.sh create mode 100644 dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference create mode 100755 dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh create mode 100644 dbms/tests/queries/0_stateless/00975_drop_detached.reference create mode 100755 dbms/tests/queries/0_stateless/00975_drop_detached.sh diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 7b8be970e1d..865aaf80ed1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -347,7 +347,7 @@ UInt64 MergeTreeDataPart::calculateTotalSizeOnDisk(const String & from) return res; } -void MergeTreeDataPart::remove() const +void MergeTreeDataPart::remove(bool force_recursive /*= false*/) const { if (relative_path.empty()) throw Exception("Part relative_path cannot be empty. This is bug.", ErrorCodes::LOGICAL_ERROR); @@ -398,6 +398,13 @@ void MergeTreeDataPart::remove() const return; } + if (force_recursive) + { + /// Part is not loaded (we don't know which files are there), so remove dir recursively. + to_dir.remove(true); + return; + } + try { /// Remove each expected file in directory, then remove directory itself. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index f41ea8af424..98af00c071a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -241,7 +241,7 @@ struct MergeTreeDataPart /// Calculate the total size of the entire directory with all the files static UInt64 calculateTotalSizeOnDisk(const String & from); - void remove() const; + void remove(bool force_recursive = false) const; /// Makes checks and move part to new directory /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 2f437e6d46b..0c1503347c0 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -1007,7 +1007,7 @@ void StorageMergeTree::dropDetached(const ASTPtr & partition, bool part, const C MergeTreeDataPart detached_part(*this, part_id, info); detached_part.relative_path = "detached/" + part_id; - detached_part.remove(); + detached_part.remove(true); } void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) @@ -1070,9 +1070,9 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par } catch (...) { - tryLogCurrentException(log, String(__PRETTY_FUNCTION__) + ": cannot attach part " + source_part_name); + LOG_INFO(log, "Cannot attach part " << source_part_name << " :" << getCurrentExceptionMessage(false)); - if (part->relative_path == "detached/attaching_" + source_part_name) + if (part && part->relative_path == "detached/attaching_" + source_part_name) { try { diff --git a/dbms/tests/queries/0_stateless/00974_attach_active_part.reference b/dbms/tests/queries/0_stateless/00974_attach_active_part.reference deleted file mode 100644 index fc0fce0a541..00000000000 --- a/dbms/tests/queries/0_stateless/00974_attach_active_part.reference +++ /dev/null @@ -1,6 +0,0 @@ -OK -0_1_1_0 -1_2_2_0 -2_3_3_0 -3_4_4_0 -16 diff --git a/dbms/tests/queries/0_stateless/00974_attach_active_part.sh b/dbms/tests/queries/0_stateless/00974_attach_active_part.sh deleted file mode 100755 index 32e2b21608f..00000000000 --- a/dbms/tests/queries/0_stateless/00974_attach_active_part.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_bug"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE attach_bug (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n"; -$CLICKHOUSE_CLIENT --query="INSERT INTO attach_bug SELECT number FROM system.numbers LIMIT 16"; -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_bug ATTACH PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK' -$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_bug' ORDER BY name FORMAT TSV"; -$CLICKHOUSE_CLIENT --query="SElECT count() FROM attach_bug FORMAT TSV"; -$CLICKHOUSE_CLIENT --query="DROP TABLE attach_bug"; diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference new file mode 100644 index 00000000000..d44f46779ca --- /dev/null +++ b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference @@ -0,0 +1,17 @@ +=== cannot attach active === +OK +0_1_1_0 +1_2_2_0 +2_3_3_0 +3_4_4_0 +16 120 +=== attach all valid parts === +0_5_5_0 +0_6_6_0 +1_2_2_0 +1_4_4_0 +16 120 +=== detached === +0_5_5_0 +delete_tmp_0_7_7 +attaching_0_6_6 diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh new file mode 100755 index 00000000000..89a6be183d2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +ch_dir=`${CLICKHOUSE_EXTRACT_CONFIG} -k path` +cur_db=`${CLICKHOUSE_CLIENT} --query "SELECT currentDatabase()"` + +echo '=== cannot attach active ==='; +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_active"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n"; +$CLICKHOUSE_CLIENT --query="INSERT INTO attach_active SELECT number FROM system.numbers LIMIT 16"; +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_active ATTACH PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK' +$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_active' AND database='${cur_db}' ORDER BY name FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SElECT count(), sum(n) FROM attach_active FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="DROP TABLE attach_active"; + + + +echo '=== attach all valid parts ==='; +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES"; +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_partitions"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE attach_partitions (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n"; +$CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"; +$CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; + +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions DETACH PARTITION 0"; +mkdir $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ # broken part +cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ +cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/delete_tmp_0_7_7_0/ +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0"; + +$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SElECT count(), sum(n) FROM attach_partitions FORMAT TSV"; +echo '=== detached ==='; +$CLICKHOUSE_CLIENT --query="SELECT directory_name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' FORMAT TSV"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE attach_partitions"; +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.reference b/dbms/tests/queries/0_stateless/00975_drop_detached.reference new file mode 100644 index 00000000000..40732c908ab --- /dev/null +++ b/dbms/tests/queries/0_stateless/00975_drop_detached.reference @@ -0,0 +1,2 @@ +OK +0_3_3_0 diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.sh b/dbms/tests/queries/0_stateless/00975_drop_detached.sh new file mode 100755 index 00000000000..9f831560bdc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00975_drop_detached.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +ch_dir=`${CLICKHOUSE_EXTRACT_CONFIG} -k path` +cur_db=`${CLICKHOUSE_CLIENT} --query "SELECT currentDatabase()"` + +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES"; +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS drop_detached"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n"; +$CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"; +$CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; + +$CLICKHOUSE_CLIENT --query="ALTER TABLE drop_detached DETACH PARTITION 0"; +mkdir $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ +mkdir $ch_dir/data/$cur_db/drop_detached/detached/delete_tmp_0_7_7_0/ +mkdir $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ + +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK' +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '0_1_1_0'" +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'attaching_0_6_6_0'" +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'delete_tmp_0_7_7_0'" +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'any_other_name'" + +$CLICKHOUSE_CLIENT --query="SElECT directory_name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="DROP TABLE drop_detached"; +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; From 8f4883b0d2d84f324c530068116dabc6f5c05146 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 28 Jul 2019 15:33:40 +0300 Subject: [PATCH 048/181] Better constraint exception --- .../CheckConstraintsBlockOutputStream.cpp | 46 +++++++++++++++---- .../CheckConstraintsBlockOutputStream.h | 16 ++++--- .../Interpreters/InterpreterInsertQuery.cpp | 2 +- .../0_stateless/00952_basic_constraints.sh | 14 +++--- .../00953_constraints_operations.sh | 2 +- 5 files changed, 56 insertions(+), 24 deletions(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index ec4a7bd45b8..cb9b8871a68 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -12,11 +12,23 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) for (size_t i = 0; i < expressions.size(); ++i) { auto constraint_expr = expressions[i]; - if (!checkConstraintOnBlock(block, constraint_expr)) - throw Exception{"Constraint " + constraints.constraints[i]->name + " is not satisfied, constraint expression: " + - serializeAST(*(constraints.constraints[i]->expr), true), ErrorCodes::LOGICAL_ERROR}; + auto res_column_uint8 = executeOnBlock(block, constraint_expr); + if (!memoryIsByte(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize(), 0x1)) + { + auto indices_wrong = findAllWrong(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize()); + std::string indices_str = "{"; + for (size_t j = 0; j < indices_wrong.size(); ++j) { + indices_str += std::to_string(indices_wrong[j]); + indices_str += (j != indices_wrong.size() - 1) ? ", " : "}"; + } + + throw Exception{"Violated constraint " + constraints.constraints[i]->name + + " in table " + table + " at indices " + indices_str + ", constraint expression: " + + serializeAST(*(constraints.constraints[i]->expr), true), ErrorCodes::LOGICAL_ERROR}; + } } output->write(block); + rows_written += block.rows(); } void CheckConstraintsBlockOutputStream::flush() @@ -34,18 +46,34 @@ void CheckConstraintsBlockOutputStream::writeSuffix() output->writeSuffix(); } -bool CheckConstraintsBlockOutputStream::checkImplMemory(const Block & block, const ExpressionActionsPtr & constraint) +const ColumnUInt8 *CheckConstraintsBlockOutputStream::executeOnBlock( + const Block & block, + const ExpressionActionsPtr & constraint) { Block res = block; + constraint->execute(res); ColumnWithTypeAndName res_column = res.safeGetByPosition(res.columns() - 1); - auto res_column_uint8 = checkAndGetColumn(res_column.column.get()); - return memoryIsByte(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize(), 0x1); + return checkAndGetColumn(res_column.column.get()); } -bool CheckConstraintsBlockOutputStream::checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint) +std::vector CheckConstraintsBlockOutputStream::findAllWrong(const void *data, size_t size) { - return checkImplMemory(block, constraint); -} + std::vector res; + if (size == 0) + return res; + + auto ptr = reinterpret_cast(data); + + for (size_t i = 0; i < size; ++i) + { + if (*(ptr + i) == 0x0) + { + res.push_back(i); + } + } + + return res; +} } diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h index 6ea42cf44af..ac2e7e974a1 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h @@ -17,14 +17,17 @@ class CheckConstraintsBlockOutputStream : public IBlockOutputStream { public: CheckConstraintsBlockOutputStream( + const String & table_, const BlockOutputStreamPtr & output_, const Block & header_, const ConstraintsDescription & constraints_, const Context & context_) - : output(output_), + : table(table_), + output(output_), header(header_), constraints(constraints_), - expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())) + expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())), + rows_written(0) { } Block getHeader() const override { return header; } @@ -35,14 +38,15 @@ public: void writePrefix() override; void writeSuffix() override; - bool checkImplMemory(const Block & block, const ExpressionActionsPtr & constraint); - bool checkImplBool(const Block & block, const ExpressionActionsPtr & constraint); - bool checkConstraintOnBlock(const Block & block, const ExpressionActionsPtr & constraint); - private: + const ColumnUInt8* executeOnBlock(const Block & block, const ExpressionActionsPtr & constraint); + std::vector findAllWrong(const void *data, size_t size); + + String table; BlockOutputStreamPtr output; Block header; const ConstraintsDescription constraints; const ConstraintsExpressions expressions; + size_t rows_written; }; } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 9c0cc31cb8e..8454df97f08 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -119,7 +119,7 @@ BlockIO InterpreterInsertQuery::execute() out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); out = std::make_shared( - out, query_sample_block, table->getConstraints(), context); + query.table, out, query_sample_block, table->getConstraints(), context); auto out_wrapper = std::make_shared(out); out_wrapper->setProcessListElement(context.getProcessListElement()); diff --git a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh index 1d2a46dae61..b6aa28c46bf 100755 --- a/dbms/tests/queries/0_stateless/00952_basic_constraints.sh +++ b/dbms/tests/queries/0_stateless/00952_basic_constraints.sh @@ -20,8 +20,8 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw and exception -EXCEPTION_TEXT="Constraint b_constraint is not satisfied" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (3, 4), (1, 0);" 2>&1 \ +EXCEPTION_TEXT="Violated constraint b_constraint in table test_constraints at indices {1, 3}" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (3, 4), (1, 0), (3, 4), (6, 0);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" @@ -32,20 +32,20 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE test_constraints ( a UInt32, b UInt32, - CONSTRAINT b_constraint CHECK b > 10, - CONSTRAINT a_constraint CHECK a < 10 + CONSTRAINT a_constraint CHECK a < 10, + CONSTRAINT b_constraint CHECK b > 10 ) ENGINE = MergeTree ORDER BY (a);" # This one must throw an exception -EXCEPTION_TEXT="Constraint b_constraint is not satisfied" +EXCEPTION_TEXT="Violated constraint b_constraint in table test_constraints at indices {0}" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw an exception -EXCEPTION_TEXT="Constraint a_constraint is not satisfied" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (5, 16), (10, 11);" 2>&1 \ +EXCEPTION_TEXT="Violated constraint a_constraint in table test_constraints at indices {1}" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (5, 16), (10, 11), (9, 11), (8, 12);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" diff --git a/dbms/tests/queries/0_stateless/00953_constraints_operations.sh b/dbms/tests/queries/0_stateless/00953_constraints_operations.sh index f0fc5b71fbf..8a563a21e02 100755 --- a/dbms/tests/queries/0_stateless/00953_constraints_operations.sh +++ b/dbms/tests/queries/0_stateless/00953_constraints_operations.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 2);" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" # This one must throw and exception -EXCEPTION_TEXT="Constraint b_constraint is not satisfied" +EXCEPTION_TEXT="Violated constraint b_constraint in table test_constraints at indices" $CLICKHOUSE_CLIENT --query="INSERT INTO test_constraints VALUES (1, 0);" 2>&1 \ | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not thrown an exception" $CLICKHOUSE_CLIENT --query="SELECT * FROM test_constraints;" From 0fc47fbbe4b70b5ffc254024798d8f9ed45b0418 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 26 Jul 2019 23:04:45 +0300 Subject: [PATCH 049/181] fixes --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 14 +++++++++- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 ++ .../Storages/MergeTree/MergeTreeDataPart.cpp | 8 ++++-- .../Storages/MergeTree/MergeTreePartInfo.cpp | 28 ++++++++----------- .../Storages/MergeTree/MergeTreePartInfo.h | 9 +++--- dbms/src/Storages/StorageMergeTree.cpp | 15 ++-------- dbms/src/Storages/StorageMergeTree.h | 1 - .../Storages/StorageReplicatedMergeTree.cpp | 2 ++ .../System/StorageSystemDetachedParts.cpp | 25 ++++++++--------- .../00974_attach_invalid_parts.reference | 4 +-- .../0_stateless/00974_attach_invalid_parts.sh | 11 +++++--- .../0_stateless/00975_drop_detached.sh | 13 +++++---- 12 files changed, 69 insertions(+), 63 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 94e42c34d0f..9d12a9ee6ea 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -87,6 +87,8 @@ namespace ErrorCodes extern const int CANNOT_MUNMAP; extern const int CANNOT_MREMAP; extern const int BAD_TTL_EXPRESSION; + extern const int INCORRECT_FILE_NAME; + extern const int BAD_DATA_PART_NAME; } @@ -2583,11 +2585,21 @@ MergeTreeData::getDetachedParts() const res.emplace_back(); auto & part = res.back(); - DetachedPartInfo::tryParseDetachedPartName(dir_name, &part, format_version); + DetachedPartInfo::tryParseDetachedPartName(dir_name, part, format_version); } return res; } +void MergeTreeData::validateDetachedPartName(const String & name) const +{ + if (name.find('/') != std::string::npos || name == "." || name == "..") + throw DB::Exception("Invalid part name", ErrorCodes::INCORRECT_FILE_NAME); + + Poco::File detached_part_dir(full_path + "detached/" + name); + if (!detached_part_dir.exists()) + throw DB::Exception("Detached part \"" + name + "\" not found" , ErrorCodes::BAD_DATA_PART_NAME); +} + MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const { DataParts res; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index cec3651652b..2333135d53e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -389,6 +389,8 @@ public: /// Returns all detached parts std::vector getDetachedParts() const; + void validateDetachedPartName(const String & name) const; + /// Returns Committed parts DataParts getDataParts() const; DataPartsVector getDataPartsVector() const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 865aaf80ed1..24bc5cd2463 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -356,16 +356,18 @@ void MergeTreeDataPart::remove(bool force_recursive /*= false*/) const * - rename directory to temporary name; * - remove it recursive. * - * For temporary name we use "delete_tmp_" prefix. + * For temporary name we use "detached/deleting_" prefix. * - * NOTE: We cannot use "tmp_delete_" prefix, because there is a second thread, + * NOTE: We cannot use "tmp_*" prefix, because there is a second thread, * that calls "clearOldTemporaryDirectories" and removes all directories, that begin with "tmp_" and are old enough. * But when we removing data part, it can be old enough. And rename doesn't change mtime. * And a race condition can happen that will lead to "File not found" error here. + * We move directory to detached/, because if an attempt to remove directory after renaming failed for some reason + * there would be no way to remove directory from storage.full_path (except manually). */ String from = storage.full_path + relative_path; - String to = storage.full_path + "delete_tmp_" + name; + String to = storage.full_path + getRelativePathForDetachedPart("deleting_"); Poco::File from_dir{from}; Poco::File to_dir{to}; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index de7150e4cea..a9e31a988b3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -188,35 +188,29 @@ String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) con return wb.str(); } -bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, DetachedPartInfo * part_info, +bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, DetachedPartInfo & part_info, MergeTreeDataFormatVersion format_version) { + part_info.dir_name = dir_name; + /// First, try to parse as . - if (MergeTreePartInfo::tryParsePartName(dir_name, part_info, format_version)) - return part_info->valid_name = true; + if (MergeTreePartInfo::tryParsePartName(dir_name, &part_info, format_version)) + return part_info.valid_name = true; /// Next, as _. Use entire name as prefix if it fails. - part_info->prefix = dir_name; + part_info.prefix = dir_name; const auto first_separator = dir_name.find_first_of('_'); if (first_separator == String::npos) - return part_info->valid_name = false; + return part_info.valid_name = false; // TODO what if contains '_'? const auto part_name = dir_name.substr(first_separator + 1, dir_name.size() - first_separator - 1); - if (!MergeTreePartInfo::tryParsePartName(part_name, part_info, format_version)) - return part_info->valid_name = false; + if (!MergeTreePartInfo::tryParsePartName(part_name, &part_info, format_version)) + return part_info.valid_name = false; - part_info->prefix = dir_name.substr(0, first_separator); - return part_info->valid_name = true; + part_info.prefix = dir_name.substr(0, first_separator); + return part_info.valid_name = true; } -String DetachedPartInfo::fullDirName() const -{ - if (!valid_name) - return prefix; - if (prefix.empty()) - return getPartName(); - return prefix + "_" + getPartName(); -} } diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index 7d0fb446ee3..25cf46ad46d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -92,15 +92,14 @@ struct MergeTreePartInfo /// addition to the above fields. struct DetachedPartInfo : public MergeTreePartInfo { + /// Suddenly, name of detached part may contain suffix (such as _tryN), which is ignored by MergeTreePartInfo::tryParsePartName(...) + String dir_name; String prefix; - /// If false, prefix contains full directory name and MergeTreePartInfo may be in invalid state - /// (directory name was not successfully parsed). + /// If false, MergeTreePartInfo is in invalid state (directory name was not successfully parsed). bool valid_name; - String fullDirName() const; - - static bool tryParseDetachedPartName(const String & dir_name, DetachedPartInfo * part_info, MergeTreeDataFormatVersion format_version); + static bool tryParseDetachedPartName(const String & dir_name, DetachedPartInfo & part_info, MergeTreeDataFormatVersion format_version); }; } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0c1503347c0..ad4d0cd933f 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -36,7 +36,6 @@ namespace ErrorCodes extern const int INCORRECT_FILE_NAME; extern const int CANNOT_ASSIGN_OPTIMIZE; extern const int INCOMPATIBLE_COLUMNS; - extern const int BAD_DATA_PART_NAME; } namespace ActionLocks @@ -1003,7 +1002,7 @@ void StorageMergeTree::dropDetached(const ASTPtr & partition, bool part, const C validateDetachedPartName(part_id); DetachedPartInfo info; - DetachedPartInfo::tryParseDetachedPartName(part_id, &info, format_version); + DetachedPartInfo::tryParseDetachedPartName(part_id, info, format_version); MergeTreeDataPart detached_part(*this, part_id, info); detached_part.relative_path = "detached/" + part_id; @@ -1038,7 +1037,8 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par { const String & name = it.name(); MergeTreePartInfo part_info; - /// Parts with prefix in name (e.g. attaching_1_3_3_0, delete_tmp_1_3_3_0) will be ignored + /// Parts with prefix in name (e.g. attaching_1_3_3_0, deleting_1_3_3_0) will be ignored + // TODO what if name contains "_tryN" suffix? if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version) || part_info.partition_id != partition_id) { @@ -1163,15 +1163,6 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con } } -void StorageMergeTree::validateDetachedPartName(const String & name) const -{ - if (name.find('/') != std::string::npos || name == "." || name == "..") - throw DB::Exception("Invalid part name", ErrorCodes::INCORRECT_FILE_NAME); - - Poco::File detached_part_dir(full_path + "detached/" + name); - if (!detached_part_dir.exists()) - throw DB::Exception("Detached part \"" + name + "\" not found" , ErrorCodes::BAD_DATA_PART_NAME); -} ActionLock StorageMergeTree::getActionLock(StorageActionBlockType action_type) { diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 42061894a8e..fa2561e4ab2 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -124,7 +124,6 @@ private: void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context); void attachPartition(const ASTPtr & partition, bool part, const Context & context); void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context); - void validateDetachedPartName(const String & name) const; friend class MergeTreeBlockOutputStream; friend class MergeTreeData; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 5f91c304e98..192384602eb 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3554,6 +3554,7 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool Strings parts; if (attach_part) { + validateDetachedPartName(partition_id); parts.push_back(partition_id); } else @@ -3566,6 +3567,7 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool { String name = it.name(); MergeTreePartInfo part_info; + // TODO what if name contains "_tryN" suffix? if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version)) continue; if (part_info.partition_id != partition_id) diff --git a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp index 9b32f1fb29b..e27c7945670 100644 --- a/dbms/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/dbms/src/Storages/System/StorageSystemDetachedParts.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -31,13 +32,12 @@ protected: setColumns(ColumnsDescription{{ {"database", std::make_shared()}, {"table", std::make_shared()}, - {"partition_id", std::make_shared()}, + {"partition_id", std::make_shared(std::make_shared())}, {"name", std::make_shared()}, - {"reason", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"directory_name", std::make_shared()} + {"reason", std::make_shared(std::make_shared())}, + {"min_block_number", std::make_shared(std::make_shared())}, + {"max_block_number", std::make_shared(std::make_shared())}, + {"level", std::make_shared(std::make_shared())} }}); } @@ -63,13 +63,12 @@ protected: int i = 0; columns[i++]->insert(info.database); columns[i++]->insert(info.table); - columns[i++]->insert(p.valid_name ? p.partition_id : ""); - columns[i++]->insert(p.valid_name ? p.getPartName() : ""); - columns[i++]->insert(p.prefix); - columns[i++]->insert(p.min_block); - columns[i++]->insert(p.max_block); - columns[i++]->insert(p.level); - columns[i++]->insert(p.fullDirName()); + columns[i++]->insert(p.valid_name ? p.partition_id : Field()); + columns[i++]->insert(p.dir_name); + columns[i++]->insert(p.valid_name ? p.prefix : Field()); + columns[i++]->insert(p.valid_name ? p.min_block : Field()); + columns[i++]->insert(p.valid_name ? p.max_block : Field()); + columns[i++]->insert(p.valid_name ? p.level : Field()); } } diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference index d44f46779ca..42a04fe5666 100644 --- a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference +++ b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference @@ -13,5 +13,5 @@ OK 16 120 === detached === 0_5_5_0 -delete_tmp_0_7_7 -attaching_0_6_6 +deleting_0_7_7_0 +attaching_0_6_6_0 diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh index 89a6be183d2..4e9efa64ad1 100755 --- a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh +++ b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh @@ -27,15 +27,18 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM sys $CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; $CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions DETACH PARTITION 0"; -mkdir $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ # broken part -cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ -cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/delete_tmp_0_7_7_0/ +sudo -n mkdir $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ 2>/dev/null || \ + mkdir $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ # broken part +sudo -n cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ 2>/dev/null || \ + cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ +sudo -n cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ 2>/dev/null || \ + cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ $CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0"; $CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; $CLICKHOUSE_CLIENT --query="SElECT count(), sum(n) FROM attach_partitions FORMAT TSV"; echo '=== detached ==='; -$CLICKHOUSE_CLIENT --query="SELECT directory_name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SELECT name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' FORMAT TSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE attach_partitions"; $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.sh b/dbms/tests/queries/0_stateless/00975_drop_detached.sh index 9f831560bdc..3a5e920da75 100755 --- a/dbms/tests/queries/0_stateless/00975_drop_detached.sh +++ b/dbms/tests/queries/0_stateless/00975_drop_detached.sh @@ -15,16 +15,19 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system. $CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; $CLICKHOUSE_CLIENT --query="ALTER TABLE drop_detached DETACH PARTITION 0"; -mkdir $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ -mkdir $ch_dir/data/$cur_db/drop_detached/detached/delete_tmp_0_7_7_0/ -mkdir $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ +sudo -n mkdir $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ 2>/dev/null || \ + mkdir $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ +sudo -n mkdir $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ 2>/dev/null || \ + mkdir $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ +sudo -n mkdir $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ 2>/dev/null || \ + mkdir $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK' $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '0_1_1_0'" $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'attaching_0_6_6_0'" -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'delete_tmp_0_7_7_0'" +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'deleting_0_7_7_0'" $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'any_other_name'" -$CLICKHOUSE_CLIENT --query="SElECT directory_name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' FORMAT TSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE drop_detached"; $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; From a3ebe3153537170e344dc8766d48a2630e63146c Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Mon, 29 Jul 2019 13:05:43 +0300 Subject: [PATCH 050/181] Brace style fix --- dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index cb9b8871a68..5adf344cf0b 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -17,7 +17,8 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) { auto indices_wrong = findAllWrong(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize()); std::string indices_str = "{"; - for (size_t j = 0; j < indices_wrong.size(); ++j) { + for (size_t j = 0; j < indices_wrong.size(); ++j) + { indices_str += std::to_string(indices_wrong[j]); indices_str += (j != indices_wrong.size() - 1) ? ", " : "}"; } From 2f33df1b2ef01ba19db8d465400f08a4d532e060 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 30 Jul 2019 20:24:40 +0300 Subject: [PATCH 051/181] rename all parts before attaching --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 40 ++++++++++++ dbms/src/Storages/MergeTree/MergeTreeData.h | 15 +++++ .../Storages/MergeTree/MergeTreeDataPart.cpp | 24 ++----- .../Storages/MergeTree/MergeTreeDataPart.h | 2 +- .../Storages/MergeTree/MergeTreePartInfo.cpp | 1 + dbms/src/Storages/StorageMergeTree.cpp | 65 ++++++------------- dbms/src/Storages/StorageMergeTree.h | 1 - .../Storages/StorageReplicatedMergeTree.cpp | 6 +- .../00974_attach_invalid_parts.reference | 17 +++-- .../0_stateless/00974_attach_invalid_parts.sh | 21 ++++-- .../0_stateless/00975_drop_detached.reference | 6 +- .../0_stateless/00975_drop_detached.sh | 20 +++--- 12 files changed, 130 insertions(+), 88 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 9d12a9ee6ea..4b13ebaa99f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1718,6 +1718,29 @@ MergeTreeData::AlterDataPartTransaction::~AlterDataPartTransaction() } } +void MergeTreeData::PartsTemporaryRename::addPart(const String & old_name, const String & new_name) +{ + Poco::File(base_dir + old_name).renameTo(base_dir + new_name); + old_and_new_names.push_back({old_name, new_name}); +} + +MergeTreeData::PartsTemporaryRename::~PartsTemporaryRename() +{ + for (const auto & names : old_and_new_names) + { + if (names.first.empty()) + continue; + try + { + Poco::File(base_dir + names.second).renameTo(base_dir + names.first); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( const MergeTreePartInfo & new_part_info, @@ -2600,6 +2623,23 @@ void MergeTreeData::validateDetachedPartName(const String & name) const throw DB::Exception("Detached part \"" + name + "\" not found" , ErrorCodes::BAD_DATA_PART_NAME); } +void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, const Context &) +{ + if (!part) // TODO + throw DB::Exception("DROP DETACHED PARTITION is not implemented, use DROP DETACHED PART", ErrorCodes::NOT_IMPLEMENTED); + + String part_id = partition->as().value.safeGet(); + validateDetachedPartName(part_id); + if (startsWith(part_id, "attaching_") || startsWith(part_id, "deleting_")) + throw DB::Exception("Cannot drop part " + part_id + ": " + "most likely it is used by another DROP or ATTACH query.", ErrorCodes::BAD_DATA_PART_NAME); + + PartsTemporaryRename renamed_parts(full_path + "detached/"); + renamed_parts.addPart(part_id, "deleting_" + part_id); + Poco::File(renamed_parts.base_dir + renamed_parts.old_and_new_names.front().second).remove(true); + renamed_parts.old_and_new_names.front().first.clear(); +} + MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const { DataParts res; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 099591a97e4..62cebf32f76 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -249,6 +249,19 @@ public: using AlterDataPartTransactionPtr = std::unique_ptr; + struct PartsTemporaryRename : private boost::noncopyable + { + PartsTemporaryRename(const String & base_dir_) : base_dir(base_dir_) {} + + /// Renames part from old_name to new_name + void addPart(const String & old_name, const String & new_name); + + /// Renames all added parts from new_name to old_name if old name is not empty + ~PartsTemporaryRename(); + + String base_dir; + std::vector> old_and_new_names; + }; /// Parameters for various modes. struct MergingParams @@ -392,6 +405,8 @@ public: void validateDetachedPartName(const String & name) const; + void dropDetached(const ASTPtr & partition, bool part, const Context & context); + /// Returns Committed parts DataParts getDataParts() const; DataPartsVector getDataPartsVector() const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index a0888732495..fa2847aa301 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -142,10 +142,7 @@ MergeTreeDataPart::MergeTreeDataPart(MergeTreeData & storage_, const String & na { } -MergeTreeDataPart::MergeTreeDataPart( - const MergeTreeData & storage_, - const String & name_, - const MergeTreePartInfo & info_) +MergeTreeDataPart::MergeTreeDataPart(const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_) : storage(storage_) , name(name_) , info(info_) @@ -350,7 +347,7 @@ UInt64 MergeTreeDataPart::calculateTotalSizeOnDisk(const String & from) return res; } -void MergeTreeDataPart::remove(bool force_recursive /*= false*/) const +void MergeTreeDataPart::remove() const { if (relative_path.empty()) throw Exception("Part relative_path cannot be empty. This is bug.", ErrorCodes::LOGICAL_ERROR); @@ -359,18 +356,18 @@ void MergeTreeDataPart::remove(bool force_recursive /*= false*/) const * - rename directory to temporary name; * - remove it recursive. * - * For temporary name we use "detached/deleting_" prefix. + * For temporary name we use "delete_tmp_" prefix. * - * NOTE: We cannot use "tmp_*" prefix, because there is a second thread, + * NOTE: We cannot use "tmp_delete_" prefix, because there is a second thread, * that calls "clearOldTemporaryDirectories" and removes all directories, that begin with "tmp_" and are old enough. * But when we removing data part, it can be old enough. And rename doesn't change mtime. * And a race condition can happen that will lead to "File not found" error here. - * We move directory to detached/, because if an attempt to remove directory after renaming failed for some reason - * there would be no way to remove directory from storage.full_path (except manually). */ + // TODO directory delete_tmp_ is never removed if server crashes before returning from this function + String from = storage.full_path + relative_path; - String to = storage.full_path + getRelativePathForDetachedPart("deleting_"); + String to = storage.full_path + "delete_tmp_" + name; Poco::File from_dir{from}; Poco::File to_dir{to}; @@ -403,13 +400,6 @@ void MergeTreeDataPart::remove(bool force_recursive /*= false*/) const return; } - if (force_recursive) - { - /// Part is not loaded (we don't know which files are there), so remove dir recursively. - to_dir.remove(true); - return; - } - try { /// Remove each expected file in directory, then remove directory itself. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index 98af00c071a..f41ea8af424 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -241,7 +241,7 @@ struct MergeTreeDataPart /// Calculate the total size of the entire directory with all the files static UInt64 calculateTotalSizeOnDisk(const String & from); - void remove(bool force_recursive = false) const; + void remove() const; /// Makes checks and move part to new directory /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index a9e31a988b3..449ea143e17 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -194,6 +194,7 @@ bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, Detache part_info.dir_name = dir_name; /// First, try to parse as . + // TODO what if tryParsePartName will parse prefix as partition_id? if (MergeTreePartInfo::tryParsePartName(dir_name, &part_info, format_version)) return part_info.valid_name = true; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 98b3ae6ad6c..db5632c3fe9 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -994,6 +994,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons /// TODO: should we include PreComitted parts like in Replicated case? auto parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); + // TODO should we throw an exception if parts_to_remove is empty? removePartsFromWorkingSet(parts_to_remove, true); if (detach) @@ -1013,22 +1014,6 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons } -void StorageMergeTree::dropDetached(const ASTPtr & partition, bool part, const Context & /*context*/) -{ - if (!part) // TODO - throw DB::Exception("DROP DETACHED PARTITION is not implemented, use DROP DETACHED PART", ErrorCodes::NOT_IMPLEMENTED); - - String part_id = partition->as().value.safeGet(); - validateDetachedPartName(part_id); - - DetachedPartInfo info; - DetachedPartInfo::tryParseDetachedPartName(part_id, info, format_version); - MergeTreeDataPart detached_part(*this, part_id, info); - detached_part.relative_path = "detached/" + part_id; - - detached_part.remove(true); -} - void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) { // TODO: should get some locks to prevent race with 'alter … modify column' @@ -1069,42 +1054,30 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par } LOG_DEBUG(log, active_parts.size() << " of them are active"); parts = active_parts.getParts(); + + // TODO should we rename inactive parts? (see StorageReplicatedMergeTree::attachPartition) } + PartsTemporaryRename renamed_parts(full_path + source_dir); for (const auto & source_part_name : parts) + renamed_parts.addPart(source_part_name, "attaching_" + source_part_name); + + std::vector loaded_parts; + for (const auto & part_names : renamed_parts.old_and_new_names) { - MutableDataPartPtr part; - try - { - part = std::make_shared(*this, source_part_name); - part->relative_path = "detached/" + source_part_name; - part->renameTo("detached/attaching_" + source_part_name, false); - - LOG_DEBUG(log, "Checking data in " << part->relative_path); + LOG_DEBUG(log, "Checking data in " << part_names.second); + MutableDataPartPtr part = std::make_shared(*this, part_names.first); + part->relative_path = source_dir + part_names.second; loadPartAndFixMetadata(part); + loaded_parts.push_back(part); + } - LOG_INFO(log, "Attaching part " << source_part_name << " from " << part->relative_path); - renameTempPartAndAdd(part, &increment); - - LOG_INFO(log, "Finished attaching part"); - } - catch (...) - { - LOG_INFO(log, "Cannot attach part " << source_part_name << " :" << getCurrentExceptionMessage(false)); - - if (part && part->relative_path == "detached/attaching_" + source_part_name) - { - try - { - part->renameTo("detached/" + source_part_name, false); - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - } - - } + for (size_t i = 0; i < loaded_parts.size(); ++i) + { + LOG_INFO(log, "Attaching part " << loaded_parts[i]->name << " from " << renamed_parts.old_and_new_names[i].second); + renameTempPartAndAdd(loaded_parts[i], &increment); + renamed_parts.old_and_new_names[i].first.clear(); + LOG_INFO(log, "Finished attaching part"); } /// New parts with other data may appear in place of deleted parts. diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index fa2561e4ab2..0de9618d915 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -120,7 +120,6 @@ private: // Partition helpers void dropPartition(const ASTPtr & partition, bool detach, const Context & context); - void dropDetached(const ASTPtr & partition, bool part, const Context & context); void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context); void attachPartition(const ASTPtr & partition, bool part, const Context & context); void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 531678decc3..67577dee2b6 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3353,8 +3353,8 @@ void StorageReplicatedMergeTree::alterPartition(const ASTPtr & query, const Part break; case PartitionCommand::DROP_DETACHED_PARTITION: - // TODO - throw DB::Exception("Not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + dropDetached(command.partition, command.part, query_context); + break; case PartitionCommand::ATTACH_PARTITION: attachPartition(command.partition, command.part, query_context); @@ -3601,6 +3601,8 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool loaded_parts.push_back(loadPartAndFixMetadata(source_dir + part)); } + // TODO fix race with DROP DETACHED + ReplicatedMergeTreeBlockOutputStream output(*this, 0, 0, 0, false); /// TODO Allow to use quorum here. for (auto & part : loaded_parts) { diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference index 42a04fe5666..f30fc160dfb 100644 --- a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference +++ b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference @@ -1,17 +1,26 @@ === cannot attach active === -OK +OK1 0_1_1_0 1_2_2_0 2_3_3_0 3_4_4_0 16 120 -=== attach all valid parts === +=== check all parts before attaching === +OK2 +1_2_2_0 +1_4_4_0 +=== detached === +0_1_1_0 +0_3_3_0 +0_5_5_0 +attaching_0_6_6_0 +deleting_0_7_7_0 +=== attach === 0_5_5_0 0_6_6_0 1_2_2_0 1_4_4_0 16 120 === detached === -0_5_5_0 -deleting_0_7_7_0 attaching_0_6_6_0 +deleting_0_7_7_0 diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh index 4e9efa64ad1..a4afbe8f817 100755 --- a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh +++ b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh @@ -12,14 +12,13 @@ echo '=== cannot attach active ==='; $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_active"; $CLICKHOUSE_CLIENT --query="CREATE TABLE attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n"; $CLICKHOUSE_CLIENT --query="INSERT INTO attach_active SELECT number FROM system.numbers LIMIT 16"; -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_active ATTACH PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK' +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_active ATTACH PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK1' $CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_active' AND database='${cur_db}' ORDER BY name FORMAT TSV"; $CLICKHOUSE_CLIENT --query="SElECT count(), sum(n) FROM attach_active FORMAT TSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE attach_active"; -echo '=== attach all valid parts ==='; $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES"; $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_partitions"; $CLICKHOUSE_CLIENT --query="CREATE TABLE attach_partitions (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n"; @@ -27,18 +26,28 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM sys $CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; $CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions DETACH PARTITION 0"; -sudo -n mkdir $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ 2>/dev/null || \ - mkdir $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ # broken part +sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ 2>/dev/null || \ + mkdir --mode=777 $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ # broken part sudo -n cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ 2>/dev/null || \ cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ sudo -n cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ 2>/dev/null || \ cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0"; +echo '=== check all parts before attaching ==='; +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0" 2>&1 | grep "No columns in part 0_5_5_0" > /dev/null && echo 'OK2'; +$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; +echo '=== detached ==='; +$CLICKHOUSE_CLIENT --query="SELECT name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; + +echo '=== attach ==='; +sudo -n rm -r $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ 2>/dev/null || \ + rm -r $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ +$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0"; $CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; $CLICKHOUSE_CLIENT --query="SElECT count(), sum(n) FROM attach_partitions FORMAT TSV"; + echo '=== detached ==='; -$CLICKHOUSE_CLIENT --query="SELECT name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SELECT name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE attach_partitions"; $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.reference b/dbms/tests/queries/0_stateless/00975_drop_detached.reference index 40732c908ab..2a355138980 100644 --- a/dbms/tests/queries/0_stateless/00975_drop_detached.reference +++ b/dbms/tests/queries/0_stateless/00975_drop_detached.reference @@ -1,2 +1,6 @@ -OK +OK1 +OK2 +OK3 0_3_3_0 +attaching_0_6_6_0 +deleting_0_7_7_0 diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.sh b/dbms/tests/queries/0_stateless/00975_drop_detached.sh index 3a5e920da75..71c0b5681fd 100755 --- a/dbms/tests/queries/0_stateless/00975_drop_detached.sh +++ b/dbms/tests/queries/0_stateless/00975_drop_detached.sh @@ -15,19 +15,19 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system. $CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; $CLICKHOUSE_CLIENT --query="ALTER TABLE drop_detached DETACH PARTITION 0"; -sudo -n mkdir $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ 2>/dev/null || \ - mkdir $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ -sudo -n mkdir $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ 2>/dev/null || \ - mkdir $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ -sudo -n mkdir $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ 2>/dev/null || \ - mkdir $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ +sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ 2>/dev/null || \ + mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ +sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ 2>/dev/null || \ + mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ +sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ 2>/dev/null || \ + mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK' +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK1' $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '0_1_1_0'" -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'attaching_0_6_6_0'" -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'deleting_0_7_7_0'" +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'attaching_0_6_6_0'" 2>&1 | grep "Cannot drop part" > /dev/null && echo 'OK2' +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'deleting_0_7_7_0'" 2>&1 | grep "Cannot drop part" > /dev/null && echo 'OK3' $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'any_other_name'" -$CLICKHOUSE_CLIENT --query="SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' ORDER BY name FORMAT TSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE drop_detached"; $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; From c6717e0d3f977e23aa9b778d00bcf64456e893cb Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 30 Jul 2019 22:11:15 +0300 Subject: [PATCH 052/181] refactor attachPartition --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 71 +++++++++++++++++++ dbms/src/Storages/MergeTree/MergeTreeData.h | 3 + dbms/src/Storages/StorageMergeTree.cpp | 55 +------------- .../Storages/StorageReplicatedMergeTree.cpp | 68 ++---------------- 4 files changed, 83 insertions(+), 114 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 4b13ebaa99f..11ad7835b51 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2640,6 +2640,77 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, const Cont renamed_parts.old_and_new_names.front().first.clear(); } +MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, + const Context & context, PartsTemporaryRename & renamed_parts) +{ + String partition_id; + + if (attach_part) + partition_id = partition->as().value.safeGet(); + else + partition_id = getPartitionIDFromQuery(partition, context); + + String source_dir = "detached/"; + + /// Let's compose a list of parts that should be added. + Strings parts; + if (attach_part) + { + validateDetachedPartName(partition_id); + parts.push_back(partition_id); + } + else + { + LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir); + ActiveDataPartSet active_parts(format_version); + + std::set part_names; + for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) + { + String name = it.name(); + MergeTreePartInfo part_info; + // TODO what if name contains "_tryN" suffix? + if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version)) + continue; + if (part_info.partition_id != partition_id) + continue; + LOG_DEBUG(log, "Found part " << name); + active_parts.add(name); + part_names.insert(name); + } + LOG_DEBUG(log, active_parts.size() << " of them are active"); + parts = active_parts.getParts(); + + /// Inactive parts rename so they can not be attached in case of repeated ATTACH. + for (const auto & name : part_names) + { + // TODO maybe use PartsTemporaryRename here? + String containing_part = active_parts.getContainingPart(name); + if (!containing_part.empty() && containing_part != name) + Poco::File(full_path + source_dir + name).renameTo(full_path + source_dir + "inactive_" + name); + } + } + + /// Try to rename all parts before attaching to prevent race with DROP DETACHED and another ATTACH. + for (const auto & source_part_name : parts) + renamed_parts.addPart(source_part_name, "attaching_" + source_part_name); + + /// Synchronously check that added parts exist and are not broken. We will write checksums.txt if it does not exist. + LOG_DEBUG(log, "Checking parts"); + MutableDataPartsVector loaded_parts; + loaded_parts.reserve(parts.size()); + for (const auto & part_names : renamed_parts.old_and_new_names) + { + LOG_DEBUG(log, "Checking part " << part_names.second); + MutableDataPartPtr part = std::make_shared(*this, part_names.first); + part->relative_path = source_dir + part_names.second; + loadPartAndFixMetadata(part); + loaded_parts.push_back(part); + } + + return loaded_parts; +} + MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const { DataParts res; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 62cebf32f76..3592164fed5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -407,6 +407,9 @@ public: void dropDetached(const ASTPtr & partition, bool part, const Context & context); + MutableDataPartsVector tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, + const Context & context, PartsTemporaryRename & renamed_parts); + /// Returns Committed parts DataParts getDataParts() const; DataPartsVector getDataPartsVector() const; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index db5632c3fe9..3464255e1b8 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -1018,59 +1018,8 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par { // TODO: should get some locks to prevent race with 'alter … modify column' - String partition_id; - - if (attach_part) - partition_id = partition->as().value.safeGet(); - else - partition_id = getPartitionIDFromQuery(partition, context); - - String source_dir = "detached/"; - - /// Let's make a list of parts to add. - Strings parts; - if (attach_part) - { - validateDetachedPartName(partition_id); - parts.push_back(partition_id); - } - else - { - LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir); - ActiveDataPartSet active_parts(format_version); - for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) - { - const String & name = it.name(); - MergeTreePartInfo part_info; - /// Parts with prefix in name (e.g. attaching_1_3_3_0, deleting_1_3_3_0) will be ignored - // TODO what if name contains "_tryN" suffix? - if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version) - || part_info.partition_id != partition_id) - { - continue; - } - LOG_DEBUG(log, "Found part " << name); - active_parts.add(name); - } - LOG_DEBUG(log, active_parts.size() << " of them are active"); - parts = active_parts.getParts(); - - // TODO should we rename inactive parts? (see StorageReplicatedMergeTree::attachPartition) - } - - PartsTemporaryRename renamed_parts(full_path + source_dir); - for (const auto & source_part_name : parts) - renamed_parts.addPart(source_part_name, "attaching_" + source_part_name); - - std::vector loaded_parts; - for (const auto & part_names : renamed_parts.old_and_new_names) - { - LOG_DEBUG(log, "Checking data in " << part_names.second); - MutableDataPartPtr part = std::make_shared(*this, part_names.first); - part->relative_path = source_dir + part_names.second; - loadPartAndFixMetadata(part); - loaded_parts.push_back(part); - } + PartsTemporaryRename renamed_parts(full_path + "detached/"); + MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, context, renamed_parts); for (size_t i = 0; i < loaded_parts.size(); ++i) { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 67577dee2b6..7e192d77a33 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3545,70 +3545,16 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool assertNotReadonly(); - String partition_id; - - if (attach_part) - partition_id = partition->as().value.safeGet(); - else - partition_id = getPartitionIDFromQuery(partition, query_context); - - String source_dir = "detached/"; - - /// Let's compose a list of parts that should be added. - Strings parts; - if (attach_part) - { - validateDetachedPartName(partition_id); - parts.push_back(partition_id); - } - else - { - LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir); - ActiveDataPartSet active_parts(format_version); - - std::set part_names; - for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it) - { - String name = it.name(); - MergeTreePartInfo part_info; - // TODO what if name contains "_tryN" suffix? - if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version)) - continue; - if (part_info.partition_id != partition_id) - continue; - LOG_DEBUG(log, "Found part " << name); - active_parts.add(name); - part_names.insert(name); - } - LOG_DEBUG(log, active_parts.size() << " of them are active"); - parts = active_parts.getParts(); - - /// Inactive parts rename so they can not be attached in case of repeated ATTACH. - for (const auto & name : part_names) - { - String containing_part = active_parts.getContainingPart(name); - if (!containing_part.empty() && containing_part != name) - Poco::File(full_path + source_dir + name).renameTo(full_path + source_dir + "inactive_" + name); - } - } - - /// Synchronously check that added parts exist and are not broken. We will write checksums.txt if it does not exist. - LOG_DEBUG(log, "Checking parts"); - std::vector loaded_parts; - for (const String & part : parts) - { - LOG_DEBUG(log, "Checking part " << part); - loaded_parts.push_back(loadPartAndFixMetadata(source_dir + part)); - } - - // TODO fix race with DROP DETACHED + PartsTemporaryRename renamed_parts(full_path + "detached/"); + MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); ReplicatedMergeTreeBlockOutputStream output(*this, 0, 0, 0, false); /// TODO Allow to use quorum here. - for (auto & part : loaded_parts) + for (size_t i = 0; i < loaded_parts.size(); ++i) { - String old_name = part->name; - output.writeExistingPart(part); - LOG_DEBUG(log, "Attached part " << old_name << " as " << part->name); + String old_name = loaded_parts[i]->name; + output.writeExistingPart(loaded_parts[i]); + renamed_parts.old_and_new_names[i].first.clear(); + LOG_DEBUG(log, "Attached part " << old_name << " as " << loaded_parts[i]->name); } } From bd493727b655d95bcadcce9bff7a3a4aad8cf304 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 31 Jul 2019 08:55:10 +0300 Subject: [PATCH 053/181] DOCAPI-7460: Added link to algorithm. --- docs/en/query_language/agg_functions/parametric_functions.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index da6052545dc..d27cb5d9431 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -10,6 +10,8 @@ Calculates a histogram. histogram(number_of_bins)(values) ``` +The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). It calculates the borders of histogram bins automatically, and in common case the widths of bins are not equal. + **Parameters** `number_of_bins` — Number of bins for the histogram. From f0836553d449368cac474e9ed9f60d3120ea79c4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 31 Jul 2019 17:44:55 +0300 Subject: [PATCH 054/181] drop detached partition --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 94 +++++++++++++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 10 +- .../Storages/MergeTree/MergeTreePartInfo.cpp | 2 +- .../Storages/MergeTree/MergeTreePartInfo.h | 3 +- dbms/src/Storages/PartitionCommands.cpp | 3 - dbms/src/Storages/StorageMergeTree.cpp | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- .../0_stateless/00974_attach_invalid_parts.sh | 8 +- .../0_stateless/00975_drop_detached.reference | 9 ++ .../0_stateless/00975_drop_detached.sh | 12 +++ 10 files changed, 103 insertions(+), 42 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 11ad7835b51..32cd3ad508e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1720,12 +1720,35 @@ MergeTreeData::AlterDataPartTransaction::~AlterDataPartTransaction() void MergeTreeData::PartsTemporaryRename::addPart(const String & old_name, const String & new_name) { - Poco::File(base_dir + old_name).renameTo(base_dir + new_name); old_and_new_names.push_back({old_name, new_name}); } +void MergeTreeData::PartsTemporaryRename::tryRenameAll() +{ + renamed = true; + for (size_t i = 0; i < old_and_new_names.size(); ++i) + { + try + { + const auto & names = old_and_new_names[i]; + if (names.first.empty() || names.second.empty()) + throw DB::Exception("Empty part name. Most likely it's a bug.", ErrorCodes::INCORRECT_FILE_NAME); + Poco::File(base_dir + names.first).renameTo(base_dir + names.second); + } + catch (...) + { + old_and_new_names.resize(i); + LOG_WARNING(storage.log, "Cannot rename parts to perform operation on them: " << getCurrentExceptionMessage(false)); + throw; + } + } +} + MergeTreeData::PartsTemporaryRename::~PartsTemporaryRename() { + // TODO what if server had crashed before this destructor was called? + if (!renamed) + return; for (const auto & names : old_and_new_names) { if (names.first.empty()) @@ -2621,46 +2644,60 @@ void MergeTreeData::validateDetachedPartName(const String & name) const Poco::File detached_part_dir(full_path + "detached/" + name); if (!detached_part_dir.exists()) throw DB::Exception("Detached part \"" + name + "\" not found" , ErrorCodes::BAD_DATA_PART_NAME); + + if (startsWith(name, "attaching_") || startsWith(name, "deleting_")) + throw DB::Exception("Cannot drop part " + name + ": " + "most likely it is used by another DROP or ATTACH query.", + ErrorCodes::BAD_DATA_PART_NAME); } -void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, const Context &) +void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, const Context & context) { - if (!part) // TODO - throw DB::Exception("DROP DETACHED PARTITION is not implemented, use DROP DETACHED PART", ErrorCodes::NOT_IMPLEMENTED); + PartsTemporaryRename renamed_parts(*this, full_path + "detached/"); - String part_id = partition->as().value.safeGet(); - validateDetachedPartName(part_id); - if (startsWith(part_id, "attaching_") || startsWith(part_id, "deleting_")) - throw DB::Exception("Cannot drop part " + part_id + ": " - "most likely it is used by another DROP or ATTACH query.", ErrorCodes::BAD_DATA_PART_NAME); + if (part) + { + String part_name = partition->as().value.safeGet(); + validateDetachedPartName(part_name); + renamed_parts.addPart(part_name, "deleting_" + part_name); + } + else + { + String partition_id = getPartitionIDFromQuery(partition, context); + DetachedPartsInfo detached_parts = getDetachedParts(); + for (const auto & part_info : detached_parts) + if (part_info.valid_name && part_info.partition_id == partition_id + && part_info.prefix != "attaching" && part_info.prefix != "deleting") + renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name); + } - PartsTemporaryRename renamed_parts(full_path + "detached/"); - renamed_parts.addPart(part_id, "deleting_" + part_id); - Poco::File(renamed_parts.base_dir + renamed_parts.old_and_new_names.front().second).remove(true); - renamed_parts.old_and_new_names.front().first.clear(); + LOG_DEBUG(log, "Will drop " << renamed_parts.old_and_new_names.size() << " detached parts."); + + renamed_parts.tryRenameAll(); + + for (auto & names : renamed_parts.old_and_new_names) + { + Poco::File(renamed_parts.base_dir + names.second).remove(true); + LOG_DEBUG(log, "Dropped detached part " << names.first); + names.first.clear(); + } } MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, const Context & context, PartsTemporaryRename & renamed_parts) { - String partition_id; - - if (attach_part) - partition_id = partition->as().value.safeGet(); - else - partition_id = getPartitionIDFromQuery(partition, context); - String source_dir = "detached/"; /// Let's compose a list of parts that should be added. - Strings parts; if (attach_part) { - validateDetachedPartName(partition_id); - parts.push_back(partition_id); + String part_id = partition->as().value.safeGet(); + validateDetachedPartName(part_id); + renamed_parts.addPart(part_id, "attaching_" + part_id); } else { + String partition_id = getPartitionIDFromQuery(partition, context); LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir); ActiveDataPartSet active_parts(format_version); @@ -2670,6 +2707,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const String name = it.name(); MergeTreePartInfo part_info; // TODO what if name contains "_tryN" suffix? + /// Parts with prefix in name (e.g. attaching_1_3_3_0, deleting_1_3_3_0) will be ignored if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version)) continue; if (part_info.partition_id != partition_id) @@ -2679,26 +2717,26 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const part_names.insert(name); } LOG_DEBUG(log, active_parts.size() << " of them are active"); - parts = active_parts.getParts(); /// Inactive parts rename so they can not be attached in case of repeated ATTACH. for (const auto & name : part_names) { - // TODO maybe use PartsTemporaryRename here? String containing_part = active_parts.getContainingPart(name); if (!containing_part.empty() && containing_part != name) + // TODO maybe use PartsTemporaryRename here? Poco::File(full_path + source_dir + name).renameTo(full_path + source_dir + "inactive_" + name); + else + renamed_parts.addPart(name, "attaching_" + name); } } /// Try to rename all parts before attaching to prevent race with DROP DETACHED and another ATTACH. - for (const auto & source_part_name : parts) - renamed_parts.addPart(source_part_name, "attaching_" + source_part_name); + renamed_parts.tryRenameAll(); /// Synchronously check that added parts exist and are not broken. We will write checksums.txt if it does not exist. LOG_DEBUG(log, "Checking parts"); MutableDataPartsVector loaded_parts; - loaded_parts.reserve(parts.size()); + loaded_parts.reserve(renamed_parts.old_and_new_names.size()); for (const auto & part_names : renamed_parts.old_and_new_names) { LOG_DEBUG(log, "Checking part " << part_names.second); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 3592164fed5..9f5d0961d27 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -251,16 +251,20 @@ public: struct PartsTemporaryRename : private boost::noncopyable { - PartsTemporaryRename(const String & base_dir_) : base_dir(base_dir_) {} + PartsTemporaryRename(const MergeTreeData & storage_, const String & base_dir_) : storage(storage_), base_dir(base_dir_) {} + + void addPart(const String & old_name, const String & new_name); /// Renames part from old_name to new_name - void addPart(const String & old_name, const String & new_name); + void tryRenameAll(); /// Renames all added parts from new_name to old_name if old name is not empty ~PartsTemporaryRename(); + const MergeTreeData & storage; String base_dir; std::vector> old_and_new_names; + bool renamed = false; }; /// Parameters for various modes. @@ -401,7 +405,7 @@ public: DataPartsVector getAllDataPartsVector(DataPartStateVector * out_states = nullptr) const; /// Returns all detached parts - std::vector getDetachedParts() const; + DetachedPartsInfo getDetachedParts() const; void validateDetachedPartName(const String & name) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 449ea143e17..3ee330b6d1a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -194,7 +194,7 @@ bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, Detache part_info.dir_name = dir_name; /// First, try to parse as . - // TODO what if tryParsePartName will parse prefix as partition_id? + // TODO what if tryParsePartName will parse prefix as partition_id? It can happen if dir_name doesn't contain mutation number at the end if (MergeTreePartInfo::tryParsePartName(dir_name, &part_info, format_version)) return part_info.valid_name = true; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index 25cf46ad46d..9fe0fbab533 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -92,7 +92,6 @@ struct MergeTreePartInfo /// addition to the above fields. struct DetachedPartInfo : public MergeTreePartInfo { - /// Suddenly, name of detached part may contain suffix (such as _tryN), which is ignored by MergeTreePartInfo::tryParsePartName(...) String dir_name; String prefix; @@ -102,4 +101,6 @@ struct DetachedPartInfo : public MergeTreePartInfo static bool tryParseDetachedPartName(const String & dir_name, DetachedPartInfo & part_info, MergeTreeDataFormatVersion format_version); }; +using DetachedPartsInfo = std::vector; + } diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index bab3f6ced24..0537482dbc1 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -25,9 +25,6 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * } else if (command_ast->type == ASTAlterCommand::DROP_DETACHED_PARTITION) { - if (!command_ast->part) // TODO - throw DB::Exception("Not implemented yet", ErrorCodes::NOT_IMPLEMENTED); - PartitionCommand res; res.type = DROP_DETACHED_PARTITION; res.partition = command_ast->partition; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 3464255e1b8..c2ee4854c39 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -1018,7 +1018,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par { // TODO: should get some locks to prevent race with 'alter … modify column' - PartsTemporaryRename renamed_parts(full_path + "detached/"); + PartsTemporaryRename renamed_parts(*this, full_path + "detached/"); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, context, renamed_parts); for (size_t i = 0; i < loaded_parts.size(); ++i) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 7e192d77a33..5109d9f7e54 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3545,7 +3545,7 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool assertNotReadonly(); - PartsTemporaryRename renamed_parts(full_path + "detached/"); + PartsTemporaryRename renamed_parts(*this, full_path + "detached/"); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); ReplicatedMergeTreeBlockOutputStream output(*this, 0, 0, 0, false); /// TODO Allow to use quorum here. diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh index a4afbe8f817..db45cfe7f21 100755 --- a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh +++ b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh @@ -28,10 +28,10 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM sys $CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions DETACH PARTITION 0"; sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ 2>/dev/null || \ mkdir --mode=777 $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ # broken part -sudo -n cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ 2>/dev/null || \ - cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ -sudo -n cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ 2>/dev/null || \ - cp -r $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ +sudo -n cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ 2>/dev/null || \ + cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ +sudo -n cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ 2>/dev/null || \ + cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ echo '=== check all parts before attaching ==='; $CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0" 2>&1 | grep "No columns in part 0_5_5_0" > /dev/null && echo 'OK2'; diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.reference b/dbms/tests/queries/0_stateless/00975_drop_detached.reference index 2a355138980..414ac4b1927 100644 --- a/dbms/tests/queries/0_stateless/00975_drop_detached.reference +++ b/dbms/tests/queries/0_stateless/00975_drop_detached.reference @@ -1,6 +1,15 @@ +=== validate part name === OK1 OK2 OK3 +=== drop detached part === +0_3_3_0 +1_2_2_0 +1_4_4_0 +attaching_0_6_6_0 +deleting_0_7_7_0 +prefix_1_2_2_0_0 +=== drop detached partition === 0_3_3_0 attaching_0_6_6_0 deleting_0_7_7_0 diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.sh b/dbms/tests/queries/0_stateless/00975_drop_detached.sh index 71c0b5681fd..8da831b019a 100755 --- a/dbms/tests/queries/0_stateless/00975_drop_detached.sh +++ b/dbms/tests/queries/0_stateless/00975_drop_detached.sh @@ -15,19 +15,31 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system. $CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; $CLICKHOUSE_CLIENT --query="ALTER TABLE drop_detached DETACH PARTITION 0"; +$CLICKHOUSE_CLIENT --query="ALTER TABLE drop_detached DETACH PARTITION 1"; sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ 2>/dev/null || \ mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ 2>/dev/null || \ mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ 2>/dev/null || \ mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ +sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0_0/ 2>/dev/null || \ + mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0_0/ +#sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0/ 2>/dev/null || \ +# mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0/ +echo '=== validate part name ===' $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK1' $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '0_1_1_0'" $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'attaching_0_6_6_0'" 2>&1 | grep "Cannot drop part" > /dev/null && echo 'OK2' $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'deleting_0_7_7_0'" 2>&1 | grep "Cannot drop part" > /dev/null && echo 'OK3' $CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'any_other_name'" +echo '=== drop detached part ===' $CLICKHOUSE_CLIENT --query="SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' ORDER BY name FORMAT TSV"; + +echo '=== drop detached partition ===' +$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PARTITION 1" +$CLICKHOUSE_CLIENT --query="SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' ORDER BY name FORMAT TSV"; + $CLICKHOUSE_CLIENT --query="DROP TABLE drop_detached"; $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; From 8e535a9cb0fe66b44de1be0f5537827aad8d7767 Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Mon, 8 Jul 2019 12:41:28 +0300 Subject: [PATCH 055/181] Implement nextInBlock function --- dbms/src/Functions/nextInBlock.cpp | 159 ++++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 + .../0_stateless/00957_next_in_block.reference | 12 ++ .../0_stateless/00957_next_in_block.sql | 22 +++ 4 files changed, 195 insertions(+) create mode 100644 dbms/src/Functions/nextInBlock.cpp create mode 100644 dbms/tests/queries/0_stateless/00957_next_in_block.reference create mode 100644 dbms/tests/queries/0_stateless/00957_next_in_block.sql diff --git a/dbms/src/Functions/nextInBlock.cpp b/dbms/src/Functions/nextInBlock.cpp new file mode 100644 index 00000000000..e672e539f25 --- /dev/null +++ b/dbms/src/Functions/nextInBlock.cpp @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +// Implements function, giving value for column in next row +// Example: +// | c1 | +// | 10 | +// | 20 | +// SELECT c1, nextInBlock(c1, 1) as c2: +// | c1 | c2 | +// | 10 | 20 | +// | 20 | 0 | +class FunctionNextInBlock : public IFunction +{ +public: + static constexpr auto name = "nextInBlock"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + /// Get the name of the function. + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isVariadic() const override { return true; } + + bool isDeterministic() const override { return false; } + + bool isDeterministicInScopeOfQuery() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + size_t number_of_arguments = arguments.size(); + + if (number_of_arguments < 1 || number_of_arguments > 3) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + toString(number_of_arguments) + + ", should be from 1 to 3", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + // second argument must be a positive, constant column + if (number_of_arguments == 2 && !isUnsignedInteger(arguments[1])) + throw Exception( + "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + + " - should be positive integer", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + // check that default value has supertype with first argument + if (number_of_arguments == 3) + { + DataTypes types = {arguments[0], arguments[2]}; + try + { + return getLeastSupertype(types); + } + catch (const Exception &) + { + throw Exception( + "Illegal types of arguments (" + types[0]->getName() + ", " + types[1]->getName() + + ")" + " of function " + + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + return arguments[0]; + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + size_t offset_value = 1; + + if (arguments.size() > 1) + { + auto offset_column = block.getByPosition(arguments[1]); + if (!isColumnConst(*offset_column.column)) + throw Exception("Second argument of function " + getName() + " should be constant", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + Field offset_field = (*block.getByPosition(arguments[1]).column)[0]; + auto raw_value = safeGet(offset_field); + + if (raw_value == 0) + throw Exception( + "Second argument of function " + getName() + " should be positive integer, " + toString(raw_value) + " given", + ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + offset_value = raw_value; + } + + auto has_column_for_missing = arguments.size() == 3; + + DataTypes types = {block.getByPosition(arguments[0]).type}; + if (has_column_for_missing) + { + types.push_back(block.getByPosition(arguments[2]).type); + } + const DataTypePtr & result_type = getLeastSupertype(types); + + auto column = result_type->createColumn(); + column->reserve(input_rows_count); + + auto source_column = block.getByPosition(arguments[0]).column; + + for (size_t i = offset_value; i < input_rows_count; i++) + { + column->insertFrom(*source_column, i); + } + + if (has_column_for_missing) + { + auto default_values_column = block.getByPosition(arguments[2]).column; + size_t starting_pos = offset_value > input_rows_count ? 0 : input_rows_count - offset_value; + if (isColumnConst(*default_values_column)) + { + Field constant_value = (*default_values_column)[0]; + for (size_t i = starting_pos; i < input_rows_count; i++) + { + column->insert(constant_value); + } + } + else + { + for (size_t i = starting_pos; i < input_rows_count; i++) + { + column->insertFrom(*default_values_column, i); + } + } + } + else + { + for (size_t i = 0; i < std::min(offset_value, input_rows_count); i++) + { + column->insertDefault(); + } + } + + block.getByPosition(result).column = std::move(column); + } +}; + +void registerFunctionNextInBlock(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} \ No newline at end of file diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index 6d201d65bd3..57ccfcd11c9 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -17,6 +17,7 @@ void registerFunctionBlockSize(FunctionFactory &); void registerFunctionBlockNumber(FunctionFactory &); void registerFunctionRowNumberInBlock(FunctionFactory &); void registerFunctionRowNumberInAllBlocks(FunctionFactory &); +void registerFunctionNextInBlock(FunctionFactory &); void registerFunctionSleep(FunctionFactory &); void registerFunctionSleepEachRow(FunctionFactory &); void registerFunctionMaterialize(FunctionFactory &); @@ -67,6 +68,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionBlockNumber(factory); registerFunctionRowNumberInBlock(factory); registerFunctionRowNumberInAllBlocks(factory); + registerFunctionNextInBlock(factory); registerFunctionSleep(factory); registerFunctionSleepEachRow(factory); registerFunctionMaterialize(factory); diff --git a/dbms/tests/queries/0_stateless/00957_next_in_block.reference b/dbms/tests/queries/0_stateless/00957_next_in_block.reference new file mode 100644 index 00000000000..860ce6dc1ba --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_next_in_block.reference @@ -0,0 +1,12 @@ +0 1 +1 0 +0 2 +1 0 +2 0 +0 0 +1 0 +0 2 +1 3 +2 4 +3 1000 +4 1000 diff --git a/dbms/tests/queries/0_stateless/00957_next_in_block.sql b/dbms/tests/queries/0_stateless/00957_next_in_block.sql new file mode 100644 index 00000000000..7cbd932cf1a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_next_in_block.sql @@ -0,0 +1,22 @@ +-- no arguments +select nextInBlock(); -- { serverError 42 } +-- greater than 3 arguments +select nextInBlock(1,2,3,4); -- { serverError 42 } +-- zero offset value +select nextInBlock(dummy, 0); -- { serverError 69 } +-- negative offset value +select nextInBlock(dummy, -1); -- { serverError 43 } +-- non-constant offset value +select nextInBlock(dummy, dummy); -- { serverError 43 } +-- bad default value +select nextInBlock(dummy, 1, 'hello'); -- { serverError 43 } +-- single argument test +select number, nextInBlock(number) from numbers(2); +-- filling by column's default value +select number, nextInBlock(number, 2) from numbers(3); +-- offset is greater that block - should fill everything with defaults +select number, nextInBlock(number, 5) from numbers(2); +-- substitution by constant for missing values +select number, nextInBlock(number, 2, 1000) from numbers(5); +-- substitution by expression +-- select number, nextInBlock(number, 2, number % 2) from numbers(5); \ No newline at end of file From cfec857f2c5685aafc1aeaa061497baf9ab39c53 Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Mon, 8 Jul 2019 17:53:02 +0300 Subject: [PATCH 056/181] Add trailing newline --- dbms/src/Functions/nextInBlock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/nextInBlock.cpp b/dbms/src/Functions/nextInBlock.cpp index e672e539f25..eeb33e28146 100644 --- a/dbms/src/Functions/nextInBlock.cpp +++ b/dbms/src/Functions/nextInBlock.cpp @@ -156,4 +156,4 @@ void registerFunctionNextInBlock(FunctionFactory & factory) factory.registerFunction(); } -} \ No newline at end of file +} From c5a778934ee82d1c80db6eca7e432644ebd6362f Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 8 Aug 2019 11:37:08 +0300 Subject: [PATCH 057/181] DOCAPI-7984: ASOF JOIN ... ON syntax --- docs/en/query_language/select.md | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index b75524274e1..048284faa6a 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -547,18 +547,29 @@ ClickHouse doesn't directly support syntax with commas, so we don't recommend us Tables for `ASOF JOIN` must have an ordered sequence column. This column cannot be alone in a table, and should be one of the data types: `UInt32`, `UInt64`, `Float32`, `Float64`, `Date`, and `DateTime`. -Use the following syntax for `ASOF JOIN`: +You can use the following types of syntax: -``` -SELECT expression_list FROM table_1 ASOF JOIN table_2 USING(equi_column1, ... equi_columnN, asof_column) -``` +- `ASOF JOIN ... ON` -`ASOF JOIN` uses `equi_columnX` for joining on equality (`user_id` in our example) and `asof_column` for joining on the closest match. + ```sql + SELECT expressions_list FROM table_1 ASOF LEFT JOIN table_2 ON equi_cond AND closest_match_cond + ``` + + You can use any number of equality conditions and exactly one closest match condition. For example, `SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t`. There is just `table_2.some_col <= table_1.some_col` and `table_1.some_col >= table2.some_col` types of conditions are available. You cannot apply other conditions like `>`, `!=`. + +- `ASOF JOIN ... USING` + + ```sql + SELECT expressions_list FROM table_1 ASOF JOIN table_2 USING(equi_column1, ... equi_columnN, asof_column) + ``` + + `ASOF JOIN` uses `equi_columnX` for joining on equality and `asof_column` for joining on the closest match with the `table_1.asof_column >= table2.asof_column` condition. The `asof_column` column must be the last in the `USING` clause. For example, consider the following tables: ``` - table_1 table_2 + table_1 table_2 + event | ev_time | user_id event | ev_time | user_id ----------|---------|---------- ----------|---------|---------- ... ... @@ -568,12 +579,9 @@ event_1_2 | 13:00 | 42 event_2_3 | 13:00 | 42 ... ... ``` -`ASOF JOIN` takes the timestamp of a user event from `table_1` and finds an event in `table_2` where the timestamp is closest (equal or less) to the timestamp of the event from `table_1`. Herewith the `user_id` column is used for joining on equality and the `ev_time` column is used for joining on the closest match. - In our example, `event_1_1` can be joined with `event_2_1`, `event_1_2` can be joined with `event_2_3`, but `event_2_2` cannot be joined. +`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest (equal or less) to the timestamp of the event from `table_1`. Herewith the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1`, `event_1_2` can be joined with `event_2_3`, but `event_2_2` cannot be joined. -Implementation details: -- `asof_column` should be last in the `USING` clause. - `ASOF` join is not supported in the [Join](../operations/table_engines/join.md) table engine. To set the default strictness value, use the session configuration parameter [join_default_strictness](../operations/settings/settings.md#settings-join_default_strictness). From bb725eb5c42531819763bce4dcf7b08d269fe224 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 8 Aug 2019 14:57:44 +0300 Subject: [PATCH 058/181] DOCAPI-7442: Started to write. --- docs/en/operations/system_tables.md | 88 +++++++++++++++++++---------- 1 file changed, 57 insertions(+), 31 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 14fa1ace01d..6f1ebebdff3 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -252,55 +252,81 @@ This is similar to the DUAL table found in other DBMSs. Contains information about parts of [MergeTree](table_engines/mergetree.md) tables. -Each row describes one part of the data. +Each row describes one data part. Columns: -- partition (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../query_language/alter.md#query_language_queries_alter) query. +- `partition` (`String`) – The partition name. To learn what a partition is, see the description of the [ALTER](../query_language/alter.md#query_language_queries_alter) query. -Formats: -- `YYYYMM` for automatic partitioning by month. -- `any_string` when partitioning manually. + Formats: -- name (String) – Name of the data part. + - `YYYYMM` for automatic partitioning by month. + - `any_string` when partitioning manually. -- active (UInt8) – Indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. +- `name` (`String`) – Name of the data part. -- marks (UInt64) – The number of marks. To get the approximate number of rows in a data part, multiply ``marks`` by the index granularity (usually 8192). +- `active` (`UInt8`) – Indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. -- marks_size (UInt64) – The size of the file with marks. +- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192). -- rows (UInt64) – The number of rows. +- `rows` (`UInt64`) – The number of rows. -- bytes (UInt64) – The number of bytes when compressed. +- `bytes_on_disk` (`UInt64`) – The number of bytes when compressed. -- modification_time (DateTime) – The modification time of the directory with the data part. This usually corresponds to the time of data part creation.| +- `data_compressed_bytes` (`UInt64`) – -- remove_time (DateTime) – The time when the data part became inactive. +- `data_uncompressed_bytes` (`UInt64`) – -- refcount (UInt32) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. +- `marks_bytes` (`UInt64`) – The size of the file with marks. -- min_date (Date) – The minimum value of the date key in the data part. +- `modification_time` (`DateTime`) – The modification time of the directory with the data part. This usually corresponds to the time of data part creation.| -- max_date (Date) – The maximum value of the date key in the data part. +- `remove_time` (`DateTime`) – The time when the data part became inactive. -- min_block_number (UInt64) – The minimum number of data parts that make up the current part after merging. +- `refcount` (`UInt32`) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. -- max_block_number (UInt64) – The maximum number of data parts that make up the current part after merging. +- `min_date` (`Date`) – The minimum value of the date key in the data part. -- level (UInt32) – Depth of the merge tree. If a merge was not performed, ``level=0``. +- `max_date` (`Date`) – The maximum value of the date key in the data part. -- primary_key_bytes_in_memory (UInt64) – The amount of memory (in bytes) used by primary key values. +- `min_time` (`DateTime`) – The minimum value of the date and time key in the data part. -- primary_key_bytes_in_memory_allocated (UInt64) – The amount of memory (in bytes) reserved for primary key values. +- `max_time`(`DateTime`) – The maximum value of the date and time key in the data part. -- database (String) – Name of the database. +- `partition_id` (`String`) – Id of the partition. -- table (String) – Name of the table. +- `min_block_number` (`UInt64`) – The minimum number of data parts that make up the current part after merging. -- engine (String) – Name of the table engine without parameters. +- `max_block_number` (`UInt64`) – The maximum number of data parts that make up the current part after merging. + +- `level` (`UInt32`) – Depth of the merge tree. If a merge was not performed, `level=0`. + +- `data_version` (`UInt64`) – + +- `primary_key_bytes_in_memory` (`UInt64`) – The amount of memory (in bytes) used by primary key values. + +- `primary_key_bytes_in_memory_allocated` (`UInt64`) – The amount of memory (in bytes) reserved for primary key values. + +- `is_frozen` (`UInt8`) – Flag that shows partition data backup existence. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../query_language/alter.md#alter_freeze-partition) + +- `database` (`String`) – Name of the database. + +- `table` (`String`) – Name of the table. + +- `engine` (`String`) – Name of the table engine without parameters. + +- `path` (`String`) – Absolute path to the folder with data part files. + +- `hash_of_all_files` (`String`) – Hash of compressed files. + +- `hash_of_uncompressed_files` (`String`) – Hash of uncompressed data. + +- `uncompressed_hash_of_compressed_files` (`String`) – Hash of the file with marks + +- `bytes` (`UInt64`) – Alias for `bytes_on_disk`. + +- `marks_size` (`UInt64`) – Alias for `marks_bytes`. -- is_frozen (UInt8) – Flag that shows partition data backup existence. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../query_language/alter.md#alter_freeze-partition) ## system.part_log {#system_tables-part-log} @@ -360,15 +386,15 @@ Contains information about execution of queries. For each query, you can see pro !!! note The table doesn't contain input data for `INSERT` queries. - + ClickHouse creates this table only if the [query_log](server_settings/settings.md#server_settings-query-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. To enable query logging, set the [log_queries](settings/settings.md#settings-log-queries) parameter to 1. For details, see the [Settings](settings/settings.md) section. The `system.query_log` table registers two kinds of queries: - + 1. Initial queries that were run directly by the client. -2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns. +2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns. Columns: @@ -380,7 +406,7 @@ Columns: - `event_date` (Date) — Event date. - `event_time` (DateTime) — Event time. - `query_start_time` (DateTime) — Start time of query processing. -- `query_duration_ms` (UInt64) — Duration of query processing. +- `query_duration_ms` (UInt64) — Duration of query processing. - `read_rows` (UInt64) — Number of read rows. - `read_bytes` (UInt64) — Number of read bytes. - `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. @@ -391,7 +417,7 @@ Columns: - `query` (String) — Query string. - `exception` (String) — Exception message. - `stack_trace` (String) — Stack trace (a list of methods called before the error occurred). An empty string, if the query is completed successfully. -- `is_initial_query` (UInt8) — Kind of query. Possible values: +- `is_initial_query` (UInt8) — Kind of query. Possible values: - 1 — Query was initiated by the client. - 0 — Query was initiated by another query for distributed query execution. - `user` (String) — Name of the user who initiated the current query. @@ -413,7 +439,7 @@ Columns: - `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md). - `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) version. - `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. + - 0 — The query was launched from the TCP interface. - 1 — `GET` method was used. - 2 — `POST` method was used. - `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. From 2f36d80705d5a62c8efc15c623de20250cee1537 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 8 Aug 2019 18:51:17 +0300 Subject: [PATCH 059/181] move tests with sudo to integration tests --- .../integration/test_partition/__init__.py | 0 dbms/tests/integration/test_partition/test.py | 244 ++++++++++++++++++ .../0_stateless/00428_partition.reference | 54 ---- .../queries/0_stateless/00428_partition.sh | 60 ----- .../00974_attach_invalid_parts.reference | 26 -- .../0_stateless/00974_attach_invalid_parts.sh | 53 ---- .../0_stateless/00975_drop_detached.reference | 15 -- .../0_stateless/00975_drop_detached.sh | 45 ---- 8 files changed, 244 insertions(+), 253 deletions(-) create mode 100644 dbms/tests/integration/test_partition/__init__.py create mode 100644 dbms/tests/integration/test_partition/test.py delete mode 100644 dbms/tests/queries/0_stateless/00428_partition.reference delete mode 100755 dbms/tests/queries/0_stateless/00428_partition.sh delete mode 100644 dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference delete mode 100755 dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh delete mode 100644 dbms/tests/queries/0_stateless/00975_drop_detached.reference delete mode 100755 dbms/tests/queries/0_stateless/00975_drop_detached.sh diff --git a/dbms/tests/integration/test_partition/__init__.py b/dbms/tests/integration/test_partition/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_partition/test.py b/dbms/tests/integration/test_partition/test.py new file mode 100644 index 00000000000..59c48e5d9e9 --- /dev/null +++ b/dbms/tests/integration/test_partition/test.py @@ -0,0 +1,244 @@ +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance') +q = instance.query +path_to_data = '/var/lib/clickhouse/' + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + q('CREATE DATABASE test') + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture +def partition_table_simple(started_cluster): + q("DROP TABLE IF EXISTS test.partition") + q("CREATE TABLE test.partition (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) " + "ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) " + "SETTINGS index_granularity=8192, index_granularity_bytes=0") + q("INSERT INTO test.partition ( x ) VALUES ( now() )") + q("INSERT INTO test.partition ( x ) VALUES ( now()+1 )") + + yield + + q('DROP TABLE test.partition') + + +def test_partition_simple(partition_table_simple): + q("ALTER TABLE test.partition DETACH PARTITION 197001") + q("ALTER TABLE test.partition ATTACH PARTITION 197001") + q("OPTIMIZE TABLE test.partition") + + +def exec_bash(cmd): + cmd = '/bin/bash -c "{}"'.format(cmd.replace('"', '\\"')) + return instance.exec_in_container(cmd) + + +def partition_complex_assert_columns_txt(): + path_to_parts = path_to_data + 'data/test/partition/' + parts = TSV(q("SELECT name FROM system.parts WHERE database='test' AND table='partition'")) + for part_name in parts.lines: + path_to_columns = path_to_parts + part_name + '/columns.txt' + # 2 header lines + 3 columns + assert exec_bash('cat {} | wc -l'.format(path_to_columns)) == u'5\n' + + +def partition_complex_assert_checksums(): + # Do `cd` for consistent output for reference + # Do not check increment.txt - it can be changed by other tests with FREEZE + cmd = 'cd ' + path_to_data + " && find shadow -type f -exec md5sum {} \\;" \ + " | grep partition" \ + " | sed 's!shadow/[0-9]*/data/[a-z0-9_-]*/!shadow/1/data/test/!g'" \ + " | sort" \ + " | uniq" + + checksums = "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition/19700102_2_2_0/k.bin\n" \ + "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition/19700201_1_1_0/v1.bin\n" \ + "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition/19700102_2_2_0/minmax_p.idx\n" \ + "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition/19700102_2_2_0/partition.dat\n" \ + "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition/19700201_1_1_0/partition.dat\n" \ + "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition/19700102_2_2_0/checksums.txt\n" \ + "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition/19700102_2_2_0/v1.bin\n" \ + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition/19700102_2_2_0/k.mrk\n" \ + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition/19700102_2_2_0/p.mrk\n" \ + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition/19700102_2_2_0/v1.mrk\n" \ + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition/19700201_1_1_0/k.mrk\n" \ + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition/19700201_1_1_0/p.mrk\n" \ + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition/19700201_1_1_0/v1.mrk\n" \ + "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition/19700201_1_1_0/primary.idx\n" \ + "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition/19700201_1_1_0/checksums.txt\n" \ + "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition/19700102_2_2_0/columns.txt\n" \ + "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition/19700201_1_1_0/columns.txt\n" \ + "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition/19700201_1_1_0/p.bin\n" \ + "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition/19700102_2_2_0/primary.idx\n" \ + "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition/19700102_2_2_0/count.txt\n" \ + "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition/19700201_1_1_0/count.txt\n" \ + "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition/19700102_2_2_0/p.bin\n" \ + "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition/19700201_1_1_0/k.bin\n" \ + "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition/19700201_1_1_0/minmax_p.idx\n" + + assert TSV(exec_bash(cmd).replace(' ', '\t')) == TSV(checksums) + + +@pytest.fixture +def partition_table_complex(started_cluster): + q("DROP TABLE IF EXISTS test.partition") + q("CREATE TABLE test.partition (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) " + "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0") + q("INSERT INTO test.partition (p, k) VALUES(toDate(31), 1)") + q("INSERT INTO test.partition (p, k) VALUES(toDate(1), 2)") + + yield + + q("DROP TABLE test.partition") + + +def test_partition_complex(partition_table_complex): + + partition_complex_assert_columns_txt() + + q("ALTER TABLE test.partition FREEZE") + + partition_complex_assert_checksums() + + q("ALTER TABLE test.partition DETACH PARTITION 197001") + q("ALTER TABLE test.partition ATTACH PARTITION 197001") + + partition_complex_assert_columns_txt() + + q("ALTER TABLE test.partition MODIFY COLUMN v1 Int8") + + # Check the backup hasn't changed + partition_complex_assert_checksums() + + q("OPTIMIZE TABLE test.partition") + + expected = TSV('31\t1\t2\n' + '1\t2\t3') + res = q("SELECT toUInt16(p), k, v1 FROM test.partition ORDER BY k") + assert(TSV(res) == expected) + + +@pytest.fixture +def cannot_attach_active_part_table(started_cluster): + q("DROP TABLE IF EXISTS test.attach_active") + q("CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n") + q("INSERT INTO test.attach_active SELECT number FROM system.numbers LIMIT 16") + + yield + + q("DROP TABLE test.attach_active") + + +def test_cannot_attach_active_part(cannot_attach_active_part_table): + error = instance.client.query_and_get_error("ALTER TABLE test.attach_active ATTACH PART '../1_2_2_0'") + print error + assert 0 <= error.find('Invalid part name') + + res = q("SElECT name FROM system.parts WHERE table='attach_active' AND database='test' ORDER BY name") + assert TSV(res) == TSV('0_1_1_0\n1_2_2_0\n2_3_3_0\n3_4_4_0') + assert TSV(q("SElECT count(), sum(n) FROM test.attach_active")) == TSV('16\t120') + + +@pytest.fixture +def attach_check_all_parts_table(started_cluster): + q("SYSTEM STOP MERGES") + q("DROP TABLE IF EXISTS test.attach_partition") + q("CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n") + q("INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8") + q("INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8") + + yield + + q("DROP TABLE test.attach_partition") + q("SYSTEM START MERGES") + + +def test_attach_check_all_parts(attach_check_all_parts_table): + q("ALTER TABLE test.attach_partition DETACH PARTITION 0") + + path_to_detached = path_to_data + 'data/test/attach_partition/detached/' + exec_bash('mkdir {}'.format(path_to_detached + '0_5_5_0')) + exec_bash('cp -pr {} {}'.format(path_to_detached + '0_1_1_0', path_to_detached + 'attaching_0_6_6_0')) + exec_bash('cp -pr {} {}'.format(path_to_detached + '0_3_3_0', path_to_detached + 'deleting_0_7_7_0')) + + error = instance.client.query_and_get_error("ALTER TABLE test.attach_partition ATTACH PARTITION 0") + assert 0 <= error.find('No columns in part 0_5_5_0') + + parts = q("SElECT name FROM system.parts WHERE table='attach_partition' AND database='test' ORDER BY name") + assert TSV(parts) == TSV('1_2_2_0\n1_4_4_0') + detached = q("SELECT name FROM system.detached_parts " + "WHERE table='attach_partition' AND database='test' ORDER BY name") + assert TSV(detached) == TSV('0_1_1_0\n0_3_3_0\n0_5_5_0\nattaching_0_6_6_0\ndeleting_0_7_7_0') + + exec_bash('rm -r {}'.format(path_to_detached + '0_5_5_0')) + + q("ALTER TABLE test.attach_partition ATTACH PARTITION 0") + parts = q("SElECT name FROM system.parts WHERE table='attach_partition' AND database='test' ORDER BY name") + expected = '0_5_5_0\n0_6_6_0\n1_2_2_0\n1_4_4_0' + assert TSV(parts) == TSV(expected) + assert TSV(q("SElECT count(), sum(n) FROM test.attach_partition")) == TSV('16\t120') + + detached = q("SELECT name FROM system.detached_parts " + "WHERE table='attach_partition' AND database='test' ORDER BY name") + assert TSV(detached) == TSV('attaching_0_6_6_0\ndeleting_0_7_7_0') + + +@pytest.fixture +def drop_detached_parts_table(started_cluster): + q("SYSTEM STOP MERGES") + q("DROP TABLE IF EXISTS test.drop_detached") + q("CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n") + q("INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8") + q("INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8") + + yield + + q("DROP TABLE test.drop_detached") + q("SYSTEM START MERGES") + + +def test_drop_detached_parts(drop_detached_parts_table): + s = {"allow_drop_detached_part": 1} + q("ALTER TABLE test.drop_detached DETACH PARTITION 0") + q("ALTER TABLE test.drop_detached DETACH PARTITION 1") + + path_to_detached = path_to_data + 'data/test/drop_detached/detached/' + exec_bash('mkdir {}'.format(path_to_detached + 'attaching_0_6_6_0')) + exec_bash('mkdir {}'.format(path_to_detached + 'deleting_0_7_7_0')) + exec_bash('mkdir {}'.format(path_to_detached + 'any_other_name')) + exec_bash('mkdir {}'.format(path_to_detached + 'prefix_1_2_2_0_0')) + + error = instance.client.query_and_get_error("ALTER TABLE test.drop_detached DROP DETACHED PART '../1_2_2_0'", settings=s) + assert 0 <= error.find('Invalid part name') + + q("ALTER TABLE test.drop_detached DROP DETACHED PART '0_1_1_0'", settings=s) + + error = instance.client.query_and_get_error("ALTER TABLE test.drop_detached DROP DETACHED PART 'attaching_0_6_6_0'", settings=s) + assert 0 <= error.find('Cannot drop part') + + error = instance.client.query_and_get_error("ALTER TABLE test.drop_detached DROP DETACHED PART 'deleting_0_7_7_0'", settings=s) + assert 0 <= error.find('Cannot drop part') + + q("ALTER TABLE test.drop_detached DROP DETACHED PART 'any_other_name'", settings=s) + + detached = q("SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='test' ORDER BY name") + assert TSV(detached) == TSV('0_3_3_0\n1_2_2_0\n1_4_4_0\nattaching_0_6_6_0\ndeleting_0_7_7_0\nprefix_1_2_2_0_0') + + q("ALTER TABLE test.drop_detached DROP DETACHED PARTITION 1", settings=s) + detached = q("SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='test' ORDER BY name") + assert TSV(detached) == TSV('0_3_3_0\nattaching_0_6_6_0\ndeleting_0_7_7_0') + diff --git a/dbms/tests/queries/0_stateless/00428_partition.reference b/dbms/tests/queries/0_stateless/00428_partition.reference deleted file mode 100644 index c777fd7a5c3..00000000000 --- a/dbms/tests/queries/0_stateless/00428_partition.reference +++ /dev/null @@ -1,54 +0,0 @@ -5 -5 -082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_2_2_0/k.bin -082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_1_1_0/v1.bin -13cae8e658e0ca4f75c56b1fc424e150 shadow/1/data/test/partition_428/19700102_2_2_0/minmax_p.idx -25daad3d9e60b45043a70c4ab7d3b1c6 shadow/1/data/test/partition_428/19700102_2_2_0/partition.dat -3726312af62aec86b64a7708d5751787 shadow/1/data/test/partition_428/19700201_1_1_0/partition.dat -37855b06a39b79a67ea4e86e4a3299aa shadow/1/data/test/partition_428/19700102_2_2_0/checksums.txt -38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_2_2_0/v1.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_2_2_0/k.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_2_2_0/p.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_2_2_0/v1.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_1_1_0/k.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_1_1_0/p.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_1_1_0/v1.mrk -55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_1_1_0/primary.idx -5f087cb3e7071bf9407e095821e2af8f shadow/1/data/test/partition_428/19700201_1_1_0/checksums.txt -77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_2_2_0/columns.txt -77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_1_1_0/columns.txt -88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_1_1_0/p.bin -9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_2_2_0/primary.idx -c4ca4238a0b923820dcc509a6f75849b shadow/1/data/test/partition_428/19700102_2_2_0/count.txt -c4ca4238a0b923820dcc509a6f75849b shadow/1/data/test/partition_428/19700201_1_1_0/count.txt -cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_2_2_0/p.bin -e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_1_1_0/k.bin -f2312862cc01adf34a93151377be2ddf shadow/1/data/test/partition_428/19700201_1_1_0/minmax_p.idx -5 -5 -082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_2_2_0/k.bin -082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_1_1_0/v1.bin -13cae8e658e0ca4f75c56b1fc424e150 shadow/1/data/test/partition_428/19700102_2_2_0/minmax_p.idx -25daad3d9e60b45043a70c4ab7d3b1c6 shadow/1/data/test/partition_428/19700102_2_2_0/partition.dat -3726312af62aec86b64a7708d5751787 shadow/1/data/test/partition_428/19700201_1_1_0/partition.dat -37855b06a39b79a67ea4e86e4a3299aa shadow/1/data/test/partition_428/19700102_2_2_0/checksums.txt -38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_2_2_0/v1.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_2_2_0/k.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_2_2_0/p.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_2_2_0/v1.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_1_1_0/k.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_1_1_0/p.mrk -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_1_1_0/v1.mrk -55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_1_1_0/primary.idx -5f087cb3e7071bf9407e095821e2af8f shadow/1/data/test/partition_428/19700201_1_1_0/checksums.txt -77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_2_2_0/columns.txt -77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_1_1_0/columns.txt -88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_1_1_0/p.bin -9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_2_2_0/primary.idx -c4ca4238a0b923820dcc509a6f75849b shadow/1/data/test/partition_428/19700102_2_2_0/count.txt -c4ca4238a0b923820dcc509a6f75849b shadow/1/data/test/partition_428/19700201_1_1_0/count.txt -cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_2_2_0/p.bin -e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_1_1_0/k.bin -f2312862cc01adf34a93151377be2ddf shadow/1/data/test/partition_428/19700201_1_1_0/minmax_p.idx -31,1,2 -1,2,3 diff --git a/dbms/tests/queries/0_stateless/00428_partition.sh b/dbms/tests/queries/0_stateless/00428_partition.sh deleted file mode 100755 index 033d5e24c13..00000000000 --- a/dbms/tests/queries/0_stateless/00428_partition.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash - -set -e - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -# Not found column date in block. There are only columns: x. - -# Test 1. Complex test checking columns.txt - -chl="$CLICKHOUSE_CLIENT -q" -ch_dir=`${CLICKHOUSE_EXTRACT_CONFIG} -k path` - -$chl "DROP TABLE IF EXISTS test.partition_428" -$chl "CREATE TABLE test.partition_428 (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0" -$chl "INSERT INTO test.partition_428 (p, k) VALUES(toDate(31), 1)" -$chl "INSERT INTO test.partition_428 (p, k) VALUES(toDate(1), 2)" - -for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table='partition_428'"`; do - # 2 header lines + 3 columns - (sudo -n cat $ch_dir/data/test/partition_428/$part/columns.txt 2>/dev/null || \ - cat $ch_dir/data/test/partition_428/$part/columns.txt) | wc -l -done - -$chl "ALTER TABLE test.partition_428 FREEZE" - -# Do `cd` for consistent output for reference -# Do not check increment.txt - it can be changed by other tests with FREEZE -cd $ch_dir && find shadow -type f -exec md5sum {} \; | grep "partition_428" | sed 's!shadow/[0-9]*/data/[a-z0-9_-]*/!shadow/1/data/test/!g' | sort | uniq - -$chl "ALTER TABLE test.partition_428 DETACH PARTITION 197001" -$chl "ALTER TABLE test.partition_428 ATTACH PARTITION 197001" - -for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table='partition_428'"`; do - # 2 header lines + 3 columns - (sudo -n cat $ch_dir/data/test/partition_428/$part/columns.txt 2>/dev/null || \ - cat $ch_dir/data/test/partition_428/$part/columns.txt) | wc -l -done - -$chl "ALTER TABLE test.partition_428 MODIFY COLUMN v1 Int8" - -# Check the backup hasn't changed -cd $ch_dir && find shadow -type f -exec md5sum {} \; | grep "partition_428" | sed 's!shadow/[0-9]*/data/[a-z0-9_-]*/!shadow/1/data/test/!g' | sort | uniq - -$chl "OPTIMIZE TABLE test.partition_428" - -$chl "SELECT toUInt16(p), k, v1 FROM test.partition_428 ORDER BY k FORMAT CSV" -$chl "DROP TABLE test.partition_428" - -# Test 2. Simple test - -$chl "drop table if exists test.partition_428" -$chl "create table test.partition_428 (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) SETTINGS index_granularity=8192, index_granularity_bytes=0" -$chl "insert into test.partition_428 ( x ) VALUES ( now() )" -$chl "insert into test.partition_428 ( x ) VALUES ( now()+1 )" -$chl "alter table test.partition_428 detach partition 197001" -$chl "alter table test.partition_428 attach partition 197001" -$chl "optimize table test.partition_428" -$chl "drop table test.partition_428" diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference deleted file mode 100644 index f30fc160dfb..00000000000 --- a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.reference +++ /dev/null @@ -1,26 +0,0 @@ -=== cannot attach active === -OK1 -0_1_1_0 -1_2_2_0 -2_3_3_0 -3_4_4_0 -16 120 -=== check all parts before attaching === -OK2 -1_2_2_0 -1_4_4_0 -=== detached === -0_1_1_0 -0_3_3_0 -0_5_5_0 -attaching_0_6_6_0 -deleting_0_7_7_0 -=== attach === -0_5_5_0 -0_6_6_0 -1_2_2_0 -1_4_4_0 -16 120 -=== detached === -attaching_0_6_6_0 -deleting_0_7_7_0 diff --git a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh b/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh deleted file mode 100755 index db45cfe7f21..00000000000 --- a/dbms/tests/queries/0_stateless/00974_attach_invalid_parts.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash - -set -e - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -ch_dir=`${CLICKHOUSE_EXTRACT_CONFIG} -k path` -cur_db=`${CLICKHOUSE_CLIENT} --query "SELECT currentDatabase()"` - -echo '=== cannot attach active ==='; -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_active"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n"; -$CLICKHOUSE_CLIENT --query="INSERT INTO attach_active SELECT number FROM system.numbers LIMIT 16"; -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_active ATTACH PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK1' -$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_active' AND database='${cur_db}' ORDER BY name FORMAT TSV"; -$CLICKHOUSE_CLIENT --query="SElECT count(), sum(n) FROM attach_active FORMAT TSV"; -$CLICKHOUSE_CLIENT --query="DROP TABLE attach_active"; - - - -$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES"; -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS attach_partitions"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE attach_partitions (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n"; -$CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"; -$CLICKHOUSE_CLIENT --query="INSERT INTO attach_partitions SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; - -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions DETACH PARTITION 0"; -sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ 2>/dev/null || \ - mkdir --mode=777 $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ # broken part -sudo -n cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ 2>/dev/null || \ - cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_1_1_0/ $ch_dir/data/$cur_db/attach_partitions/detached/attaching_0_6_6_0/ -sudo -n cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ 2>/dev/null || \ - cp -pr $ch_dir/data/$cur_db/attach_partitions/detached/0_3_3_0/ $ch_dir/data/$cur_db/attach_partitions/detached/deleting_0_7_7_0/ - -echo '=== check all parts before attaching ==='; -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0" 2>&1 | grep "No columns in part 0_5_5_0" > /dev/null && echo 'OK2'; -$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; -echo '=== detached ==='; -$CLICKHOUSE_CLIENT --query="SELECT name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; - -echo '=== attach ==='; -sudo -n rm -r $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ 2>/dev/null || \ - rm -r $ch_dir/data/$cur_db/attach_partitions/detached/0_5_5_0/ -$CLICKHOUSE_CLIENT --query="ALTER TABLE attach_partitions ATTACH PARTITION 0"; -$CLICKHOUSE_CLIENT --query="SElECT name FROM system.parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; -$CLICKHOUSE_CLIENT --query="SElECT count(), sum(n) FROM attach_partitions FORMAT TSV"; - -echo '=== detached ==='; -$CLICKHOUSE_CLIENT --query="SELECT name FROM system.detached_parts WHERE table='attach_partitions' AND database='${cur_db}' ORDER BY name FORMAT TSV"; - -$CLICKHOUSE_CLIENT --query="DROP TABLE attach_partitions"; -$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.reference b/dbms/tests/queries/0_stateless/00975_drop_detached.reference deleted file mode 100644 index 414ac4b1927..00000000000 --- a/dbms/tests/queries/0_stateless/00975_drop_detached.reference +++ /dev/null @@ -1,15 +0,0 @@ -=== validate part name === -OK1 -OK2 -OK3 -=== drop detached part === -0_3_3_0 -1_2_2_0 -1_4_4_0 -attaching_0_6_6_0 -deleting_0_7_7_0 -prefix_1_2_2_0_0 -=== drop detached partition === -0_3_3_0 -attaching_0_6_6_0 -deleting_0_7_7_0 diff --git a/dbms/tests/queries/0_stateless/00975_drop_detached.sh b/dbms/tests/queries/0_stateless/00975_drop_detached.sh deleted file mode 100755 index 8da831b019a..00000000000 --- a/dbms/tests/queries/0_stateless/00975_drop_detached.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash - -set -e - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -ch_dir=`${CLICKHOUSE_EXTRACT_CONFIG} -k path` -cur_db=`${CLICKHOUSE_CLIENT} --query "SELECT currentDatabase()"` - -$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES"; -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS drop_detached"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n"; -$CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"; -$CLICKHOUSE_CLIENT --query="INSERT INTO drop_detached SELECT number FROM system.numbers WHERE number % 2 = 1 LIMIT 8"; - -$CLICKHOUSE_CLIENT --query="ALTER TABLE drop_detached DETACH PARTITION 0"; -$CLICKHOUSE_CLIENT --query="ALTER TABLE drop_detached DETACH PARTITION 1"; -sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ 2>/dev/null || \ - mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/attaching_0_6_6_0/ -sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ 2>/dev/null || \ - mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/deleting_0_7_7_0/ -sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ 2>/dev/null || \ - mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/any_other_name/ -sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0_0/ 2>/dev/null || \ - mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0_0/ -#sudo -n mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0/ 2>/dev/null || \ -# mkdir --mode=777 $ch_dir/data/$cur_db/drop_detached/detached/prefix_1_2_2_0/ - -echo '=== validate part name ===' -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '../1_2_2_0'" 2>&1 | grep "Invalid part name" > /dev/null && echo 'OK1' -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART '0_1_1_0'" -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'attaching_0_6_6_0'" 2>&1 | grep "Cannot drop part" > /dev/null && echo 'OK2' -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'deleting_0_7_7_0'" 2>&1 | grep "Cannot drop part" > /dev/null && echo 'OK3' -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PART 'any_other_name'" - -echo '=== drop detached part ===' -$CLICKHOUSE_CLIENT --query="SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' ORDER BY name FORMAT TSV"; - -echo '=== drop detached partition ===' -$CLICKHOUSE_CLIENT --allow_drop_detached=1 --query="ALTER TABLE drop_detached DROP DETACHED PARTITION 1" -$CLICKHOUSE_CLIENT --query="SElECT name FROM system.detached_parts WHERE table='drop_detached' AND database='${cur_db}' ORDER BY name FORMAT TSV"; - -$CLICKHOUSE_CLIENT --query="DROP TABLE drop_detached"; -$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES"; From b5eee531a9cbd9b3df2c5c373d99680e50d6b8cb Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 8 Aug 2019 19:08:43 +0300 Subject: [PATCH 060/181] fix setting name --- dbms/src/Core/Settings.h | 2 +- dbms/src/Interpreters/InterpreterAlterQuery.cpp | 2 +- dbms/tests/integration/test_partition/test.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index fd11d645bd5..ffc11cef4a6 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -341,7 +341,7 @@ struct Settings : public SettingsCollection /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ - M(SettingBool, allow_drop_detached_part, false, "Allow ALTER TABLE ... DROP DETACHED PART ... queries") + M(SettingBool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries") DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 074fbb7d4c2..bc419f1ff84 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -56,7 +56,7 @@ BlockIO InterpreterAlterQuery::execute() else if (auto partition_command = PartitionCommand::parse(command_ast)) { if (partition_command->type == PartitionCommand::DROP_DETACHED_PARTITION - && !context.getSettingsRef().allow_drop_detached_part) + && !context.getSettingsRef().allow_drop_detached) throw DB::Exception("Cannot execute query: DROP DETACHED PART is disabled " "(see allow_drop_detached setting)", ErrorCodes::SUPPORT_IS_DISABLED); partition_commands.emplace_back(std::move(*partition_command)); diff --git a/dbms/tests/integration/test_partition/test.py b/dbms/tests/integration/test_partition/test.py index 59c48e5d9e9..3365343b6fb 100644 --- a/dbms/tests/integration/test_partition/test.py +++ b/dbms/tests/integration/test_partition/test.py @@ -212,7 +212,7 @@ def drop_detached_parts_table(started_cluster): def test_drop_detached_parts(drop_detached_parts_table): - s = {"allow_drop_detached_part": 1} + s = {"allow_drop_detached": 1} q("ALTER TABLE test.drop_detached DETACH PARTITION 0") q("ALTER TABLE test.drop_detached DETACH PARTITION 1") From d1ebfaacd6df35b2a6b55f25f6f9319e00c1f5ac Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 8 Aug 2019 22:28:25 +0300 Subject: [PATCH 061/181] update docs --- docs/en/operations/system_tables.md | 3 ++- docs/en/query_language/alter.md | 12 +++++++++++- docs/ru/operations/system_tables.md | 6 ++++++ docs/ru/query_language/alter.md | 11 ++++++++++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index e63a9115270..e5eac2f1f58 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -87,13 +87,14 @@ This table contains a single String column called 'name' – the name of a datab Each database that the server knows about has a corresponding entry in the table. This system table is used for implementing the `SHOW DATABASES` query. -## system.detached_parts +## system.detached_parts {#system_tables-detached_parts} Contains information about detached parts of [MergeTree](table_engines/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION|PART](../query_language/query_language/alter/#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). +If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). ## system.dictionaries diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 6e8e712ff30..2d42c4cc354 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -210,6 +210,16 @@ Read about setting the partition expression in a section [How to specify the par The query is replicated – it deletes data on all replicas. +#### DROP DETACHED PARTITION|PART {#alter_drop-detached} + +```sql +ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr +``` + +Removes the specified part or all parts of the specified partition from `detached`. +Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). + + #### ATTACH PARTITION|PART {#alter_attach-partition} ``` sql @@ -327,7 +337,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif - As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. - As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- In the [ALTER ATTACH PART](#alter_attach-partition) query, to specify the name of a part, use a value from the `name` column of the `system.parts` table. For example, `ALTER TABLE visits ATTACH PART 201901_1_1_0`. +- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../operations/system_tables.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index eb452c8de4e..4345f83718b 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -47,6 +47,12 @@ default_expression String - выражение для значения по ум Для каждой базы данных, о которой знает сервер, будет присутствовать соответствующая запись в таблице. Эта системная таблица используется для реализации запроса `SHOW DATABASES`. +## system.detached_parts {#system_tables-detached_parts} + +Сожелржит информацию об отсоединённых кусках таблиц семейства [MergeTree](table_engines/mergetree.md). Столбец `reason` содержит причину, по которой кусок был отсоединён. Для кусов, отсоединённых пользователем, `reason` содержит пустую строку. +Такие куски могут быть присоединены с помощью [ALTER TABLE ATTACH PARTITION|PART](../query_language/query_language/alter/#alter_attach-partition). Остальные столбцы описаны в [system.parts](#system_tables-parts). +Если имя куска некорректно, значения некоторых столбцов могут быть `NULL`. Такие куски могут быть удалены с помощью [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). + ## system.dictionaries Содержит информацию о внешних словарях. diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index 2367386172a..3e0030e948e 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -209,6 +209,15 @@ ALTER TABLE table_name DROP PARTITION partition_expr Запрос реплицируется — данные будут удалены на всех репликах. +#### DROP DETACHED PARTITION|PART {#alter_drop-detached} + +```sql +ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr +``` + +Удаляет из `detached` кусок или все куски, принадлежащие партиции. +Подробнее о том, как корректно задать имя партиции, см. в разделе [Как задавать имя партиции в запросах ALTER](#alter-how-to-specify-part-expr). + #### ATTACH PARTITION|PART {#alter_attach-partition} ```sql @@ -328,7 +337,7 @@ ALTER TABLE users ATTACH PARTITION 201902; - Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../operations/system_tables.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`. - Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. - Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- Для запросов [ATTACH PART](#alter_attach-partition): чтобы задать имя куска партиции, используйте значение из столбца `name` системной таблицы `system.parts`. Например, `ALTER TABLE visits ATTACH PART 201901_1_1_0`. +- Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../operations/system_tables.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. Использование кавычек в имени партиций зависит от типа данных столбца, по которому задано партиционирование. Например, для столбца с типом `String` имя партиции необходимо указывать в кавычках (одинарных). Для типов `Date` и `Int*` кавычки указывать не нужно. From 67331881356ee5dd1fb158ce728e8ab42016c9f2 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Mon, 5 Aug 2019 07:09:09 +0000 Subject: [PATCH 062/181] Added gcc-9 to docker/builder container --- docker/builder/Dockerfile | 6 ++++-- docker/builder/build.sh | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile index 03b4e242d6d..41a558f9eb8 100644 --- a/docker/builder/Dockerfile +++ b/docker/builder/Dockerfile @@ -1,6 +1,8 @@ FROM ubuntu:18.04 RUN apt-get update -y \ + && apt-get install -y software-properties-common \ + && add-apt-repository ppa:ubuntu-toolchain-r/test \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ bash \ @@ -8,8 +10,8 @@ RUN apt-get update -y \ cmake \ curl \ expect \ - g++ \ - gcc \ + g++-9 \ + gcc-9 \ libclang-6.0-dev \ libicu-dev \ liblld-6.0-dev \ diff --git a/docker/builder/build.sh b/docker/builder/build.sh index 6a5f1359bda..57999a4b483 100755 --- a/docker/builder/build.sh +++ b/docker/builder/build.sh @@ -3,7 +3,7 @@ #ccache -s mkdir -p /server/build_docker cd /server/build_docker -cmake -G Ninja /server -DENABLE_TESTS=1 +cmake -G Ninja /server -DENABLE_TESTS=1 -DCMAKE_C_COMPILER=`which gcc-9` -DCMAKE_CXX_COMPILER=`which g++-9` # Set the number of build jobs to the half of number of virtual CPU cores (rounded up). # By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time. From 0233f32f9b30e78d8f54c28ccdea16736a6293b1 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 11 Aug 2019 12:28:15 +0300 Subject: [PATCH 063/181] Fixed AddresSanitizer error --- .../DataStreams/CheckConstraintsBlockOutputStream.cpp | 11 +++++------ .../DataStreams/CheckConstraintsBlockOutputStream.h | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 5adf344cf0b..4b4865f004f 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -11,8 +11,9 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) { for (size_t i = 0; i < expressions.size(); ++i) { + Block res = block; auto constraint_expr = expressions[i]; - auto res_column_uint8 = executeOnBlock(block, constraint_expr); + auto res_column_uint8 = executeOnBlock(res, constraint_expr); if (!memoryIsByte(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize(), 0x1)) { auto indices_wrong = findAllWrong(res_column_uint8->getRawDataBegin<1>(), res_column_uint8->byteSize()); @@ -48,13 +49,11 @@ void CheckConstraintsBlockOutputStream::writeSuffix() } const ColumnUInt8 *CheckConstraintsBlockOutputStream::executeOnBlock( - const Block & block, + Block & block, const ExpressionActionsPtr & constraint) { - Block res = block; - - constraint->execute(res); - ColumnWithTypeAndName res_column = res.safeGetByPosition(res.columns() - 1); + constraint->execute(block); + ColumnWithTypeAndName res_column = block.safeGetByPosition(block.columns() - 1); return checkAndGetColumn(res_column.column.get()); } diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h index ac2e7e974a1..7ab6832fd28 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.h @@ -39,7 +39,7 @@ public: void writeSuffix() override; private: - const ColumnUInt8* executeOnBlock(const Block & block, const ExpressionActionsPtr & constraint); + const ColumnUInt8* executeOnBlock(Block & block, const ExpressionActionsPtr & constraint); std::vector findAllWrong(const void *data, size_t size); String table; From 93a635d18a2119cc018cdbb5d73cfd12da6fbdc6 Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 11 Aug 2019 12:30:01 +0300 Subject: [PATCH 064/181] Added clang-8 to docker builder --- docker/builder/Dockerfile | 4 ++++ docker/builder/Makefile | 2 +- docker/builder/build.sh | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile index 41a558f9eb8..5978dcd08d0 100644 --- a/docker/builder/Dockerfile +++ b/docker/builder/Dockerfile @@ -28,6 +28,10 @@ RUN apt-get update -y \ tzdata \ gperf +RUN apt install -y wget +RUN printf "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main\ndeb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list \ + && wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && apt update && apt-get install -y clang-8 lldb-8 lld-8 + COPY build.sh / CMD ["/bin/bash", "/build.sh"] diff --git a/docker/builder/Makefile b/docker/builder/Makefile index 779e944b723..a9a7cddf3f2 100644 --- a/docker/builder/Makefile +++ b/docker/builder/Makefile @@ -1,6 +1,6 @@ build: image mkdir -p $(HOME)/.ccache - docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder + docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --cap-add=SYS_PTRACE --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder pull: docker pull yandex/clickhouse-builder diff --git a/docker/builder/build.sh b/docker/builder/build.sh index 57999a4b483..96468d8d820 100755 --- a/docker/builder/build.sh +++ b/docker/builder/build.sh @@ -3,7 +3,8 @@ #ccache -s mkdir -p /server/build_docker cd /server/build_docker -cmake -G Ninja /server -DENABLE_TESTS=1 -DCMAKE_C_COMPILER=`which gcc-9` -DCMAKE_CXX_COMPILER=`which g++-9` + +cmake -G Ninja /server -DCMAKE_C_COMPILER=`which clang-8` -DCMAKE_CXX_COMPILER=`which clang++-8` -DCMAKE_BUILD_TYPE=Debug # Set the number of build jobs to the half of number of virtual CPU cores (rounded up). # By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time. From 3b9e1f9bf727764e1a4d9787b6e58313f3d381bf Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Sun, 11 Aug 2019 13:39:17 +0300 Subject: [PATCH 065/181] Fixed getIdentifierName call in AlterCommand::parse --- dbms/src/Storages/AlterCommands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 48690df071a..13141683be8 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -190,7 +190,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) { command.type = AlterCommand::DROP_COLUMN; - command.column_name = *getIdentifierName(command_ast->column); + command.column_name = getIdentifierName(command_ast->column); } return command; From 16bab882262e5075272b6c60dba49e2887e05933 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 12 Aug 2019 13:27:28 +0300 Subject: [PATCH 066/181] DOCAPI-7442: 7442 --- docs/en/operations/system_tables.md | 14 +++++++------- docs/en/query_language/functions/hash_functions.md | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 6f1ebebdff3..3abbd98961f 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -271,11 +271,11 @@ Columns: - `rows` (`UInt64`) – The number of rows. -- `bytes_on_disk` (`UInt64`) – The number of bytes when compressed. +- `bytes_on_disk` (`UInt64`) – Total size of all the data part files in bytes. -- `data_compressed_bytes` (`UInt64`) – +- `data_compressed_bytes` (`UInt64`) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `data_uncompressed_bytes` (`UInt64`) – +- `data_uncompressed_bytes` (`UInt64`) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. - `marks_bytes` (`UInt64`) – The size of the file with marks. @@ -301,7 +301,7 @@ Columns: - `level` (`UInt32`) – Depth of the merge tree. If a merge was not performed, `level=0`. -- `data_version` (`UInt64`) – +- `data_version` (`UInt64`) – Block number that is used to determine which mutations should be applied to the data part (the mutations with the bigger version than `data_version`). - `primary_key_bytes_in_memory` (`UInt64`) – The amount of memory (in bytes) used by primary key values. @@ -317,11 +317,11 @@ Columns: - `path` (`String`) – Absolute path to the folder with data part files. -- `hash_of_all_files` (`String`) – Hash of compressed files. +- `hash_of_all_files` (`String`) – [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) of compressed files. -- `hash_of_uncompressed_files` (`String`) – Hash of uncompressed data. +- `hash_of_uncompressed_files` (`String`) – [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) of uncompressed data. -- `uncompressed_hash_of_compressed_files` (`String`) – Hash of the file with marks +- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) of the file with marks. - `bytes` (`UInt64`) – Alias for `bytes_on_disk`. diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index 296cca1e712..b384dead609 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -74,7 +74,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 └──────────────────────┴────────┘ ``` -## sipHash128 +## sipHash128 {#hash_functions-siphash128} Calculates SipHash from a string. Accepts a String-type argument. Returns FixedString(16). From 360b1217b9c6749f4a667230001c98c7d1db5036 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 12 Aug 2019 13:31:28 +0300 Subject: [PATCH 067/181] DOCAPI-7442: Fix. --- docs/en/operations/system_tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 3abbd98961f..2fd86b52c35 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -265,7 +265,7 @@ Columns: - `name` (`String`) – Name of the data part. -- `active` (`UInt8`) – Indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. +- `active` (`UInt8`) – Flag that indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. - `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192). From 1b54a52488441b753c99fe6e17d19d458e90313c Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Mon, 12 Aug 2019 17:10:29 +0300 Subject: [PATCH 068/181] Temp --- dbms/src/Functions/neighbour.cpp | 217 +++++++++++++++++++++++++++++ dbms/src/Functions/nextInBlock.cpp | 159 --------------------- 2 files changed, 217 insertions(+), 159 deletions(-) create mode 100644 dbms/src/Functions/neighbour.cpp delete mode 100644 dbms/src/Functions/nextInBlock.cpp diff --git a/dbms/src/Functions/neighbour.cpp b/dbms/src/Functions/neighbour.cpp new file mode 100644 index 00000000000..cf96282725a --- /dev/null +++ b/dbms/src/Functions/neighbour.cpp @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +// Implements function, giving value for column within range of given +// Example: +// | c1 | +// | 10 | +// | 20 | +// SELECT c1, neighbour(c1, 1) as c2: +// | c1 | c2 | +// | 10 | 20 | +// | 20 | 0 | +class FunctionNeighbour : public IFunction +{ +public: + static constexpr auto name = "neighbour"; + static FunctionPtr create(const Context & context) { return std::make_shared(context); } + + FunctionNeighbour(const Context & context_) : context(context_) {} + + /// Get the name of the function. + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isVariadic() const override { return true; } + + bool isDeterministic() const override { return false; } + + bool isDeterministicInScopeOfQuery() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + size_t number_of_arguments = arguments.size(); + + if (number_of_arguments < 2 || number_of_arguments > 3) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + toString(number_of_arguments) + + ", should be from 2 to 3", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + // second argument must be a positive integer + if (!isInteger(arguments[1])) + throw Exception( + "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + + " - should be positive integer", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + // check that default value column has supertype with first argument + if (number_of_arguments == 3) + { + DataTypes types = {arguments[0], arguments[2]}; + try + { + return getLeastSupertype(types); + } + catch (const Exception &) + { + throw Exception( + "Illegal types of arguments (" + types[0]->getName() + ", " + types[1]->getName() + + ")" + " of function " + + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + return arguments[0]; + } + + static void insertDefaults(const MutableColumnPtr & target, size_t row_count, ColumnPtr & default_values_column, size_t offset) + { + if (row_count == 0) { + return; + } + if (default_values_column) + { + if (isColumnConst(*default_values_column)) + { + Field constant_value = (*default_values_column)[0]; + for(size_t row = 0; row < row_count;row++) + { + target->insert(constant_value); + } + } else { + target->insertRangeFrom(*default_values_column, offset, row_count); + } + } else { + for(size_t row = 0; row <= row_count;row++) { + target->insertDefault(); + } + } + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + auto offset_structure = block.getByPosition(arguments[1]); + ColumnPtr & offset_column = offset_structure.column; + + auto is_constant_offset = isColumnConst(*offset_structure.column); + ColumnPtr default_values_column = nullptr; + if (arguments.size() == 3) + { + default_values_column = block.getByPosition(arguments[2]).column; + } + +// Field offset_field = (*block.getByPosition(arguments[1]).column)[0]; +// auto raw_value = safeGet(offset_field); + + ColumnWithTypeAndName &source_column_name_and_type = block.getByPosition(arguments[0]); + DataTypes types = {source_column_name_and_type.type}; + if (default_values_column) + { + types.push_back(block.getByPosition(arguments[2]).type); + } + const DataTypePtr & result_type = getLeastSupertype(types); + auto source_column = source_column_name_and_type.column; + + // adjust source and default values columns to resulting datatype + if (!source_column_name_and_type.type->equals(*result_type)) { + source_column = castColumn(source_column_name_and_type, result_type, context); + } + + if (default_values_column && !block.getByPosition(arguments[2]).type->equals(*result_type)) { + default_values_column = castColumn(block.getByPosition(arguments[2]), result_type, context); + } + + auto column = result_type->createColumn(); + column->reserve(input_rows_count); + + const DataTypePtr desired_type = std::make_shared(); + if (!block.getByPosition(arguments[1]).type->equals(*desired_type)) { + offset_column = castColumn(offset_structure, desired_type, context); + } + + // with constant offset - insertRangeFrom + if (is_constant_offset) + { + Int64 offset_value = offset_column->getInt(0); + + if (offset_value > 0) + { + // insert shifted value + column->insertRangeFrom(*source_column, offset_value, input_rows_count - offset_value); + // insert defaults into the end + insertDefaults(column, input_rows_count - offset_value, default_values_column, offset_value); + } else if(offset_value < 0) { + // insert defaults up to offset_value + insertDefaults(column, input_rows_count - std::abs(offset_value), default_values_column, std::abs(offset_value)); + // insert range, where possible + column->insertRangeFrom(*source_column, 0, input_rows_count - std::abs(offset_value)); + } else { + // populate column with source values + column->insertRangeFrom(*source_column, 0, input_rows_count); + } + } else { + // with dynamic offset - handle row by row + for (size_t row = 0; row < input_rows_count; row++) + { + Int64 offset_value = offset_column->getInt(row); + if (offset_value == 0) { + column->insertFrom(*source_column, row); + } else if (offset_value > 0) { + size_t real_offset = row + offset_value; + if (real_offset > input_rows_count) { + if (default_values_column) { + column->insertFrom(*default_values_column, row); + } else { + column->insertDefault(); + } + } else { + column->insertFrom(*column, real_offset); + } + } else { + // out of range + if ((size_t)std::abs(offset_value) > row) + { + if (default_values_column) { + column->insertFrom(*default_values_column, row); + } else { + column->insertDefault(); + } + } else { + column->insertFrom(*column, row - std::abs(offset_value)); + } + } + } + } + + + block.getByPosition(result).column = std::move(column); + } +private: + const Context & context; +}; + +void registerFunctionNextInBlock(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/nextInBlock.cpp b/dbms/src/Functions/nextInBlock.cpp deleted file mode 100644 index eeb33e28146..00000000000 --- a/dbms/src/Functions/nextInBlock.cpp +++ /dev/null @@ -1,159 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -// Implements function, giving value for column in next row -// Example: -// | c1 | -// | 10 | -// | 20 | -// SELECT c1, nextInBlock(c1, 1) as c2: -// | c1 | c2 | -// | 10 | 20 | -// | 20 | 0 | -class FunctionNextInBlock : public IFunction -{ -public: - static constexpr auto name = "nextInBlock"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - /// Get the name of the function. - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 0; } - - bool isVariadic() const override { return true; } - - bool isDeterministic() const override { return false; } - - bool isDeterministicInScopeOfQuery() const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - size_t number_of_arguments = arguments.size(); - - if (number_of_arguments < 1 || number_of_arguments > 3) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(number_of_arguments) - + ", should be from 1 to 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - // second argument must be a positive, constant column - if (number_of_arguments == 2 && !isUnsignedInteger(arguments[1])) - throw Exception( - "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() - + " - should be positive integer", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - // check that default value has supertype with first argument - if (number_of_arguments == 3) - { - DataTypes types = {arguments[0], arguments[2]}; - try - { - return getLeastSupertype(types); - } - catch (const Exception &) - { - throw Exception( - "Illegal types of arguments (" + types[0]->getName() + ", " + types[1]->getName() - + ")" - " of function " - + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - } - - return arguments[0]; - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - size_t offset_value = 1; - - if (arguments.size() > 1) - { - auto offset_column = block.getByPosition(arguments[1]); - if (!isColumnConst(*offset_column.column)) - throw Exception("Second argument of function " + getName() + " should be constant", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - Field offset_field = (*block.getByPosition(arguments[1]).column)[0]; - auto raw_value = safeGet(offset_field); - - if (raw_value == 0) - throw Exception( - "Second argument of function " + getName() + " should be positive integer, " + toString(raw_value) + " given", - ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - offset_value = raw_value; - } - - auto has_column_for_missing = arguments.size() == 3; - - DataTypes types = {block.getByPosition(arguments[0]).type}; - if (has_column_for_missing) - { - types.push_back(block.getByPosition(arguments[2]).type); - } - const DataTypePtr & result_type = getLeastSupertype(types); - - auto column = result_type->createColumn(); - column->reserve(input_rows_count); - - auto source_column = block.getByPosition(arguments[0]).column; - - for (size_t i = offset_value; i < input_rows_count; i++) - { - column->insertFrom(*source_column, i); - } - - if (has_column_for_missing) - { - auto default_values_column = block.getByPosition(arguments[2]).column; - size_t starting_pos = offset_value > input_rows_count ? 0 : input_rows_count - offset_value; - if (isColumnConst(*default_values_column)) - { - Field constant_value = (*default_values_column)[0]; - for (size_t i = starting_pos; i < input_rows_count; i++) - { - column->insert(constant_value); - } - } - else - { - for (size_t i = starting_pos; i < input_rows_count; i++) - { - column->insertFrom(*default_values_column, i); - } - } - } - else - { - for (size_t i = 0; i < std::min(offset_value, input_rows_count); i++) - { - column->insertDefault(); - } - } - - block.getByPosition(result).column = std::move(column); - } -}; - -void registerFunctionNextInBlock(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} From 31fdc99efc3b668b18e8ea830dfc669f743d275d Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Mon, 12 Aug 2019 18:44:28 +0300 Subject: [PATCH 069/181] In progress --- dbms/src/Functions/neighbour.cpp | 10 ++++---- .../0_stateless/00957_next_in_block.sql | 24 ++++++++----------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/dbms/src/Functions/neighbour.cpp b/dbms/src/Functions/neighbour.cpp index cf96282725a..3eff660c996 100644 --- a/dbms/src/Functions/neighbour.cpp +++ b/dbms/src/Functions/neighbour.cpp @@ -101,7 +101,7 @@ public: target->insertRangeFrom(*default_values_column, offset, row_count); } } else { - for(size_t row = 0; row <= row_count;row++) { + for(size_t row = 0; row < row_count;row++) { target->insertDefault(); } } @@ -156,9 +156,11 @@ public: if (offset_value > 0) { // insert shifted value - column->insertRangeFrom(*source_column, offset_value, input_rows_count - offset_value); - // insert defaults into the end - insertDefaults(column, input_rows_count - offset_value, default_values_column, offset_value); + if ((size_t)std::abs(offset_value) <= input_rows_count) { + column->insertRangeFrom(*source_column, offset_value, input_rows_count - offset_value); + // insert defaults into the end + insertDefaults(column, input_rows_count - offset_value, default_values_column, offset_value); + } } else if(offset_value < 0) { // insert defaults up to offset_value insertDefaults(column, input_rows_count - std::abs(offset_value), default_values_column, std::abs(offset_value)); diff --git a/dbms/tests/queries/0_stateless/00957_next_in_block.sql b/dbms/tests/queries/0_stateless/00957_next_in_block.sql index 7cbd932cf1a..1efda43339e 100644 --- a/dbms/tests/queries/0_stateless/00957_next_in_block.sql +++ b/dbms/tests/queries/0_stateless/00957_next_in_block.sql @@ -1,22 +1,18 @@ -- no arguments -select nextInBlock(); -- { serverError 42 } +select neighbour(); -- { serverError 42 } +-- single argument +select neighbour(1); -- { serverError 42 } -- greater than 3 arguments -select nextInBlock(1,2,3,4); -- { serverError 42 } --- zero offset value -select nextInBlock(dummy, 0); -- { serverError 69 } --- negative offset value -select nextInBlock(dummy, -1); -- { serverError 43 } --- non-constant offset value -select nextInBlock(dummy, dummy); -- { serverError 43 } +select neighbour(1,2,3,4); -- { serverError 42 } -- bad default value -select nextInBlock(dummy, 1, 'hello'); -- { serverError 43 } +select neighbour(dummy, 1, 'hello'); -- { serverError 43 } -- single argument test -select number, nextInBlock(number) from numbers(2); +select number, neighbour(number,1) from numbers(2); -- filling by column's default value -select number, nextInBlock(number, 2) from numbers(3); +select number, neighbour(number, 2) from numbers(3); -- offset is greater that block - should fill everything with defaults -select number, nextInBlock(number, 5) from numbers(2); +select number, neighbour(number, 5) from numbers(2); -- substitution by constant for missing values -select number, nextInBlock(number, 2, 1000) from numbers(5); +select number, neighbour(number, 2, 1000) from numbers(5); -- substitution by expression --- select number, nextInBlock(number, 2, number % 2) from numbers(5); \ No newline at end of file +-- select number, neighbour(number, 2, number % 2) from numbers(5); \ No newline at end of file From ea9cf3a62f42f0ed48efe7baac2be46bfaa7ae5e Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Tue, 13 Aug 2019 16:11:24 +0300 Subject: [PATCH 070/181] Done --- dbms/src/Functions/neighbour.cpp | 105 +++++++++--------- .../0_stateless/00957_neighbour.reference | 42 +++++++ .../queries/0_stateless/00957_neighbour.sql | 30 +++++ .../0_stateless/00957_next_in_block.reference | 12 -- .../0_stateless/00957_next_in_block.sql | 18 --- .../functions/other_functions.md | 43 +++++++ .../functions/other_functions.md | 42 +++++++ 7 files changed, 209 insertions(+), 83 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00957_neighbour.reference create mode 100644 dbms/tests/queries/0_stateless/00957_neighbour.sql delete mode 100644 dbms/tests/queries/0_stateless/00957_next_in_block.reference delete mode 100644 dbms/tests/queries/0_stateless/00957_next_in_block.sql diff --git a/dbms/src/Functions/neighbour.cpp b/dbms/src/Functions/neighbour.cpp index 3eff660c996..79a1feec002 100644 --- a/dbms/src/Functions/neighbour.cpp +++ b/dbms/src/Functions/neighbour.cpp @@ -119,9 +119,6 @@ public: default_values_column = block.getByPosition(arguments[2]).column; } -// Field offset_field = (*block.getByPosition(arguments[1]).column)[0]; -// auto raw_value = safeGet(offset_field); - ColumnWithTypeAndName &source_column_name_and_type = block.getByPosition(arguments[0]); DataTypes types = {source_column_name_and_type.type}; if (default_values_column) @@ -140,72 +137,74 @@ public: default_values_column = castColumn(block.getByPosition(arguments[2]), result_type, context); } - auto column = result_type->createColumn(); - column->reserve(input_rows_count); - const DataTypePtr desired_type = std::make_shared(); if (!block.getByPosition(arguments[1]).type->equals(*desired_type)) { offset_column = castColumn(offset_structure, desired_type, context); } - - // with constant offset - insertRangeFrom - if (is_constant_offset) - { - Int64 offset_value = offset_column->getInt(0); - if (offset_value > 0) - { - // insert shifted value - if ((size_t)std::abs(offset_value) <= input_rows_count) { - column->insertRangeFrom(*source_column, offset_value, input_rows_count - offset_value); - // insert defaults into the end - insertDefaults(column, input_rows_count - offset_value, default_values_column, offset_value); - } - } else if(offset_value < 0) { - // insert defaults up to offset_value - insertDefaults(column, input_rows_count - std::abs(offset_value), default_values_column, std::abs(offset_value)); - // insert range, where possible - column->insertRangeFrom(*source_column, 0, input_rows_count - std::abs(offset_value)); - } else { - // populate column with source values - column->insertRangeFrom(*source_column, 0, input_rows_count); - } + if (isColumnConst(*source_column)) { + auto column = result_type->createColumnConst(input_rows_count, (*source_column)[0]); + block.getByPosition(result).column = std::move(column); } else { - // with dynamic offset - handle row by row - for (size_t row = 0; row < input_rows_count; row++) + auto column = result_type->createColumn(); + column->reserve(input_rows_count); + // with constant offset - insertRangeFrom + if (is_constant_offset) { - Int64 offset_value = offset_column->getInt(row); - if (offset_value == 0) { - column->insertFrom(*source_column, row); - } else if (offset_value > 0) { - size_t real_offset = row + offset_value; - if (real_offset > input_rows_count) { - if (default_values_column) { - column->insertFrom(*default_values_column, row); - } else { - column->insertDefault(); - } - } else { - column->insertFrom(*column, real_offset); + Int64 offset_value = offset_column->getInt(0); + + if (offset_value > 0) + { + // insert shifted value + if ((size_t)offset_value <= input_rows_count) { + column->insertRangeFrom(*source_column, offset_value, input_rows_count - offset_value); } + size_t row_count = (size_t)offset_value > input_rows_count ? input_rows_count : offset_value; + insertDefaults(column, row_count, default_values_column, input_rows_count - row_count); + } else if (offset_value < 0) { + size_t row_count = (size_t)std::abs(offset_value) > input_rows_count ? input_rows_count : std::abs(offset_value); + // insert defaults up to offset_value + insertDefaults(column, row_count, default_values_column, 0); + column->insertRangeFrom(*source_column, 0, input_rows_count - row_count); } else { - // out of range - if ((size_t)std::abs(offset_value) > row) - { - if (default_values_column) { - column->insertFrom(*default_values_column, row); + // populate column with source values + column->insertRangeFrom(*source_column, 0, input_rows_count); + } + } else { + // with dynamic offset - handle row by row + for (size_t row = 0; row < input_rows_count; row++) + { + Int64 offset_value = offset_column->getInt(row); + if (offset_value == 0) { + column->insertFrom(*source_column, row); + } else if (offset_value > 0) { + size_t real_offset = row + offset_value; + if (real_offset > input_rows_count) { + if (default_values_column) { + column->insertFrom(*default_values_column, row); + } else { + column->insertDefault(); + } } else { - column->insertDefault(); + column->insertFrom(*column, real_offset); } } else { - column->insertFrom(*column, row - std::abs(offset_value)); + // out of range + if ((size_t)std::abs(offset_value) > row) + { + if (default_values_column) { + column->insertFrom(*default_values_column, row); + } else { + column->insertDefault(); + } + } else { + column->insertFrom(*column, row - std::abs(offset_value)); + } } } } + block.getByPosition(result).column = std::move(column); } - - - block.getByPosition(result).column = std::move(column); } private: const Context & context; diff --git a/dbms/tests/queries/0_stateless/00957_neighbour.reference b/dbms/tests/queries/0_stateless/00957_neighbour.reference new file mode 100644 index 00000000000..cd8c6310f22 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_neighbour.reference @@ -0,0 +1,42 @@ +Result with different type +0 1 +1 2 +2 -10 +Offset > block +0 0 +1 0 +2 0 +Abs(Offset) > block +0 0 +1 0 +2 0 +Positive offset +0 1 +1 2 +2 0 +Negative offset +0 1 +1 2 +2 0 +Positive offset with defaults +0 2 +1 3 +2 12 +3 13 +Negative offset with defaults +0 10 +1 11 +2 0 +3 1 +Positive offset with const defaults +0 1 +1 2 +2 1000 +Negative offset with const defaults +0 1000 +1 0 +2 1 +Constant column +0 1000 +1 1000 +2 1000 diff --git a/dbms/tests/queries/0_stateless/00957_neighbour.sql b/dbms/tests/queries/0_stateless/00957_neighbour.sql new file mode 100644 index 00000000000..156672155a8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_neighbour.sql @@ -0,0 +1,30 @@ +-- no arguments +select neighbour(); -- { serverError 42 } +-- single argument +select neighbour(1); -- { serverError 42 } +-- greater than 3 arguments +select neighbour(1,2,3,4); -- { serverError 42 } +-- bad default value +select neighbour(dummy, 1, 'hello'); -- { serverError 43 } +-- types without common supertype (UInt64 and Int8) +select number, neighbour(number, 1, -10) from numbers(3); -- { serverError 43 } +select 'Result with different type'; +select toInt32(number) as n, neighbour(n, 1, -10) from numbers(3); +select 'Offset > block'; +select number, neighbour(number, 10) from numbers(3); +select 'Abs(Offset) > block'; +select number, neighbour(number, -10) from numbers(3); +select 'Positive offset'; +select number, neighbour(number, 1) from numbers(3); +select 'Negative offset'; +select number, neighbour(number, 1) from numbers(3); +select 'Positive offset with defaults'; +select number, neighbour(number, 2, number + 10) from numbers(4); +select 'Negative offset with defaults'; +select number, neighbour(number, -2, number + 10) from numbers(4); +select 'Positive offset with const defaults'; +select number, neighbour(number, 1, 1000) from numbers(3); +select 'Negative offset with const defaults'; +select number, neighbour(number, -1, 1000) from numbers(3); +select 'Constant column'; +select number, neighbour(1000, 10) from numbers(3); \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00957_next_in_block.reference b/dbms/tests/queries/0_stateless/00957_next_in_block.reference deleted file mode 100644 index 860ce6dc1ba..00000000000 --- a/dbms/tests/queries/0_stateless/00957_next_in_block.reference +++ /dev/null @@ -1,12 +0,0 @@ -0 1 -1 0 -0 2 -1 0 -2 0 -0 0 -1 0 -0 2 -1 3 -2 4 -3 1000 -4 1000 diff --git a/dbms/tests/queries/0_stateless/00957_next_in_block.sql b/dbms/tests/queries/0_stateless/00957_next_in_block.sql deleted file mode 100644 index 1efda43339e..00000000000 --- a/dbms/tests/queries/0_stateless/00957_next_in_block.sql +++ /dev/null @@ -1,18 +0,0 @@ --- no arguments -select neighbour(); -- { serverError 42 } --- single argument -select neighbour(1); -- { serverError 42 } --- greater than 3 arguments -select neighbour(1,2,3,4); -- { serverError 42 } --- bad default value -select neighbour(dummy, 1, 'hello'); -- { serverError 43 } --- single argument test -select number, neighbour(number,1) from numbers(2); --- filling by column's default value -select number, neighbour(number, 2) from numbers(3); --- offset is greater that block - should fill everything with defaults -select number, neighbour(number, 5) from numbers(2); --- substitution by constant for missing values -select number, neighbour(number, 2, 1000) from numbers(5); --- substitution by expression --- select number, neighbour(number, 2, number % 2) from numbers(5); \ No newline at end of file diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index 57fa8acfee3..05efe0fceb4 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -311,6 +311,49 @@ Returns the ordinal number of the row in the data block. Different data blocks a Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. +## neighbour(column, offset\[, default_value\]) + +Returns value for `column`, in `offset` distance from current row. +This function is a partial implementation of [window functions](https://en.wikipedia.org/wiki/SQL_window_function) LEAD() and LAG(). + +The result of the function depends on the affected data blocks and the order of data in the block. +If you make a subquery with ORDER BY and call the function from outside the subquery, you can get the expected result. + +If `offset` value is outside block bounds, a default value for `column` returned. If `default_value` is given, then it will be used. +This function can be used to compute year-over-year metric value: + +``` sql +WITH toDate('2018-01-01') AS start_date +SELECT + toStartOfMonth(start_date + (number * 32)) AS month, + toInt32(month) % 100 AS money, + neighbour(money, -12) AS prev_year, + round(prev_year / money, 2) AS year_over_year +FROM numbers(16) +``` + +``` +┌──────month─┬─money─┬─prev_year─┬─year_over_year─┐ +│ 2018-01-01 │ 32 │ 0 │ 0 │ +│ 2018-02-01 │ 63 │ 0 │ 0 │ +│ 2018-03-01 │ 91 │ 0 │ 0 │ +│ 2018-04-01 │ 22 │ 0 │ 0 │ +│ 2018-05-01 │ 52 │ 0 │ 0 │ +│ 2018-06-01 │ 83 │ 0 │ 0 │ +│ 2018-07-01 │ 13 │ 0 │ 0 │ +│ 2018-08-01 │ 44 │ 0 │ 0 │ +│ 2018-09-01 │ 75 │ 0 │ 0 │ +│ 2018-10-01 │ 5 │ 0 │ 0 │ +│ 2018-11-01 │ 36 │ 0 │ 0 │ +│ 2018-12-01 │ 66 │ 0 │ 0 │ +│ 2019-01-01 │ 97 │ 32 │ 0.33 │ +│ 2019-02-01 │ 28 │ 63 │ 2.25 │ +│ 2019-03-01 │ 56 │ 91 │ 1.62 │ +│ 2019-04-01 │ 87 │ 22 │ 0.25 │ +└────────────┴───────┴───────────┴────────────────┘ +``` + + ## runningDifference(x) {#other_functions-runningdifference} Calculates the difference between successive row values ​​in the data block. diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index 1637c7bda93..6f5b6f1ff0d 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -288,6 +288,48 @@ SELECT ## rowNumberInAllBlocks() Возвращает порядковый номер строки в блоке данных. Функция учитывает только задействованные блоки данных. +## neighbour(column, offset\[, default_value\]) + +Функция позволяет получить доступ к значению в колонке `column`, находящемуся на смещении `offset` относительно текущей строки. +Является частичной реализацией [оконных функций](https://en.wikipedia.org/wiki/SQL_window_function) LEAD() и LAG(). + +Результат функции зависит от затронутых блоков данных и порядка данных в блоке. +Если сделать подзапрос с ORDER BY и вызывать функцию извне подзапроса, можно будет получить ожидаемый результат. + +Если значение `offset` выходит за пределы блока данных, то берётся значение по-умолчанию для колонки `column`. Если передан параметр `default_value`, то значение берётся из него. +Например, эта функция может использоваться чтобы оценить year-over-year значение показателя: + +``` sql +WITH toDate('2018-01-01') AS start_date +SELECT + toStartOfMonth(start_date + (number * 32)) AS month, + toInt32(month) % 100 AS money, + neighbour(money, -12) AS prev_year, + round(prev_year / money, 2) AS year_over_year +FROM numbers(16) +``` + +``` +┌──────month─┬─money─┬─prev_year─┬─year_over_year─┐ +│ 2018-01-01 │ 32 │ 0 │ 0 │ +│ 2018-02-01 │ 63 │ 0 │ 0 │ +│ 2018-03-01 │ 91 │ 0 │ 0 │ +│ 2018-04-01 │ 22 │ 0 │ 0 │ +│ 2018-05-01 │ 52 │ 0 │ 0 │ +│ 2018-06-01 │ 83 │ 0 │ 0 │ +│ 2018-07-01 │ 13 │ 0 │ 0 │ +│ 2018-08-01 │ 44 │ 0 │ 0 │ +│ 2018-09-01 │ 75 │ 0 │ 0 │ +│ 2018-10-01 │ 5 │ 0 │ 0 │ +│ 2018-11-01 │ 36 │ 0 │ 0 │ +│ 2018-12-01 │ 66 │ 0 │ 0 │ +│ 2019-01-01 │ 97 │ 32 │ 0.33 │ +│ 2019-02-01 │ 28 │ 63 │ 2.25 │ +│ 2019-03-01 │ 56 │ 91 │ 1.62 │ +│ 2019-04-01 │ 87 │ 22 │ 0.25 │ +└────────────┴───────┴───────────┴────────────────┘ +``` + ## runningDifference(x) Считает разницу между последовательными значениями строк в блоке данных. Возвращает 0 для первой строки и разницу с предыдущей строкой для каждой последующей строки. From 6bf3902ce5d814758381bf15689c4585346478f0 Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Tue, 13 Aug 2019 16:20:32 +0300 Subject: [PATCH 071/181] Format file --- dbms/src/Functions/neighbour.cpp | 88 ++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 26 deletions(-) diff --git a/dbms/src/Functions/neighbour.cpp b/dbms/src/Functions/neighbour.cpp index 79a1feec002..2efca01a66f 100644 --- a/dbms/src/Functions/neighbour.cpp +++ b/dbms/src/Functions/neighbour.cpp @@ -1,10 +1,10 @@ #include +#include #include #include #include #include #include -#include namespace DB { @@ -85,7 +85,8 @@ public: static void insertDefaults(const MutableColumnPtr & target, size_t row_count, ColumnPtr & default_values_column, size_t offset) { - if (row_count == 0) { + if (row_count == 0) + { return; } if (default_values_column) @@ -93,15 +94,20 @@ public: if (isColumnConst(*default_values_column)) { Field constant_value = (*default_values_column)[0]; - for(size_t row = 0; row < row_count;row++) + for (size_t row = 0; row < row_count; row++) { target->insert(constant_value); } - } else { + } + else + { target->insertRangeFrom(*default_values_column, offset, row_count); } - } else { - for(size_t row = 0; row < row_count;row++) { + } + else + { + for (size_t row = 0; row < row_count; row++) + { target->insertDefault(); } } @@ -119,7 +125,7 @@ public: default_values_column = block.getByPosition(arguments[2]).column; } - ColumnWithTypeAndName &source_column_name_and_type = block.getByPosition(arguments[0]); + ColumnWithTypeAndName & source_column_name_and_type = block.getByPosition(arguments[0]); DataTypes types = {source_column_name_and_type.type}; if (default_values_column) { @@ -129,23 +135,29 @@ public: auto source_column = source_column_name_and_type.column; // adjust source and default values columns to resulting datatype - if (!source_column_name_and_type.type->equals(*result_type)) { + if (!source_column_name_and_type.type->equals(*result_type)) + { source_column = castColumn(source_column_name_and_type, result_type, context); } - if (default_values_column && !block.getByPosition(arguments[2]).type->equals(*result_type)) { + if (default_values_column && !block.getByPosition(arguments[2]).type->equals(*result_type)) + { default_values_column = castColumn(block.getByPosition(arguments[2]), result_type, context); } const DataTypePtr desired_type = std::make_shared(); - if (!block.getByPosition(arguments[1]).type->equals(*desired_type)) { + if (!block.getByPosition(arguments[1]).type->equals(*desired_type)) + { offset_column = castColumn(offset_structure, desired_type, context); } - if (isColumnConst(*source_column)) { + if (isColumnConst(*source_column)) + { auto column = result_type->createColumnConst(input_rows_count, (*source_column)[0]); block.getByPosition(result).column = std::move(column); - } else { + } + else + { auto column = result_type->createColumn(); column->reserve(input_rows_count); // with constant offset - insertRangeFrom @@ -156,48 +168,71 @@ public: if (offset_value > 0) { // insert shifted value - if ((size_t)offset_value <= input_rows_count) { + if ((size_t)offset_value <= input_rows_count) + { column->insertRangeFrom(*source_column, offset_value, input_rows_count - offset_value); } size_t row_count = (size_t)offset_value > input_rows_count ? input_rows_count : offset_value; insertDefaults(column, row_count, default_values_column, input_rows_count - row_count); - } else if (offset_value < 0) { + } + else if (offset_value < 0) + { size_t row_count = (size_t)std::abs(offset_value) > input_rows_count ? input_rows_count : std::abs(offset_value); // insert defaults up to offset_value insertDefaults(column, row_count, default_values_column, 0); column->insertRangeFrom(*source_column, 0, input_rows_count - row_count); - } else { + } + else + { // populate column with source values column->insertRangeFrom(*source_column, 0, input_rows_count); } - } else { + } + else + { // with dynamic offset - handle row by row for (size_t row = 0; row < input_rows_count; row++) { Int64 offset_value = offset_column->getInt(row); - if (offset_value == 0) { + if (offset_value == 0) + { column->insertFrom(*source_column, row); - } else if (offset_value > 0) { + } + else if (offset_value > 0) + { size_t real_offset = row + offset_value; - if (real_offset > input_rows_count) { - if (default_values_column) { + if (real_offset > input_rows_count) + { + if (default_values_column) + { column->insertFrom(*default_values_column, row); - } else { + } + else + { column->insertDefault(); } - } else { + } + else + { column->insertFrom(*column, real_offset); } - } else { + } + else + { // out of range if ((size_t)std::abs(offset_value) > row) { - if (default_values_column) { + if (default_values_column) + { column->insertFrom(*default_values_column, row); - } else { + } + else + { column->insertDefault(); } - } else { + } + else + { column->insertFrom(*column, row - std::abs(offset_value)); } } @@ -206,6 +241,7 @@ public: block.getByPosition(result).column = std::move(column); } } + private: const Context & context; }; From 986d56ba0c1d80b0fc879f22750e42a1f8b213f1 Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Wed, 14 Aug 2019 14:32:03 +0300 Subject: [PATCH 072/181] Fix casting style, work with Nullable --- dbms/src/Functions/neighbour.cpp | 30 ++++++++++++------- .../registerFunctionsMiscellaneous.cpp | 4 +-- .../queries/0_stateless/00957_neighbour.sql | 4 +++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/dbms/src/Functions/neighbour.cpp b/dbms/src/Functions/neighbour.cpp index 2efca01a66f..153307abd4b 100644 --- a/dbms/src/Functions/neighbour.cpp +++ b/dbms/src/Functions/neighbour.cpp @@ -54,13 +54,14 @@ public: + ", should be from 2 to 3", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - // second argument must be a positive integer + // second argument must be an integer if (!isInteger(arguments[1])) throw Exception( "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() - + " - should be positive integer", + + " - should be an integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + // check that default value column has supertype with first argument if (number_of_arguments == 3) { @@ -116,7 +117,13 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { auto offset_structure = block.getByPosition(arguments[1]); + ColumnPtr & offset_column = offset_structure.column; + if (isColumnNullable(*offset_column)) + throw Exception( + "Illegal type " + offset_structure.type->getName() + " of second argument of function " + getName() + + " - can not be Nullable", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto is_constant_offset = isColumnConst(*offset_structure.column); ColumnPtr default_values_column = nullptr; @@ -145,6 +152,7 @@ public: default_values_column = castColumn(block.getByPosition(arguments[2]), result_type, context); } + // since we are working with both signed and unsigned - we'll try to use Int64 for handling all of them const DataTypePtr desired_type = std::make_shared(); if (!block.getByPosition(arguments[1]).type->equals(*desired_type)) { @@ -165,26 +173,26 @@ public: { Int64 offset_value = offset_column->getInt(0); + auto offset_value_casted = static_cast(std::abs(offset_value)); + size_t default_value_count = std::min(offset_value_casted, input_rows_count); if (offset_value > 0) { // insert shifted value - if ((size_t)offset_value <= input_rows_count) + if (offset_value_casted <= input_rows_count) { - column->insertRangeFrom(*source_column, offset_value, input_rows_count - offset_value); + column->insertRangeFrom(*source_column, offset_value_casted, input_rows_count - offset_value_casted); } - size_t row_count = (size_t)offset_value > input_rows_count ? input_rows_count : offset_value; - insertDefaults(column, row_count, default_values_column, input_rows_count - row_count); + insertDefaults(column, default_value_count, default_values_column, input_rows_count - default_value_count); } else if (offset_value < 0) { - size_t row_count = (size_t)std::abs(offset_value) > input_rows_count ? input_rows_count : std::abs(offset_value); // insert defaults up to offset_value - insertDefaults(column, row_count, default_values_column, 0); - column->insertRangeFrom(*source_column, 0, input_rows_count - row_count); + insertDefaults(column, default_value_count, default_values_column, 0); + column->insertRangeFrom(*source_column, 0, input_rows_count - default_value_count); } else { - // populate column with source values + // populate column with source values, when offset is equal to zero column->insertRangeFrom(*source_column, 0, input_rows_count); } } @@ -246,7 +254,7 @@ private: const Context & context; }; -void registerFunctionNextInBlock(FunctionFactory & factory) +void registerFunctionNeighbour(FunctionFactory & factory) { factory.registerFunction(); } diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index 57ccfcd11c9..c96f5f05c7b 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -17,7 +17,7 @@ void registerFunctionBlockSize(FunctionFactory &); void registerFunctionBlockNumber(FunctionFactory &); void registerFunctionRowNumberInBlock(FunctionFactory &); void registerFunctionRowNumberInAllBlocks(FunctionFactory &); -void registerFunctionNextInBlock(FunctionFactory &); +void registerFunctionNeighbour(FunctionFactory &); void registerFunctionSleep(FunctionFactory &); void registerFunctionSleepEachRow(FunctionFactory &); void registerFunctionMaterialize(FunctionFactory &); @@ -68,7 +68,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionBlockNumber(factory); registerFunctionRowNumberInBlock(factory); registerFunctionRowNumberInAllBlocks(factory); - registerFunctionNextInBlock(factory); + registerFunctionNeighbour(factory); registerFunctionSleep(factory); registerFunctionSleepEachRow(factory); registerFunctionMaterialize(factory); diff --git a/dbms/tests/queries/0_stateless/00957_neighbour.sql b/dbms/tests/queries/0_stateless/00957_neighbour.sql index 156672155a8..665936fd70f 100644 --- a/dbms/tests/queries/0_stateless/00957_neighbour.sql +++ b/dbms/tests/queries/0_stateless/00957_neighbour.sql @@ -8,6 +8,10 @@ select neighbour(1,2,3,4); -- { serverError 42 } select neighbour(dummy, 1, 'hello'); -- { serverError 43 } -- types without common supertype (UInt64 and Int8) select number, neighbour(number, 1, -10) from numbers(3); -- { serverError 43 } +-- nullable offset is not allowed +select number, if(number > 1, number, null) as offset, neighbour(number, offset) from numbers(3); -- { serverError 43 } +select 'Zero offset'; +select number, neighbour(number, 0) from numbers(3); select 'Result with different type'; select toInt32(number) as n, neighbour(n, 1, -10) from numbers(3); select 'Offset > block'; From 2126196c8946126cd32322f259571c241b7f857d Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Wed, 14 Aug 2019 15:09:51 +0300 Subject: [PATCH 073/181] Nullable correct handling --- dbms/src/Functions/neighbour.cpp | 21 ++++++++++--------- .../0_stateless/00957_neighbour.reference | 8 +++++++ .../queries/0_stateless/00957_neighbour.sql | 2 ++ 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/neighbour.cpp b/dbms/src/Functions/neighbour.cpp index 153307abd4b..06307884f22 100644 --- a/dbms/src/Functions/neighbour.cpp +++ b/dbms/src/Functions/neighbour.cpp @@ -44,6 +44,8 @@ public: bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { size_t number_of_arguments = arguments.size(); @@ -57,8 +59,11 @@ public: // second argument must be an integer if (!isInteger(arguments[1])) throw Exception( - "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() - + " - should be an integer", + "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + " - should be an integer", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + else if (arguments[1]->isNullable()) + throw Exception( + "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + " - can not be Nullable", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -119,11 +124,6 @@ public: auto offset_structure = block.getByPosition(arguments[1]); ColumnPtr & offset_column = offset_structure.column; - if (isColumnNullable(*offset_column)) - throw Exception( - "Illegal type " + offset_structure.type->getName() + " of second argument of function " + getName() - + " - can not be Nullable", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto is_constant_offset = isColumnConst(*offset_structure.column); ColumnPtr default_values_column = nullptr; @@ -222,13 +222,14 @@ public: } else { - column->insertFrom(*column, real_offset); + column->insertFrom(*source_column, real_offset); } } else { // out of range - if ((size_t)std::abs(offset_value) > row) + auto offset_value_casted = static_cast(std::abs(offset_value)); + if (offset_value_casted > row) { if (default_values_column) { @@ -241,7 +242,7 @@ public: } else { - column->insertFrom(*column, row - std::abs(offset_value)); + column->insertFrom(*column, row - offset_value_casted); } } } diff --git a/dbms/tests/queries/0_stateless/00957_neighbour.reference b/dbms/tests/queries/0_stateless/00957_neighbour.reference index cd8c6310f22..1983488cfc2 100644 --- a/dbms/tests/queries/0_stateless/00957_neighbour.reference +++ b/dbms/tests/queries/0_stateless/00957_neighbour.reference @@ -1,3 +1,11 @@ +Zero offset +0 0 +1 1 +2 2 +Nullable values +\N 0 \N +\N 1 2 +2 2 \N Result with different type 0 1 1 2 diff --git a/dbms/tests/queries/0_stateless/00957_neighbour.sql b/dbms/tests/queries/0_stateless/00957_neighbour.sql index 665936fd70f..753ae8493a3 100644 --- a/dbms/tests/queries/0_stateless/00957_neighbour.sql +++ b/dbms/tests/queries/0_stateless/00957_neighbour.sql @@ -12,6 +12,8 @@ select number, neighbour(number, 1, -10) from numbers(3); -- { serverError 43 } select number, if(number > 1, number, null) as offset, neighbour(number, offset) from numbers(3); -- { serverError 43 } select 'Zero offset'; select number, neighbour(number, 0) from numbers(3); +select 'Nullable values'; +select if(number > 1, number, null) as value, number as offset, neighbour(value, offset) as neighbour from numbers(3); select 'Result with different type'; select toInt32(number) as n, neighbour(n, 1, -10) from numbers(3); select 'Offset > block'; From ab1c4139deaad29f00104580df09f2334bb4efaa Mon Sep 17 00:00:00 2001 From: Gleb Novikov Date: Wed, 14 Aug 2019 22:51:03 +0300 Subject: [PATCH 074/181] Added ReplicatedMergeTree support and test for constraints, also added VIOLATED_CONSTRAINT error --- dbms/programs/server/config.xml | 5 ++- dbms/src/Common/ErrorCodes.cpp | 1 + .../CheckConstraintsBlockOutputStream.cpp | 2 +- .../CheckConstraintsBlockOutputStream.h | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 6 +++ .../Storages/StorageReplicatedMergeTree.cpp | 4 ++ ...onstraints_replication_zookeeper.reference | 0 ...0988_constraints_replication_zookeeper.sql | 43 +++++++++++++++++++ 8 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00988_constraints_replication_zookeeper.reference create mode 100644 dbms/tests/queries/0_stateless/00988_constraints_replication_zookeeper.sql diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index c09913cbd87..188a98779e9 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -217,7 +217,10 @@ See https://clickhouse.yandex/docs/en/table_engines/replication/ --> - + + + testkeeper + - - testkeeper - + + From e68775e3d0f3a8201d8f4d1cc319dcd99295099c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 21 Aug 2019 17:51:32 +0300 Subject: [PATCH 103/181] Fix TotalsHavingTransform. --- dbms/src/Processors/Transforms/TotalsHavingTransform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Processors/Transforms/TotalsHavingTransform.cpp b/dbms/src/Processors/Transforms/TotalsHavingTransform.cpp index cce091b1999..b6931f2c8e1 100644 --- a/dbms/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/dbms/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -161,6 +161,8 @@ void TotalsHavingTransform::transform(Chunk & chunk) if (const_filter_description.always_true) { addToTotals(chunk, nullptr); + auto num_rows = columns.front()->size(); + chunk.setColumns(std::move(columns), num_rows); return; } From 51f6d9751156a506807aba97962b7ca52bbf425e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 21 Aug 2019 18:20:42 +0300 Subject: [PATCH 104/181] Update formats.md --- docs/en/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index c5dbbce674d..2f409706c61 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -146,7 +146,7 @@ SELECT * FROM t_null FORMAT TSKV x=1 y=\N ``` -When there is a large number of small columns, this format is ineffective, and there is generally no reason to use it. It is used in some departments of Yandex. +When there is a large number of small columns, this format is ineffective, and there is generally no reason to use it. Nevertheless, it is no worse than JSONEachRow in terms of efficiency. Both data output and parsing are supported in this format. For parsing, any order is supported for the values of different columns. It is acceptable for some values to be omitted – they are treated as equal to their default values. In this case, zeros and blank rows are used as default values. Complex values that could be specified in the table are not supported as defaults. From a1560448d632ea5dbef1e9bbe2b66c03285598f5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 21 Aug 2019 18:21:36 +0300 Subject: [PATCH 105/181] Update formats.md --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 20c919665d5..130a32d63fa 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -145,7 +145,7 @@ SELECT * FROM t_null FORMAT TSKV x=1 y=\N ``` -При большом количестве маленьких столбцов, этот формат существенно неэффективен, и обычно нет причин его использовать. Он реализован, так как используется в некоторых отделах Яндекса. +При большом количестве маленьких столбцов, этот формат существенно неэффективен, и обычно нет причин его использовать. Впрочем, он не хуже формата JSONEachRow по производительности. Поддерживается как вывод, так и парсинг данных в этом формате. При парсинге, поддерживается расположение значений разных столбцов в произвольном порядке. Допустимо отсутствие некоторых значений - тогда они воспринимаются как равные значениям по умолчанию. В этом случае в качестве значений по умолчанию используются нули и пустые строки. Сложные значения, которые могут быть заданы в таблице не поддерживаются как значения по умолчанию. From 2e03ac09ecdc103ee0fb029aa6445c0003df595b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 21 Aug 2019 19:38:27 +0300 Subject: [PATCH 106/181] Fix MergingSortedTransform. --- dbms/src/Processors/Transforms/MergingSortedTransform.cpp | 3 ++- dbms/src/Processors/Transforms/MergingSortedTransform.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp index 8857ec876d7..e37eae82de1 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -13,7 +14,7 @@ MergingSortedTransform::MergingSortedTransform( UInt64 limit_, bool quiet_, bool have_all_inputs_) - : IProcessor(InputPorts(num_inputs, header), {header}) + : IProcessor(InputPorts(num_inputs, header), {materializeBlock(header)}) , description(description_), max_block_size(max_block_size_), limit(limit_), quiet(quiet_) , have_all_inputs(have_all_inputs_) , merged_data(header), source_chunks(num_inputs), cursors(num_inputs) diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.h b/dbms/src/Processors/Transforms/MergingSortedTransform.h index 5a1f417fdb6..223d5253e62 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.h +++ b/dbms/src/Processors/Transforms/MergingSortedTransform.h @@ -93,7 +93,7 @@ protected: columns = chunk.mutateColumns(); if (limit_rows && num_rows > limit_rows) for (auto & column : columns) - column = (*column->cut(0, limit_rows)).mutate(); + column = (*column->cut(0, limit_rows)->convertToFullColumnIfConst()).mutate(); total_merged_rows += num_rows; merged_rows = num_rows; From 321233ddaa5934985686c99e290b66621369575d Mon Sep 17 00:00:00 2001 From: akonyaev Date: Wed, 21 Aug 2019 19:39:20 +0300 Subject: [PATCH 107/181] ADQM-34 fix hardcode for,at name in ArrowColumnToCHColumn class --- .../src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 8 ++++---- dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h | 2 +- dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp | 2 +- .../Processors/Formats/Impl/ParquetBlockInputFormat.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 24a144b10b5..fc1277c4a88 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -268,7 +268,7 @@ namespace DB void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk &res, std::shared_ptr &table, arrow::Status &read_status, const Block &header, - int &row_group_current, const Context &context) + int &row_group_current, const Context &context, std::string format_name) { Columns columns_list; UInt64 num_rows = 0; @@ -277,7 +277,7 @@ namespace DB using NameToColumnPtr = std::unordered_map>; if (!read_status.ok()) - throw Exception{"Error while reading ORC data: " + read_status.ToString(), + throw Exception{"Error while reading " + format_name + " data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; if (0 == table->num_rows()) @@ -333,7 +333,7 @@ namespace DB throw Exception { "The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() - + "\" is not supported for conversion from a ORC data format", + + "\" is not supported for conversion from a " + format_name + " data format", ErrorCodes::CANNOT_CONVERT_TYPE}; } @@ -392,7 +392,7 @@ namespace DB default: throw Exception { - "Unsupported ORC type \"" + arrow_column->type()->name() + "\" of an input column \"" + "Unsupported " + format_name + " type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + "\"", ErrorCodes::UNKNOWN_TYPE }; diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index cf4f021f8c0..b5f4732d107 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -39,7 +39,7 @@ namespace DB static void arrowTableToCHChunk(Chunk &res, std::shared_ptr &table, arrow::Status &read_status, const Block &header, - int &row_group_current, const Context &context); + int &row_group_current, const Context &context, std::string format_name); }; } #endif diff --git a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 4da6b10f3ab..2069c3f3cbf 100644 --- a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -57,7 +57,7 @@ namespace DB arrow::Status read_status = file_reader->Read(&table); - ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, context); + ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, context, "ORC"); return res; } diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 32a55c70e55..54bab6d7467 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -59,7 +59,7 @@ namespace DB std::shared_ptr table; arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, &table); - ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, context); + ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, context, "Parquet"); return res; } From 6d81087bdc4c777cbdb317a3c0e5d1885fb2264c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 21 Aug 2019 19:53:40 +0300 Subject: [PATCH 108/181] Remove _dummy column if it is not needed. --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 27be35ad57f..2ccfc762087 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1550,7 +1550,11 @@ void InterpreterSelectQuery::executeFetchColumns( { /// Unify streams in case they have different headers. auto first_header = streams.at(0)->getHeader(); - for (size_t i = 1; i < streams.size(); ++i) + + if (first_header.columns() > 1 && first_header.has("_dummy")) + first_header.erase("_dummy"); + + for (size_t i = 0; i < streams.size(); ++i) { auto & stream = streams[i]; auto header = stream->getHeader(); From d573c4ec3e1263a590d19b5de751fb7e6e8fd1b1 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 21 Aug 2019 20:42:44 +0300 Subject: [PATCH 109/181] fix wrong check for disabling "read in order" optimization --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 2 +- .../00995_optimize_read_in_order_with_aggregation.reference | 1 + .../00995_optimize_read_in_order_with_aggregation.sql | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.reference create mode 100644 dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.sql diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 27be35ad57f..dbc0cf85824 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -884,7 +884,7 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS } SortingInfoPtr sorting_info; - if (settings.optimize_read_in_order && storage && query.orderBy() && !query.groupBy() && !query.final() && !query.join()) + if (settings.optimize_read_in_order && storage && query.orderBy() && !query_analyzer->hasAggregation() && !query.final() && !query.join()) { if (const MergeTreeData * merge_tree_data = dynamic_cast(storage.get())) sorting_info = optimizeReadInOrder(*merge_tree_data, query, context, syntax_analyzer_result); diff --git a/dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.reference b/dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.reference new file mode 100644 index 00000000000..d567f8a0b01 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.reference @@ -0,0 +1 @@ +4950 diff --git a/dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.sql b/dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.sql new file mode 100644 index 00000000000..93c907811a5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00995_optimize_read_in_order_with_aggregation.sql @@ -0,0 +1,6 @@ +SET optimize_read_in_order = 1; +DROP TABLE IF EXISTS order_with_aggr; +CREATE TABLE order_with_aggr(a Int) ENGINE = MergeTree ORDER BY a; + +INSERT INTO order_with_aggr SELECT * FROM numbers(100); +SELECT sum(a) as s FROM order_with_aggr ORDER BY s; From 93d380ddaad180df67ec150e03f2bded4c5a52d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Aug 2019 23:32:58 +0300 Subject: [PATCH 110/181] Fixed test --- dbms/src/Functions/array/arrayIntersect.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/array/arrayIntersect.cpp b/dbms/src/Functions/array/arrayIntersect.cpp index e6cc24a49ba..6aec4f94dc8 100644 --- a/dbms/src/Functions/array/arrayIntersect.cpp +++ b/dbms/src/Functions/array/arrayIntersect.cpp @@ -398,7 +398,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable all_nullable = false; } - auto & result_data = assert_cast(*result_data_ptr); + auto & result_data = static_cast(*result_data_ptr); auto result_offsets_ptr = ColumnArray::ColumnOffsets::create(rows); auto & result_offsets = assert_cast(*result_offsets_ptr); auto null_map_column = ColumnUInt8::create(); From e573b06373c7215b25b5e2859575c1a3ccfeebb0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Aug 2019 23:33:38 +0300 Subject: [PATCH 111/181] Updated comment --- dbms/src/Functions/nullIf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/nullIf.cpp b/dbms/src/Functions/nullIf.cpp index 91fb311f444..507abc8d3e1 100644 --- a/dbms/src/Functions/nullIf.cpp +++ b/dbms/src/Functions/nullIf.cpp @@ -43,7 +43,7 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - /// nullIf(col1, col2) == if(col1 = col2, NULL, 1) + /// nullIf(col1, col2) == if(col1 = col2, NULL, col1) Block temp_block = block; From a7fa71aaf1a19ddf2715acd2048a861afffa9e06 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Aug 2019 00:28:05 +0300 Subject: [PATCH 112/181] Fixed flappy test --- .../queries/0_stateless/00704_drop_truncate_memory_table.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh b/dbms/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh index 170259f0e24..1bbb69f4dd7 100755 --- a/dbms/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh +++ b/dbms/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -e +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh From 6bbf3bd6341910844f185a376e7db994dafa2fa0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Aug 2019 04:30:49 +0300 Subject: [PATCH 113/181] Added a metric for the number of files to send in Distributed tables --- dbms/src/Common/CurrentMetrics.cpp | 1 + dbms/src/Storages/Distributed/DirectoryMonitor.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index b8e30f3cccd..6bd99fb8f01 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -49,6 +49,7 @@ M(GlobalThreadActive, "Number of threads in global thread pool running a task.") \ M(LocalThread, "Number of threads in local thread pools. Should be similar to GlobalThreadActive.") \ M(LocalThreadActive, "Number of threads in local thread pools running a task.") \ + M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \ namespace CurrentMetrics diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp index 7eefc68f3a8..13c9cf3050a 100644 --- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp @@ -23,6 +23,7 @@ namespace CurrentMetrics { extern const Metric DistributedSend; + extern const Metric DistributedFilesToInsert; } namespace DB @@ -209,6 +210,8 @@ bool StorageDistributedDirectoryMonitor::processFiles() if (files.empty()) return false; + CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedFilesToInsert, CurrentMetrics::Value(files.size())}; + if (should_batch_inserts) { processFilesWithBatching(files); From ae7ae6d660361d25d12b0dc5f555ef924e1ffd9a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Aug 2019 05:05:46 +0300 Subject: [PATCH 114/181] Update CHANGELOG.md --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 607f650deeb..eceeb5db0ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,16 @@ * Fix build with external `libcxx` [#6010](https://github.com/yandex/ClickHouse/pull/6010) ([Ivan](https://github.com/abyss7)) * Fix shared build with `rdkafka` library [#6101](https://github.com/yandex/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) +## ClickHouse release 19.11.8.46, 2019-08-22 + +### Bug Fix +* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/yandex/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) +* Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/yandex/ClickHouse/issues/6125) [#6550](https://github.com/yandex/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +* Fixed an issue that if a stale replica become alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/yandex/ClickHouse/issues/6522) [#6523](https://github.com/yandex/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +* Fixed issue with parsing CSV [#6426](https://github.com/yandex/ClickHouse/issues/6426) [#6559](https://github.com/yandex/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) +* Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/yandex/ClickHouse/issues/6245). [#6513](https://github.com/yandex/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/yandex/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) + ## ClickHouse release 19.11.7.40, 2019-08-14 ### Bug fix From 19cb429b06d3ca454621f45caa8ac86e9331bcb5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Aug 2019 05:08:12 +0300 Subject: [PATCH 115/181] Update CHANGELOG.md --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eceeb5db0ac..32eb446ac01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## ClickHouse release 19.13.3, 2019-08-22 + +### Bug Fix +* Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/yandex/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) +* Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/yandex/ClickHouse/issues/6125) [#6550](https://github.com/yandex/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +* Fixed an issue that if a stale replica become alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/yandex/ClickHouse/issues/6522) [#6523](https://github.com/yandex/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +* Fixed issue with parsing CSV [#6426](https://github.com/yandex/ClickHouse/issues/6426) [#6559](https://github.com/yandex/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) +* Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/yandex/ClickHouse/issues/6245). [#6513](https://github.com/yandex/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/yandex/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) + ## ClickHouse release 19.13.2.19, 2019-08-14 ### New Feature From bbf2911d61089d643524ef08672abf67a488677a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Aug 2019 05:14:01 +0300 Subject: [PATCH 116/181] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32eb446ac01..6263a197f45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## ClickHouse release 19.13.3, 2019-08-22 +## ClickHouse release 19.13.3.26, 2019-08-22 ### Bug Fix * Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/yandex/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) From 833d6d60a67764ba688f4b8139a58475fb151e32 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Aug 2019 05:14:46 +0300 Subject: [PATCH 117/181] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6263a197f45..74012668d1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ### Bug Fix * Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/yandex/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) * Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/yandex/ClickHouse/issues/6125) [#6550](https://github.com/yandex/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -* Fixed an issue that if a stale replica become alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/yandex/ClickHouse/issues/6522) [#6523](https://github.com/yandex/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +* Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/yandex/ClickHouse/issues/6522) [#6523](https://github.com/yandex/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) * Fixed issue with parsing CSV [#6426](https://github.com/yandex/ClickHouse/issues/6426) [#6559](https://github.com/yandex/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) * Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/yandex/ClickHouse/issues/6245). [#6513](https://github.com/yandex/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/yandex/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) From ee89ee0218c86613db1c6856fda8fb3d1140b33b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Aug 2019 05:15:05 +0300 Subject: [PATCH 118/181] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74012668d1b..7d6714b6474 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,7 +46,7 @@ ### Bug Fix * Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [#6543](https://github.com/yandex/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) * Fix NPE when using IN clause with a subquery with a tuple. [#6125](https://github.com/yandex/ClickHouse/issues/6125) [#6550](https://github.com/yandex/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -* Fixed an issue that if a stale replica become alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/yandex/ClickHouse/issues/6522) [#6523](https://github.com/yandex/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +* Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [#6522](https://github.com/yandex/ClickHouse/issues/6522) [#6523](https://github.com/yandex/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) * Fixed issue with parsing CSV [#6426](https://github.com/yandex/ClickHouse/issues/6426) [#6559](https://github.com/yandex/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) * Fixed data race in system.parts table and ALTER query. This fixes [#6245](https://github.com/yandex/ClickHouse/issues/6245). [#6513](https://github.com/yandex/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [#6514](https://github.com/yandex/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) From 85d3ba099ff93e8edc696feaa93c579a6967ea86 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Aug 2019 05:31:49 +0300 Subject: [PATCH 119/181] Added a comment --- dbms/src/Common/SymbolIndex.cpp | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/dbms/src/Common/SymbolIndex.cpp b/dbms/src/Common/SymbolIndex.cpp index 05268c4b6f1..7f494f0dd9b 100644 --- a/dbms/src/Common/SymbolIndex.cpp +++ b/dbms/src/Common/SymbolIndex.cpp @@ -10,6 +10,49 @@ //#include #include +/** + +ELF object can contain three different places with symbol names and addresses: + +1. Symbol table in section headers. It is used for static linking and usually left in executable. +It is not loaded in memory and they are not necessary for program to run. +It does not relate to debug info and present regardless to -g flag. +You can use strip to get rid of this symbol table. +If you have this symbol table in your binary, you can manually read it and get symbol names, even for symbols from anonymous namespaces. + +2. Hashes in program headers such as DT_HASH and DT_GNU_HASH. +It is necessary for dynamic object (.so libraries and any dynamically linked executable that depend on .so libraries) +because it is used for dynamic linking that happens in runtime and performed by dynamic loader. +Only exported symbols will be presented in that hash tables. Symbols from anonymous namespaces are not. +This part of executable binary is loaded in memory and accessible via 'dl_iterate_phdr', 'dladdr' and 'backtrace_symbols' functions from libc. +ClickHouse versions prior to 19.13 has used just these symbol names to symbolize stack traces +and stack traces may be incomplete due to lack of symbols with internal linkage. +But because ClickHouse is linked with most of the symbols exported (-rdynamic flag) it can still provide good enough stack traces. + +3. DWARF debug info. It contains the most detailed information about symbols and everything else. +It allows to get source file names and line numbers from addresses. Only available if you use -g option for compiler. +It is also used by default for ClickHouse builds, but because of its weight (about two gigabytes) +it is splitted to separate binary and provided in clickhouse-common-static-dbg package. +This separate binary is placed in /usr/lib/debug/usr/bin/clickhouse and is loaded automatically by tools like gdb, addr2line. +When you build ClickHouse by yourself, debug info is not splitted and present in a single huge binary. + +What ClickHouse is using to provide good stack traces? + +In versions prior to 19.13, only "program headers" (2) was used. + +In version 19.13, ClickHouse will read program headers (2) and cache them, +also it will read itself as ELF binary and extract symbol tables from section headers (1) +to also symbolize functions that are not exported for dynamic linking. +And finally, it will read DWARF info (3) if available to display file names and line numbers. + +What detail can you obtain depending on your binary? + +If you have debug info (you build ClickHouse by yourself or install clickhouse-common-static-dbg package), you will get source file names and line numbers. +Otherwise you will get only symbol names. If your binary contains symbol table in section headers (the default, unless stripped), you will get all symbol names. +Otherwise you will get only exported symbols from program headers. + +*/ + namespace DB { From 78af6d793d472f6f8903d5a6cff2a2eeb8c004a0 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 22 Aug 2019 12:34:06 +0800 Subject: [PATCH 120/181] Fix segfault when decoding symbol table. --- dbms/src/Common/SymbolIndex.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/SymbolIndex.cpp b/dbms/src/Common/SymbolIndex.cpp index 7f494f0dd9b..5cb424ef48b 100644 --- a/dbms/src/Common/SymbolIndex.cpp +++ b/dbms/src/Common/SymbolIndex.cpp @@ -107,13 +107,14 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, size_t sym_cnt = 0; for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it) { - if (it->d_tag == DT_HASH) - { - const ElfW(Word) * hash = reinterpret_cast(correct_address(info->dlpi_addr, it->d_un.d_ptr)); - sym_cnt = hash[1]; - break; - } - else if (it->d_tag == DT_GNU_HASH) + // TODO: this branch leads to invalid address of the hash table. Need further investigation. + // if (it->d_tag == DT_HASH) + // { + // const ElfW(Word) * hash = reinterpret_cast(correct_address(info->dlpi_addr, it->d_un.d_ptr)); + // sym_cnt = hash[1]; + // break; + // } + if (it->d_tag == DT_GNU_HASH) { /// This code based on Musl-libc. From 8ac4ee3dca4d51380fd3e2235bb842ce18017509 Mon Sep 17 00:00:00 2001 From: sev7e0 Date: Thu, 22 Aug 2019 14:29:21 +0800 Subject: [PATCH 121/181] Fix data type (enum) table name error in docs --- docs/en/data_types/enum.md | 2 +- docs/ru/data_types/enum.md | 2 +- docs/zh/data_types/enum.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/data_types/enum.md b/docs/en/data_types/enum.md index 247ec070190..3fbe5b3708b 100644 --- a/docs/en/data_types/enum.md +++ b/docs/en/data_types/enum.md @@ -94,7 +94,7 @@ ENGINE = TinyLog it can store not only `'hello'` and `'world'`, but `NULL`, as well. ``` -INSERT INTO t_enum_null Values('hello'),('world'),(NULL) +INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL) ``` In RAM, an `Enum` column is stored in the same way as `Int8` or `Int16` of the corresponding numerical values. diff --git a/docs/ru/data_types/enum.md b/docs/ru/data_types/enum.md index 7ed0150e65f..9191dc5d2b0 100644 --- a/docs/ru/data_types/enum.md +++ b/docs/ru/data_types/enum.md @@ -90,7 +90,7 @@ ENGINE = TinyLog , то в ней можно будет хранить не только `'hello'` и `'world'`, но и `NULL`. ``` -INSERT INTO t_enum_null Values('hello'),('world'),(NULL) +INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL) ``` В оперативке столбец типа `Enum` представлен так же, как `Int8` или `Int16` соответствующими числовыми значениями. diff --git a/docs/zh/data_types/enum.md b/docs/zh/data_types/enum.md index ca8488b4345..41133b56d45 100644 --- a/docs/zh/data_types/enum.md +++ b/docs/zh/data_types/enum.md @@ -91,7 +91,7 @@ ENGINE = TinyLog 不仅可以存储 `'hello'` 和 `'world'` ,还可以存储 `NULL`。 ``` -INSERT INTO t_enum_null Values('hello'),('world'),(NULL) +INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL) ``` 在内存中,`Enum` 列的存储方式与相应数值的 `Int8` 或 `Int16` 相同。 From 1683547b2d7ac65f22cda65dd8a0c769e8ec91e9 Mon Sep 17 00:00:00 2001 From: akonyaev Date: Thu, 22 Aug 2019 10:24:04 +0300 Subject: [PATCH 122/181] ADQM-34 fix style --- dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 15d0ad861ba..8fb6ab5a359 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -27,7 +27,6 @@ namespace DB extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; extern const int THERE_IS_NO_COLUMN; } - const std::unordered_map> arrow_type_to_internal_type = { //{arrow::Type::DECIMAL, std::make_shared()}, {arrow::Type::UINT8, std::make_shared()}, @@ -63,7 +62,7 @@ namespace DB // 2. JSON -> String // Full list of types: contrib/arrow/cpp/src/arrow/type.h }; - + /// Inserts numeric data right into internal column data to reduce an overhead template > static void fillColumnWithNumericData(std::shared_ptr & arrow_column, MutableColumnPtr & internal_column) From c3aaf583a0df53bdbc62286c170df733993fe404 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 11:26:50 +0300 Subject: [PATCH 123/181] Fix RollupTransform. --- .../Processors/Transforms/RollupTransform.cpp | 33 ++++++++++++------- .../Processors/Transforms/RollupTransform.h | 10 +++--- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/dbms/src/Processors/Transforms/RollupTransform.cpp b/dbms/src/Processors/Transforms/RollupTransform.cpp index 9c8270ce091..1f5c140aafe 100644 --- a/dbms/src/Processors/Transforms/RollupTransform.cpp +++ b/dbms/src/Processors/Transforms/RollupTransform.cpp @@ -5,7 +5,7 @@ namespace DB { RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr params_) - : IInflatingTransform(std::move(header), params_->getHeader()) + : IAccumulatingTransform(std::move(header), params_->getHeader()) , params(std::move(params_)) , keys(params->params.keys) { @@ -13,18 +13,33 @@ RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr par void RollupTransform::consume(Chunk chunk) { - consumed_chunk = std::move(chunk); - last_removed_key = keys.size(); + consumed_chunks.emplace_back(std::move(chunk)); } -bool RollupTransform::canGenerate() +Chunk RollupTransform::merge(Chunks && chunks, bool final) { - return consumed_chunk; + BlocksList rollup_blocks; + for (auto & chunk : chunks) + rollup_blocks.emplace_back(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + + auto rollup_block = params->aggregator.mergeBlocks(rollup_blocks, final); + auto num_rows = rollup_block.rows(); + return Chunk(rollup_block.getColumns(), num_rows); } Chunk RollupTransform::generate() { - auto gen_chunk = std::move(consumed_chunk); + if (!consumed_chunks.empty()) + { + if (consumed_chunks.size() > 1) + rollup_chunk = merge(std::move(consumed_chunks), false); + else + rollup_chunk = std::move(consumed_chunks.front()); + + consumed_chunks.clear(); + } + + auto gen_chunk = std::move(rollup_chunk); if (last_removed_key) { @@ -35,11 +50,7 @@ Chunk RollupTransform::generate() auto columns = gen_chunk.getColumns(); columns[key] = columns[key]->cloneEmpty()->cloneResized(num_rows); - BlocksList rollup_blocks = { getInputPort().getHeader().cloneWithColumns(columns) }; - auto rollup_block = params->aggregator.mergeBlocks(rollup_blocks, false); - - num_rows = rollup_block.rows(); - consumed_chunk = Chunk(rollup_block.getColumns(), num_rows); + rollup_chunk = merge({Chunk(std::move(columns), num_rows)}, false); } finalizeChunk(gen_chunk); diff --git a/dbms/src/Processors/Transforms/RollupTransform.h b/dbms/src/Processors/Transforms/RollupTransform.h index 754e0237357..fd435740a63 100644 --- a/dbms/src/Processors/Transforms/RollupTransform.h +++ b/dbms/src/Processors/Transforms/RollupTransform.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include namespace DB @@ -7,7 +7,7 @@ namespace DB /// Takes blocks after grouping, with non-finalized aggregate functions. /// Calculates subtotals and grand totals values for a set of columns. -class RollupTransform : public IInflatingTransform +class RollupTransform : public IAccumulatingTransform { public: RollupTransform(Block header, AggregatingTransformParamsPtr params); @@ -15,14 +15,16 @@ public: protected: void consume(Chunk chunk) override; - bool canGenerate() override; Chunk generate() override; private: AggregatingTransformParamsPtr params; ColumnNumbers keys; - Chunk consumed_chunk; + Chunks consumed_chunks; + Chunk rollup_chunk; size_t last_removed_key = 0; + + Chunk merge(Chunks && chunks, bool final); }; } From 705e2c7437a7ee937bd47e8966c18fa04071ce7c Mon Sep 17 00:00:00 2001 From: akonyaev Date: Thu, 22 Aug 2019 11:28:32 +0300 Subject: [PATCH 124/181] ADQM-34 fix parquet tests --- .../queries/0_stateless/00900_parquet_load.reference | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00900_parquet_load.reference b/dbms/tests/queries/0_stateless/00900_parquet_load.reference index 83d0e8efde9..4e3977e0e96 100644 --- a/dbms/tests/queries/0_stateless/00900_parquet_load.reference +++ b/dbms/tests/queries/0_stateless/00900_parquet_load.reference @@ -39,7 +39,7 @@ 23.0 24.0 === Try load data from datapage_v2.snappy.parquet -Code: 33. DB::Ex---tion: Error while reading parquet data: IOError: Arrow error: IOError: Corrupt snappy compressed data. +Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Arrow error: IOError: Corrupt snappy compressed data. === Try load data from fixed_length_decimal_1.parquet 1.0 @@ -171,19 +171,19 @@ Code: 33. DB::Ex---tion: Error while reading parquet data: IOError: Arrow error: Code: 8. DB::Ex---tion: Column "element" is not presented in input data === Try load data from nested_maps.snappy.parquet -Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported. +Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported. === Try load data from nonnullable.impala.parquet -Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported. +Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported. === Try load data from nullable.impala.parquet -Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported. +Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported. === Try load data from nulls.snappy.parquet Code: 8. DB::Ex---tion: Column "b_c_int" is not presented in input data === Try load data from repeated_no_annotation.parquet -Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported. +Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported. === Try load data from userdata1.parquet 1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02 From 8228871821b60f0ab4af7f9aba6118ce26c129cb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 11:29:29 +0300 Subject: [PATCH 125/181] Fix RollupTransform. --- dbms/src/Processors/Transforms/RollupTransform.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Processors/Transforms/RollupTransform.cpp b/dbms/src/Processors/Transforms/RollupTransform.cpp index 1f5c140aafe..d60007b102e 100644 --- a/dbms/src/Processors/Transforms/RollupTransform.cpp +++ b/dbms/src/Processors/Transforms/RollupTransform.cpp @@ -50,7 +50,9 @@ Chunk RollupTransform::generate() auto columns = gen_chunk.getColumns(); columns[key] = columns[key]->cloneEmpty()->cloneResized(num_rows); - rollup_chunk = merge({Chunk(std::move(columns), num_rows)}, false); + Chunks chunks; + chunks.emplace_back(std::move(columns), num_rows); + rollup_chunk = merge(std::move(chunks), false); } finalizeChunk(gen_chunk); From 4f38b08481f572207e0263b701d15cbeb9f8f5bc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 11:53:34 +0300 Subject: [PATCH 126/181] Fix RollupTransform. --- dbms/src/Processors/Transforms/RollupTransform.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Processors/Transforms/RollupTransform.cpp b/dbms/src/Processors/Transforms/RollupTransform.cpp index d60007b102e..fb51b5f6b45 100644 --- a/dbms/src/Processors/Transforms/RollupTransform.cpp +++ b/dbms/src/Processors/Transforms/RollupTransform.cpp @@ -37,6 +37,7 @@ Chunk RollupTransform::generate() rollup_chunk = std::move(consumed_chunks.front()); consumed_chunks.clear(); + last_removed_key = keys.size(); } auto gen_chunk = std::move(rollup_chunk); From 11f8ec76df2086c112522ab4668d8e61fb5285ab Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 12:28:23 +0300 Subject: [PATCH 127/181] Fix CubeTransform. --- .../Processors/Transforms/CubeTransform.cpp | 53 ++++++++++++------- .../src/Processors/Transforms/CubeTransform.h | 10 ++-- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/dbms/src/Processors/Transforms/CubeTransform.cpp b/dbms/src/Processors/Transforms/CubeTransform.cpp index 5809a480d09..106bd0fef51 100644 --- a/dbms/src/Processors/Transforms/CubeTransform.cpp +++ b/dbms/src/Processors/Transforms/CubeTransform.cpp @@ -5,7 +5,7 @@ namespace DB { CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_) - : IInflatingTransform(std::move(header), params_->getHeader()) + : IAccumulatingTransform(std::move(header), params_->getHeader()) , params(std::move(params_)) , keys(params->params.keys) { @@ -13,28 +13,45 @@ CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_ throw Exception("Too many keys are used for CubeTransform.", ErrorCodes::LOGICAL_ERROR); } -void CubeTransform::consume(Chunk chunk) +Chunk CubeTransform::merge(Chunks && chunks, bool final) { - consumed_chunk = std::move(chunk); - auto num_rows = consumed_chunk.getNumRows(); - mask = (UInt64(1) << keys.size()) - 1; + BlocksList rollup_blocks; + for (auto & chunk : chunks) + rollup_blocks.emplace_back(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - current_columns = consumed_chunk.getColumns(); - current_zero_columns.clear(); - current_zero_columns.reserve(keys.size()); - - for (auto key : keys) - current_zero_columns.emplace_back(current_columns[key]->cloneEmpty()->cloneResized(num_rows)); + auto rollup_block = params->aggregator.mergeBlocks(rollup_blocks, final); + auto num_rows = rollup_block.rows(); + return Chunk(rollup_block.getColumns(), num_rows); } -bool CubeTransform::canGenerate() +void CubeTransform::consume(Chunk chunk) { - return consumed_chunk; + consumed_chunks.emplace_back(std::move(chunk)); } Chunk CubeTransform::generate() { - auto gen_chunk = std::move(consumed_chunk); + if (!consumed_chunks.empty()) + { + if (consumed_chunks.size() > 1) + cube_chunk = merge(std::move(consumed_chunks), false); + else + cube_chunk = std::move(consumed_chunks.front()); + + consumed_chunks.clear(); + + auto num_rows = cube_chunk.getNumRows(); + mask = (UInt64(1) << keys.size()) - 1; + + current_columns = cube_chunk.getColumns(); + current_zero_columns.clear(); + current_zero_columns.reserve(keys.size()); + + for (auto key : keys) + current_zero_columns.emplace_back(current_columns[key]->cloneEmpty()->cloneResized(num_rows)); + } + + auto gen_chunk = std::move(cube_chunk); if (mask) { @@ -47,11 +64,9 @@ Chunk CubeTransform::generate() if ((mask & (UInt64(1) << (size - i - 1))) == 0) columns[keys[i]] = current_zero_columns[i]; - BlocksList cube_blocks = { getInputPort().getHeader().cloneWithColumns(columns) }; - auto cube_block = params->aggregator.mergeBlocks(cube_blocks, false); - - auto num_rows = cube_block.rows(); - consumed_chunk = Chunk(cube_block.getColumns(), num_rows); + Chunks chunks; + chunks.emplace_back(std::move(columns), current_columns.front()->size()); + cube_chunk = merge(std::move(chunks), false); } finalizeChunk(gen_chunk); diff --git a/dbms/src/Processors/Transforms/CubeTransform.h b/dbms/src/Processors/Transforms/CubeTransform.h index 60259832e40..6d0e2338174 100644 --- a/dbms/src/Processors/Transforms/CubeTransform.h +++ b/dbms/src/Processors/Transforms/CubeTransform.h @@ -8,7 +8,7 @@ namespace DB /// Takes blocks after grouping, with non-finalized aggregate functions. /// Calculates all subsets of columns and aggregates over them. -class CubeTransform : public IInflatingTransform +class CubeTransform : public IAccumulatingTransform { public: CubeTransform(Block header, AggregatingTransformParamsPtr params); @@ -16,20 +16,20 @@ public: protected: void consume(Chunk chunk) override; - - bool canGenerate() override; - Chunk generate() override; private: AggregatingTransformParamsPtr params; ColumnNumbers keys; - Chunk consumed_chunk; + Chunks consumed_chunks; + Chunk cube_chunk; Columns current_columns; Columns current_zero_columns; UInt64 mask = 0; + + Chunk merge(Chunks && chunks, bool final); }; } From b93ffdd0387ea998be71bac361d6eda85a9c8c66 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 13:16:56 +0300 Subject: [PATCH 128/181] Fix MergingSortedTransform. --- dbms/src/Processors/Transforms/MergingSortedTransform.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.h b/dbms/src/Processors/Transforms/MergingSortedTransform.h index 223d5253e62..f1175c8d347 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.h +++ b/dbms/src/Processors/Transforms/MergingSortedTransform.h @@ -165,6 +165,13 @@ private: void updateCursor(Chunk chunk, size_t source_num) { + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); + auto & shared_chunk_ptr = source_chunks[source_num]; if (!shared_chunk_ptr) From 1f9087dbb22cf648a580aa7c286c45cc6a16e5c6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 13:46:23 +0300 Subject: [PATCH 129/181] Increase memory limit in 00284_external_aggregation. --- dbms/tests/queries/0_stateless/00284_external_aggregation.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00284_external_aggregation.sql b/dbms/tests/queries/0_stateless/00284_external_aggregation.sql index 75d2c0b9bc2..03403b90b6c 100644 --- a/dbms/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/dbms/tests/queries/0_stateless/00284_external_aggregation.sql @@ -1,5 +1,5 @@ SET max_bytes_before_external_group_by = 100000000; -SET max_memory_usage = 201000000; +SET max_memory_usage = 301000000; SELECT sum(k), sum(c) FROM (SELECT number AS k, count() AS c FROM (SELECT * FROM system.numbers LIMIT 10000000) GROUP BY k); SELECT sum(k), sum(c), max(u) FROM (SELECT number AS k, count() AS c, uniqArray(range(number % 16)) AS u FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k); From fb534bd1ce0b290ba9380d47cbb04f75ccfdb1f0 Mon Sep 17 00:00:00 2001 From: dimarub2000 Date: Thu, 22 Aug 2019 14:03:01 +0300 Subject: [PATCH 130/181] Added symlink to clickhouse-client package --- debian/clickhouse-client.install | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/clickhouse-client.install b/debian/clickhouse-client.install index 7331f3eb5ed..5e730db669f 100644 --- a/debian/clickhouse-client.install +++ b/debian/clickhouse-client.install @@ -3,5 +3,6 @@ usr/bin/clickhouse-local usr/bin/clickhouse-compressor usr/bin/clickhouse-benchmark usr/bin/clickhouse-format +usr/bin/clickhouse-obfuscator etc/clickhouse-client/config.xml usr/bin/clickhouse-extract-from-config From 63411b4d4a568bdebec9a463c607e74404a8ae9d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 15:51:33 +0300 Subject: [PATCH 131/181] Try to fix 00093_union_race_conditions_4. --- dbms/tests/queries/0_stateless/00093_union_race_conditions_4.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00093_union_race_conditions_4.sh b/dbms/tests/queries/0_stateless/00093_union_race_conditions_4.sh index fcdbe4cbcdd..2d255a0c2f6 100755 --- a/dbms/tests/queries/0_stateless/00093_union_race_conditions_4.sh +++ b/dbms/tests/queries/0_stateless/00093_union_race_conditions_4.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -o errexit set -o pipefail -for i in {1..10}; do seq 1 10 | sed 's/.*/SELECT 1 % ((number + 500) % 1000) FROM system.numbers_mt LIMIT 1000;/' | $CLICKHOUSE_CLIENT -n --receive_timeout=1 --max_block_size=1 >/dev/null 2>&1 && echo 'Fail!' && break; echo -n '.'; done; echo +for i in {1..10}; do seq 1 10 | sed 's/.*/SELECT 1 % ((number + 500) % 1000) FROM numbers_mt(1000);/' | $CLICKHOUSE_CLIENT -n --receive_timeout=1 --max_block_size=1 >/dev/null 2>&1 && echo 'Fail!' && break; echo -n '.'; done; echo From b3f91a717cabca06b03aec29851d0776f45f06bc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 17:38:44 +0300 Subject: [PATCH 132/181] Use ThreadFromGlobalPool in PipelineExecutor instead of ThreadPool. --- .../Processors/Executors/PipelineExecutor.cpp | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/dbms/src/Processors/Executors/PipelineExecutor.cpp b/dbms/src/Processors/Executors/PipelineExecutor.cpp index 31cca4e1a48..6aad6f96b5c 100644 --- a/dbms/src/Processors/Executors/PipelineExecutor.cpp +++ b/dbms/src/Processors/Executors/PipelineExecutor.cpp @@ -590,32 +590,45 @@ void PipelineExecutor::executeImpl(size_t num_threads) for (size_t i = 0; i < num_threads; ++i) executor_contexts.emplace_back(std::make_unique()); + auto thread_group = CurrentThread::getGroup(); + + using ThreadsData = std::vector; + ThreadsData threads; + threads.reserve(num_threads); + + bool finished_flag = false; + + SCOPE_EXIT( + if (!finished_flag) + { + finish(); + + for (auto & thread : threads) + thread.join(); + } + ); + addChildlessProcessorsToStack(stack); - while (!stack.empty()) { - UInt64 proc = stack.top(); - stack.pop(); + std::lock_guard lock(task_queue_mutex); - if (prepareProcessor(proc, stack, stack, 0, false)) + while (!stack.empty()) { - auto cur_state = graph[proc].execution_state.get(); - task_queue.push(cur_state); + UInt64 proc = stack.top(); + stack.pop(); + + if (prepareProcessor(proc, stack, stack, 0, false)) + { + auto cur_state = graph[proc].execution_state.get(); + task_queue.push(cur_state); + } } } - ThreadPool pool(num_threads); - - SCOPE_EXIT( - finish(); - pool.wait() - ); - - auto thread_group = CurrentThread::getGroup(); - for (size_t i = 0; i < num_threads; ++i) { - pool.schedule([this, thread_group, thread_num = i, num_threads] + threads.emplace_back([this, thread_group, thread_num = i, num_threads] { /// ThreadStatus thread_status; @@ -631,7 +644,10 @@ void PipelineExecutor::executeImpl(size_t num_threads) }); } - pool.wait(); + for (auto & thread : threads) + thread.join(); + + finished_flag = true; } String PipelineExecutor::dumpPipeline() const From 6d78e3be94b56f5e844238af7a688f0125d7e274 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 21 Aug 2019 11:12:39 +0300 Subject: [PATCH 133/181] hasToken function implementation * Function to check if given token is present in a string; * Special case for hasToken to 'tokenbf_v1' index; * Test cases for hasToken() * Test case for hasToken() + 'tokenbf_v1' integration --- dbms/src/Common/StringSearcher.h | 79 +++++++++- dbms/src/Common/Volnitsky.h | 14 +- dbms/src/Functions/FunctionsStringSearch.cpp | 76 ++++++++++ .../MergeTree/MergeTreeIndexFullText.cpp | 13 ++ .../queries/0_stateless/00990_hasToken.python | 124 ++++++++++++++++ .../0_stateless/00990_hasToken.reference | 139 ++++++++++++++++++ .../queries/0_stateless/00990_hasToken.sh | 8 + .../00990_hasToken_and_tokenbf.reference | 3 + .../00990_hasToken_and_tokenbf.sql | 33 +++++ 9 files changed, 481 insertions(+), 8 deletions(-) create mode 100755 dbms/tests/queries/0_stateless/00990_hasToken.python create mode 100644 dbms/tests/queries/0_stateless/00990_hasToken.reference create mode 100755 dbms/tests/queries/0_stateless/00990_hasToken.sh create mode 100644 dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference create mode 100644 dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index 5e78ff23df1..fecf1a7ca81 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -23,6 +25,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_PARAMETER; + extern const int BAD_ARGUMENTS; } @@ -157,7 +160,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { static const Poco::UTF8Encoding utf8; @@ -374,7 +377,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -567,7 +570,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -697,11 +700,81 @@ public: } }; +// Searches for needle surrounded by token-separators. +// Separators are anything inside ASCII (0-128) and not alphanum. +// Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings +// should work just fine. But any Unicode whitespace is not considered a token separtor. +template +class TokenSearcher +{ + StringSearcher searcher; + size_t needle_size; + +public: + TokenSearcher(const char * const needle_, const size_t needle_size_) + : searcher{needle_, needle_size_}, + needle_size(needle_size_) + { + if (std::any_of(reinterpret_cast(needle_), reinterpret_cast(needle_) + needle_size_, isTokenSeparator)) + { + throw Exception{"needle must not contain whitespace characters", ErrorCodes::BAD_ARGUMENTS}; + } + + } + + ALWAYS_INLINE bool compare(const UInt8 * haystack, const UInt8 * haystack_end, const UInt8 * pos) const + { + // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token. + if (isToken(haystack, haystack_end, pos)) + return searcher.compare(haystack, haystack_end, pos); + + return false; + } + + const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const + { + // use searcher.search(), then verify that returned value is a token + // if it is not, skip it and re-run + + const UInt8 * pos = haystack; + while (pos < haystack_end) + { + pos = searcher.search(pos, haystack_end); + if (pos == haystack_end || isToken(haystack, haystack_end, pos)) + return pos; + + // assuming that heendle does not contain any token separators. + pos += needle_size; + } + return haystack_end; + } + + const UInt8 * search(const UInt8 * haystack, const size_t haystack_size) const + { + return search(haystack, haystack + haystack_size); + } + + ALWAYS_INLINE bool isToken(const UInt8 * haystack, const UInt8 * const haystack_end, const UInt8* p) const + { + return (p == haystack || isTokenSeparator(*(p - 1))) + && (p + needle_size >= haystack_end || isTokenSeparator(*(p + needle_size))); + } + + ALWAYS_INLINE static bool isTokenSeparator(const UInt8 c) + { + if (isAlphaNumericASCII(c) || !isASCII(c)) + return false; + + return true; + } +}; + using ASCIICaseSensitiveStringSearcher = StringSearcher; using ASCIICaseInsensitiveStringSearcher = StringSearcher; using UTF8CaseSensitiveStringSearcher = StringSearcher; using UTF8CaseInsensitiveStringSearcher = StringSearcher; +using ASCIICaseSensitiveTokenSearcher = TokenSearcher; /** Uses functions from libc. diff --git a/dbms/src/Common/Volnitsky.h b/dbms/src/Common/Volnitsky.h index 748cbe09138..c87bdd79dab 100644 --- a/dbms/src/Common/Volnitsky.h +++ b/dbms/src/Common/Volnitsky.h @@ -327,6 +327,8 @@ protected: FallbackSearcher fallback_searcher; public: + using Searcher = FallbackSearcher; + /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified). * If you specify it small enough, the fallback algorithm will be used, * since it is considered that it's useless to waste time initializing the hash table. @@ -373,7 +375,7 @@ public: const auto res = pos - (hash[cell_num] - 1); /// pointer in the code is always padded array so we can use pagesafe semantics - if (fallback_searcher.compare(res)) + if (fallback_searcher.compare(haystack, haystack_end, res)) return res; } } @@ -520,7 +522,7 @@ public: { const auto res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) return true; } } @@ -552,7 +554,7 @@ public: { const auto res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) ans = std::min(ans, ind); } } @@ -590,7 +592,7 @@ public: { const auto res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) ans = std::min(ans, res - haystack); } } @@ -625,7 +627,7 @@ public: { const auto * res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (ans[ind] == 0 && res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (ans[ind] == 0 && res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) ans[ind] = count_chars(haystack, res); } } @@ -650,6 +652,8 @@ using VolnitskyUTF8 = VolnitskyBase; /// ignores non-ASCII bytes using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase; +using VolnitskyToken = VolnitskyBase; + using MultiVolnitsky = MultiVolnitskyBase; using MultiVolnitskyUTF8 = MultiVolnitskyBase; using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase; diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 726eb8738af..5d688232bd4 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -434,6 +434,74 @@ struct MultiSearchFirstIndexImpl } }; +/** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. + */ +template +struct HasTokenImpl +{ + using ResultType = UInt8; + + static void vector_constant( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray & res) + { + if (offsets.empty()) + return; + + const UInt8 * begin = data.data(); + const UInt8 * pos = begin; + const UInt8 * end = pos + data.size(); + + /// The current index in the array of strings. + size_t i = 0; + + VolnitskyToken searcher(pattern.data(), pattern.size(), end - pos); + + /// We will search for the next occurrence in all rows at once. + while (pos < end && end != (pos = searcher.search(pos, end - pos))) + { + /// Let's determine which index it refers to. + while (begin + offsets[i] <= pos) + { + res[i] = negate_result; + ++i; + } + + /// We check that the entry does not pass through the boundaries of strings. + if (pos + pattern.size() < begin + offsets[i]) + res[i] = !negate_result; + else + res[i] = negate_result; + + pos = begin + offsets[i]; + ++i; + } + + /// Tail, in which there can be no substring. + if (i < res.size()) + memset(&res[i], negate_result, (res.size() - i) * sizeof(res[0])); + } + + static void constant_constant(const std::string & data, const std::string & pattern, UInt8 & res) + { + VolnitskyToken searcher(pattern.data(), pattern.size(), data.size()); + const auto found = searcher.search(data.c_str(), data.size()) != data.end().base(); + res = negate_result ^ found; + } + + template + static void vector_vector(Args &&...) + { + throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + } + + /// Search different needles in single haystack. + template + static void constant_vector(Args &&...) + { + throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + } +}; + struct NamePosition { @@ -516,6 +584,11 @@ struct NameMultiSearchFirstPositionCaseInsensitiveUTF8 static constexpr auto name = "multiSearchFirstPositionCaseInsensitiveUTF8"; }; +struct NameHasToken +{ + static constexpr auto name = "hasToken"; +}; + using FunctionPosition = FunctionsStringSearch, NamePosition>; using FunctionPositionUTF8 = FunctionsStringSearch, NamePositionUTF8>; @@ -542,6 +615,7 @@ using FunctionMultiSearchFirstPositionUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitive>; using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitiveUTF8>; +using FunctionHasToken = FunctionsStringSearch, NameHasToken>; void registerFunctionsStringSearch(FunctionFactory & factory) { @@ -570,6 +644,8 @@ void registerFunctionsStringSearch(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 3625c6f1aa5..246ad6784b2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -168,6 +168,19 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map return true; } }, + { + "hasToken", + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) + { + out.function = RPNElement::FUNCTION_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + + const auto & str = value.get(); + stringToBloomFilter(str.c_str(), str.size(), idx.token_extractor_func, *out.bloom_filter); + return true; + } + }, { "startsWith", [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) diff --git a/dbms/tests/queries/0_stateless/00990_hasToken.python b/dbms/tests/queries/0_stateless/00990_hasToken.python new file mode 100755 index 00000000000..217d96dfe52 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken.python @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# encoding: utf-8 + +import re + +HAYSTACKS = [ + "hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay needle", + "needle hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay", + "hay hay hay hay hay hay hay hay hay needle hay hay hay hay hay hay hay hay hay", +] + +NEEDLE = "needle" + +HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE) +NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE) + +def replace_follow_case(replacement): + def func(match): + g = match.group() + if g.islower(): return replacement.lower() + if g.istitle(): return replacement.title() + if g.isupper(): return replacement.upper() + return replacement + return func + +def replace_separators(query, new_sep): + SEP_RE = re.compile('\\s+') + result = SEP_RE.sub(new_sep, query) + return result + +def enlarge_haystack(query, times, separator=''): + return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query) + +def small_needle(query): + return NEEDLE_RE.sub(replace_follow_case('n'), query) + +def remove_needle(query): + return NEEDLE_RE.sub('', query) + +def replace_needle(query, new_needle): + return NEEDLE_RE.sub(new_needle, query) + +# with str.lower, str.uppert, str.title and such +def transform_needle(query, string_transformation_func): + def replace_with_transformation(match): + g = match.group() + return string_transformation_func(g) + + return NEEDLE_RE.sub(replace_with_transformation, query) + + +def create_cases(table_row_template, table_query_template, const_query_template): + const_queries = [] + table_rows = [] + table_queries = set() + + def add_case(haystack, needle, match): + match = int(match) + const_queries.append(const_query_template.format(haystack=haystack, needle=needle, match=match)) + table_queries.add(table_query_template.format(haystack=haystack, needle=needle, match=match)) + table_rows.append(table_row_template.format(haystack=haystack, needle=needle, match=match)) + + # Negative cases + add_case(remove_needle(HAYSTACKS[0]), NEEDLE, False) + for haystack in HAYSTACKS: + add_case(transform_needle(haystack, str.title), NEEDLE, False) + sep = '' + h = replace_separators(haystack, sep) + add_case(h, NEEDLE, False) + add_case(small_needle(h), small_needle(NEEDLE), False) + add_case(enlarge_haystack(h, 10, sep), NEEDLE, False) + + # positive cases + for haystack in HAYSTACKS: + add_case(transform_needle(haystack, str.title), transform_needle(NEEDLE, str.title), True) + add_case(transform_needle(haystack, str.upper), transform_needle(NEEDLE, str.upper), True) + + # Not checking all separators since some (like ' and \n) cause issues when coupled with + # re-based replacement and quoting in query + # other are rare in practice and checking all separators makes this test too lengthy. + + # r'\\\\' turns into a single '\' in query + #separators = list(''' \t`~!@#$%^&*()-=+|]}[{";:/?.>,<''') + [r'\\\\'] + separators = list(''' \t;:?.,''') + [r'\\\\'] + for sep in separators: + h = replace_separators(haystack, sep) + add_case(h, NEEDLE, True) + add_case(small_needle(h), small_needle(NEEDLE), True) + add_case(enlarge_haystack(h, 200, sep), NEEDLE, True) + add_case(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True) + add_case(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True) + + return table_rows, table_queries, const_queries + +def main(): + + def query(x): + print x + + CONST_QUERY = """SELECT hasToken('{haystack}', '{needle}'), ' expecting ', {match};""" + #SELECT hasToken(haystack, '{needle}') FROM ht WHERE needle = '{needle}' AND match = {match};""" + TABLE_QUERY = """WITH '{needle}' as n SELECT haystack, needle, hasToken(haystack, n) as result FROM ht WHERE needle = n AND result != match;""" + TABLE_ROW = """('{haystack}', '{needle}', {match})""" + + rows, table_queries, const_queries = create_cases(TABLE_ROW, TABLE_QUERY, CONST_QUERY) + for q in const_queries: + query(q) + + query("""DROP TABLE IF EXISTS ht; + CREATE TABLE IF NOT EXISTS + ht +( + haystack String, + needle String, + match UInt8 +) +ENGINE MergeTree() +ORDER BY haystack; +INSERT INTO ht VALUES {values};""".format(values=", ".join(rows))) + for q in sorted(table_queries): + query(q) + +if __name__ == '__main__': + main() diff --git a/dbms/tests/queries/0_stateless/00990_hasToken.reference b/dbms/tests/queries/0_stateless/00990_hasToken.reference new file mode 100644 index 00000000000..867c0c1c691 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken.reference @@ -0,0 +1,139 @@ +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 diff --git a/dbms/tests/queries/0_stateless/00990_hasToken.sh b/dbms/tests/queries/0_stateless/00990_hasToken.sh new file mode 100755 index 00000000000..4ccb77b8ecc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python $CURDIR/00990_hasToken.python | ${CLICKHOUSE_CLIENT} -nm diff --git a/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference new file mode 100644 index 00000000000..10e8f0d2c59 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference @@ -0,0 +1,3 @@ +2007 +2007 +2007 diff --git a/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql new file mode 100644 index 00000000000..60e4d959417 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql @@ -0,0 +1,33 @@ +SET allow_experimental_data_skipping_indices = 1; + +DROP TABLE IF EXISTS bloom_filter; + +CREATE TABLE bloom_filter +( + id UInt64, + s String, + INDEX tok_bf (s, lower(s)) TYPE tokenbf_v1(512, 3, 0) GRANULARITY 1 +) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 8; + +insert into bloom_filter select number, 'yyy,uuu' from numbers(1024); +insert into bloom_filter select number+2000, 'abc,def,zzz' from numbers(8); +insert into bloom_filter select number+3000, 'yyy,uuu' from numbers(1024); +insert into bloom_filter select number+3000, 'abcdefzzz' from numbers(1024); + +set max_rows_to_read = 16; + +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc'); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'def'); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz'); + +-- invert result +-- this does not work as expected, reading more rows that it should +-- SELECT max(id) FROM bloom_filter WHERE NOT hasToken(s, 'yyy'); + +-- accessing to many rows +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'yyy'); -- { serverError 158 } + +-- this syntax is not supported by tokenbf +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz') == 1; -- { serverError 158 } + +DROP TABLE bloom_filter; \ No newline at end of file From a50d6e713299f8b54c7d1b81cc9742bf4a6a211e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 18:07:07 +0300 Subject: [PATCH 134/181] Update 00416_pocopatch_progress_in_http_headers. --- ...copatch_progress_in_http_headers.reference | 23 ++++++++++--------- ...0416_pocopatch_progress_in_http_headers.sh | 5 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference index e838f583cdf..a2c79e66928 100644 --- a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference +++ b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference @@ -3,17 +3,18 @@ < X-ClickHouse-Progress: {"read_rows":"10","read_bytes":"80","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} < X-ClickHouse-Progress: {"read_rows":"10","read_bytes":"80","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} 9 -< X-ClickHouse-Progress: {"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"2","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"4","read_bytes":"32","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"6","read_bytes":"48","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"7","read_bytes":"56","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"8","read_bytes":"64","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"9","read_bytes":"72","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"10","read_bytes":"80","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< X-ClickHouse-Progress: {"read_rows":"10","read_bytes":"80","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"2","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"4","read_bytes":"32","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"6","read_bytes":"48","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"7","read_bytes":"56","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"8","read_bytes":"64","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"9","read_bytes":"72","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"10","read_bytes":"80","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} +< X-ClickHouse-Progress: {"read_rows":"10","read_bytes":"80","written_rows":"0","written_bytes":"0","total_rows_to_read":"10"} 0 1 2 diff --git a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index d95798bc95c..c86154a8402 100755 --- a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -6,9 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' # This test will fail with external poco (progress not supported) -# "grep -v 11" in order to skip extra progress header for 11-th row (for processors pipeline) -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&experimental_use_processors=0" -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' | grep -v 11 -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1&experimental_use_processors=0" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d # 'send_progress_in_http_headers' is false by default ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -q 'X-ClickHouse-Progress' && echo 'Fail' || true From b6c8a492445e99d1d4ca318c6cd580c6362fb634 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Aug 2019 20:08:10 +0300 Subject: [PATCH 135/181] Update 00284_external_aggregation --- dbms/tests/queries/0_stateless/00284_external_aggregation.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00284_external_aggregation.sql b/dbms/tests/queries/0_stateless/00284_external_aggregation.sql index 03403b90b6c..cd9abec59a8 100644 --- a/dbms/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/dbms/tests/queries/0_stateless/00284_external_aggregation.sql @@ -1,5 +1,5 @@ SET max_bytes_before_external_group_by = 100000000; -SET max_memory_usage = 301000000; +SET max_memory_usage = 351000000; SELECT sum(k), sum(c) FROM (SELECT number AS k, count() AS c FROM (SELECT * FROM system.numbers LIMIT 10000000) GROUP BY k); SELECT sum(k), sum(c), max(u) FROM (SELECT number AS k, count() AS c, uniqArray(range(number % 16)) AS u FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k); From acce56095fc78c7d4fbe6e80746c4241fe698d89 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Aug 2019 22:35:46 +0300 Subject: [PATCH 136/181] limit number of background threads for mutations --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 9 ++++++++- .../src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../MergeTree/ReplicatedMergeTreeQueue.cpp | 18 +++++++++++------- dbms/src/Storages/StorageMergeTree.cpp | 7 ++----- .../Storages/StorageReplicatedMergeTree.cpp | 3 +-- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 74193fa7156..5a9affaacd4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -157,7 +157,14 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_siz UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation() { - return static_cast(DiskSpaceMonitor::getUnreservedFreeSpace(data.full_path) / DISK_USAGE_COEFFICIENT_TO_RESERVE); + size_t total_threads_in_pool = pool.getNumberOfThreads(); + size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed); + + /// Allow mutations only if there are enough threads, leave free threads for merges else + if (total_threads_in_pool - busy_threads_in_pool >= data.settings.number_of_free_entries_in_pool_to_execute_mutation) + return static_cast(DiskSpaceMonitor::getUnreservedFreeSpace(data.full_path) / DISK_USAGE_COEFFICIENT_TO_RESERVE); + + return 0; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 36e82b96961..afd0772a937 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -33,6 +33,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.") \ M(SettingUInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.") \ M(SettingUInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.") \ + M(SettingUInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"") \ M(SettingSeconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.") \ M(SettingSeconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.") \ \ diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index e6251502576..fd65f14fedb 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -956,15 +956,19 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( return false; } - /** Execute merge only if there are enough free threads in background pool to do merges of that size. - * But if all threads are free (maximal size of merge is allowed) then execute any merge, - * (because it may be ordered by OPTIMIZE or early with differrent settings). + UInt64 max_source_parts_size = entry.type == LogEntry::MERGE_PARTS ? merger_mutator.getMaxSourcePartsSizeForMerge() + : merger_mutator.getMaxSourcePartSizeForMutation(); + /** If there are enough free threads in background pool to do large merges (maximal size of merge is allowed), + * then ignore value returned by getMaxSourcePartsSizeForMerge() and execute merge of any size, + * because it may be ordered by OPTIMIZE or early with different settings. + * Setting max_bytes_to_merge_at_max_space_in_pool still working for regular merges, + * because the leader replica does not assign merges of greater size (except OPTIMIZE PARTITION and OPTIMIZE FINAL). */ - UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(); - if (max_source_parts_size != data.settings.max_bytes_to_merge_at_max_space_in_pool - && sum_parts_size_in_bytes > max_source_parts_size) + bool ignore_max_size = (entry.type == LogEntry::MERGE_PARTS) && (max_source_parts_size == data.settings.max_bytes_to_merge_at_max_space_in_pool); + + if (!ignore_max_size && sum_parts_size_in_bytes > max_source_parts_size) { - String reason = "Not executing log entry for part " + entry.new_part_name + String reason = "Not executing log entry " + entry.typeToString() + " for part " + entry.new_part_name + " because source parts size (" + formatReadableSizeWithBinarySuffix(sum_parts_size_in_bytes) + ") is greater than the current maximum (" + formatReadableSizeWithBinarySuffix(max_source_parts_size) + ")."; LOG_DEBUG(log, reason); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index d062bb197ca..779efe95a8c 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -624,8 +624,6 @@ bool StorageMergeTree::tryMutatePart() /// You must call destructor with unlocked `currently_merging_mutex`. std::optional tagger; { - auto disk_space = DiskSpaceMonitor::getUnreservedFreeSpace(full_path); - std::lock_guard lock(currently_merging_mutex); if (current_mutations_by_version.empty()) @@ -641,8 +639,7 @@ bool StorageMergeTree::tryMutatePart() if (mutations_begin_it == mutations_end_it) continue; - auto estimated_needed_space = MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}); - if (estimated_needed_space > disk_space) + if (merger_mutator.getMaxSourcePartSizeForMutation() < part->bytes_on_disk) continue; for (auto it = mutations_begin_it; it != mutations_end_it; ++it) @@ -655,7 +652,7 @@ bool StorageMergeTree::tryMutatePart() future_part.part_info = new_part_info; future_part.name = part->getNewName(new_part_info); - tagger.emplace(future_part, estimated_needed_space, *this); + tagger.emplace(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this); break; } } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 028235d9eef..7a946400658 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2208,14 +2208,13 @@ void StorageReplicatedMergeTree::mergeSelectingTask() UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation(); FutureMergedMutatedPart future_merged_part; - - /// If there are many mutations in queue it may happen, that we cannot enqueue enough merges to merge all new parts if (max_source_parts_size_for_merge > 0 && merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred)) { success = createLogEntryToMergeParts(zookeeper, future_merged_part.parts, future_merged_part.name, deduplicate, force_ttl); } + /// If there are many mutations in queue it may happen, that we cannot enqueue enough merges to merge all new parts else if (max_source_part_size_for_mutation > 0 && queue.countMutations() > 0 && merges_and_mutations_queued.second < settings.max_replicated_mutations_in_queue) { From 9c054419323fb8db91b586f6c19208507c7452c5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Aug 2019 22:54:42 +0300 Subject: [PATCH 137/181] better test --- .../configs/merge_tree_max_parts.xml | 6 +++ .../configs/merge_tree_queue.xml | 7 --- .../test_replicated_mutations/test.py | 50 ++++++++++++------- 3 files changed, 39 insertions(+), 24 deletions(-) create mode 100644 dbms/tests/integration/test_replicated_mutations/configs/merge_tree_max_parts.xml delete mode 100644 dbms/tests/integration/test_replicated_mutations/configs/merge_tree_queue.xml diff --git a/dbms/tests/integration/test_replicated_mutations/configs/merge_tree_max_parts.xml b/dbms/tests/integration/test_replicated_mutations/configs/merge_tree_max_parts.xml new file mode 100644 index 00000000000..60047dcab2c --- /dev/null +++ b/dbms/tests/integration/test_replicated_mutations/configs/merge_tree_max_parts.xml @@ -0,0 +1,6 @@ + + + 50 + 50 + + \ No newline at end of file diff --git a/dbms/tests/integration/test_replicated_mutations/configs/merge_tree_queue.xml b/dbms/tests/integration/test_replicated_mutations/configs/merge_tree_queue.xml deleted file mode 100644 index ccc63f2eaec..00000000000 --- a/dbms/tests/integration/test_replicated_mutations/configs/merge_tree_queue.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - 50 - 8 - 4 - - \ No newline at end of file diff --git a/dbms/tests/integration/test_replicated_mutations/test.py b/dbms/tests/integration/test_replicated_mutations/test.py index dd42a70e280..0347ba4782c 100644 --- a/dbms/tests/integration/test_replicated_mutations/test.py +++ b/dbms/tests/integration/test_replicated_mutations/test.py @@ -14,10 +14,12 @@ node1 = cluster.add_instance('node1', macros={'cluster': 'test1'}, with_zookeepe # Check, that limits on max part size for merges doesn`t affect mutations node2 = cluster.add_instance('node2', macros={'cluster': 'test1'}, main_configs=["configs/merge_tree.xml"], with_zookeeper=True) -node3 = cluster.add_instance('node3', macros={'cluster': 'test2'}, main_configs=["configs/merge_tree_queue.xml"], with_zookeeper=True) -node4 = cluster.add_instance('node4', macros={'cluster': 'test2'}, main_configs=["configs/merge_tree_queue.xml"], with_zookeeper=True) +node3 = cluster.add_instance('node3', macros={'cluster': 'test2'}, main_configs=["configs/merge_tree_max_parts.xml"], with_zookeeper=True) +node4 = cluster.add_instance('node4', macros={'cluster': 'test2'}, main_configs=["configs/merge_tree_max_parts.xml"], with_zookeeper=True) -all_nodes = [node1, node2, node3, node4] +node5 = cluster.add_instance('node5', macros={'cluster': 'test3'}, main_configs=["configs/merge_tree_max_parts.xml"]) + +all_nodes = [node1, node2, node3, node4, node5] @pytest.fixture(scope="module") def started_cluster(): @@ -27,9 +29,11 @@ def started_cluster(): for node in all_nodes: node.query("DROP TABLE IF EXISTS test_mutations") - for node in all_nodes: + for node in [node1, node2, node3, node4]: node.query("CREATE TABLE test_mutations(d Date, x UInt32, i UInt32) ENGINE ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/test_mutations', '{instance}') ORDER BY x PARTITION BY toYYYYMM(d)") + node5.query("CREATE TABLE test_mutations(d Date, x UInt32, i UInt32) ENGINE MergeTree() ORDER BY x PARTITION BY toYYYYMM(d)") + yield cluster finally: @@ -56,7 +60,7 @@ class Runner: self.exceptions = [] - def do_insert(self, thread_num): + def do_insert(self, thread_num, partitions_num): self.stop_ev.wait(random.random()) # Each thread inserts a small random number of rows with random year, month 01 and day determined @@ -74,7 +78,7 @@ class Runner: for x in xs: self.currently_inserting_xs[x] += 1 - year = 2000 + random.randint(0, 10) + year = 2000 + random.randint(0, partitions_num) date_str = '{year}-{month}-{day}'.format(year=year, month=month, day=day) payload = '' for x in xs: @@ -158,7 +162,7 @@ def test_mutations(started_cluster): threads = [] for thread_num in range(5): - threads.append(threading.Thread(target=runner.do_insert, args=(thread_num, ))) + threads.append(threading.Thread(target=runner.do_insert, args=(thread_num, 10))) for thread_num in (11, 12, 13): threads.append(threading.Thread(target=runner.do_delete, args=(thread_num,))) @@ -178,7 +182,9 @@ def test_mutations(started_cluster): all_done = wait_for_mutations(nodes, runner.total_mutations) - print node1.query("SELECT mutation_id, command, parts_to_do, is_done FROM system.mutations WHERE table = 'test_mutations' FORMAT TSVWithNames") + print "Total mutations: ", runner.total_mutations + for node in nodes: + print node.query("SELECT mutation_id, command, parts_to_do, is_done FROM system.mutations WHERE table = 'test_mutations' FORMAT TSVWithNames") assert all_done expected_sum = runner.total_inserted_xs - runner.total_deleted_xs @@ -188,24 +194,30 @@ def test_mutations(started_cluster): assert actual_sums[i] == expected_sum -def test_mutations_dont_prevent_merges(started_cluster): - nodes = [node3, node4] - for year in range(2000, 2008): +@pytest.mark.parametrize( + ('nodes', ), + [ + ([node5, ], ), # MergeTree + ([node3, node4], ), # ReplicatedMergeTree + ] +) +def test_mutations_dont_prevent_merges(started_cluster, nodes): + for year in range(2000, 2016): rows = '' date_str = '{}-01-{}'.format(year, random.randint(1, 10)) for i in range(10): rows += '{} {} {}\n'.format(date_str, random.randint(1, 10), i) - node3.query("INSERT INTO test_mutations FORMAT TSV", rows) + nodes[0].query("INSERT INTO test_mutations FORMAT TSV", rows) - # will run mutations of 8 parts in parallel, mutations will sleep for about 20 seconds - node3.query("ALTER TABLE test_mutations UPDATE i = sleepEachRow(2) WHERE 1") + # will run mutations of 16 parts in parallel, mutations will sleep for about 20 seconds + nodes[0].query("ALTER TABLE test_mutations UPDATE i = sleepEachRow(2) WHERE 1") runner = Runner(nodes) threads = [] - for thread_num in range(10): - threads.append(threading.Thread(target=runner.do_insert, args=(thread_num, ))) + for thread_num in range(2): + threads.append(threading.Thread(target=runner.do_insert, args=(thread_num, 0))) - # will insert approx 4-5 new parts per 1 second into each partition + # will insert approx 8-10 new parts per 1 second into one partition for t in threads: t.start() @@ -215,5 +227,9 @@ def test_mutations_dont_prevent_merges(started_cluster): for t in threads: t.join() + for node in nodes: + print node.query("SELECT mutation_id, command, parts_to_do, is_done FROM system.mutations WHERE table = 'test_mutations' FORMAT TSVWithNames") + print node.query("SELECT partition, count(name), sum(active), sum(active*rows) FROM system.parts WHERE table ='test_mutations' GROUP BY partition FORMAT TSVWithNames") + assert all_done assert all([str(e).find("Too many parts") < 0 for e in runner.exceptions]) From e8bc2189840613f6cbe1caaca987c293adff1b16 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 00:35:16 +0300 Subject: [PATCH 138/181] Rewrite flappy test --- .../00600_replace_running_query.reference | 8 ++++--- .../00600_replace_running_query.sh | 22 ++++++++++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference index 237dd6b5309..804267a1c11 100644 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference @@ -1,5 +1,7 @@ 0 -1 0 -3 0 -2 0 +1 +1 +1 +finished 42 readonly SELECT 2, count() FROM system.numbers +1 44 diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index 8e4677bb1d5..513f6d8440e 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -1,17 +1,18 @@ #!/usr/bin/env bash +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -set -e -o pipefail function wait_for_query_to_start() { while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done } -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(100000000) GROUP BY k)' 2>&1 > /dev/null & +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' 2>&1 > /dev/null & wait_for_query_to_start 'hello' # Replace it @@ -20,15 +21,20 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT} --user=readonly --query_id=42 --query='SELECT 1, sleep(1)' & +${CLICKHOUSE_CLIENT} --user=readonly --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' -( ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' ||: ) 2>&1 | grep -F 'is already running by user' > /dev/null + +# Trying to run another query with the same query_id +${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' 2>&1 | grep -cF 'is already running by user' + +# Trying to replace query of a different user +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user' + +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" wait -${CLICKHOUSE_CLIENT} --query='SELECT 3, sleep(1)' & -sleep 0.1 -${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 2, sleep(1)' & +${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' -( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F "can't be stopped" > /dev/null +${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' wait From dce975321599838a03ed22ab3a52a493a1f01f5d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 00:44:40 +0300 Subject: [PATCH 139/181] Fix splitted build. --- contrib/arrow-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 7b94acc9031..bc229deeced 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -219,7 +219,7 @@ endif() add_library(${ARROW_LIBRARY} ${ARROW_SRCS}) add_dependencies(${ARROW_LIBRARY} protoc) target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS}) -target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} Threads::Threads) +target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${PROTOBUF_LIBRARIES} Threads::Threads) if (ARROW_WITH_LZ4) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY}) endif() From 859736d935275ef9d66e1f84c853f7f69f780394 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 01:41:30 +0300 Subject: [PATCH 140/181] Basic code quality of Live View --- .../PushingToViewsBlockOutputStream.cpp | 3 +- dbms/src/Storages/StorageLiveView.cpp | 217 ++++++++++++++++-- dbms/src/Storages/StorageLiveView.h | 184 +-------------- 3 files changed, 207 insertions(+), 197 deletions(-) diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 840d3479ab9..d16d68bf72b 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -106,8 +106,7 @@ void PushingToViewsBlockOutputStream::write(const Block & block) if (auto * live_view = dynamic_cast(storage.get())) { - BlockOutputStreamPtr output_ = std::make_shared(*live_view); - StorageLiveView::writeIntoLiveView(*live_view, block, context, output_); + StorageLiveView::writeIntoLiveView(*live_view, block, context); } else { diff --git a/dbms/src/Storages/StorageLiveView.cpp b/dbms/src/Storages/StorageLiveView.cpp index 6e42b9780e1..fdfa6a3a80d 100644 --- a/dbms/src/Storages/StorageLiveView.cpp +++ b/dbms/src/Storages/StorageLiveView.cpp @@ -9,6 +9,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + #include #include #include @@ -17,10 +18,17 @@ limitations under the License. */ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include #include #include @@ -98,6 +106,174 @@ static void checkAllowedQueries(const ASTSelectQuery & query) } } + +class LiveViewBlockOutputStream : public IBlockOutputStream +{ +public: + explicit LiveViewBlockOutputStream(StorageLiveView & storage_) : storage(storage_) {} + + void writePrefix() override + { + new_blocks = std::make_shared(); + new_blocks_metadata = std::make_shared(); + new_hash = std::make_shared(); + } + + void writeSuffix() override + { + UInt128 key; + String key_str; + + new_hash->get128(key.low, key.high); + key_str = key.toHexString(); + + Poco::FastMutex::ScopedLock lock(storage.mutex); + + if (storage.getBlocksHashKey() != key_str) + { + new_blocks_metadata->hash = key_str; + new_blocks_metadata->version = storage.getBlocksVersion() + 1; + + for (auto & block : *new_blocks) + { + block.insert({DataTypeUInt64().createColumnConst( + block.rows(), new_blocks_metadata->version)->convertToFullColumnIfConst(), + std::make_shared(), + "_version"}); + } + + (*storage.blocks_ptr) = new_blocks; + (*storage.blocks_metadata_ptr) = new_blocks_metadata; + + storage.condition.broadcast(); + } + + new_blocks.reset(); + new_blocks_metadata.reset(); + new_hash.reset(); + } + + void write(const Block & block) override + { + new_blocks->push_back(block); + block.updateHash(*new_hash); + } + + Block getHeader() const override { return storage.getHeader(); } + +private: + using SipHashPtr = std::shared_ptr; + + BlocksPtr new_blocks; + BlocksMetadataPtr new_blocks_metadata; + SipHashPtr new_hash; + StorageLiveView & storage; +}; + + +void StorageLiveView::writeIntoLiveView( + StorageLiveView & live_view, + const Block & block, + const Context & context) +{ + BlockOutputStreamPtr output = std::make_shared(live_view); + + /// Check if live view has any readers if not + /// just reset blocks to empty and do nothing else + /// When first reader comes the blocks will be read. + { + Poco::FastMutex::ScopedLock lock(live_view.mutex); + if (!live_view.hasActiveUsers()) + { + live_view.reset(); + return; + } + } + + bool is_block_processed = false; + BlockInputStreams from; + BlocksPtrs mergeable_blocks; + BlocksPtr new_mergeable_blocks = std::make_shared(); + + { + Poco::FastMutex::ScopedLock lock(live_view.mutex); + + mergeable_blocks = live_view.getMergeableBlocks(); + if (!mergeable_blocks || mergeable_blocks->size() >= context.getGlobalContext().getSettingsRef().max_live_view_insert_blocks_before_refresh) + { + mergeable_blocks = std::make_shared>(); + BlocksPtr base_mergeable_blocks = std::make_shared(); + InterpreterSelectQuery interpreter(live_view.getInnerQuery(), context, SelectQueryOptions(QueryProcessingStage::WithMergeableState), Names()); + auto view_mergeable_stream = std::make_shared( + interpreter.execute().in); + while (Block this_block = view_mergeable_stream->read()) + base_mergeable_blocks->push_back(this_block); + mergeable_blocks->push_back(base_mergeable_blocks); + live_view.setMergeableBlocks(mergeable_blocks); + + /// Create from streams + for (auto & blocks_ : *mergeable_blocks) + { + if (blocks_->empty()) + continue; + auto sample_block = blocks_->front().cloneEmpty(); + BlockInputStreamPtr stream = std::make_shared(std::make_shared(blocks_), sample_block); + from.push_back(std::move(stream)); + } + + is_block_processed = true; + } + } + + if (!is_block_processed) + { + auto parent_storage = context.getTable(live_view.getSelectDatabaseName(), live_view.getSelectTableName()); + BlockInputStreams streams = {std::make_shared(block)}; + auto proxy_storage = std::make_shared(parent_storage, std::move(streams), QueryProcessingStage::FetchColumns); + InterpreterSelectQuery select_block(live_view.getInnerQuery(), + context, proxy_storage, + QueryProcessingStage::WithMergeableState); + auto data_mergeable_stream = std::make_shared( + select_block.execute().in); + while (Block this_block = data_mergeable_stream->read()) + new_mergeable_blocks->push_back(this_block); + + if (new_mergeable_blocks->empty()) + return; + + { + Poco::FastMutex::ScopedLock lock(live_view.mutex); + + mergeable_blocks = live_view.getMergeableBlocks(); + mergeable_blocks->push_back(new_mergeable_blocks); + + /// Create from streams + for (auto & blocks_ : *mergeable_blocks) + { + if (blocks_->empty()) + continue; + auto sample_block = blocks_->front().cloneEmpty(); + BlockInputStreamPtr stream = std::make_shared(std::make_shared(blocks_), sample_block); + from.push_back(std::move(stream)); + } + } + } + + auto parent_storage = context.getTable(live_view.getSelectDatabaseName(), live_view.getSelectTableName()); + auto proxy_storage = std::make_shared(parent_storage, std::move(from), QueryProcessingStage::WithMergeableState); + InterpreterSelectQuery select(live_view.getInnerQuery(), context, proxy_storage, QueryProcessingStage::Complete); + BlockInputStreamPtr data = std::make_shared(select.execute().in); + + /// Squashing is needed here because the view query can generate a lot of blocks + /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY + /// and two-level aggregation is triggered). + data = std::make_shared( + data, context.getGlobalContext().getSettingsRef().min_insert_block_size_rows, context.getGlobalContext().getSettingsRef().min_insert_block_size_bytes); + + copyData(*data, *output); +} + + StorageLiveView::StorageLiveView( const String & table_name_, const String & database_name_, @@ -259,11 +435,10 @@ void StorageLiveView::noUsersThread(const UInt64 & timeout) { while (1) { - Poco::FastMutex::ScopedLock lock(noUsersThreadMutex); - if (!noUsersThreadWakeUp && !noUsersThreadCondition.tryWait(noUsersThreadMutex, - timeout * 1000)) + Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + if (!no_users_thread_wakeup && !no_users_thread_condition.tryWait(no_users_thread_mutex, timeout * 1000)) { - noUsersThreadWakeUp = false; + no_users_thread_wakeup = false; if (shutdown_called) return; if (hasUsers()) @@ -301,7 +476,7 @@ void StorageLiveView::noUsersThread(const UInt64 & timeout) void StorageLiveView::startNoUsersThread(const UInt64 & timeout) { bool expected = false; - if (!startnousersthread_called.compare_exchange_strong(expected, true)) + if (!start_no_users_thread_called.compare_exchange_strong(expected, true)) return; if (is_dropped) @@ -312,20 +487,20 @@ void StorageLiveView::startNoUsersThread(const UInt64 & timeout) if (no_users_thread.joinable()) { { - Poco::FastMutex::ScopedLock lock(noUsersThreadMutex); - noUsersThreadWakeUp = true; - noUsersThreadCondition.signal(); + Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + no_users_thread_wakeup = true; + no_users_thread_condition.signal(); } no_users_thread.join(); } { - Poco::FastMutex::ScopedLock lock(noUsersThreadMutex); - noUsersThreadWakeUp = false; + Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + no_users_thread_wakeup = false; } if (!is_dropped) no_users_thread = std::thread(&StorageLiveView::noUsersThread, this, timeout); } - startnousersthread_called = false; + start_no_users_thread_called = false; } void StorageLiveView::startup() @@ -341,13 +516,13 @@ void StorageLiveView::shutdown() if (no_users_thread.joinable()) { - Poco::FastMutex::ScopedLock lock(noUsersThreadMutex); - noUsersThreadWakeUp = true; - noUsersThreadCondition.signal(); + Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + no_users_thread_wakeup = true; + no_users_thread_condition.signal(); /// Must detach the no users thread /// as we can't join it as it will result /// in a deadlock - no_users_thread.detach(); + no_users_thread.detach(); /// TODO Not viable at all. } } @@ -423,9 +598,9 @@ BlockInputStreams StorageLiveView::watch( if (no_users_thread.joinable()) { - Poco::FastMutex::ScopedLock lock(noUsersThreadMutex); - noUsersThreadWakeUp = true; - noUsersThreadCondition.signal(); + Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + no_users_thread_wakeup = true; + no_users_thread_condition.signal(); } { @@ -448,9 +623,9 @@ BlockInputStreams StorageLiveView::watch( if (no_users_thread.joinable()) { - Poco::FastMutex::ScopedLock lock(noUsersThreadMutex); - noUsersThreadWakeUp = true; - noUsersThreadCondition.signal(); + Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + no_users_thread_wakeup = true; + no_users_thread_condition.signal(); } { diff --git a/dbms/src/Storages/StorageLiveView.h b/dbms/src/Storages/StorageLiveView.h index 6bff9c2dc85..3b2398a421f 100644 --- a/dbms/src/Storages/StorageLiveView.h +++ b/dbms/src/Storages/StorageLiveView.h @@ -12,33 +12,25 @@ limitations under the License. */ #pragma once #include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include #include + namespace DB { -class IAST; - struct BlocksMetadata { String hash; UInt64 version; }; +class IAST; using ASTPtr = std::shared_ptr; using BlocksMetadataPtr = std::shared_ptr; -using SipHashPtr = std::shared_ptr; class StorageLiveView : public ext::shared_ptr_helper, public IStorage { @@ -87,9 +79,9 @@ public: /// Background thread for temporary tables /// which drops this table if there are no users void startNoUsersThread(const UInt64 & timeout); - Poco::FastMutex noUsersThreadMutex; - bool noUsersThreadWakeUp{false}; - Poco::Condition noUsersThreadCondition; + Poco::FastMutex no_users_thread_mutex; + bool no_users_thread_wakeup{false}; + Poco::Condition no_users_thread_condition; /// Get blocks hash /// must be called with mutex locked String getBlocksHashKey() @@ -150,105 +142,10 @@ public: Block getHeader() const; - static void writeIntoLiveView(StorageLiveView & live_view, - const Block & block, - const Context & context, - BlockOutputStreamPtr & output) - { - /// Check if live view has any readers if not - /// just reset blocks to empty and do nothing else - /// When first reader comes the blocks will be read. - { - Poco::FastMutex::ScopedLock lock(live_view.mutex); - if (!live_view.hasActiveUsers()) - { - live_view.reset(); - return; - } - } - - bool is_block_processed = false; - BlockInputStreams from; - BlocksPtrs mergeable_blocks; - BlocksPtr new_mergeable_blocks = std::make_shared(); - - { - Poco::FastMutex::ScopedLock lock(live_view.mutex); - - mergeable_blocks = live_view.getMergeableBlocks(); - if (!mergeable_blocks || mergeable_blocks->size() >= context.getGlobalContext().getSettingsRef().max_live_view_insert_blocks_before_refresh) - { - mergeable_blocks = std::make_shared>(); - BlocksPtr base_mergeable_blocks = std::make_shared(); - InterpreterSelectQuery interpreter(live_view.getInnerQuery(), context, SelectQueryOptions(QueryProcessingStage::WithMergeableState), Names()); - auto view_mergeable_stream = std::make_shared( - interpreter.execute().in); - while (Block this_block = view_mergeable_stream->read()) - base_mergeable_blocks->push_back(this_block); - mergeable_blocks->push_back(base_mergeable_blocks); - live_view.setMergeableBlocks(mergeable_blocks); - - /// Create from streams - for (auto & blocks_ : *mergeable_blocks) - { - if (blocks_->empty()) - continue; - auto sample_block = blocks_->front().cloneEmpty(); - BlockInputStreamPtr stream = std::make_shared(std::make_shared(blocks_), sample_block); - from.push_back(std::move(stream)); - } - - is_block_processed = true; - } - } - - if (!is_block_processed) - { - auto parent_storage = context.getTable(live_view.getSelectDatabaseName(), live_view.getSelectTableName()); - BlockInputStreams streams = {std::make_shared(block)}; - auto proxy_storage = std::make_shared(parent_storage, std::move(streams), QueryProcessingStage::FetchColumns); - InterpreterSelectQuery select_block(live_view.getInnerQuery(), - context, proxy_storage, - QueryProcessingStage::WithMergeableState); - auto data_mergeable_stream = std::make_shared( - select_block.execute().in); - while (Block this_block = data_mergeable_stream->read()) - new_mergeable_blocks->push_back(this_block); - - if (new_mergeable_blocks->empty()) - return; - - { - Poco::FastMutex::ScopedLock lock(live_view.mutex); - - mergeable_blocks = live_view.getMergeableBlocks(); - mergeable_blocks->push_back(new_mergeable_blocks); - - /// Create from streams - for (auto & blocks_ : *mergeable_blocks) - { - if (blocks_->empty()) - continue; - auto sample_block = blocks_->front().cloneEmpty(); - BlockInputStreamPtr stream = std::make_shared(std::make_shared(blocks_), sample_block); - from.push_back(std::move(stream)); - } - } - } - - auto parent_storage = context.getTable(live_view.getSelectDatabaseName(), live_view.getSelectTableName()); - auto proxy_storage = std::make_shared(parent_storage, std::move(from), QueryProcessingStage::WithMergeableState); - InterpreterSelectQuery select(live_view.getInnerQuery(), context, proxy_storage, QueryProcessingStage::Complete); - BlockInputStreamPtr data = std::make_shared(select.execute().in); - - /// Squashing is needed here because the view query can generate a lot of blocks - /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY - /// and two-level aggregation is triggered). - data = std::make_shared( - data, context.getGlobalContext().getSettingsRef().min_insert_block_size_rows, context.getGlobalContext().getSettingsRef().min_insert_block_size_bytes); - - copyData(*data, *output); - } + static void writeIntoLiveView( + StorageLiveView & live_view, + const Block & block, + const Context & context); private: String select_database_name; @@ -271,7 +168,7 @@ private: void noUsersThread(const UInt64 & timeout); std::thread no_users_thread; std::atomic shutdown_called{false}; - std::atomic startnousersthread_called{false}; + std::atomic start_no_users_thread_called{false}; UInt64 temporary_live_view_timeout; StorageLiveView( @@ -283,65 +180,4 @@ private: ); }; -class LiveViewBlockOutputStream : public IBlockOutputStream -{ -public: - explicit LiveViewBlockOutputStream(StorageLiveView & storage_) : storage(storage_) {} - - void writePrefix() override - { - new_blocks = std::make_shared(); - new_blocks_metadata = std::make_shared(); - new_hash = std::make_shared(); - } - - void writeSuffix() override - { - UInt128 key; - String key_str; - - new_hash->get128(key.low, key.high); - key_str = key.toHexString(); - - Poco::FastMutex::ScopedLock lock(storage.mutex); - - if (storage.getBlocksHashKey() != key_str) - { - new_blocks_metadata->hash = key_str; - new_blocks_metadata->version = storage.getBlocksVersion() + 1; - - for (auto & block : *new_blocks) - { - block.insert({DataTypeUInt64().createColumnConst( - block.rows(), new_blocks_metadata->version)->convertToFullColumnIfConst(), - std::make_shared(), - "_version"}); - } - - (*storage.blocks_ptr) = new_blocks; - (*storage.blocks_metadata_ptr) = new_blocks_metadata; - - storage.condition.broadcast(); - } - - new_blocks.reset(); - new_blocks_metadata.reset(); - new_hash.reset(); - } - - void write(const Block & block) override - { - new_blocks->push_back(block); - block.updateHash(*new_hash); - } - - Block getHeader() const override { return storage.getHeader(); } - -private: - BlocksPtr new_blocks; - BlocksMetadataPtr new_blocks_metadata; - SipHashPtr new_hash; - StorageLiveView & storage; -}; - } From c4712f1e6e2376232747e839850127238a008061 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 02:22:57 +0300 Subject: [PATCH 141/181] Make the code less bad --- dbms/CMakeLists.txt | 1 + .../PushingToViewsBlockOutputStream.cpp | 2 +- .../PushingToViewsBlockOutputStream.h | 1 - .../Interpreters/InterpreterAlterQuery.cpp | 4 +- .../LiveView}/LiveViewBlockInputStream.h | 38 ++---- .../LiveView/LiveViewBlockOutputStream.h | 74 +++++++++++ .../{ => LiveView}/LiveViewCommands.h | 4 +- .../LiveViewEventsBlockInputStream.h | 27 ++-- .../Storages/{ => LiveView}/ProxyStorage.h | 0 .../{ => LiveView}/StorageLiveView.cpp | 125 +++++------------- .../Storages/{ => LiveView}/StorageLiveView.h | 27 ++-- dbms/src/Storages/StorageFactory.cpp | 14 -- dbms/src/Storages/registerStorages.cpp | 2 - 13 files changed, 152 insertions(+), 167 deletions(-) rename dbms/src/{DataStreams => Storages/LiveView}/LiveViewBlockInputStream.h (81%) create mode 100644 dbms/src/Storages/LiveView/LiveViewBlockOutputStream.h rename dbms/src/Storages/{ => LiveView}/LiveViewCommands.h (97%) rename dbms/src/{DataStreams => Storages/LiveView}/LiveViewEventsBlockInputStream.h (91%) rename dbms/src/Storages/{ => LiveView}/ProxyStorage.h (100%) rename dbms/src/Storages/{ => LiveView}/StorageLiveView.cpp (85%) rename dbms/src/Storages/{ => LiveView}/StorageLiveView.h (94%) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index b589c398238..f011cc21103 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -114,6 +114,7 @@ add_headers_and_sources(dbms src/Columns) add_headers_and_sources(dbms src/Storages) add_headers_and_sources(dbms src/Storages/Distributed) add_headers_and_sources(dbms src/Storages/MergeTree) +add_headers_and_sources(dbms src/Storages/LiveView) add_headers_and_sources(dbms src/Client) add_headers_and_sources(dbms src/Formats) add_headers_and_sources(dbms src/Processors) diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index d16d68bf72b..807a9129a75 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h index e3f96241b1d..c9b0538e470 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB { diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index a3e6824c3a5..61d5e011d37 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -8,9 +8,9 @@ #include #include #include -#include +#include +#include #include -#include #include diff --git a/dbms/src/DataStreams/LiveViewBlockInputStream.h b/dbms/src/Storages/LiveView/LiveViewBlockInputStream.h similarity index 81% rename from dbms/src/DataStreams/LiveViewBlockInputStream.h rename to dbms/src/Storages/LiveView/LiveViewBlockInputStream.h index b3756c9ff6d..345fceaf095 100644 --- a/dbms/src/DataStreams/LiveViewBlockInputStream.h +++ b/dbms/src/Storages/LiveView/LiveViewBlockInputStream.h @@ -1,23 +1,6 @@ -/* Copyright (c) 2018 BlackBerry Limited - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ #pragma once -#include - -#include -#include -#include #include -#include namespace DB @@ -61,8 +44,8 @@ public: if (isCancelled() || storage->is_dropped) return; IBlockInputStream::cancel(kill); - Poco::FastMutex::ScopedLock lock(storage->mutex); - storage->condition.broadcast(); + std::lock_guard lock(storage->mutex); + storage->condition.notify_all(); } Block getHeader() const override { return storage->getHeader(); } @@ -92,14 +75,14 @@ public: NonBlockingResult tryRead() { - return tryRead_(false); + return tryReadImpl(false); } protected: Block readImpl() override { /// try reading - return tryRead_(true).first; + return tryReadImpl(true).first; } /** tryRead method attempts to read a block in either blocking @@ -107,7 +90,7 @@ protected: * then method return empty block with flag set to false * to indicate that method would block to get the next block. */ - NonBlockingResult tryRead_(bool blocking) + NonBlockingResult tryReadImpl(bool blocking) { Block res; @@ -118,7 +101,7 @@ protected: /// If blocks were never assigned get blocks if (!blocks) { - Poco::FastMutex::ScopedLock lock(storage->mutex); + std::lock_guard lock(storage->mutex); if (!active) return { Block(), false }; blocks = (*blocks_ptr); @@ -135,7 +118,7 @@ protected: if (it == end) { { - Poco::FastMutex::ScopedLock lock(storage->mutex); + std::unique_lock lock(storage->mutex); if (!active) return { Block(), false }; /// If we are done iterating over our blocks @@ -162,7 +145,10 @@ protected: while (true) { UInt64 timestamp_usec = static_cast(timestamp.epochMicroseconds()); - bool signaled = storage->condition.tryWait(storage->mutex, std::max(static_cast(0), heartbeat_interval_usec - (timestamp_usec - last_event_timestamp_usec)) / 1000); + + /// Or spurious wakeup. + bool signaled = std::cv_status::no_timeout == storage->condition.wait_for(lock, + std::chrono::microseconds(std::max(UInt64(0), heartbeat_interval_usec - (timestamp_usec - last_event_timestamp_usec)))); if (isCancelled() || storage->is_dropped) { @@ -181,7 +167,7 @@ protected: } } } - return tryRead_(blocking); + return tryReadImpl(blocking); } res = *it; diff --git a/dbms/src/Storages/LiveView/LiveViewBlockOutputStream.h b/dbms/src/Storages/LiveView/LiveViewBlockOutputStream.h new file mode 100644 index 00000000000..548bcf1b86a --- /dev/null +++ b/dbms/src/Storages/LiveView/LiveViewBlockOutputStream.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class LiveViewBlockOutputStream : public IBlockOutputStream +{ +public: + explicit LiveViewBlockOutputStream(StorageLiveView & storage_) : storage(storage_) {} + + void writePrefix() override + { + new_blocks = std::make_shared(); + new_blocks_metadata = std::make_shared(); + new_hash = std::make_shared(); + } + + void writeSuffix() override + { + UInt128 key; + String key_str; + + new_hash->get128(key.low, key.high); + key_str = key.toHexString(); + + std::lock_guard lock(storage.mutex); + + if (storage.getBlocksHashKey() != key_str) + { + new_blocks_metadata->hash = key_str; + new_blocks_metadata->version = storage.getBlocksVersion() + 1; + + for (auto & block : *new_blocks) + { + block.insert({DataTypeUInt64().createColumnConst( + block.rows(), new_blocks_metadata->version)->convertToFullColumnIfConst(), + std::make_shared(), + "_version"}); + } + + (*storage.blocks_ptr) = new_blocks; + (*storage.blocks_metadata_ptr) = new_blocks_metadata; + + storage.condition.notify_all(); + } + + new_blocks.reset(); + new_blocks_metadata.reset(); + new_hash.reset(); + } + + void write(const Block & block) override + { + new_blocks->push_back(block); + block.updateHash(*new_hash); + } + + Block getHeader() const override { return storage.getHeader(); } + +private: + using SipHashPtr = std::shared_ptr; + + BlocksPtr new_blocks; + BlocksMetadataPtr new_blocks_metadata; + SipHashPtr new_hash; + StorageLiveView & storage; +}; + +} diff --git a/dbms/src/Storages/LiveViewCommands.h b/dbms/src/Storages/LiveView/LiveViewCommands.h similarity index 97% rename from dbms/src/Storages/LiveViewCommands.h rename to dbms/src/Storages/LiveView/LiveViewCommands.h index 35015a7e5aa..54048c28a5f 100644 --- a/dbms/src/Storages/LiveViewCommands.h +++ b/dbms/src/Storages/LiveView/LiveViewCommands.h @@ -12,9 +12,9 @@ limitations under the License. */ #pragma once -#include #include -#include +#include +#include namespace DB { diff --git a/dbms/src/DataStreams/LiveViewEventsBlockInputStream.h b/dbms/src/Storages/LiveView/LiveViewEventsBlockInputStream.h similarity index 91% rename from dbms/src/DataStreams/LiveViewEventsBlockInputStream.h rename to dbms/src/Storages/LiveView/LiveViewEventsBlockInputStream.h index 93fb6a76372..120d0098536 100644 --- a/dbms/src/DataStreams/LiveViewEventsBlockInputStream.h +++ b/dbms/src/Storages/LiveView/LiveViewEventsBlockInputStream.h @@ -9,11 +9,9 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + #pragma once -#include - -#include #include #include #include @@ -21,7 +19,7 @@ limitations under the License. */ #include #include #include -#include +#include namespace DB @@ -66,8 +64,8 @@ public: if (isCancelled() || storage->is_dropped) return; IBlockInputStream::cancel(kill); - Poco::FastMutex::ScopedLock lock(storage->mutex); - storage->condition.broadcast(); + std::lock_guard lock(storage->mutex); + storage->condition.notify_all(); } Block getHeader() const override @@ -103,7 +101,7 @@ public: NonBlockingResult tryRead() { - return tryRead_(false); + return tryReadImpl(false); } Block getEventBlock() @@ -120,7 +118,7 @@ protected: Block readImpl() override { /// try reading - return tryRead_(true).first; + return tryReadImpl(true).first; } /** tryRead method attempts to read a block in either blocking @@ -128,7 +126,7 @@ protected: * then method return empty block with flag set to false * to indicate that method would block to get the next block. */ - NonBlockingResult tryRead_(bool blocking) + NonBlockingResult tryReadImpl(bool blocking) { if (has_limit && num_updates == static_cast(limit)) { @@ -137,7 +135,7 @@ protected: /// If blocks were never assigned get blocks if (!blocks) { - Poco::FastMutex::ScopedLock lock(storage->mutex); + std::lock_guard lock(storage->mutex); if (!active) return { Block(), false }; blocks = (*blocks_ptr); @@ -155,7 +153,7 @@ protected: if (it == end) { { - Poco::FastMutex::ScopedLock lock(storage->mutex); + std::unique_lock lock(storage->mutex); if (!active) return { Block(), false }; /// If we are done iterating over our blocks @@ -183,7 +181,10 @@ protected: while (true) { UInt64 timestamp_usec = static_cast(timestamp.epochMicroseconds()); - bool signaled = storage->condition.tryWait(storage->mutex, std::max(static_cast(0), heartbeat_interval_usec - (timestamp_usec - last_event_timestamp_usec)) / 1000); + + /// Or spurious wakeup. + bool signaled = std::cv_status::no_timeout == storage->condition.wait_for(lock, + std::chrono::microseconds(std::max(UInt64(0), heartbeat_interval_usec - (timestamp_usec - last_event_timestamp_usec)))); if (isCancelled() || storage->is_dropped) { @@ -202,7 +203,7 @@ protected: } } } - return tryRead_(blocking); + return tryReadImpl(blocking); } // move right to the end diff --git a/dbms/src/Storages/ProxyStorage.h b/dbms/src/Storages/LiveView/ProxyStorage.h similarity index 100% rename from dbms/src/Storages/ProxyStorage.h rename to dbms/src/Storages/LiveView/ProxyStorage.h diff --git a/dbms/src/Storages/StorageLiveView.cpp b/dbms/src/Storages/LiveView/StorageLiveView.cpp similarity index 85% rename from dbms/src/Storages/StorageLiveView.cpp rename to dbms/src/Storages/LiveView/StorageLiveView.cpp index fdfa6a3a80d..98d48392e09 100644 --- a/dbms/src/Storages/StorageLiveView.cpp +++ b/dbms/src/Storages/LiveView/StorageLiveView.cpp @@ -20,9 +20,6 @@ limitations under the License. */ #include #include #include -#include -#include -#include #include #include #include @@ -30,10 +27,15 @@ limitations under the License. */ #include #include #include +#include + +#include +#include +#include +#include +#include -#include #include -#include #include #include #include @@ -107,70 +109,6 @@ static void checkAllowedQueries(const ASTSelectQuery & query) } -class LiveViewBlockOutputStream : public IBlockOutputStream -{ -public: - explicit LiveViewBlockOutputStream(StorageLiveView & storage_) : storage(storage_) {} - - void writePrefix() override - { - new_blocks = std::make_shared(); - new_blocks_metadata = std::make_shared(); - new_hash = std::make_shared(); - } - - void writeSuffix() override - { - UInt128 key; - String key_str; - - new_hash->get128(key.low, key.high); - key_str = key.toHexString(); - - Poco::FastMutex::ScopedLock lock(storage.mutex); - - if (storage.getBlocksHashKey() != key_str) - { - new_blocks_metadata->hash = key_str; - new_blocks_metadata->version = storage.getBlocksVersion() + 1; - - for (auto & block : *new_blocks) - { - block.insert({DataTypeUInt64().createColumnConst( - block.rows(), new_blocks_metadata->version)->convertToFullColumnIfConst(), - std::make_shared(), - "_version"}); - } - - (*storage.blocks_ptr) = new_blocks; - (*storage.blocks_metadata_ptr) = new_blocks_metadata; - - storage.condition.broadcast(); - } - - new_blocks.reset(); - new_blocks_metadata.reset(); - new_hash.reset(); - } - - void write(const Block & block) override - { - new_blocks->push_back(block); - block.updateHash(*new_hash); - } - - Block getHeader() const override { return storage.getHeader(); } - -private: - using SipHashPtr = std::shared_ptr; - - BlocksPtr new_blocks; - BlocksMetadataPtr new_blocks_metadata; - SipHashPtr new_hash; - StorageLiveView & storage; -}; - - void StorageLiveView::writeIntoLiveView( StorageLiveView & live_view, const Block & block, @@ -182,7 +120,7 @@ void StorageLiveView::writeIntoLiveView( /// just reset blocks to empty and do nothing else /// When first reader comes the blocks will be read. { - Poco::FastMutex::ScopedLock lock(live_view.mutex); + std::lock_guard lock(live_view.mutex); if (!live_view.hasActiveUsers()) { live_view.reset(); @@ -196,7 +134,7 @@ void StorageLiveView::writeIntoLiveView( BlocksPtr new_mergeable_blocks = std::make_shared(); { - Poco::FastMutex::ScopedLock lock(live_view.mutex); + std::lock_guard lock(live_view.mutex); mergeable_blocks = live_view.getMergeableBlocks(); if (!mergeable_blocks || mergeable_blocks->size() >= context.getGlobalContext().getSettingsRef().max_live_view_insert_blocks_before_refresh) @@ -242,7 +180,7 @@ void StorageLiveView::writeIntoLiveView( return; { - Poco::FastMutex::ScopedLock lock(live_view.mutex); + std::lock_guard lock(live_view.mutex); mergeable_blocks = live_view.getMergeableBlocks(); mergeable_blocks->push_back(new_mergeable_blocks); @@ -435,8 +373,8 @@ void StorageLiveView::noUsersThread(const UInt64 & timeout) { while (1) { - Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); - if (!no_users_thread_wakeup && !no_users_thread_condition.tryWait(no_users_thread_mutex, timeout * 1000)) + std::unique_lock lock(no_users_thread_mutex); + if (!no_users_thread_condition.wait_for(lock, std::chrono::seconds(timeout), [&] { return no_users_thread_wakeup; })) { no_users_thread_wakeup = false; if (shutdown_called) @@ -487,14 +425,14 @@ void StorageLiveView::startNoUsersThread(const UInt64 & timeout) if (no_users_thread.joinable()) { { - Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + std::lock_guard lock(no_users_thread_mutex); no_users_thread_wakeup = true; - no_users_thread_condition.signal(); + no_users_thread_condition.notify_one(); } no_users_thread.join(); } { - Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + std::lock_guard lock(no_users_thread_mutex); no_users_thread_wakeup = false; } if (!is_dropped) @@ -516,9 +454,9 @@ void StorageLiveView::shutdown() if (no_users_thread.joinable()) { - Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + std::lock_guard lock(no_users_thread_mutex); no_users_thread_wakeup = true; - no_users_thread_condition.signal(); + no_users_thread_condition.notify_one(); /// Must detach the no users thread /// as we can't join it as it will result /// in a deadlock @@ -536,18 +474,19 @@ void StorageLiveView::drop() global_context.removeDependency( DatabaseAndTableName(select_database_name, select_table_name), DatabaseAndTableName(database_name, table_name)); - Poco::FastMutex::ScopedLock lock(mutex); + + std::lock_guard lock(mutex); is_dropped = true; - condition.broadcast(); + condition.notify_all(); } void StorageLiveView::refresh(const Context & context) { auto alter_lock = lockAlterIntention(context.getCurrentQueryId()); { - Poco::FastMutex::ScopedLock lock(mutex); + std::lock_guard lock(mutex); if (getNewBlocks()) - condition.broadcast(); + condition.notify_all(); } } @@ -562,11 +501,11 @@ BlockInputStreams StorageLiveView::read( /// add user to the blocks_ptr std::shared_ptr stream_blocks_ptr = blocks_ptr; { - Poco::FastMutex::ScopedLock lock(mutex); + std::lock_guard lock(mutex); if (!(*blocks_ptr)) { if (getNewBlocks()) - condition.broadcast(); + condition.notify_all(); } } return { std::make_shared(stream_blocks_ptr, getHeader()) }; @@ -598,17 +537,17 @@ BlockInputStreams StorageLiveView::watch( if (no_users_thread.joinable()) { - Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + std::lock_guard lock(no_users_thread_mutex); no_users_thread_wakeup = true; - no_users_thread_condition.signal(); + no_users_thread_condition.notify_one(); } { - Poco::FastMutex::ScopedLock lock(mutex); + std::lock_guard lock(mutex); if (!(*blocks_ptr)) { if (getNewBlocks()) - condition.broadcast(); + condition.notify_all(); } } @@ -623,17 +562,17 @@ BlockInputStreams StorageLiveView::watch( if (no_users_thread.joinable()) { - Poco::FastMutex::ScopedLock lock(no_users_thread_mutex); + std::lock_guard lock(no_users_thread_mutex); no_users_thread_wakeup = true; - no_users_thread_condition.signal(); + no_users_thread_condition.notify_one(); } { - Poco::FastMutex::ScopedLock lock(mutex); + std::lock_guard lock(mutex); if (!(*blocks_ptr)) { if (getNewBlocks()) - condition.broadcast(); + condition.notify_all(); } } diff --git a/dbms/src/Storages/StorageLiveView.h b/dbms/src/Storages/LiveView/StorageLiveView.h similarity index 94% rename from dbms/src/Storages/StorageLiveView.h rename to dbms/src/Storages/LiveView/StorageLiveView.h index 3b2398a421f..9930d8d6154 100644 --- a/dbms/src/Storages/StorageLiveView.h +++ b/dbms/src/Storages/LiveView/StorageLiveView.h @@ -11,12 +11,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include -#include #include -#include #include -#include + +#include +#include namespace DB @@ -35,6 +34,8 @@ using BlocksMetadataPtr = std::shared_ptr; class StorageLiveView : public ext::shared_ptr_helper, public IStorage { friend struct ext::shared_ptr_helper; +friend class LiveViewBlockInputStream; +friend class LiveViewEventsBlockInputStream; friend class LiveViewBlockOutputStream; public: @@ -55,12 +56,6 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } - /// Mutex for the blocks and ready condition - Poco::FastMutex mutex; - /// New blocks ready condition to broadcast to readers - /// that new blocks are available - Poco::Condition condition; - bool isTemporary() { return is_temporary; } /// Check if we have any readers @@ -79,16 +74,16 @@ public: /// Background thread for temporary tables /// which drops this table if there are no users void startNoUsersThread(const UInt64 & timeout); - Poco::FastMutex no_users_thread_mutex; + std::mutex no_users_thread_mutex; bool no_users_thread_wakeup{false}; - Poco::Condition no_users_thread_condition; + std::condition_variable no_users_thread_condition; /// Get blocks hash /// must be called with mutex locked String getBlocksHashKey() { if (*blocks_metadata_ptr) return (*blocks_metadata_ptr)->hash; - return ""; + return {}; } /// Get blocks version /// must be called with mutex locked @@ -157,6 +152,12 @@ private: bool is_temporary {false}; mutable Block sample_block; + /// Mutex for the blocks and ready condition + std::mutex mutex; + /// New blocks ready condition to broadcast to readers + /// that new blocks are available + std::condition_variable condition; + /// Active users std::shared_ptr active_ptr; /// Current data blocks that store query result diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 7d92ce0ea2c..862f76bc3ce 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -68,14 +68,6 @@ StoragePtr StorageFactory::get( name = "LiveView"; } - else if (query.is_live_channel) - { - - if (query.storage) - throw Exception("Specifying ENGINE is not allowed for a LiveChannel", ErrorCodes::INCORRECT_QUERY); - - name = "LiveChannel"; - } else { /// Check for some special types, that are not allowed to be stored in tables. Example: NULL data type. @@ -137,12 +129,6 @@ StoragePtr StorageFactory::get( "Direct creation of tables with ENGINE LiveView is not supported, use CREATE LIVE VIEW statement", ErrorCodes::INCORRECT_QUERY); } - else if (name == "LiveChannel") - { - throw Exception( - "Direct creation of tables with ENGINE LiveChannel is not supported, use CREATE LIVE CHANNEL statement", - ErrorCodes::INCORRECT_QUERY); - } } } diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index 811a031c7a3..c21156ea44d 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -25,7 +25,6 @@ void registerStorageJoin(StorageFactory & factory); void registerStorageView(StorageFactory & factory); void registerStorageMaterializedView(StorageFactory & factory); void registerStorageLiveView(StorageFactory & factory); -//void registerStorageLiveChannel(StorageFactory & factory); #if USE_HDFS void registerStorageHDFS(StorageFactory & factory); @@ -67,7 +66,6 @@ void registerStorages() registerStorageView(factory); registerStorageMaterializedView(factory); registerStorageLiveView(factory); - //registerStorageLiveChannel(factory); #if USE_HDFS registerStorageHDFS(factory); From 78c3be175ca2ca52b04e18c5e2c690242a6eeb44 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 02:46:40 +0300 Subject: [PATCH 142/181] Added experimental setting for LIVE VIEWs --- dbms/src/Core/Settings.h | 2 ++ dbms/src/Interpreters/InterpreterWatchQuery.cpp | 4 ++++ dbms/src/Storages/LiveView/StorageLiveView.cpp | 5 ++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 22b0b5c8d03..6a22869c8dc 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -342,6 +342,8 @@ struct Settings : public SettingsCollection M(SettingUInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.") \ M(SettingBool, check_query_single_value_result, true, "Return check query result as single 1/0 value") \ \ + M(SettingBool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.") \ + \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ diff --git a/dbms/src/Interpreters/InterpreterWatchQuery.cpp b/dbms/src/Interpreters/InterpreterWatchQuery.cpp index 3ba8e2eadaa..da09022e252 100644 --- a/dbms/src/Interpreters/InterpreterWatchQuery.cpp +++ b/dbms/src/Interpreters/InterpreterWatchQuery.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int UNKNOWN_TABLE; extern const int TOO_MANY_COLUMNS; + extern const int SUPPORT_IS_DISABLED; } BlockInputStreamPtr InterpreterWatchQuery::executeImpl() @@ -34,6 +35,9 @@ BlockInputStreamPtr InterpreterWatchQuery::executeImpl() BlockIO InterpreterWatchQuery::execute() { + if (!context.getSettingsRef().allow_experimental_live_view) + throw Exception("Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", ErrorCodes::SUPPORT_IS_DISABLED); + BlockIO res; const ASTWatchQuery & query = typeid_cast(*query_ptr); String database; diff --git a/dbms/src/Storages/LiveView/StorageLiveView.cpp b/dbms/src/Storages/LiveView/StorageLiveView.cpp index 98d48392e09..3c0d205fa3f 100644 --- a/dbms/src/Storages/LiveView/StorageLiveView.cpp +++ b/dbms/src/Storages/LiveView/StorageLiveView.cpp @@ -19,7 +19,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include @@ -50,6 +49,7 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; extern const int TABLE_WAS_NOT_DROPPED; extern const int QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW; + extern const int SUPPORT_IS_DISABLED; } static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name) @@ -586,6 +586,9 @@ void registerStorageLiveView(StorageFactory & factory) { factory.registerStorage("LiveView", [](const StorageFactory::Arguments & args) { + if (!args.local_context.getSettingsRef().allow_experimental_live_view) + throw Exception("Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", ErrorCodes::SUPPORT_IS_DISABLED); + return StorageLiveView::create(args.table_name, args.database_name, args.local_context, args.query, args.columns); }); } From 96869d405fc132a7443cfbc13d1e583079503db8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 02:48:52 +0300 Subject: [PATCH 143/181] Temporarily disable all LIVE VIEW tests because this feature has subtle bugs that manifestate under TSan --- .../00960_live_view_watch_events_live.py | 42 ---- ...0960_live_view_watch_events_live.reference | 0 .../00961_temporary_live_view_watch.reference | 3 - .../00961_temporary_live_view_watch.sql | 18 -- .../00962_temporary_live_view_watch_live.py | 42 ---- ...2_temporary_live_view_watch_live.reference | 0 ...y_live_view_watch_live_timeout.py.disabled | 49 ----- ...ary_live_view_watch_live_timeout.reference | 0 .../00964_live_view_watch_events_heartbeat.py | 44 ---- ...live_view_watch_events_heartbeat.reference | 0 .../00965_live_view_watch_heartbeat.py | 45 ---- .../00965_live_view_watch_heartbeat.reference | 0 .../00966_live_view_watch_events_http.py | 37 ---- ...0966_live_view_watch_events_http.reference | 0 .../0_stateless/00967_live_view_watch_http.py | 37 ---- .../00967_live_view_watch_http.reference | 0 ...t_format_jsoneachrowwithprogress.reference | 4 - ..._select_format_jsoneachrowwithprogress.sql | 12 - ...h_format_jsoneachrowwithprogress.reference | 6 - ...w_watch_format_jsoneachrowwithprogress.sql | 18 -- ...0_live_view_watch_events_http_heartbeat.py | 43 ---- ...view_watch_events_http_heartbeat.reference | 0 .../00971_live_view_watch_http_heartbeat.py | 43 ---- ...1_live_view_watch_http_heartbeat.reference | 0 .../00972_live_view_select_1.reference | 1 - .../0_stateless/00972_live_view_select_1.sql | 7 - .../00973_live_view_select.reference | 4 - .../0_stateless/00973_live_view_select.sql | 18 -- ...ive_view_select_with_aggregation.reference | 2 - ...0974_live_view_select_with_aggregation.sql | 16 -- .../00975_live_view_create.reference | 0 .../0_stateless/00975_live_view_create.sql | 7 - .../00976_live_view_select_version.reference | 3 - .../00976_live_view_select_version.sql | 12 - .../00977_live_view_watch_events.reference | 3 - .../00977_live_view_watch_events.sql | 18 -- .../00978_live_view_watch.reference | 3 - .../0_stateless/00978_live_view_watch.sql | 18 -- .../0_stateless/00979_live_view_watch_live.py | 48 ---- .../00979_live_view_watch_live.reference | 0 ...00980_create_temporary_live_view.reference | 3 - .../00980_create_temporary_live_view.sql | 15 -- .../00991_live_view_watch_event_live.python | 81 ------- ...00991_live_view_watch_event_live.reference | 7 - ...991_live_view_watch_event_live.sh.disabled | 6 - .../00991_live_view_watch_http.python | 63 ------ .../00991_live_view_watch_http.reference | 4 - .../00991_live_view_watch_http.sh.disabled | 6 - ...ry_live_view_watch_events_heartbeat.python | 83 ------- ...live_view_watch_events_heartbeat.reference | 0 ...ve_view_watch_events_heartbeat.sh.disabled | 6 - ...0991_temporary_live_view_watch_live.python | 81 ------- ...1_temporary_live_view_watch_live.reference | 7 - ...temporary_live_view_watch_live.sh.disabled | 6 - .../queries/0_stateless/helpers/client.py | 36 --- .../queries/0_stateless/helpers/httpclient.py | 14 -- .../queries/0_stateless/helpers/httpexpect.py | 73 ------- .../queries/0_stateless/helpers/uexpect.py | 206 ------------------ 58 files changed, 1300 deletions(-) delete mode 100755 dbms/tests/queries/0_stateless/00960_live_view_watch_events_live.py delete mode 100644 dbms/tests/queries/0_stateless/00960_live_view_watch_events_live.reference delete mode 100644 dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.reference delete mode 100644 dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.sql delete mode 100755 dbms/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py delete mode 100644 dbms/tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference delete mode 100755 dbms/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.py.disabled delete mode 100644 dbms/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference delete mode 100755 dbms/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py delete mode 100644 dbms/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.reference delete mode 100755 dbms/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py delete mode 100644 dbms/tests/queries/0_stateless/00965_live_view_watch_heartbeat.reference delete mode 100755 dbms/tests/queries/0_stateless/00966_live_view_watch_events_http.py delete mode 100644 dbms/tests/queries/0_stateless/00966_live_view_watch_events_http.reference delete mode 100755 dbms/tests/queries/0_stateless/00967_live_view_watch_http.py delete mode 100644 dbms/tests/queries/0_stateless/00967_live_view_watch_http.reference delete mode 100644 dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.reference delete mode 100644 dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.sql delete mode 100644 dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.reference delete mode 100644 dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.sql delete mode 100755 dbms/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.py delete mode 100644 dbms/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.reference delete mode 100755 dbms/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.py delete mode 100644 dbms/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.reference delete mode 100644 dbms/tests/queries/0_stateless/00972_live_view_select_1.reference delete mode 100644 dbms/tests/queries/0_stateless/00972_live_view_select_1.sql delete mode 100644 dbms/tests/queries/0_stateless/00973_live_view_select.reference delete mode 100644 dbms/tests/queries/0_stateless/00973_live_view_select.sql delete mode 100644 dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.reference delete mode 100644 dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.sql delete mode 100644 dbms/tests/queries/0_stateless/00975_live_view_create.reference delete mode 100644 dbms/tests/queries/0_stateless/00975_live_view_create.sql delete mode 100644 dbms/tests/queries/0_stateless/00976_live_view_select_version.reference delete mode 100644 dbms/tests/queries/0_stateless/00976_live_view_select_version.sql delete mode 100644 dbms/tests/queries/0_stateless/00977_live_view_watch_events.reference delete mode 100644 dbms/tests/queries/0_stateless/00977_live_view_watch_events.sql delete mode 100644 dbms/tests/queries/0_stateless/00978_live_view_watch.reference delete mode 100644 dbms/tests/queries/0_stateless/00978_live_view_watch.sql delete mode 100755 dbms/tests/queries/0_stateless/00979_live_view_watch_live.py delete mode 100644 dbms/tests/queries/0_stateless/00979_live_view_watch_live.reference delete mode 100644 dbms/tests/queries/0_stateless/00980_create_temporary_live_view.reference delete mode 100644 dbms/tests/queries/0_stateless/00980_create_temporary_live_view.sql delete mode 100644 dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.python delete mode 100644 dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.reference delete mode 100755 dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.sh.disabled delete mode 100755 dbms/tests/queries/0_stateless/00991_live_view_watch_http.python delete mode 100644 dbms/tests/queries/0_stateless/00991_live_view_watch_http.reference delete mode 100755 dbms/tests/queries/0_stateless/00991_live_view_watch_http.sh.disabled delete mode 100644 dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python delete mode 100644 dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference delete mode 100755 dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.sh.disabled delete mode 100644 dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python delete mode 100644 dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference delete mode 100755 dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.sh.disabled delete mode 100644 dbms/tests/queries/0_stateless/helpers/client.py delete mode 100644 dbms/tests/queries/0_stateless/helpers/httpclient.py delete mode 100644 dbms/tests/queries/0_stateless/helpers/httpexpect.py delete mode 100644 dbms/tests/queries/0_stateless/helpers/uexpect.py diff --git a/dbms/tests/queries/0_stateless/00960_live_view_watch_events_live.py b/dbms/tests/queries/0_stateless/00960_live_view_watch_events_live.py deleted file mode 100755 index b7fc3f4e3a6..00000000000 --- a/dbms/tests/queries/0_stateless/00960_live_view_watch_events_live.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - client1.send('WATCH test.lv EVENTS') - client1.expect('1.*' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect('2.*' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (4),(5),(6)') - client1.expect('3.*' + end_of_block) - # send Ctrl-C - client1.send('\x03', eol='') - match = client1.expect('(%s)|([#\$] )' % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00960_live_view_watch_events_live.reference b/dbms/tests/queries/0_stateless/00960_live_view_watch_events_live.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.reference b/dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.reference deleted file mode 100644 index 6fbbedf1b21..00000000000 --- a/dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.reference +++ /dev/null @@ -1,3 +0,0 @@ -0 1 -6 2 -21 3 diff --git a/dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.sql b/dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.sql deleted file mode 100644 index c3e2ab8d102..00000000000 --- a/dbms/tests/queries/0_stateless/00961_temporary_live_view_watch.sql +++ /dev/null @@ -1,18 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt; - -WATCH test.lv LIMIT 0; - -INSERT INTO test.mt VALUES (1),(2),(3); - -WATCH test.lv LIMIT 0; - -INSERT INTO test.mt VALUES (4),(5),(6); - -WATCH test.lv LIMIT 0; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py b/dbms/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py deleted file mode 100755 index f27b1213c70..00000000000 --- a/dbms/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send('DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - client1.send('WATCH test.lv') - client1.expect(r'0.*1' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect(r'6.*2' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (4),(5),(6)') - client1.expect(r'21.*3' + end_of_block) - # send Ctrl-C - client1.send('\x03', eol='') - match = client1.expect('(%s)|([#\$] )' % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference b/dbms/tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.py.disabled b/dbms/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.py.disabled deleted file mode 100755 index df627c84e49..00000000000 --- a/dbms/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.py.disabled +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send('DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('SET temporary_live_view_timeout=1') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - client1.send('WATCH test.lv') - client1.expect(r'0.*1' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client2.expect(prompt) - client1.expect(r'6.*2' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (4),(5),(6)') - client2.expect(prompt) - client1.expect(r'21.*3' + end_of_block) - # send Ctrl-C - client1.send('\x03', eol='') - match = client1.expect('(%s)|([#\$] )' % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send('SELECT sleep(1)') - client1.expect(prompt) - client1.send('DROP TABLE test.lv') - client1.expect('Table test.lv doesn\'t exist') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference b/dbms/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py b/dbms/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py deleted file mode 100755 index 5664c0e6c6d..00000000000 --- a/dbms/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('SET live_view_heartbeat_interval=1') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - client1.send('WATCH test.lv EVENTS') - client2.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect('2.*' + end_of_block) - client1.expect('Progress: 2.00 rows.*\)') - # wait for heartbeat - client1.expect('Progress: 2.00 rows.*\)') - # send Ctrl-C - client1.send('\x03', eol='') - match = client1.expect('(%s)|([#\$] )' % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.reference b/dbms/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py b/dbms/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py deleted file mode 100755 index 03e22175dff..00000000000 --- a/dbms/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('SET live_view_heartbeat_interval=1') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - client1.send('WATCH test.lv') - client1.expect(r'0.*1' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect(r'6.*2' + end_of_block) - client1.expect('Progress: 2.00 rows.*\)') - # wait for heartbeat - client1.expect('Progress: 2.00 rows.*\)') - # send Ctrl-C - client1.send('\x03', eol='') - match = client1.expect('(%s)|([#\$] )' % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00965_live_view_watch_heartbeat.reference b/dbms/tests/queries/0_stateless/00965_live_view_watch_heartbeat.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00966_live_view_watch_events_http.py b/dbms/tests/queries/0_stateless/00966_live_view_watch_events_http.py deleted file mode 100755 index bb9d6152200..00000000000 --- a/dbms/tests/queries/0_stateless/00966_live_view_watch_events_http.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1: - client1.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - - - with http_client({'method':'GET', 'url': '/?query=WATCH%20test.lv%20EVENTS'}, name='client2>', log=log) as client2: - client2.expect('.*1\n') - client1.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect(prompt) - client2.expect('.*2\n') - - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00966_live_view_watch_events_http.reference b/dbms/tests/queries/0_stateless/00966_live_view_watch_events_http.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00967_live_view_watch_http.py b/dbms/tests/queries/0_stateless/00967_live_view_watch_http.py deleted file mode 100755 index d3439431eb3..00000000000 --- a/dbms/tests/queries/0_stateless/00967_live_view_watch_http.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1: - client1.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - - - with http_client({'method':'GET', 'url':'/?query=WATCH%20test.lv'}, name='client2>', log=log) as client2: - client2.expect('.*0\t1\n') - client1.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect(prompt) - client2.expect('.*6\t2\n') - - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00967_live_view_watch_http.reference b/dbms/tests/queries/0_stateless/00967_live_view_watch_http.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.reference b/dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.reference deleted file mode 100644 index 5ae423d90d1..00000000000 --- a/dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.reference +++ /dev/null @@ -1,4 +0,0 @@ -{"row":{"a":1}} -{"row":{"a":2}} -{"row":{"a":3}} -{"progress":{"read_rows":"3","read_bytes":"36","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}} diff --git a/dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.sql b/dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.sql deleted file mode 100644 index 8c6f4197d54..00000000000 --- a/dbms/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.sql +++ /dev/null @@ -1,12 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT * FROM test.mt; - -INSERT INTO test.mt VALUES (1),(2),(3); - -SELECT * FROM test.lv FORMAT JSONEachRowWithProgress; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.reference b/dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.reference deleted file mode 100644 index 287a1ced92d..00000000000 --- a/dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.reference +++ /dev/null @@ -1,6 +0,0 @@ -{"row":{"sum(a)":"0","_version":"1"}} -{"progress":{"read_rows":"1","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}} -{"row":{"sum(a)":"6","_version":"2"}} -{"progress":{"read_rows":"1","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}} -{"row":{"sum(a)":"21","_version":"3"}} -{"progress":{"read_rows":"1","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}} diff --git a/dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.sql b/dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.sql deleted file mode 100644 index 725a4ad00ed..00000000000 --- a/dbms/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.sql +++ /dev/null @@ -1,18 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt; - -WATCH test.lv LIMIT 0 FORMAT JSONEachRowWithProgress; - -INSERT INTO test.mt VALUES (1),(2),(3); - -WATCH test.lv LIMIT 0 FORMAT JSONEachRowWithProgress; - -INSERT INTO test.mt VALUES (4),(5),(6); - -WATCH test.lv LIMIT 0 FORMAT JSONEachRowWithProgress; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.py b/dbms/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.py deleted file mode 100755 index 63628c4a76f..00000000000 --- a/dbms/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1: - client1.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - - - with http_client({'method':'GET', 'url': '/?live_view_heartbeat_interval=1&query=WATCH%20test.lv%20EVENTS%20FORMAT%20JSONEachRowWithProgress'}, name='client2>', log=log) as client2: - client2.expect('{"progress":{"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}}\n', escape=True) - client2.expect('{"row":{"version":"1"}', escape=True) - client2.expect('{"progress":{"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}}', escape=True) - # heartbeat is provided by progress message - client2.expect('{"progress":{"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}}', escape=True) - - client1.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect(prompt) - - client2.expect('{"row":{"version":"2"}}\n', escape=True) - - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.reference b/dbms/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.py b/dbms/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.py deleted file mode 100755 index 7bdb47b7caa..00000000000 --- a/dbms/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1: - client1.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - - with http_client({'method':'GET', 'url':'/?live_view_heartbeat_interval=1&query=WATCH%20test.lv%20FORMAT%20JSONEachRowWithProgress'}, name='client2>', log=log) as client2: - client2.expect('"progress".*',) - client2.expect('{"row":{"sum(a)":"0","_version":"1"}}\n', escape=True) - client2.expect('"progress".*\n') - # heartbeat is provided by progress message - client2.expect('"progress".*\n') - - client1.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect(prompt) - - client2.expect('"progress".*"read_rows":"2".*\n') - client2.expect('{"row":{"sum(a)":"6","_version":"2"}}\n', escape=True) - - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.reference b/dbms/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00972_live_view_select_1.reference b/dbms/tests/queries/0_stateless/00972_live_view_select_1.reference deleted file mode 100644 index d00491fd7e5..00000000000 --- a/dbms/tests/queries/0_stateless/00972_live_view_select_1.reference +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/dbms/tests/queries/0_stateless/00972_live_view_select_1.sql b/dbms/tests/queries/0_stateless/00972_live_view_select_1.sql deleted file mode 100644 index 661080b577b..00000000000 --- a/dbms/tests/queries/0_stateless/00972_live_view_select_1.sql +++ /dev/null @@ -1,7 +0,0 @@ -DROP TABLE IF EXISTS test.lv; - -CREATE LIVE VIEW test.lv AS SELECT 1; - -SELECT * FROM test.lv; - -DROP TABLE test.lv; diff --git a/dbms/tests/queries/0_stateless/00973_live_view_select.reference b/dbms/tests/queries/0_stateless/00973_live_view_select.reference deleted file mode 100644 index 75236c0daf7..00000000000 --- a/dbms/tests/queries/0_stateless/00973_live_view_select.reference +++ /dev/null @@ -1,4 +0,0 @@ -6 1 -6 1 -12 2 -12 2 diff --git a/dbms/tests/queries/0_stateless/00973_live_view_select.sql b/dbms/tests/queries/0_stateless/00973_live_view_select.sql deleted file mode 100644 index ff4a45ffcc1..00000000000 --- a/dbms/tests/queries/0_stateless/00973_live_view_select.sql +++ /dev/null @@ -1,18 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt; - -INSERT INTO test.mt VALUES (1),(2),(3); - -SELECT *,_version FROM test.lv; -SELECT *,_version FROM test.lv; - -INSERT INTO test.mt VALUES (1),(2),(3); - -SELECT *,_version FROM test.lv; -SELECT *,_version FROM test.lv; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.reference b/dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.reference deleted file mode 100644 index 6d50f0e9c3a..00000000000 --- a/dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.reference +++ /dev/null @@ -1,2 +0,0 @@ -6 -21 diff --git a/dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.sql b/dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.sql deleted file mode 100644 index 3c11f855c9d..00000000000 --- a/dbms/tests/queries/0_stateless/00974_live_view_select_with_aggregation.sql +++ /dev/null @@ -1,16 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT * FROM test.mt; - -INSERT INTO test.mt VALUES (1),(2),(3); - -SELECT sum(a) FROM test.lv; - -INSERT INTO test.mt VALUES (4),(5),(6); - -SELECT sum(a) FROM test.lv; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00975_live_view_create.reference b/dbms/tests/queries/0_stateless/00975_live_view_create.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00975_live_view_create.sql b/dbms/tests/queries/0_stateless/00975_live_view_create.sql deleted file mode 100644 index 1c929b15b00..00000000000 --- a/dbms/tests/queries/0_stateless/00975_live_view_create.sql +++ /dev/null @@ -1,7 +0,0 @@ -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT * FROM test.mt; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00976_live_view_select_version.reference b/dbms/tests/queries/0_stateless/00976_live_view_select_version.reference deleted file mode 100644 index 453bd800469..00000000000 --- a/dbms/tests/queries/0_stateless/00976_live_view_select_version.reference +++ /dev/null @@ -1,3 +0,0 @@ -1 1 -2 1 -3 1 diff --git a/dbms/tests/queries/0_stateless/00976_live_view_select_version.sql b/dbms/tests/queries/0_stateless/00976_live_view_select_version.sql deleted file mode 100644 index 5f3ab1f7546..00000000000 --- a/dbms/tests/queries/0_stateless/00976_live_view_select_version.sql +++ /dev/null @@ -1,12 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT * FROM test.mt; - -INSERT INTO test.mt VALUES (1),(2),(3); - -SELECT *,_version FROM test.lv; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00977_live_view_watch_events.reference b/dbms/tests/queries/0_stateless/00977_live_view_watch_events.reference deleted file mode 100644 index 01e79c32a8c..00000000000 --- a/dbms/tests/queries/0_stateless/00977_live_view_watch_events.reference +++ /dev/null @@ -1,3 +0,0 @@ -1 -2 -3 diff --git a/dbms/tests/queries/0_stateless/00977_live_view_watch_events.sql b/dbms/tests/queries/0_stateless/00977_live_view_watch_events.sql deleted file mode 100644 index a3b84e8d4c1..00000000000 --- a/dbms/tests/queries/0_stateless/00977_live_view_watch_events.sql +++ /dev/null @@ -1,18 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt; - -WATCH test.lv EVENTS LIMIT 0; - -INSERT INTO test.mt VALUES (1),(2),(3); - -WATCH test.lv EVENTS LIMIT 0; - -INSERT INTO test.mt VALUES (4),(5),(6); - -WATCH test.lv EVENTS LIMIT 0; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00978_live_view_watch.reference b/dbms/tests/queries/0_stateless/00978_live_view_watch.reference deleted file mode 100644 index 6fbbedf1b21..00000000000 --- a/dbms/tests/queries/0_stateless/00978_live_view_watch.reference +++ /dev/null @@ -1,3 +0,0 @@ -0 1 -6 2 -21 3 diff --git a/dbms/tests/queries/0_stateless/00978_live_view_watch.sql b/dbms/tests/queries/0_stateless/00978_live_view_watch.sql deleted file mode 100644 index abe4a6c32ae..00000000000 --- a/dbms/tests/queries/0_stateless/00978_live_view_watch.sql +++ /dev/null @@ -1,18 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt; - -WATCH test.lv LIMIT 0; - -INSERT INTO test.mt VALUES (1),(2),(3); - -WATCH test.lv LIMIT 0; - -INSERT INTO test.mt VALUES (4),(5),(6); - -WATCH test.lv LIMIT 0; - -DROP TABLE test.lv; -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00979_live_view_watch_live.py b/dbms/tests/queries/0_stateless/00979_live_view_watch_live.py deleted file mode 100755 index 948e4c93662..00000000000 --- a/dbms/tests/queries/0_stateless/00979_live_view_watch_live.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -#log=sys.stdout - -with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send('DROP TABLE IF EXISTS test.lv') - client1.expect(prompt) - client1.send(' DROP TABLE IF EXISTS test.mt') - client1.expect(prompt) - client1.send('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()') - client1.expect(prompt) - client1.send('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt') - client1.expect(prompt) - client1.send('WATCH test.lv') - client1.expect(r'0.*1' + end_of_block) - client2.send('INSERT INTO test.mt VALUES (1),(2),(3)') - client1.expect(r'6.*2' + end_of_block) - client2.expect(prompt) - client2.send('INSERT INTO test.mt VALUES (4),(5),(6)') - client1.expect(r'21.*3' + end_of_block) - client2.expect(prompt) - for i in range(1,129): - client2.send('INSERT INTO test.mt VALUES (1)') - client1.expect(r'%d.*%d' % (21+i, 3+i) + end_of_block) - client2.expect(prompt) - # send Ctrl-C - client1.send('\x03', eol='') - match = client1.expect('(%s)|([#\$] )' % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send('DROP TABLE test.lv') - client1.expect(prompt) - client1.send('DROP TABLE test.mt') - client1.expect(prompt) diff --git a/dbms/tests/queries/0_stateless/00979_live_view_watch_live.reference b/dbms/tests/queries/0_stateless/00979_live_view_watch_live.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00980_create_temporary_live_view.reference b/dbms/tests/queries/0_stateless/00980_create_temporary_live_view.reference deleted file mode 100644 index 7f9fcbb2e9c..00000000000 --- a/dbms/tests/queries/0_stateless/00980_create_temporary_live_view.reference +++ /dev/null @@ -1,3 +0,0 @@ -temporary_live_view_timeout 5 -live_view_heartbeat_interval 15 -0 diff --git a/dbms/tests/queries/0_stateless/00980_create_temporary_live_view.sql b/dbms/tests/queries/0_stateless/00980_create_temporary_live_view.sql deleted file mode 100644 index 8cd6ee06ace..00000000000 --- a/dbms/tests/queries/0_stateless/00980_create_temporary_live_view.sql +++ /dev/null @@ -1,15 +0,0 @@ -DROP TABLE IF EXISTS test.lv; -DROP TABLE IF EXISTS test.mt; - -SELECT name, value from system.settings WHERE name = 'temporary_live_view_timeout'; -SELECT name, value from system.settings WHERE name = 'live_view_heartbeat_interval'; - -SET temporary_live_view_timeout=1; -CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple(); -CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt; - -SHOW TABLES LIKE 'lv'; -SELECT sleep(2); -SHOW TABLES LIKE 'lv'; - -DROP TABLE test.mt; diff --git a/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.python b/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.python deleted file mode 100644 index 782671cdfaf..00000000000 --- a/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.python +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python - -import subprocess -import threading -import Queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - print(line) - assert (line == '0\t1') - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - print(line) - assert (line == '6\t2') - - send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - line = q.get() - print(line) - assert (line == '21\t3') - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - print(line) - assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.reference b/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.reference deleted file mode 100644 index 1e94cdade41..00000000000 --- a/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.reference +++ /dev/null @@ -1,7 +0,0 @@ -0 1 -0 1 -6 2 -6 2 -21 3 -21 3 -None diff --git a/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.sh.disabled b/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.sh.disabled deleted file mode 100755 index 10e4e98b2e3..00000000000 --- a/dbms/tests/queries/0_stateless/00991_live_view_watch_event_live.sh.disabled +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -python $CURDIR/00991_live_view_watch_event_live.python diff --git a/dbms/tests/queries/0_stateless/00991_live_view_watch_http.python b/dbms/tests/queries/0_stateless/00991_live_view_watch_http.python deleted file mode 100755 index 938547ca0cb..00000000000 --- a/dbms/tests/queries/0_stateless/00991_live_view_watch_http.python +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -import subprocess -import threading -import Queue as queue -import os -import sys - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_http_query(query): - cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10']) - cmd += ['-sSN', CLICKHOUSE_URL, '-d', query] - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def read_lines_and_push_to_queue(pipe, queue): - for line in iter(pipe.readline, ''): - line = line.strip() - print(line) - sys.stdout.flush() - queue.put(line) - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - pipe = send_http_query('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(pipe, q)) - thread.start() - - line = q.get() - print(line) - assert (line == '0\t1') - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - print(line) - assert (line == '6\t2') - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/dbms/tests/queries/0_stateless/00991_live_view_watch_http.reference b/dbms/tests/queries/0_stateless/00991_live_view_watch_http.reference deleted file mode 100644 index 489457d751b..00000000000 --- a/dbms/tests/queries/0_stateless/00991_live_view_watch_http.reference +++ /dev/null @@ -1,4 +0,0 @@ -0 1 -0 1 -6 2 -6 2 diff --git a/dbms/tests/queries/0_stateless/00991_live_view_watch_http.sh.disabled b/dbms/tests/queries/0_stateless/00991_live_view_watch_http.sh.disabled deleted file mode 100755 index 88cce77f595..00000000000 --- a/dbms/tests/queries/0_stateless/00991_live_view_watch_http.sh.disabled +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -python $CURDIR/00991_live_view_watch_http.python diff --git a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python b/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python deleted file mode 100644 index 70063adc6e3..00000000000 --- a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python - -import subprocess -import threading -import Queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query, '--live_view_heartbeat_interval=1', '--progress'] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - # print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - # print(line) - assert (line.endswith('0\t1')) - assert ('Progress: 0.00 rows' in line) - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - assert (line.endswith('6\t2')) - assert ('Progress: 1.00 rows' in line) - - # send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - # line = q.get() - # print(line) - # assert (line.endswith('6\t2')) - # assert ('Progress: 1.00 rows' in line) - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - # print(line) - # assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference b/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.sh.disabled b/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.sh.disabled deleted file mode 100755 index f7aa13d52b3..00000000000 --- a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.sh.disabled +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -python $CURDIR/00991_temporary_live_view_watch_events_heartbeat.python diff --git a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python b/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python deleted file mode 100644 index d290018a02c..00000000000 --- a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python - -import subprocess -import threading -import Queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE TEMPORARY LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - print(line) - assert (line == '0\t1') - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - print(line) - assert (line == '6\t2') - - send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - line = q.get() - print(line) - assert (line == '21\t3') - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - print(line) - assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference b/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference deleted file mode 100644 index 1e94cdade41..00000000000 --- a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference +++ /dev/null @@ -1,7 +0,0 @@ -0 1 -0 1 -6 2 -6 2 -21 3 -21 3 -None diff --git a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.sh.disabled b/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.sh.disabled deleted file mode 100755 index 4d01d1c3a8e..00000000000 --- a/dbms/tests/queries/0_stateless/00991_temporary_live_view_watch_live.sh.disabled +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -python $CURDIR/00991_temporary_live_view_watch_live.python diff --git a/dbms/tests/queries/0_stateless/helpers/client.py b/dbms/tests/queries/0_stateless/helpers/client.py deleted file mode 100644 index f3938d3bf63..00000000000 --- a/dbms/tests/queries/0_stateless/helpers/client.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import sys -import time - -CURDIR = os.path.dirname(os.path.realpath(__file__)) - -sys.path.insert(0, os.path.join(CURDIR)) - -import uexpect - -prompt = ':\) ' -end_of_block = r'.*\r\n.*\r\n' - -class client(object): - def __init__(self, command=None, name='', log=None): - self.client = uexpect.spawn(['/bin/bash','--noediting']) - if command is None: - command = os.environ.get('CLICKHOUSE_BINARY', 'clickhouse') + '-client' - self.client.command = command - self.client.eol('\r') - self.client.logger(log, prefix=name) - self.client.timeout(20) - self.client.expect('[#\$] ', timeout=2) - self.client.send(command) - - def __enter__(self): - return self.client.__enter__() - - def __exit__(self, type, value, traceback): - self.client.reader['kill_event'].set() - # send Ctrl-C - self.client.send('\x03', eol='') - time.sleep(0.3) - self.client.send('quit', eol='\r') - self.client.send('\x03', eol='') - return self.client.__exit__(type, value, traceback) diff --git a/dbms/tests/queries/0_stateless/helpers/httpclient.py b/dbms/tests/queries/0_stateless/helpers/httpclient.py deleted file mode 100644 index a42fad2cbc3..00000000000 --- a/dbms/tests/queries/0_stateless/helpers/httpclient.py +++ /dev/null @@ -1,14 +0,0 @@ -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) - -sys.path.insert(0, os.path.join(CURDIR)) - -import httpexpect - -def client(request, name='', log=None): - client = httpexpect.spawn({'host':'localhost','port':8123}, request) - client.logger(log, prefix=name) - client.timeout(20) - return client diff --git a/dbms/tests/queries/0_stateless/helpers/httpexpect.py b/dbms/tests/queries/0_stateless/helpers/httpexpect.py deleted file mode 100644 index e440dafce4e..00000000000 --- a/dbms/tests/queries/0_stateless/helpers/httpexpect.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2019 Vitaliy Zakaznikov -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import httplib - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, CURDIR) - -import uexpect - -from threading import Thread, Event -from Queue import Queue, Empty - -class IO(uexpect.IO): - def __init__(self, connection, response, queue, reader): - self.connection = connection - self.response = response - super(IO, self).__init__(None, None, queue, reader) - - def write(self, data): - raise NotImplementedError - - def close(self, force=True): - self.reader['kill_event'].set() - self.connection.close() - if self._logger: - self._logger.write('\n') - self._logger.flush() - - -def reader(response, queue, kill_event): - while True: - try: - if kill_event.is_set(): - break - data = response.read(1) - queue.put(data) - except Exception, e: - if kill_event.is_set(): - break - raise - -def spawn(connection, request): - connection = httplib.HTTPConnection(**connection) - connection.request(**request) - response = connection.getresponse() - - queue = Queue() - reader_kill_event = Event() - thread = Thread(target=reader, args=(response, queue, reader_kill_event)) - thread.daemon = True - thread.start() - - return IO(connection, response, queue, reader={'thread':thread, 'kill_event':reader_kill_event}) - -if __name__ == '__main__': - with http({'host':'localhost','port':8123},{'method':'GET', 'url':'?query=SELECT%201'}) as client: - client.logger(sys.stdout) - client.timeout(2) - print client.response.status, client.response.reason - client.expect('1\n') diff --git a/dbms/tests/queries/0_stateless/helpers/uexpect.py b/dbms/tests/queries/0_stateless/helpers/uexpect.py deleted file mode 100644 index f71b32a53e1..00000000000 --- a/dbms/tests/queries/0_stateless/helpers/uexpect.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2019 Vitaliy Zakaznikov -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import pty -import time -import sys -import re - -from threading import Thread, Event -from subprocess import Popen -from Queue import Queue, Empty - -class TimeoutError(Exception): - def __init__(self, timeout): - self.timeout = timeout - - def __str__(self): - return 'Timeout %.3fs' % float(self.timeout) - -class ExpectTimeoutError(Exception): - def __init__(self, pattern, timeout, buffer): - self.pattern = pattern - self.timeout = timeout - self.buffer = buffer - - def __str__(self): - s = 'Timeout %.3fs ' % float(self.timeout) - if self.pattern: - s += 'for %s ' % repr(self.pattern.pattern) - if self.buffer: - s += 'buffer %s ' % repr(self.buffer[:]) - s += 'or \'%s\'' % ','.join(['%x' % ord(c) for c in self.buffer[:]]) - return s - -class IO(object): - class EOF(object): - pass - - class Timeout(object): - pass - - EOF = EOF - TIMEOUT = Timeout - - class Logger(object): - def __init__(self, logger, prefix=''): - self._logger = logger - self._prefix = prefix - - def write(self, data): - self._logger.write(('\n' + data).replace('\n','\n' + self._prefix)) - - def flush(self): - self._logger.flush() - - def __init__(self, process, master, queue, reader): - self.process = process - self.master = master - self.queue = queue - self.buffer = None - self.before = None - self.after = None - self.match = None - self.pattern = None - self.reader = reader - self._timeout = None - self._logger = None - self._eol = '' - - def __enter__(self): - return self - - def __exit__(self, type, value, traceback): - self.close() - - def logger(self, logger=None, prefix=''): - if logger: - self._logger = self.Logger(logger, prefix=prefix) - return self._logger - - def timeout(self, timeout=None): - if timeout: - self._timeout = timeout - return self._timeout - - def eol(self, eol=None): - if eol: - self._eol = eol - return self._eol - - def close(self, force=True): - self.reader['kill_event'].set() - os.system('pkill -TERM -P %d' % self.process.pid) - if force: - self.process.kill() - else: - self.process.terminate() - os.close(self.master) - if self._logger: - self._logger.write('\n') - self._logger.flush() - - def send(self, data, eol=None): - if eol is None: - eol = self._eol - return self.write(data + eol) - - def write(self, data): - return os.write(self.master, data) - - def expect(self, pattern, timeout=None, escape=False): - self.match = None - self.before = None - self.after = None - if escape: - pattern = re.escape(pattern) - pattern = re.compile(pattern) - if timeout is None: - timeout = self._timeout - timeleft = timeout - while True: - start_time = time.time() - if self.buffer is not None: - self.match = pattern.search(self.buffer, 0) - if self.match is not None: - self.after = self.buffer[self.match.start():self.match.end()] - self.before = self.buffer[:self.match.start()] - self.buffer = self.buffer[self.match.end():] - break - if timeleft < 0: - break - try: - data = self.read(timeout=timeleft, raise_exception=True) - except TimeoutError: - if self._logger: - self._logger.write((self.buffer or '') + '\n') - self._logger.flush() - exception = ExpectTimeoutError(pattern, timeout, self.buffer) - self.buffer = None - raise exception - timeleft -= (time.time() - start_time) - if data: - self.buffer = (self.buffer + data) if self.buffer else data - if self._logger: - self._logger.write((self.before or '') + (self.after or '')) - self._logger.flush() - if self.match is None: - exception = ExpectTimeoutError(pattern, timeout, self.buffer) - self.buffer = None - raise exception - return self.match - - def read(self, timeout=0, raise_exception=False): - data = '' - timeleft = timeout - try: - while timeleft >= 0 : - start_time = time.time() - data += self.queue.get(timeout=timeleft) - if data: - break - timeleft -= (time.time() - start_time) - except Empty: - if data: - return data - if raise_exception: - raise TimeoutError(timeout) - pass - if not data and raise_exception: - raise TimeoutError(timeout) - - return data - -def spawn(command): - master, slave = pty.openpty() - process = Popen(command, preexec_fn=os.setsid, stdout=slave, stdin=slave, stderr=slave, bufsize=1) - os.close(slave) - - queue = Queue() - reader_kill_event = Event() - thread = Thread(target=reader, args=(process, master, queue, reader_kill_event)) - thread.daemon = True - thread.start() - - return IO(process, master, queue, reader={'thread':thread, 'kill_event':reader_kill_event}) - -def reader(process, out, queue, kill_event): - while True: - try: - data = os.read(out, 65536) - queue.put(data) - except: - if kill_event.is_set(): - break - raise From cff8ec43f992ca2daff2141ea15cd964c7401168 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 04:31:04 +0300 Subject: [PATCH 144/181] Rename neighbour -> neighbor --- dbms/src/Functions/{neighbour.cpp => neighbor.cpp} | 14 +++++++------- .../Functions/registerFunctionsMiscellaneous.cpp | 4 ++-- .../en/query_language/functions/other_functions.md | 4 ++-- .../ru/query_language/functions/other_functions.md | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) rename dbms/src/Functions/{neighbour.cpp => neighbor.cpp} (96%) diff --git a/dbms/src/Functions/neighbour.cpp b/dbms/src/Functions/neighbor.cpp similarity index 96% rename from dbms/src/Functions/neighbour.cpp rename to dbms/src/Functions/neighbor.cpp index 02cd4df4996..6ac2c966016 100644 --- a/dbms/src/Functions/neighbour.cpp +++ b/dbms/src/Functions/neighbor.cpp @@ -21,17 +21,17 @@ namespace ErrorCodes // | c1 | // | 10 | // | 20 | -// SELECT c1, neighbour(c1, 1) as c2: +// SELECT c1, neighbor(c1, 1) as c2: // | c1 | c2 | // | 10 | 20 | // | 20 | 0 | -class FunctionNeighbour : public IFunction +class FunctionNeighbor : public IFunction { public: - static constexpr auto name = "neighbour"; - static FunctionPtr create(const Context & context) { return std::make_shared(context); } + static constexpr auto name = "neighbor"; + static FunctionPtr create(const Context & context) { return std::make_shared(context); } - FunctionNeighbour(const Context & context_) : context(context_) {} + FunctionNeighbor(const Context & context_) : context(context_) {} /// Get the name of the function. String getName() const override { return name; } @@ -255,9 +255,9 @@ private: const Context & context; }; -void registerFunctionNeighbour(FunctionFactory & factory) +void registerFunctionNeighbor(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index e3d4714638e..1d95844ce9b 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -18,7 +18,7 @@ void registerFunctionBlockSize(FunctionFactory &); void registerFunctionBlockNumber(FunctionFactory &); void registerFunctionRowNumberInBlock(FunctionFactory &); void registerFunctionRowNumberInAllBlocks(FunctionFactory &); -void registerFunctionNeighbour(FunctionFactory &); +void registerFunctionNeighbor(FunctionFactory &); void registerFunctionSleep(FunctionFactory &); void registerFunctionSleepEachRow(FunctionFactory &); void registerFunctionMaterialize(FunctionFactory &); @@ -70,7 +70,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionBlockNumber(factory); registerFunctionRowNumberInBlock(factory); registerFunctionRowNumberInAllBlocks(factory); - registerFunctionNeighbour(factory); + registerFunctionNeighbor(factory); registerFunctionSleep(factory); registerFunctionSleepEachRow(factory); registerFunctionMaterialize(factory); diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index 36c40fc4b02..349397059af 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -314,7 +314,7 @@ Returns the ordinal number of the row in the data block. Different data blocks a Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. -## neighbour(column, offset\[, default_value\]) +## neighbor(column, offset\[, default_value\]) Returns value for `column`, in `offset` distance from current row. This function is a partial implementation of [window functions](https://en.wikipedia.org/wiki/SQL_window_function) LEAD() and LAG(). @@ -330,7 +330,7 @@ WITH toDate('2018-01-01') AS start_date SELECT toStartOfMonth(start_date + (number * 32)) AS month, toInt32(month) % 100 AS money, - neighbour(money, -12) AS prev_year, + neighbor(money, -12) AS prev_year, round(prev_year / money, 2) AS year_over_year FROM numbers(16) ``` diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index 41cd7c8c63b..fdc46a0d4ee 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -291,7 +291,7 @@ SELECT ## rowNumberInAllBlocks() Возвращает порядковый номер строки в блоке данных. Функция учитывает только задействованные блоки данных. -## neighbour(column, offset\[, default_value\]) +## neighbor(column, offset\[, default_value\]) Функция позволяет получить доступ к значению в колонке `column`, находящемуся на смещении `offset` относительно текущей строки. Является частичной реализацией [оконных функций](https://en.wikipedia.org/wiki/SQL_window_function) LEAD() и LAG(). @@ -307,7 +307,7 @@ WITH toDate('2018-01-01') AS start_date SELECT toStartOfMonth(start_date + (number * 32)) AS month, toInt32(month) % 100 AS money, - neighbour(money, -12) AS prev_year, + neighbor(money, -12) AS prev_year, round(prev_year / money, 2) AS year_over_year FROM numbers(16) ``` From e3bd572fc75f20d83f421e00981524e6bcd1b8ba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 05:00:35 +0300 Subject: [PATCH 145/181] Removed unused settings --- dbms/src/Core/Settings.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 1c2cab8b860..3cc5d291708 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -350,8 +350,6 @@ struct Settings : public SettingsCollection \ M(SettingSeconds, live_view_heartbeat_interval, DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate live query is alive.") \ M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.") \ - M(SettingSeconds, temporary_live_channel_timeout, DEFAULT_TEMPORARY_LIVE_CHANNEL_TIMEOUT_SEC, "Timeout after which temporary live channel is deleted.") \ - M(SettingMilliseconds, alter_channel_wait_ms, DEFAULT_ALTER_LIVE_CHANNEL_WAIT_MS, "The wait time for alter channel request.") \ M(SettingUInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.") \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) From 99f4c9c8130a742d532294ce5edf2ac6cfd86a8c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 05:01:24 +0300 Subject: [PATCH 146/181] Moved settings that were in a wrong place --- dbms/src/Core/Settings.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 3cc5d291708..ff3c8fb5890 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -343,14 +343,13 @@ struct Settings : public SettingsCollection M(SettingBool, check_query_single_value_result, true, "Return check query result as single 1/0 value") \ \ M(SettingBool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.") \ + M(SettingSeconds, live_view_heartbeat_interval, DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate live query is alive.") \ + M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.") \ + M(SettingUInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.") \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ - \ - M(SettingSeconds, live_view_heartbeat_interval, DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate live query is alive.") \ - M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.") \ - M(SettingUInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.") \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) From 1222973cb3fa6e267c6bb7d29c44fef2beafe173 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 05:02:14 +0300 Subject: [PATCH 147/181] Function "neighbor": merging #5925 --- dbms/src/Functions/neighbor.cpp | 50 +++++++++++++++------------------ 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/dbms/src/Functions/neighbor.cpp b/dbms/src/Functions/neighbor.cpp index 6ac2c966016..02d28028d3b 100644 --- a/dbms/src/Functions/neighbor.cpp +++ b/dbms/src/Functions/neighbor.cpp @@ -66,7 +66,6 @@ public: "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + " - can not be Nullable", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - // check that default value column has supertype with first argument if (number_of_arguments == 3) { @@ -99,10 +98,10 @@ public: { if (isColumnConst(*default_values_column)) { - Field constant_value = (*default_values_column)[0]; - for (size_t row = 0; row < row_count; row++) + const IColumn & constant_content = assert_cast(*default_values_column).getDataColumn(); + for (size_t row = 0; row < row_count; ++row) { - target->insert(constant_value); + target->insertFrom(constant_content, 0); } } else @@ -112,7 +111,7 @@ public: } else { - for (size_t row = 0; row < row_count; row++) + for (size_t row = 0; row < row_count; ++row) { target->insertDefault(); } @@ -121,37 +120,33 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - auto offset_structure = block.getByPosition(arguments[1]); + const ColumnWithTypeAndName & source_column_name_and_type = block.getByPosition(arguments[0]); + const DataTypePtr & result_type = block.getByPosition(result).type; - ColumnPtr & offset_column = offset_structure.column; + ColumnPtr source_column = source_column_name_and_type.column; - auto is_constant_offset = isColumnConst(*offset_structure.column); - ColumnPtr default_values_column = nullptr; - if (arguments.size() == 3) - { - default_values_column = block.getByPosition(arguments[2]).column; - } - - ColumnWithTypeAndName & source_column_name_and_type = block.getByPosition(arguments[0]); - DataTypes types = {source_column_name_and_type.type}; - if (default_values_column) - { - types.push_back(block.getByPosition(arguments[2]).type); - } - const DataTypePtr & result_type = getLeastSupertype(types); - auto source_column = source_column_name_and_type.column; - - // adjust source and default values columns to resulting datatype + // adjust source and default values columns to resulting data type if (!source_column_name_and_type.type->equals(*result_type)) { source_column = castColumn(source_column_name_and_type, result_type, context); } - if (default_values_column && !block.getByPosition(arguments[2]).type->equals(*result_type)) + ColumnPtr default_values_column; + + /// Has argument with default value: neighbor(source, offset, default) + if (arguments.size() == 3) { - default_values_column = castColumn(block.getByPosition(arguments[2]), result_type, context); + default_values_column = block.getByPosition(arguments[2]).column; + + if (!block.getByPosition(arguments[2]).type->equals(*result_type)) + default_values_column = castColumn(block.getByPosition(arguments[2]), result_type, context); } + const auto & offset_structure = block.getByPosition(arguments[1]); + ColumnPtr offset_column = offset_structure.column; + + auto is_constant_offset = isColumnConst(*offset_structure.column); + // since we are working with both signed and unsigned - we'll try to use Int64 for handling all of them const DataTypePtr desired_type = std::make_shared(); if (!block.getByPosition(arguments[1]).type->equals(*desired_type)) @@ -161,6 +156,7 @@ public: if (isColumnConst(*source_column)) { + /// NOTE Inconsistency when default_values are specified. auto column = result_type->createColumnConst(input_rows_count, (*source_column)[0]); block.getByPosition(result).column = std::move(column); } @@ -199,7 +195,7 @@ public: else { // with dynamic offset - handle row by row - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { Int64 offset_value = offset_column->getInt(row); if (offset_value == 0) From ff9e92eab9ace56801f032e1bf3990217ee456d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 05:53:09 +0300 Subject: [PATCH 148/181] Renamed function in test --- .../0_stateless/00957_neighbor.reference | 69 +++++++++++++++++++ .../queries/0_stateless/00957_neighbor.sql | 42 +++++++++++ 2 files changed, 111 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00957_neighbor.reference create mode 100644 dbms/tests/queries/0_stateless/00957_neighbor.sql diff --git a/dbms/tests/queries/0_stateless/00957_neighbor.reference b/dbms/tests/queries/0_stateless/00957_neighbor.reference new file mode 100644 index 00000000000..5a523d2c575 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_neighbor.reference @@ -0,0 +1,69 @@ +Zero offset +0 0 +1 1 +2 2 +Nullable values +\N 0 \N +\N 1 2 +2 2 \N +Result with different type +0 1 +1 2 +2 -10 +Offset > block +0 0 +1 0 +2 0 +Abs(Offset) > block +0 0 +1 0 +2 0 +Positive offset +0 1 +1 2 +2 0 +Negative offset +0 1 +1 2 +2 0 +Positive offset with defaults +0 2 +1 3 +2 12 +3 13 +Negative offset with defaults +0 10 +1 11 +2 0 +3 1 +Positive offset with const defaults +0 1 +1 2 +2 1000 +Negative offset with const defaults +0 1000 +1 0 +2 1 +Dynamic column and offset, out of bounds +0 0 0 +1 2 3 +2 4 20 +3 6 30 +Dynamic column and offset, negative +0 0 0 +1 -2 10 +2 -4 20 +3 -6 30 +4 -8 40 +5 -10 50 +Dynamic column and offset, without defaults +0 4 4 +1 2 3 +2 0 2 +3 -2 1 +4 -4 0 +5 -6 0 +Constant column +0 1000 +1 1000 +2 1000 diff --git a/dbms/tests/queries/0_stateless/00957_neighbor.sql b/dbms/tests/queries/0_stateless/00957_neighbor.sql new file mode 100644 index 00000000000..2f701d4d5c7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_neighbor.sql @@ -0,0 +1,42 @@ +-- no arguments +select neighbor(); -- { serverError 42 } +-- single argument +select neighbor(1); -- { serverError 42 } +-- greater than 3 arguments +select neighbor(1,2,3,4); -- { serverError 42 } +-- bad default value +select neighbor(dummy, 1, 'hello'); -- { serverError 43 } +-- types without common supertype (UInt64 and Int8) +select number, neighbor(number, 1, -10) from numbers(3); -- { serverError 43 } +-- nullable offset is not allowed +select number, if(number > 1, number, null) as offset, neighbor(number, offset) from numbers(3); -- { serverError 43 } +select 'Zero offset'; +select number, neighbor(number, 0) from numbers(3); +select 'Nullable values'; +select if(number > 1, number, null) as value, number as offset, neighbor(value, offset) as neighbor from numbers(3); +select 'Result with different type'; +select toInt32(number) as n, neighbor(n, 1, -10) from numbers(3); +select 'Offset > block'; +select number, neighbor(number, 10) from numbers(3); +select 'Abs(Offset) > block'; +select number, neighbor(number, -10) from numbers(3); +select 'Positive offset'; +select number, neighbor(number, 1) from numbers(3); +select 'Negative offset'; +select number, neighbor(number, 1) from numbers(3); +select 'Positive offset with defaults'; +select number, neighbor(number, 2, number + 10) from numbers(4); +select 'Negative offset with defaults'; +select number, neighbor(number, -2, number + 10) from numbers(4); +select 'Positive offset with const defaults'; +select number, neighbor(number, 1, 1000) from numbers(3); +select 'Negative offset with const defaults'; +select number, neighbor(number, -1, 1000) from numbers(3); +select 'Dynamic column and offset, out of bounds'; +select number, number * 2 as offset, neighbor(number, offset, number * 10) from numbers(4); +select 'Dynamic column and offset, negative'; +select number, -number * 2 as offset, neighbor(number, offset, number * 10) from numbers(6); +select 'Dynamic column and offset, without defaults'; +select number, -(number - 2) * 2 as offset, neighbor(number, offset) from numbers(6); +select 'Constant column'; +select number, neighbor(1000, 10) from numbers(3); \ No newline at end of file From 341e2e4587a18065c2da1ca888c73389f48ce36c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 06:00:06 +0300 Subject: [PATCH 149/181] Step 1: make it correct. --- dbms/src/Functions/neighbor.cpp | 197 ++++-------------- .../0_stateless/00957_neighbor.reference | 6 +- .../queries/0_stateless/00957_neighbor.sql | 4 +- .../0_stateless/00957_neighbour.reference | 69 ------ .../queries/0_stateless/00957_neighbour.sql | 42 ---- 5 files changed, 42 insertions(+), 276 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/00957_neighbour.reference delete mode 100644 dbms/tests/queries/0_stateless/00957_neighbour.sql diff --git a/dbms/src/Functions/neighbor.cpp b/dbms/src/Functions/neighbor.cpp index 02d28028d3b..1c640ffb76c 100644 --- a/dbms/src/Functions/neighbor.cpp +++ b/dbms/src/Functions/neighbor.cpp @@ -46,6 +46,8 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return false; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { size_t number_of_arguments = arguments.size(); @@ -68,183 +70,58 @@ public: // check that default value column has supertype with first argument if (number_of_arguments == 3) - { - DataTypes types = {arguments[0], arguments[2]}; - try - { - return getLeastSupertype(types); - } - catch (const Exception &) - { - throw Exception( - "Illegal types of arguments (" + types[0]->getName() + ", " + types[1]->getName() - + ")" - " of function " - + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - } + return getLeastSupertype({arguments[0], arguments[2]}); return arguments[0]; } - static void insertDefaults(const MutableColumnPtr & target, size_t row_count, ColumnPtr & default_values_column, size_t offset) - { - if (row_count == 0) - { - return; - } - if (default_values_column) - { - if (isColumnConst(*default_values_column)) - { - const IColumn & constant_content = assert_cast(*default_values_column).getDataColumn(); - for (size_t row = 0; row < row_count; ++row) - { - target->insertFrom(constant_content, 0); - } - } - else - { - target->insertRangeFrom(*default_values_column, offset, row_count); - } - } - else - { - for (size_t row = 0; row < row_count; ++row) - { - target->insertDefault(); - } - } - } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - const ColumnWithTypeAndName & source_column_name_and_type = block.getByPosition(arguments[0]); const DataTypePtr & result_type = block.getByPosition(result).type; - ColumnPtr source_column = source_column_name_and_type.column; + const ColumnWithTypeAndName & source_elem = block.getByPosition(arguments[0]); + const ColumnWithTypeAndName & offset_elem = block.getByPosition(arguments[1]); + bool has_defaults = arguments.size() == 3; - // adjust source and default values columns to resulting data type - if (!source_column_name_and_type.type->equals(*result_type)) + ColumnPtr source_column_casted = castColumn(source_elem, result_type, context); + ColumnPtr offset_column = offset_elem.column; + + ColumnPtr default_column_casted; + if (has_defaults) { - source_column = castColumn(source_column_name_and_type, result_type, context); + const ColumnWithTypeAndName & default_elem = block.getByPosition(arguments[2]); + default_column_casted = castColumn(default_elem, result_type, context); } - ColumnPtr default_values_column; + bool source_is_constant = isColumnConst(*source_column_casted); + bool offset_is_constant = isColumnConst(*offset_column); - /// Has argument with default value: neighbor(source, offset, default) - if (arguments.size() == 3) + bool default_is_constant = false; + if (has_defaults) + default_is_constant = isColumnConst(*default_column_casted); + + if (source_is_constant) + source_column_casted = assert_cast(*source_column_casted).getDataColumnPtr(); + if (offset_is_constant) + offset_column = assert_cast(*offset_column).getDataColumnPtr(); + if (default_is_constant) + default_column_casted = assert_cast(*default_column_casted).getDataColumnPtr(); + + auto column = result_type->createColumn(); + + for (size_t row = 0; row < input_rows_count; ++row) { - default_values_column = block.getByPosition(arguments[2]).column; + Int64 src_idx = row + offset_column->getInt(offset_is_constant ? 0 : row); - if (!block.getByPosition(arguments[2]).type->equals(*result_type)) - default_values_column = castColumn(block.getByPosition(arguments[2]), result_type, context); - } - - const auto & offset_structure = block.getByPosition(arguments[1]); - ColumnPtr offset_column = offset_structure.column; - - auto is_constant_offset = isColumnConst(*offset_structure.column); - - // since we are working with both signed and unsigned - we'll try to use Int64 for handling all of them - const DataTypePtr desired_type = std::make_shared(); - if (!block.getByPosition(arguments[1]).type->equals(*desired_type)) - { - offset_column = castColumn(offset_structure, desired_type, context); - } - - if (isColumnConst(*source_column)) - { - /// NOTE Inconsistency when default_values are specified. - auto column = result_type->createColumnConst(input_rows_count, (*source_column)[0]); - block.getByPosition(result).column = std::move(column); - } - else - { - auto column = result_type->createColumn(); - column->reserve(input_rows_count); - // with constant offset - insertRangeFrom - if (is_constant_offset) - { - Int64 offset_value = offset_column->getInt(0); - - auto offset_value_casted = static_cast(std::abs(offset_value)); - size_t default_value_count = std::min(offset_value_casted, input_rows_count); - if (offset_value > 0) - { - // insert shifted value - if (offset_value_casted <= input_rows_count) - { - column->insertRangeFrom(*source_column, offset_value_casted, input_rows_count - offset_value_casted); - } - insertDefaults(column, default_value_count, default_values_column, input_rows_count - default_value_count); - } - else if (offset_value < 0) - { - // insert defaults up to offset_value - insertDefaults(column, default_value_count, default_values_column, 0); - column->insertRangeFrom(*source_column, 0, input_rows_count - default_value_count); - } - else - { - // populate column with source values, when offset is equal to zero - column->insertRangeFrom(*source_column, 0, input_rows_count); - } - } + if (src_idx >= 0 && src_idx < Int64(input_rows_count)) + column->insertFrom(*source_column_casted, source_is_constant ? 0 : src_idx); + else if (has_defaults) + column->insertFrom(*default_column_casted, default_is_constant ? 0 : row); else - { - // with dynamic offset - handle row by row - for (size_t row = 0; row < input_rows_count; ++row) - { - Int64 offset_value = offset_column->getInt(row); - if (offset_value == 0) - { - column->insertFrom(*source_column, row); - } - else if (offset_value > 0) - { - size_t real_offset = row + offset_value; - if (real_offset > input_rows_count) - { - if (default_values_column) - { - column->insertFrom(*default_values_column, row); - } - else - { - column->insertDefault(); - } - } - else - { - column->insertFrom(*source_column, real_offset); - } - } - else - { - // out of range - auto offset_value_casted = static_cast(std::abs(offset_value)); - if (offset_value_casted > row) - { - if (default_values_column) - { - column->insertFrom(*default_values_column, row); - } - else - { - column->insertDefault(); - } - } - else - { - column->insertFrom(*source_column, row - offset_value_casted); - } - } - } - } - block.getByPosition(result).column = std::move(column); + column->insertDefault(); } + + block.getByPosition(result).column = std::move(column); } private: diff --git a/dbms/tests/queries/0_stateless/00957_neighbor.reference b/dbms/tests/queries/0_stateless/00957_neighbor.reference index 5a523d2c575..d25d727da5d 100644 --- a/dbms/tests/queries/0_stateless/00957_neighbor.reference +++ b/dbms/tests/queries/0_stateless/00957_neighbor.reference @@ -64,6 +64,6 @@ Dynamic column and offset, without defaults 4 -4 0 5 -6 0 Constant column -0 1000 -1 1000 -2 1000 +0 0 +1 0 +2 0 diff --git a/dbms/tests/queries/0_stateless/00957_neighbor.sql b/dbms/tests/queries/0_stateless/00957_neighbor.sql index 2f701d4d5c7..c4b801c80cb 100644 --- a/dbms/tests/queries/0_stateless/00957_neighbor.sql +++ b/dbms/tests/queries/0_stateless/00957_neighbor.sql @@ -5,9 +5,9 @@ select neighbor(1); -- { serverError 42 } -- greater than 3 arguments select neighbor(1,2,3,4); -- { serverError 42 } -- bad default value -select neighbor(dummy, 1, 'hello'); -- { serverError 43 } +select neighbor(dummy, 1, 'hello'); -- { serverError 386 } -- types without common supertype (UInt64 and Int8) -select number, neighbor(number, 1, -10) from numbers(3); -- { serverError 43 } +select number, neighbor(number, 1, -10) from numbers(3); -- { serverError 386 } -- nullable offset is not allowed select number, if(number > 1, number, null) as offset, neighbor(number, offset) from numbers(3); -- { serverError 43 } select 'Zero offset'; diff --git a/dbms/tests/queries/0_stateless/00957_neighbour.reference b/dbms/tests/queries/0_stateless/00957_neighbour.reference deleted file mode 100644 index 5a523d2c575..00000000000 --- a/dbms/tests/queries/0_stateless/00957_neighbour.reference +++ /dev/null @@ -1,69 +0,0 @@ -Zero offset -0 0 -1 1 -2 2 -Nullable values -\N 0 \N -\N 1 2 -2 2 \N -Result with different type -0 1 -1 2 -2 -10 -Offset > block -0 0 -1 0 -2 0 -Abs(Offset) > block -0 0 -1 0 -2 0 -Positive offset -0 1 -1 2 -2 0 -Negative offset -0 1 -1 2 -2 0 -Positive offset with defaults -0 2 -1 3 -2 12 -3 13 -Negative offset with defaults -0 10 -1 11 -2 0 -3 1 -Positive offset with const defaults -0 1 -1 2 -2 1000 -Negative offset with const defaults -0 1000 -1 0 -2 1 -Dynamic column and offset, out of bounds -0 0 0 -1 2 3 -2 4 20 -3 6 30 -Dynamic column and offset, negative -0 0 0 -1 -2 10 -2 -4 20 -3 -6 30 -4 -8 40 -5 -10 50 -Dynamic column and offset, without defaults -0 4 4 -1 2 3 -2 0 2 -3 -2 1 -4 -4 0 -5 -6 0 -Constant column -0 1000 -1 1000 -2 1000 diff --git a/dbms/tests/queries/0_stateless/00957_neighbour.sql b/dbms/tests/queries/0_stateless/00957_neighbour.sql deleted file mode 100644 index b60cc3123b6..00000000000 --- a/dbms/tests/queries/0_stateless/00957_neighbour.sql +++ /dev/null @@ -1,42 +0,0 @@ --- no arguments -select neighbour(); -- { serverError 42 } --- single argument -select neighbour(1); -- { serverError 42 } --- greater than 3 arguments -select neighbour(1,2,3,4); -- { serverError 42 } --- bad default value -select neighbour(dummy, 1, 'hello'); -- { serverError 43 } --- types without common supertype (UInt64 and Int8) -select number, neighbour(number, 1, -10) from numbers(3); -- { serverError 43 } --- nullable offset is not allowed -select number, if(number > 1, number, null) as offset, neighbour(number, offset) from numbers(3); -- { serverError 43 } -select 'Zero offset'; -select number, neighbour(number, 0) from numbers(3); -select 'Nullable values'; -select if(number > 1, number, null) as value, number as offset, neighbour(value, offset) as neighbour from numbers(3); -select 'Result with different type'; -select toInt32(number) as n, neighbour(n, 1, -10) from numbers(3); -select 'Offset > block'; -select number, neighbour(number, 10) from numbers(3); -select 'Abs(Offset) > block'; -select number, neighbour(number, -10) from numbers(3); -select 'Positive offset'; -select number, neighbour(number, 1) from numbers(3); -select 'Negative offset'; -select number, neighbour(number, 1) from numbers(3); -select 'Positive offset with defaults'; -select number, neighbour(number, 2, number + 10) from numbers(4); -select 'Negative offset with defaults'; -select number, neighbour(number, -2, number + 10) from numbers(4); -select 'Positive offset with const defaults'; -select number, neighbour(number, 1, 1000) from numbers(3); -select 'Negative offset with const defaults'; -select number, neighbour(number, -1, 1000) from numbers(3); -select 'Dynamic column and offset, out of bounds'; -select number, number * 2 as offset, neighbour(number, offset, number * 10) from numbers(4); -select 'Dynamic column and offset, negative'; -select number, -number * 2 as offset, neighbour(number, offset, number * 10) from numbers(6); -select 'Dynamic column and offset, without defaults'; -select number, -(number - 2) * 2 as offset, neighbour(number, offset) from numbers(6); -select 'Constant column'; -select number, neighbour(1000, 10) from numbers(3); \ No newline at end of file From 6685365ab8c5b74f9650492c88a012596eb1b0c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 07:25:32 +0300 Subject: [PATCH 150/181] Added optimized case --- dbms/src/Functions/neighbor.cpp | 78 ++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/neighbor.cpp b/dbms/src/Functions/neighbor.cpp index 1c640ffb76c..994e6311678 100644 --- a/dbms/src/Functions/neighbor.cpp +++ b/dbms/src/Functions/neighbor.cpp @@ -107,21 +107,79 @@ public: if (default_is_constant) default_column_casted = assert_cast(*default_column_casted).getDataColumnPtr(); - auto column = result_type->createColumn(); - - for (size_t row = 0; row < input_rows_count; ++row) + if (offset_is_constant) { - Int64 src_idx = row + offset_column->getInt(offset_is_constant ? 0 : row); + /// Optimization for the case when we can copy many values at once. - if (src_idx >= 0 && src_idx < Int64(input_rows_count)) - column->insertFrom(*source_column_casted, source_is_constant ? 0 : src_idx); - else if (has_defaults) - column->insertFrom(*default_column_casted, default_is_constant ? 0 : row); + Int64 offset = offset_column->getInt(0); + + auto result_column = result_type->createColumn(); + + auto insert_range_from = [&](bool is_const, const ColumnPtr & src, Int64 begin, Int64 size) + { + /// Saturation of bounds. + if (begin < 0) + { + size += begin; + begin = 0; + } + if (size <= 0) + return; + if (size > Int64(input_rows_count)) + size = input_rows_count; + + if (!src) + { + for (Int64 i = 0; i < size; ++i) + result_column->insertDefault(); + } + else if (is_const) + { + for (Int64 i = 0; i < size; ++i) + result_column->insertFrom(*src, 0); + } + else + { + result_column->insertRangeFrom(*src, begin, size); + } + }; + + if (offset == 0) + { + /// Degenerate case, just copy source column as is. + block.getByPosition(result).column = source_column_casted; /// TODO + } + else if (offset > 0) + { + insert_range_from(source_is_constant, source_column_casted, offset, Int64(input_rows_count) - offset); + insert_range_from(default_is_constant, default_column_casted, Int64(input_rows_count) - offset, offset); + block.getByPosition(result).column = std::move(result_column); + } else - column->insertDefault(); + { + insert_range_from(default_is_constant, default_column_casted, 0, -offset); + insert_range_from(source_is_constant, source_column_casted, 0, Int64(input_rows_count) + offset); + block.getByPosition(result).column = std::move(result_column); + } } + else + { + auto result_column = result_type->createColumn(); - block.getByPosition(result).column = std::move(column); + for (size_t row = 0; row < input_rows_count; ++row) + { + Int64 src_idx = row + offset_column->getInt(offset_is_constant ? 0 : row); + + if (src_idx >= 0 && src_idx < Int64(input_rows_count)) + result_column->insertFrom(*source_column_casted, source_is_constant ? 0 : src_idx); + else if (has_defaults) + result_column->insertFrom(*default_column_casted, default_is_constant ? 0 : row); + else + result_column->insertDefault(); + } + + block.getByPosition(result).column = std::move(result_column); + } } private: From 20b9af29f555f6e125c811e95cc19c39ca7857d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 07:34:59 +0300 Subject: [PATCH 151/181] More tests --- dbms/src/Functions/neighbor.cpp | 2 +- .../0_stateless/00996_neigbor.reference | 270 ++++++++++++++++++ .../queries/0_stateless/00996_neigbor.sql | 42 +++ 3 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00996_neigbor.reference create mode 100644 dbms/tests/queries/0_stateless/00996_neigbor.sql diff --git a/dbms/src/Functions/neighbor.cpp b/dbms/src/Functions/neighbor.cpp index 994e6311678..078f704c771 100644 --- a/dbms/src/Functions/neighbor.cpp +++ b/dbms/src/Functions/neighbor.cpp @@ -147,7 +147,7 @@ public: if (offset == 0) { /// Degenerate case, just copy source column as is. - block.getByPosition(result).column = source_column_casted; /// TODO + block.getByPosition(result).column = source_is_constant ? ColumnConst::create(source_column_casted, input_rows_count) : source_column_casted; } else if (offset > 0) { diff --git a/dbms/tests/queries/0_stateless/00996_neigbor.reference b/dbms/tests/queries/0_stateless/00996_neigbor.reference new file mode 100644 index 00000000000..ebdb8c9e684 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00996_neigbor.reference @@ -0,0 +1,270 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +0 5 +1 6 +2 7 +3 8 +4 9 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 0 +6 1 +7 2 +8 3 +9 4 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 5 +1 6 +2 7 +3 8 +4 9 +5 Hello +6 Hello +7 Hello +8 Hello +9 Hello +0 World +1 World +2 World +3 World +4 World +5 0 +6 1 +7 2 +8 3 +9 4 +0 5 +1 6 +2 7 +3 8 +4 9 +5 Hello 5 +6 Hello 6 +7 Hello 7 +8 Hello 8 +9 Hello 9 +0 World 0 +1 World 1 +2 World 2 +3 World 3 +4 World 4 +5 0 +6 1 +7 2 +8 3 +9 4 +0 ClickHouse +1 ClickHouse +2 ClickHouse +3 ClickHouse +4 ClickHouse +5 ClickHouse +6 ClickHouse +7 ClickHouse +8 ClickHouse +9 ClickHouse +0 ClickHouse +1 ClickHouse +2 ClickHouse +3 ClickHouse +4 ClickHouse +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 ClickHouse +6 ClickHouse +7 ClickHouse +8 ClickHouse +9 ClickHouse +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 ClickHouse +1 ClickHouse +2 ClickHouse +3 ClickHouse +4 ClickHouse +5 Hello +6 Hello +7 Hello +8 Hello +9 Hello +0 World +1 World +2 World +3 World +4 World +5 ClickHouse +6 ClickHouse +7 ClickHouse +8 ClickHouse +9 ClickHouse +0 ClickHouse +1 ClickHouse +2 ClickHouse +3 ClickHouse +4 ClickHouse +5 Hello 5 +6 Hello 6 +7 Hello 7 +8 Hello 8 +9 Hello 9 +0 World 0 +1 World 1 +2 World 2 +3 World 3 +4 World 4 +5 ClickHouse +6 ClickHouse +7 ClickHouse +8 ClickHouse +9 ClickHouse +0 0 +1 2 +2 4 +3 6 +4 8 +5 +6 +7 +8 +9 +0 0 +1 1 +2 3 +3 4 +4 6 +5 7 +6 9 +7 +8 +9 +0 Hello +1 Hello +2 Hello +3 Hello +4 Hello +5 +6 +7 +8 +9 +0 +1 +2 +3 Hello +4 Hello +5 Hello +6 Hello +7 Hello +8 Hello +9 Hello +0 World +1 World +2 World +3 Hello +4 Hello +5 Hello +6 Hello +7 Hello +8 Hello +9 Hello diff --git a/dbms/tests/queries/0_stateless/00996_neigbor.sql b/dbms/tests/queries/0_stateless/00996_neigbor.sql new file mode 100644 index 00000000000..25c20b1b896 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00996_neigbor.sql @@ -0,0 +1,42 @@ +SELECT number, neighbor(toString(number), 0) FROM numbers(10); + +SELECT number, neighbor(toString(number), 5) FROM numbers(10); +SELECT number, neighbor(toString(number), -5) FROM numbers(10); + +SELECT number, neighbor(toString(number), 10) FROM numbers(10); +SELECT number, neighbor(toString(number), -10) FROM numbers(10); + +SELECT number, neighbor(toString(number), 15) FROM numbers(10); +SELECT number, neighbor(toString(number), -15) FROM numbers(10); + +SELECT number, neighbor(toString(number), 5, 'Hello') FROM numbers(10); +SELECT number, neighbor(toString(number), -5, 'World') FROM numbers(10); + +SELECT number, neighbor(toString(number), 5, concat('Hello ', toString(number))) FROM numbers(10); +SELECT number, neighbor(toString(number), -5, concat('World ', toString(number))) FROM numbers(10); + + +SELECT number, neighbor('ClickHouse', 0) FROM numbers(10); + +SELECT number, neighbor('ClickHouse', 5) FROM numbers(10); +SELECT number, neighbor('ClickHouse', -5) FROM numbers(10); + +SELECT number, neighbor('ClickHouse', 10) FROM numbers(10); +SELECT number, neighbor('ClickHouse', -10) FROM numbers(10); + +SELECT number, neighbor('ClickHouse', 15) FROM numbers(10); +SELECT number, neighbor('ClickHouse', -15) FROM numbers(10); + +SELECT number, neighbor('ClickHouse', 5, 'Hello') FROM numbers(10); +SELECT number, neighbor('ClickHouse', -5, 'World') FROM numbers(10); + +SELECT number, neighbor('ClickHouse', 5, concat('Hello ', toString(number))) FROM numbers(10); +SELECT number, neighbor('ClickHouse', -5, concat('World ', toString(number))) FROM numbers(10); + + +SELECT number, neighbor(toString(number), number) FROM numbers(10); +SELECT number, neighbor(toString(number), intDiv(number, 2)) FROM numbers(10); + +SELECT number, neighbor('Hello', number) FROM numbers(10); +SELECT number, neighbor('Hello', -3) FROM numbers(10); +SELECT number, neighbor('Hello', -3, 'World') FROM numbers(10); From 3d8613f8dfe1750e7f7c5be0b71921db2b2cea2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 07:36:01 +0300 Subject: [PATCH 152/181] More tests --- .../{00996_neigbor.reference => 00996_neighbor.reference} | 0 .../queries/0_stateless/{00996_neigbor.sql => 00996_neighbor.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00996_neigbor.reference => 00996_neighbor.reference} (100%) rename dbms/tests/queries/0_stateless/{00996_neigbor.sql => 00996_neighbor.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00996_neigbor.reference b/dbms/tests/queries/0_stateless/00996_neighbor.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00996_neigbor.reference rename to dbms/tests/queries/0_stateless/00996_neighbor.reference diff --git a/dbms/tests/queries/0_stateless/00996_neigbor.sql b/dbms/tests/queries/0_stateless/00996_neighbor.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00996_neigbor.sql rename to dbms/tests/queries/0_stateless/00996_neighbor.sql From 84b0f709aa0c1b26d15e4de0abea45c865fb1934 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 08:08:06 +0300 Subject: [PATCH 153/181] Removed useless code --- dbms/src/Core/Defines.h | 2 - dbms/src/Parsers/ASTAlterQuery.cpp | 43 ------------------ dbms/src/Parsers/ASTAlterQuery.h | 19 -------- dbms/src/Parsers/ASTCreateQuery.cpp | 3 -- dbms/src/Parsers/ASTCreateQuery.h | 1 - dbms/src/Parsers/ParserAlterQuery.cpp | 63 ++------------------------ dbms/src/Parsers/ParserAlterQuery.h | 13 +----- dbms/src/Parsers/ParserCreateQuery.cpp | 47 ++++++------------- dbms/src/Parsers/ParserCreateQuery.h | 2 +- 9 files changed, 20 insertions(+), 173 deletions(-) diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 0c72c926006..a172cf6e243 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -33,8 +33,6 @@ #define DEFAULT_MERGE_BLOCK_SIZE 8192 #define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5 -#define DEFAULT_TEMPORARY_LIVE_CHANNEL_TIMEOUT_SEC 15 -#define DEFAULT_ALTER_LIVE_CHANNEL_WAIT_MS 10000 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160) #define DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC 15 #define DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE 1024 diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 54ba460d75a..6cfba1d6a79 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -45,11 +45,6 @@ ASTPtr ASTAlterCommand::clone() const res->ttl = ttl->clone(); res->children.push_back(res->ttl); } - if (values) - { - res->values = values->clone(); - res->children.push_back(res->values); - } return res; } @@ -226,42 +221,6 @@ void ASTAlterCommand::formatImpl( { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REFRESH " << (settings.hilite ? hilite_none : ""); } - else if (type == ASTAlterCommand::LIVE_CHANNEL_ADD) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD " << (settings.hilite ? hilite_none : ""); - - values->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::LIVE_CHANNEL_DROP) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP " << (settings.hilite ? hilite_none : ""); - - values->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::LIVE_CHANNEL_MODIFY) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY " << (settings.hilite ? hilite_none : ""); - - values->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::LIVE_CHANNEL_SUSPEND) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "SUSPEND " << (settings.hilite ? hilite_none : ""); - - values->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::LIVE_CHANNEL_RESUME) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RESUME " << (settings.hilite ? hilite_none : ""); - - values->formatImpl(settings, state, frame); - } - else if (type == ASTAlterCommand::LIVE_CHANNEL_REFRESH) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "REFRESH " << (settings.hilite ? hilite_none : ""); - - values->formatImpl(settings, state, frame); - } else throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } @@ -316,8 +275,6 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState if (is_live_view) settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER LIVE VIEW " << (settings.hilite ? hilite_none : ""); - else if (is_live_channel) - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER LIVE CHANNEL " << (settings.hilite ? hilite_none : ""); else settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : ""); diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 5b04fcdffb9..2563abfac6e 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -17,13 +17,6 @@ namespace DB * COMMENT_COLUMN col_name 'comment', * ALTER LIVE VIEW [db.]name_type * REFRESH - * ALTER CHANNEL [db.]name_type - * ADD live_view,... - * DROP live_view,... - * SUSPEND live_view,... - * RESUME live_view,... - * REFRESH live_view,... - * MODIFY live_view,... */ class ASTAlterCommand : public IAST @@ -59,13 +52,6 @@ public: NO_TYPE, LIVE_VIEW_REFRESH, - - LIVE_CHANNEL_ADD, - LIVE_CHANNEL_DROP, - LIVE_CHANNEL_SUSPEND, - LIVE_CHANNEL_RESUME, - LIVE_CHANNEL_REFRESH, - LIVE_CHANNEL_MODIFY }; Type type = NO_TYPE; @@ -121,10 +107,6 @@ public: /// For MODIFY TTL query ASTPtr ttl; - /** In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here - */ - ASTPtr values; - bool detach = false; /// true for DETACH PARTITION bool part = false; /// true for ATTACH PART and DROP DETACHED PART @@ -182,7 +164,6 @@ class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCl { public: bool is_live_view{false}; /// true for ALTER LIVE VIEW - bool is_live_channel{false}; /// true for ALTER LIVE CHANNEL ASTAlterCommandList * command_list = nullptr; diff --git a/dbms/src/Parsers/ASTCreateQuery.cpp b/dbms/src/Parsers/ASTCreateQuery.cpp index 87e9301329d..f22460d8716 100644 --- a/dbms/src/Parsers/ASTCreateQuery.cpp +++ b/dbms/src/Parsers/ASTCreateQuery.cpp @@ -220,9 +220,6 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat what = "MATERIALIZED VIEW"; if (is_live_view) what = "LIVE VIEW"; - if (is_live_channel) - what = "LIVE CHANNEL"; - settings.ostr << (settings.hilite ? hilite_keyword : "") diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 3893fa2c82a..5ca0c067a3c 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -57,7 +57,6 @@ public: bool is_view{false}; bool is_materialized_view{false}; bool is_live_view{false}; - bool is_live_channel{false}; bool is_populate{false}; bool replace_view{false}; /// CREATE OR REPLACE VIEW ASTColumns * columns_list = nullptr; diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 41b046e2877..21a4fd586c6 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -89,53 +89,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else return false; } - else if (is_live_channel) - { - if (s_add.ignore(pos, expected)) - { - if (!values_p.parse(pos, command->values, expected)) - return false; - - command->type = ASTAlterCommand::LIVE_CHANNEL_ADD; - } - else if (s_drop.ignore(pos, expected)) - { - if (!values_p.parse(pos, command->values, expected)) - return false; - - command->type = ASTAlterCommand::LIVE_CHANNEL_DROP; - } - else if (s_suspend.ignore(pos, expected)) - { - if (!values_p.parse(pos, command->values, expected)) - return false; - - command->type = ASTAlterCommand::LIVE_CHANNEL_SUSPEND; - } - else if (s_resume.ignore(pos, expected)) - { - if (!values_p.parse(pos, command->values, expected)) - return false; - - command->type = ASTAlterCommand::LIVE_CHANNEL_RESUME; - } - else if (s_refresh.ignore(pos, expected)) - { - if (!values_p.parse(pos, command->values, expected)) - return false; - - command->type = ASTAlterCommand::LIVE_CHANNEL_REFRESH; - } - else if (s_modify.ignore(pos, expected)) - { - if (!values_p.parse(pos, command->values, expected)) - return false; - - command->type = ASTAlterCommand::LIVE_CHANNEL_MODIFY; - } - else - return false; - } else { if (s_add_column.ignore(pos, expected)) @@ -466,7 +419,7 @@ bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expe node = command_list; ParserToken s_comma(TokenType::Comma); - ParserAlterCommand p_command(is_live_view, is_live_channel); + ParserAlterCommand p_command(is_live_view); do { @@ -516,20 +469,13 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_alter_table("ALTER TABLE"); ParserKeyword s_alter_live_view("ALTER LIVE VIEW"); - ParserKeyword s_alter_live_channel("ALTER LIVE CHANNEL"); bool is_live_view = false; - bool is_live_channel = false; if (!s_alter_table.ignore(pos, expected)) { if (!s_alter_live_view.ignore(pos, expected)) - { - if (!s_alter_live_channel.ignore(pos, expected)) - return false; - else - is_live_channel = true; - } + return false; else is_live_view = true; } @@ -537,9 +483,6 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (is_live_view) query->is_live_view = true; - if (is_live_channel) - query->is_live_channel = true; - if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) return false; @@ -551,7 +494,7 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } query->cluster = cluster_str; - ParserAlterCommandList p_command_list(is_live_view, is_live_channel); + ParserAlterCommandList p_command_list(is_live_view); ASTPtr command_list; if (!p_command_list.parse(pos, command_list, expected)) return false; diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 13f5681a9da..450e64ee9f4 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -21,13 +21,6 @@ namespace DB * [UPDATE col_name = expr, ... WHERE ...] * ALTER LIVE VIEW [db.name] * [REFRESH] - * ALTER LIVE CHANNEL [db.name] [ON CLUSTER cluster] - * [ADD live_view, ...] - * [DROP live_view, ...] - * [SUSPEND live_view, ...] - * [RESUME live_view, ...] - * [REFRESH live_view, ...] - * [MODIFY live_view, ...] */ class ParserAlterQuery : public IParserBase @@ -46,9 +39,8 @@ protected: public: bool is_live_view; - bool is_live_channel; - ParserAlterCommandList(bool is_live_view_ = false, bool is_live_channel_ = false) : is_live_view(is_live_view_), is_live_channel(is_live_channel_) {} + ParserAlterCommandList(bool is_live_view_ = false) : is_live_view(is_live_view_) {} }; @@ -60,9 +52,8 @@ protected: public: bool is_live_view; - bool is_live_channel; - ParserAlterCommand(bool is_live_view_ = false, bool is_live_channel_ = false) : is_live_view(is_live_view_), is_live_channel(is_live_channel_) {} + ParserAlterCommand(bool is_live_view_ = false) : is_live_view(is_live_view_) {} }; diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index d2cbd920d0f..acbf2fb89ba 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -365,7 +365,6 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_with("WITH"); ParserKeyword s_materialized("MATERIALIZED"); ParserKeyword s_live("LIVE"); - ParserKeyword s_channel("CHANNEL"); ParserKeyword s_populate("POPULATE"); ParserKeyword s_or_replace("OR REPLACE"); ParserToken s_dot(TokenType::Dot); @@ -396,7 +395,6 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool is_view = false; bool is_materialized_view = false; bool is_live_view = false; - bool is_live_channel = false; bool is_populate = false; bool is_temporary = false; bool replace_view = false; @@ -494,9 +492,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_live.ignore(pos, expected)) { - if (s_channel.ignore(pos, expected)) - is_live_channel = true; - else if (s_view.ignore(pos, expected)) + if (s_view.ignore(pos, expected)) is_live_view = true; else return false; @@ -520,50 +516,36 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - if (!is_live_channel) + // TO [db.]table + if (ParserKeyword{"TO"}.ignore(pos, expected)) { - // TO [db.]table - if (ParserKeyword{"TO"}.ignore(pos, expected)) + if (!name_p.parse(pos, to_table, expected)) + return false; + + if (s_dot.ignore(pos, expected)) { + to_database = to_table; if (!name_p.parse(pos, to_table, expected)) return false; - - if (s_dot.ignore(pos, expected)) - { - to_database = to_table; - if (!name_p.parse(pos, to_table, expected)) - return false; - } } } /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { - if (!columns_or_indices_p.parse(pos, columns_list, expected)) + if (!table_properties_p.parse(pos, columns_list, expected)) return false; if (!s_rparen.ignore(pos, expected)) return false; } - if (is_live_channel) - { - if (s_with.ignore(pos, expected)) - { - if (!names_p.parse(pos, tables, expected)) - return false; - } - } - else - { - /// AS SELECT ... - if (!s_as.ignore(pos, expected)) - return false; + /// AS SELECT ... + if (!s_as.ignore(pos, expected)) + return false; - if (!select_p.parse(pos, select, expected)) - return false; - } + if (!select_p.parse(pos, select, expected)) + return false; } else if (is_temporary) return false; @@ -673,7 +655,6 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->is_view = is_view; query->is_materialized_view = is_materialized_view; query->is_live_view = is_live_view; - query->is_live_channel = is_live_channel; query->is_populate = is_populate; query->temporary = is_temporary; query->replace_view = replace_view; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 1199c712585..a4f4da8907e 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -321,7 +321,7 @@ protected: * CREATE|ATTACH DATABASE db [ENGINE = engine] * * Or: - * CREATE[OR REPLACE]|ATTACH [[MATERIALIZED] VIEW] | [[TEMPORARY] LIVE [CHANNEL] | [VIEW]] [IF NOT EXISTS] [db.]name [TO [db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... + * CREATE[OR REPLACE]|ATTACH [[MATERIALIZED] VIEW] | [VIEW]] [IF NOT EXISTS] [db.]name [TO [db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... */ class ParserCreateQuery : public IParserBase { From dae2aa61387a619cf5db94d8a046d5c3353fde37 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 08:11:11 +0300 Subject: [PATCH 154/181] Removed useless code --- dbms/src/Parsers/ASTAlterQuery.cpp | 5 +++++ dbms/src/Parsers/ASTAlterQuery.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 6cfba1d6a79..e8fa3630442 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -45,6 +45,11 @@ ASTPtr ASTAlterCommand::clone() const res->ttl = ttl->clone(); res->children.push_back(res->ttl); } + if (values) + { + res->values = values->clone(); + res->children.push_back(res->values); + } return res; } diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 2563abfac6e..a4962087a9b 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -107,6 +107,10 @@ public: /// For MODIFY TTL query ASTPtr ttl; + /** In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here + */ + ASTPtr values; + bool detach = false; /// true for DETACH PARTITION bool part = false; /// true for ATTACH PART and DROP DETACHED PART From 1cc8bf7fe040afe05978bec43fab525bd85e68d7 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Fri, 23 Aug 2019 12:11:43 +0300 Subject: [PATCH 155/181] DOCAPI-7783: RU Translation (#6623) --- docs/ru/query_language/misc.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/ru/query_language/misc.md b/docs/ru/query_language/misc.md index 93f548bf73c..e366abf2f3d 100644 --- a/docs/ru/query_language/misc.md +++ b/docs/ru/query_language/misc.md @@ -193,18 +193,21 @@ RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... Все таблицы переименовываются под глобальной блокировкой. Переименовывание таблицы является лёгкой операцией. Если вы указали после TO другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка). -## SET +## SET {#query-set} ```sql SET param = value ``` -Позволяет установить настройку `param` в значение `value`. Также можно одним запросом установить все настройки из заданного профиля настроек. Для этого укажите 'profile' в качестве имени настройки. Подробнее смотрите в разделе "Настройки". -Настройка устанавливается на сессию, или на сервер (глобально), если указано `GLOBAL`. -При установке глобальных настроек, эти настройки не применяются к уже запущенной сессии, включая текущую сессию. Она будет использована только для новых сессий. +Устанавливает значение `value` для настройки `param` в текущей сессии. [Конфигурационные параметры сервера](../operations/server_settings/index.md) нельзя изменить подобным образом. -При перезапуске сервера теряются настройки, установленные с помощью `SET`. -Установить настройки, которые переживут перезапуск сервера, можно только с помощью конфигурационного файла сервера. +Можно одним запросом установить все настройки из заданного профиля настроек. + +```sql +SET profile = 'profile-name-from-the-settings-file' +``` + +Подробности смотрите в разделе [Настройки](../operations/settings/settings.md). ## SHOW CREATE TABLE From baf121c864550cdcafdcd70b9b2ab36afc7e9c53 Mon Sep 17 00:00:00 2001 From: Winter Zhang Date: Fri, 23 Aug 2019 18:14:19 +0800 Subject: [PATCH 156/181] Translate database engine documentation(zh) (#6625) --- docs/zh/database_engines/index.md | 12 +- docs/zh/database_engines/mysql.md | 125 +++++++++++++++++- docs/zh/operations/table_engines/mergetree.md | 2 +- 3 files changed, 136 insertions(+), 3 deletions(-) diff --git a/docs/zh/database_engines/index.md b/docs/zh/database_engines/index.md index bbdb762a4ad..f8ae05e2520 120000 --- a/docs/zh/database_engines/index.md +++ b/docs/zh/database_engines/index.md @@ -1 +1,11 @@ -../../en/database_engines/index.md \ No newline at end of file +# 数据库引擎 + +您使用的所有表都是由数据库引擎所提供的 + +默认情况下,ClickHouse使用自己的数据库引擎,该引擎提供可配置的[表引擎](../operations/table_engines/index.md)和[所有支持的SQL语法](../query_language/syntax.md). + +除此之外,您还可以选择使用以下的数据库引擎: + +- [MySQL](mysql.md) + +[来源文章](https://clickhouse.yandex/docs/en/database_engines/) diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/database_engines/mysql.md index 51ac4126e2d..38dfcb5ef64 120000 --- a/docs/zh/database_engines/mysql.md +++ b/docs/zh/database_engines/mysql.md @@ -1 +1,124 @@ -../../en/database_engines/mysql.md \ No newline at end of file +# MySQL + +MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并允许您对表进行`INSERT`和`SELECT`查询,以方便您在ClickHouse与MySQL之间进行数据交换。 + +`MySQL`数据库引擎会将对其的查询转换为MySQL语法并发送到MySQL服务器中,因此您可以执行诸如`SHOW TABLES`或`SHOW CREATE TABLE`之类的操作。 + +但您无法对其执行以下操作: + +- `ATTACH`/`DETACH` +- `DROP` +- `RENAME` +- `CREATE TABLE` +- `ALTER` + + +## CREATE DATABASE + +``` sql +CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] +ENGINE = MySQL('host:port', 'database', 'user', 'password') +``` + +**MySQL数据库引擎参数** + +- `host:port` — 链接的MySQL地址。 +- `database` — 链接的MySQL数据库。 +- `user` — 链接的MySQL用户。 +- `password` — 链接的MySQL用户密码。 + + +## 支持的类型对应 + +MySQL | ClickHouse +------|------------ +UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) +TINYINT | [Int8](../data_types/int_uint.md) +UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) +SMALLINT | [Int16](../data_types/int_uint.md) +UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) +INT, MEDIUMINT | [Int32](../data_types/int_uint.md) +UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) +BIGINT | [Int64](../data_types/int_uint.md) +FLOAT | [Float32](../data_types/float.md) +DOUBLE | [Float64](../data_types/float.md) +DATE | [Date](../data_types/date.md) +DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) +BINARY | [FixedString](../data_types/fixedstring.md) + +其他的MySQL数据类型将全部都转换为[String](../data_types/string.md)。 + +同时以上的所有类型都支持[Nullable](../data_types/nullable.md)。 + + +## 使用示例 + +在MySQL中创建表: + +``` +mysql> USE test; +Database changed + +mysql> CREATE TABLE `mysql_table` ( + -> `int_id` INT NOT NULL AUTO_INCREMENT, + -> `float` FLOAT NOT NULL, + -> PRIMARY KEY (`int_id`)); +Query OK, 0 rows affected (0,09 sec) + +mysql> insert into mysql_table (`int_id`, `float`) VALUES (1,2); +Query OK, 1 row affected (0,00 sec) + +mysql> select * from mysql_table; ++--------+-------+ +| int_id | value | ++--------+-------+ +| 1 | 2 | ++--------+-------+ +1 row in set (0,00 sec) +``` + +在ClickHouse中创建MySQL类型的数据库,同时与MySQL服务器交换数据: + +```sql +CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') +``` +```sql +SHOW DATABASES +``` +```text +┌─name─────┐ +│ default │ +│ mysql_db │ +│ system │ +└──────────┘ +``` +```sql +SHOW TABLES FROM mysql_db +``` +```text +┌─name─────────┐ +│ mysql_table │ +└──────────────┘ +``` +```sql +SELECT * FROM mysql_db.mysql_table +``` +```text +┌─int_id─┬─value─┐ +│ 1 │ 2 │ +└────────┴───────┘ +``` +```sql +INSERT INTO mysql_db.mysql_table VALUES (3,4) +``` +```sql +SELECT * FROM mysql_db.mysql_table +``` +```text +┌─int_id─┬─value─┐ +│ 1 │ 2 │ +│ 3 │ 4 │ +└────────┴───────┘ +``` + +[来源文章](https://clickhouse.yandex/docs/en/database_engines/mysql/) diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/operations/table_engines/mergetree.md index 5ddf837708a..5e330164c5a 100644 --- a/docs/zh/operations/table_engines/mergetree.md +++ b/docs/zh/operations/table_engines/mergetree.md @@ -48,7 +48,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **子句** -- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`。 `MergeTree` 引擎不需要其他参数。 - `PARTITION BY` — [分区键](custom_partitioning_key.md) 。 From d001c7e554ccedfba05b271a91687e918ab4e6d3 Mon Sep 17 00:00:00 2001 From: Alex Ryndin Date: Fri, 23 Aug 2019 17:27:07 +0300 Subject: [PATCH 157/181] Fix typo (#6631) --- docs/en/operations/settings/query_complexity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 77699c868b4..c00f2132ebd 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -79,7 +79,7 @@ Enables or disables execution of `GROUP BY` clauses in external memory. See [GRO Possible values: -- Maximum volume or RAM (in bytes) that can be used by the single [GROUP BY](../../query_language/select.md#select-group-by-clause) operation. +- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../query_language/select.md#select-group-by-clause) operation. - 0 — `GROUP BY` in external memory disabled. Default value: 0. From f757de462584ec8cad5d6cba4c73f3fe2b480324 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Aug 2019 17:56:38 +0300 Subject: [PATCH 158/181] Revert "Translate database engine documentation(zh) (#6625)" This reverts commit baf121c864550cdcafdcd70b9b2ab36afc7e9c53. --- docs/zh/database_engines/index.md | 12 +- docs/zh/database_engines/mysql.md | 125 +----------------- docs/zh/operations/table_engines/mergetree.md | 2 +- 3 files changed, 3 insertions(+), 136 deletions(-) diff --git a/docs/zh/database_engines/index.md b/docs/zh/database_engines/index.md index f8ae05e2520..bbdb762a4ad 120000 --- a/docs/zh/database_engines/index.md +++ b/docs/zh/database_engines/index.md @@ -1,11 +1 @@ -# 数据库引擎 - -您使用的所有表都是由数据库引擎所提供的 - -默认情况下,ClickHouse使用自己的数据库引擎,该引擎提供可配置的[表引擎](../operations/table_engines/index.md)和[所有支持的SQL语法](../query_language/syntax.md). - -除此之外,您还可以选择使用以下的数据库引擎: - -- [MySQL](mysql.md) - -[来源文章](https://clickhouse.yandex/docs/en/database_engines/) +../../en/database_engines/index.md \ No newline at end of file diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/database_engines/mysql.md index 38dfcb5ef64..51ac4126e2d 120000 --- a/docs/zh/database_engines/mysql.md +++ b/docs/zh/database_engines/mysql.md @@ -1,124 +1 @@ -# MySQL - -MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并允许您对表进行`INSERT`和`SELECT`查询,以方便您在ClickHouse与MySQL之间进行数据交换。 - -`MySQL`数据库引擎会将对其的查询转换为MySQL语法并发送到MySQL服务器中,因此您可以执行诸如`SHOW TABLES`或`SHOW CREATE TABLE`之类的操作。 - -但您无法对其执行以下操作: - -- `ATTACH`/`DETACH` -- `DROP` -- `RENAME` -- `CREATE TABLE` -- `ALTER` - - -## CREATE DATABASE - -``` sql -CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') -``` - -**MySQL数据库引擎参数** - -- `host:port` — 链接的MySQL地址。 -- `database` — 链接的MySQL数据库。 -- `user` — 链接的MySQL用户。 -- `password` — 链接的MySQL用户密码。 - - -## 支持的类型对应 - -MySQL | ClickHouse -------|------------ -UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) -TINYINT | [Int8](../data_types/int_uint.md) -UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) -SMALLINT | [Int16](../data_types/int_uint.md) -UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) -INT, MEDIUMINT | [Int32](../data_types/int_uint.md) -UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) -BIGINT | [Int64](../data_types/int_uint.md) -FLOAT | [Float32](../data_types/float.md) -DOUBLE | [Float64](../data_types/float.md) -DATE | [Date](../data_types/date.md) -DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) -BINARY | [FixedString](../data_types/fixedstring.md) - -其他的MySQL数据类型将全部都转换为[String](../data_types/string.md)。 - -同时以上的所有类型都支持[Nullable](../data_types/nullable.md)。 - - -## 使用示例 - -在MySQL中创建表: - -``` -mysql> USE test; -Database changed - -mysql> CREATE TABLE `mysql_table` ( - -> `int_id` INT NOT NULL AUTO_INCREMENT, - -> `float` FLOAT NOT NULL, - -> PRIMARY KEY (`int_id`)); -Query OK, 0 rows affected (0,09 sec) - -mysql> insert into mysql_table (`int_id`, `float`) VALUES (1,2); -Query OK, 1 row affected (0,00 sec) - -mysql> select * from mysql_table; -+--------+-------+ -| int_id | value | -+--------+-------+ -| 1 | 2 | -+--------+-------+ -1 row in set (0,00 sec) -``` - -在ClickHouse中创建MySQL类型的数据库,同时与MySQL服务器交换数据: - -```sql -CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') -``` -```sql -SHOW DATABASES -``` -```text -┌─name─────┐ -│ default │ -│ mysql_db │ -│ system │ -└──────────┘ -``` -```sql -SHOW TABLES FROM mysql_db -``` -```text -┌─name─────────┐ -│ mysql_table │ -└──────────────┘ -``` -```sql -SELECT * FROM mysql_db.mysql_table -``` -```text -┌─int_id─┬─value─┐ -│ 1 │ 2 │ -└────────┴───────┘ -``` -```sql -INSERT INTO mysql_db.mysql_table VALUES (3,4) -``` -```sql -SELECT * FROM mysql_db.mysql_table -``` -```text -┌─int_id─┬─value─┐ -│ 1 │ 2 │ -│ 3 │ 4 │ -└────────┴───────┘ -``` - -[来源文章](https://clickhouse.yandex/docs/en/database_engines/mysql/) +../../en/database_engines/mysql.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/operations/table_engines/mergetree.md index 5e330164c5a..5ddf837708a 100644 --- a/docs/zh/operations/table_engines/mergetree.md +++ b/docs/zh/operations/table_engines/mergetree.md @@ -48,7 +48,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **子句** -- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`。 `MergeTree` 引擎不需要其他参数。 +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 - `PARTITION BY` — [分区键](custom_partitioning_key.md) 。 From abdd70fcc4b02297618c8ed29751e6bbff917fa1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:01:36 +0300 Subject: [PATCH 159/181] Fixed "splitted" build --- contrib/arrow-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index bc229deeced..843ff9cd8af 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -219,7 +219,7 @@ endif() add_library(${ARROW_LIBRARY} ${ARROW_SRCS}) add_dependencies(${ARROW_LIBRARY} protoc) target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS}) -target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${PROTOBUF_LIBRARIES} Threads::Threads) +target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY} Threads::Threads) if (ARROW_WITH_LZ4) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY}) endif() From 75e124f3909d1a6820938986f9d7f82b05acb309 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:05:27 +0300 Subject: [PATCH 160/181] Removed misleading flag from CMake --- CMakeLists.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ac4d67f6ae..f84a181a39c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -264,11 +264,10 @@ if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}") endif () -# TODO: only make this extra-checks in CI builds, since a lot of contrib libs won't link - -# CI works around this problem by explicitly adding GLIBC_COMPATIBILITY flag. -if (NOT SANITIZE AND YANDEX_OFFICIAL_BUILD) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") +# Make this extra-checks for correct library dependencies. +if (NOT SANITIZE) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") endif () include (cmake/find_unwind.cmake) From 5fd649e663e126813b074f2c819baca8a6eedc7c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:10:33 +0300 Subject: [PATCH 161/181] Check for broken symlinks #6625 --- utils/check-style/check-style | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index fed4b6b8670..ef3bf6cfad4 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -25,3 +25,6 @@ find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | grep -vP 'Compiler|build' | xargs grep $@ -P '}\s*//+\s*namespace\s*' + +# Broken symlinks +find -L $ROOT_PATH -type l | grep -v contrib && echo "^ Broken symlinks found" From 016b1ee2f77646806d95aa28b562ab7c8e0fd41e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:42:45 +0300 Subject: [PATCH 162/181] Increase timeout for "stack overflow" test because it may take a long time in debug build --- dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh | 2 ++ dbms/tests/queries/shell_config.sh | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh b/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh index 64fae3fb0f9..14f2a8e31fb 100755 --- a/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh +++ b/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +CLICKHOUSE_CURL_TIMEOUT=30 + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh diff --git a/dbms/tests/queries/shell_config.sh b/dbms/tests/queries/shell_config.sh index d4ab11be927..b3058a6cdbe 100644 --- a/dbms/tests/queries/shell_config.sh +++ b/dbms/tests/queries/shell_config.sh @@ -46,7 +46,8 @@ export CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:="9009"} export CLICKHOUSE_URL_INTERSERVER=${CLICKHOUSE_URL_INTERSERVER:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_INTERSERVER}/"} export CLICKHOUSE_CURL_COMMAND=${CLICKHOUSE_CURL_COMMAND:="curl"} -export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} --max-time 10"} +export CLICKHOUSE_CURL_TIMEOUT=${CLICKHOUSE_CURL_TIMEOUT:="10"} +export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} --max-time ${CLICKHOUSE_CURL_TIMEOUT}"} export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} From 50b927a9d74ed6de672ec9cc58e230c545c6ce43 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Aug 2019 19:08:27 +0300 Subject: [PATCH 163/181] Update StringSearcher.h --- dbms/src/Common/StringSearcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index fecf1a7ca81..3cb6e56ab78 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -717,7 +717,7 @@ public: { if (std::any_of(reinterpret_cast(needle_), reinterpret_cast(needle_) + needle_size_, isTokenSeparator)) { - throw Exception{"needle must not contain whitespace characters", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{"Needle must not contain whitespace or separator characters", ErrorCodes::BAD_ARGUMENTS}; } } From c781e1c6a7640ddf29bb0af52608fa97a1793736 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Aug 2019 19:09:24 +0300 Subject: [PATCH 164/181] Update StringSearcher.h --- dbms/src/Common/StringSearcher.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index 3cb6e56ab78..25287db11f5 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -160,7 +160,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { static const Poco::UTF8Encoding utf8; @@ -377,7 +377,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -570,7 +570,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -722,7 +722,7 @@ public: } - ALWAYS_INLINE bool compare(const UInt8 * haystack, const UInt8 * haystack_end, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * haystack, const UInt8 * haystack_end, const UInt8 * pos) const { // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token. if (isToken(haystack, haystack_end, pos)) From b42f85e16bd683b1f03fe4a0833370c742e1445b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 21:30:04 +0300 Subject: [PATCH 165/181] Added a check for double whitespaces --- dbms/src/AggregateFunctions/QuantileExact.h | 2 +- dbms/src/DataTypes/DataTypeEnum.cpp | 2 +- dbms/src/Functions/GeoUtils.cpp | 2 +- .../Formats/Impl/ProtobufRowInputFormat.cpp | 2 +- .../MergeTree/MergeTreeRangeReader.cpp | 2 +- .../Storages/MergeTree/MergeTreeRangeReader.h | 2 +- .../Storages/System/StorageSystemNumbers.cpp | 2 +- utils/check-style/check-style | 3 ++ utils/check-style/double-whitespaces.pl | 33 +++++++++++++++++++ 9 files changed, 43 insertions(+), 7 deletions(-) create mode 100755 utils/check-style/double-whitespaces.pl diff --git a/dbms/src/AggregateFunctions/QuantileExact.h b/dbms/src/AggregateFunctions/QuantileExact.h index 5a1343b1399..4a2aa574ae9 100644 --- a/dbms/src/AggregateFunctions/QuantileExact.h +++ b/dbms/src/AggregateFunctions/QuantileExact.h @@ -176,7 +176,7 @@ struct QuantileExactExclusive : public QuantileExact } }; -/// QuantileExactInclusive is equivalent to Excel PERCENTILE and PERCENTILE.INC, R-7, SciPy-(1,1) +/// QuantileExactInclusive is equivalent to Excel PERCENTILE and PERCENTILE.INC, R-7, SciPy-(1,1) template struct QuantileExactInclusive : public QuantileExact { diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index cffc29feaf8..add7052195a 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -234,7 +234,7 @@ void DataTypeEnum::deserializeBinaryBulk( } template -void DataTypeEnum::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +void DataTypeEnum::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const { if (value_index) return; diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index 5134343dae0..847d934c6b4 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -18,7 +18,7 @@ const UInt8 geohash_base32_decode_lookup_table[256] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 12, 13, 14, 15, 16, 0xFF, 17, 18, 0xFF, 19, 20, 0xFF, diff --git a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 25fecc5c642..1cd9d329c9d 100644 --- a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -12,7 +12,7 @@ namespace DB ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_) : IRowInputFormat(header_, in_, params_) - , data_types(header_.getDataTypes()) + , data_types(header_.getDataTypes()) , reader(in, ProtobufSchemas::instance().getMessageTypeForFormatSchema(info_), header_.getNames()) { } diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 2aae847217e..932721eb028 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -108,7 +108,7 @@ size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block) MergeTreeRangeReader::Stream::Stream( - size_t from_mark, size_t to_mark, MergeTreeReader * merge_tree_reader_) + size_t from_mark, size_t to_mark, MergeTreeReader * merge_tree_reader_) : current_mark(from_mark), offset_after_current_mark(0) , last_mark(to_mark) , merge_tree_reader(merge_tree_reader_) diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h index 9552373901c..0eae69ee17e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -41,7 +41,7 @@ public: { public: DelayedStream() = default; - DelayedStream(size_t from_mark, MergeTreeReader * merge_tree_reader); + DelayedStream(size_t from_mark, MergeTreeReader * merge_tree_reader); /// Read @num_rows rows from @from_mark starting from @offset row /// Returns the number of rows added to block. diff --git a/dbms/src/Storages/System/StorageSystemNumbers.cpp b/dbms/src/Storages/System/StorageSystemNumbers.cpp index 2afe2a7c018..2f155e22a11 100644 --- a/dbms/src/Storages/System/StorageSystemNumbers.cpp +++ b/dbms/src/Storages/System/StorageSystemNumbers.cpp @@ -146,7 +146,7 @@ BlockInputStreams StorageSystemNumbers::read( res[i] = std::make_shared(max_block_size, offset + i * max_block_size, num_streams * max_block_size); if (limit) /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. - res[i] = std::make_shared(res[i], *limit * (i + 1) / num_streams - *limit * i / num_streams, 0, false, true); + res[i] = std::make_shared(res[i], *limit * (i + 1) / num_streams - *limit * i / num_streams, 0, false, true); } return res; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index ef3bf6cfad4..deed481f043 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -28,3 +28,6 @@ find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | # Broken symlinks find -L $ROOT_PATH -type l | grep -v contrib && echo "^ Broken symlinks found" + +# Double whitespaces +find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done diff --git a/utils/check-style/double-whitespaces.pl b/utils/check-style/double-whitespaces.pl new file mode 100755 index 00000000000..47b03cb74ab --- /dev/null +++ b/utils/check-style/double-whitespaces.pl @@ -0,0 +1,33 @@ +#!/usr/bin/perl + +use strict; + +# Find double whitespace such as "a, b, c" that looks very ugly and annoying. +# But skip double whitespaces if they are used as an alignment - by comparing to surrounding lines. + +my @array; + +while (<>) +{ + push @array, $_; +} + +my $ret = 0; + +for (my $i = 1; $i < $#array; ++$i) +{ + if ($array[$i] =~ ',( {2,3})[^ /]') + { + # https://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl + + if ((substr($array[$i - 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) # whitespaces are not part of alignment + && (substr($array[$i + 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) + && $array[$i] !~ /(-?\d+\w*,\s+){3,}/) # this is not a number table like { 10, -1, 2 } + { + print(($i + 1) . ":" . $array[$i]); + $ret = 1; + } + } +} + +exit $ret; From a21b43913fd97d0a97232764ad2f338da93a9561 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 23 Aug 2019 21:40:42 +0300 Subject: [PATCH 166/181] fix crash in OptimizedRegularExpression --- dbms/src/Common/OptimizedRegularExpression.cpp | 5 +++-- .../0_stateless/00997_extract_all_crash_6627.reference | 1 + .../queries/0_stateless/00997_extract_all_crash_6627.sql | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference create mode 100644 dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql diff --git a/dbms/src/Common/OptimizedRegularExpression.cpp b/dbms/src/Common/OptimizedRegularExpression.cpp index c87d87fc2df..3a224709447 100644 --- a/dbms/src/Common/OptimizedRegularExpression.cpp +++ b/dbms/src/Common/OptimizedRegularExpression.cpp @@ -1,4 +1,5 @@ #include +#include #include #define MIN_LENGTH_FOR_STRSTR 3 @@ -413,9 +414,9 @@ unsigned OptimizedRegularExpressionImpl::match(const char * subject return 0; } - StringPieceType pieces[MAX_SUBPATTERNS]; + DB::PODArrayWithStackMemory pieces(limit); - if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces, limit)) + if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces.data(), pieces.size())) return 0; else { diff --git a/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference new file mode 100644 index 00000000000..acb53e80e6d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference @@ -0,0 +1 @@ +['9'] diff --git a/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql new file mode 100644 index 00000000000..06de4ec8afb --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql @@ -0,0 +1 @@ +SELECT extractAll('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 YaBrowser/19.7.2.455 Yowser/2.5 Safari/537.36', '[Y][a-zA-Z]{8}/[1-9]([1-9]+)?(((.?)([0-9]+)?){0,4})?'); From 7dffa0fe9fbca08d6f25bc83b53fdee1ffeadbf5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 23 Aug 2019 22:19:36 +0300 Subject: [PATCH 167/181] added wait for mutation to indices tests --- dbms/tests/queries/0_stateless/00942_mutate_index.sh | 4 ++-- dbms/tests/queries/0_stateless/00943_materialize_index.sh | 6 ++++++ .../queries/0_stateless/00944_clear_index_in_partition.sh | 3 ++- .../00975_indices_mutation_replicated_zookeeper.sh | 7 +++++-- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00942_mutate_index.sh b/dbms/tests/queries/0_stateless/00942_mutate_index.sh index c6dd1dfb836..467eb9ab671 100755 --- a/dbms/tests/queries/0_stateless/00942_mutate_index.sh +++ b/dbms/tests/queries/0_stateless/00942_mutate_index.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;" @@ -35,8 +36,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 1;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 5;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx UPDATE i64 = 5 WHERE i64 = 1;" - -sleep 0.1 +wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 1;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 5;" diff --git a/dbms/tests/queries/0_stateless/00943_materialize_index.sh b/dbms/tests/queries/0_stateless/00943_materialize_index.sh index f51b66993aa..ab2fd1e5355 100755 --- a/dbms/tests/queries/0_stateless/00943_materialize_index.sh +++ b/dbms/tests/queries/0_stateless/00943_materialize_index.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;" @@ -38,22 +39,27 @@ SET allow_experimental_data_skipping_indices=1; ALTER TABLE test.minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" +wait_for_mutation "minmax_idx" "mutation_1.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" +wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 1;" +wait_for_mutation "minmax_idx" "mutation_3.txt" "test" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 2;" +wait_for_mutation "minmax_idx" "mutation_4.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx;" +wait_for_mutation "minmax_idx" "mutation_5.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 9047bbb3a72..7d68bac8c83 100755 --- a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;" @@ -42,7 +43,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -sleep 0.5 +wait_for_mutation "minmax_idx" "mutation_1.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh index 613226a3fb7..1bcb4f17edd 100755 --- a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.indices_mutaions1;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.indices_mutaions2;" @@ -47,13 +48,15 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 CLEAR INDEX idx IN PARTITION 1;" -sleep 1 +wait_for_mutation "indices_mutaions1" "mutation_1.txt" "test" +wait_for_mutation "indices_mutaions2" "mutation_1.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 MATERIALIZE INDEX idx IN PARTITION 1;" -sleep 1 +wait_for_mutation "indices_mutaions1" "mutation_2.txt" "test" +wait_for_mutation "indices_mutaions2" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" From ba2d17c12a68ede2bb47e34fdfbdd6ea0ec33a8e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 23 Aug 2019 22:36:17 +0300 Subject: [PATCH 168/181] fix --- .../0_stateless/00943_materialize_index.sh | 17 ++++++++--------- .../00944_clear_index_in_partition.sh | 2 +- ...975_indices_mutation_replicated_zookeeper.sh | 3 +-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00943_materialize_index.sh b/dbms/tests/queries/0_stateless/00943_materialize_index.sh index ab2fd1e5355..feab59b368e 100755 --- a/dbms/tests/queries/0_stateless/00943_materialize_index.sh +++ b/dbms/tests/queries/0_stateless/00943_materialize_index.sh @@ -39,27 +39,26 @@ SET allow_experimental_data_skipping_indices=1; ALTER TABLE test.minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_1.txt" "test" - -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" - -$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 1;" +$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" wait_for_mutation "minmax_idx" "mutation_3.txt" "test" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 2;" -wait_for_mutation "minmax_idx" "mutation_4.txt" "test" +sleep 0.5 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx;" -wait_for_mutation "minmax_idx" "mutation_5.txt" "test" +wait_for_mutation "minmax_idx" "mutation_4.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 7d68bac8c83..5a7bdd8e3ae 100755 --- a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -43,7 +43,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_1.txt" "test" +wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh index 1bcb4f17edd..5e6159475f8 100755 --- a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh @@ -48,8 +48,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 CLEAR INDEX idx IN PARTITION 1;" -wait_for_mutation "indices_mutaions1" "mutation_1.txt" "test" -wait_for_mutation "indices_mutaions2" "mutation_1.txt" "test" +sleep 0.5 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" From d38e9ee229cede07348b2531c3949a9f11b9abe3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 23:32:31 +0300 Subject: [PATCH 169/181] Fixed "trim" functions (in progress) --- dbms/src/Functions/trim.cpp | 64 ++---- libs/libcommon/include/common/find_symbols.h | 198 +++++++++++++------ 2 files changed, 157 insertions(+), 105 deletions(-) diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index f2e2543cc90..81916604d63 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -1,10 +1,8 @@ #include #include #include +#include -#ifdef __SSE4_2__ -#include -#endif namespace DB { @@ -60,7 +58,7 @@ public: execute(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); res_data.resize(res_data.size() + length + 1); - memcpy(&res_data[res_offset], start, length); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length); res_offset += length + 1; res_data[res_offset - 1] = '\0'; @@ -77,59 +75,27 @@ public: private: static void execute(const UInt8 * data, size_t size, const UInt8 *& res_data, size_t & res_size) { - size_t chars_to_trim_left = 0; - size_t chars_to_trim_right = 0; - char whitespace = ' '; -#ifdef __SSE4_2__ - const auto bytes_sse = sizeof(__m128i); - const auto size_sse = size - (size % bytes_sse); - const auto whitespace_mask = _mm_set1_epi8(whitespace); - constexpr auto base_sse_mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY; - auto mask = bytes_sse; -#endif + const char * char_data = reinterpret_cast(data); + const char * char_end = char_data + size; if constexpr (mode::trim_left) { -#ifdef __SSE4_2__ - /// skip whitespace from left in blocks of up to 16 characters - - /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate - enum { left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT }; - while (mask == bytes_sse && chars_to_trim_left < size_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(data + chars_to_trim_left)); - mask = _mm_cmpistri(whitespace_mask, chars, left_sse_mode); - chars_to_trim_left += mask; - } -#endif - /// skip remaining whitespace from left, character by character - while (chars_to_trim_left < size && data[chars_to_trim_left] == whitespace) - ++chars_to_trim_left; + const char * found = find_first_not_symbols<' '>(char_data, char_end); + size_t num_chars = found - char_data; + char_data += num_chars; } - if constexpr (mode::trim_right) + if constexpr (mode::trim_left) { - const auto trim_right_size = size - chars_to_trim_left; -#ifdef __SSE4_2__ - /// try to skip whitespace from right in blocks of up to 16 characters - - /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate - enum { right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT }; - const auto trim_right_size_sse = trim_right_size - (trim_right_size % bytes_sse); - while (mask == bytes_sse && chars_to_trim_right < trim_right_size_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(data + size - chars_to_trim_right - bytes_sse)); - mask = _mm_cmpistri(whitespace_mask, chars, right_sse_mode); - chars_to_trim_right += mask; - } -#endif - /// skip remaining whitespace from right, character by character - while (chars_to_trim_right < trim_right_size && data[size - chars_to_trim_right - 1] == whitespace) - ++chars_to_trim_right; + const char * found = find_last_not_symbols_or_null<' '>(char_data, char_end); + if (found) + char_end = found + 1; + else + char_end = char_data; } - res_data = data + chars_to_trim_left; - res_size = size - chars_to_trim_left - chars_to_trim_right; + res_data = reinterpret_cast(char_data); + res_size = char_end - char_data; } }; diff --git a/libs/libcommon/include/common/find_symbols.h b/libs/libcommon/include/common/find_symbols.h index 68b49397683..920a7df04c5 100644 --- a/libs/libcommon/include/common/find_symbols.h +++ b/libs/libcommon/include/common/find_symbols.h @@ -65,115 +65,153 @@ inline __m128i mm_is_in(__m128i bytes) } #endif - -template -inline const char * find_first_symbols_sse2(const char * begin, const char * end) +template +bool maybe_negate(bool x) { + if constexpr (positive) + return x; + else + return !x; +} + +template +uint16_t maybe_negate(uint16_t x) +{ + if constexpr (positive) + return x; + else + return ~x; +} + +enum class ReturnMode +{ + End, + Nullptr, +}; + + +template +inline const char * find_first_symbols_sse2(const char * const begin, const char * const end) +{ + const char * pos = begin; + #if defined(__SSE2__) - for (; begin + 15 < end; begin += 16) + for (; pos + 15 < end; pos += 16) { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); __m128i eq = mm_is_in(bytes); - uint16_t bit_mask = _mm_movemask_epi8(eq); + uint16_t bit_mask = maybe_negate(uint16_t(_mm_movemask_epi8(eq))); if (bit_mask) - return begin + __builtin_ctz(bit_mask); + return pos + __builtin_ctz(bit_mask); } #endif - for (; begin < end; ++begin) - if (is_in(*begin)) - return begin; - return end; + for (; pos < end; ++pos) + if (maybe_negate(is_in(*pos))) + return pos; + + return return_mode == ReturnMode::End ? end : nullptr; } -template -inline const char * find_last_symbols_or_null_sse2(const char * begin, const char * end) +template +inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) { + const char * pos = end; + #if defined(__SSE2__) - for (; end - 16 >= begin; end -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. + for (; pos - 16 >= begin; pos -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(end - 16)); + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos - 16)); __m128i eq = mm_is_in(bytes); - uint16_t bit_mask = _mm_movemask_epi8(eq); + uint16_t bit_mask = maybe_negate(uint16_t(_mm_movemask_epi8(eq))); if (bit_mask) - return end - 1 - (__builtin_clz(bit_mask) - 16); /// because __builtin_clz works with mask as uint32. + return pos - 1 - (__builtin_clz(bit_mask) - 16); /// because __builtin_clz works with mask as uint32. } #endif - --end; - for (; end >= begin; --end) - if (is_in(*end)) - return end; + --pos; + for (; pos >= begin; --pos) + if (maybe_negate(is_in(*pos))) + return pos; - return nullptr; + return return_mode == ReturnMode::End ? end : nullptr; } -template -inline const char * find_first_symbols_sse42_impl(const char * begin, const char * end) +inline const char * find_first_symbols_sse42_impl(const char * const begin, const char * const end) { + const char * pos = begin; + #if defined(__SSE4_2__) #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); - for (; begin + 15 < end; begin += 16) + for (; pos + 15 < end; pos += 16) { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); - if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE)) - return begin + _mm_cmpestri(set, num_chars, bytes, 16, MODE); + if constexpr (positive) + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE); + } + else + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY); + } } #undef MODE #endif - for (; begin < end; ++begin) - if ( (num_chars >= 1 && *begin == c01) - || (num_chars >= 2 && *begin == c02) - || (num_chars >= 3 && *begin == c03) - || (num_chars >= 4 && *begin == c04) - || (num_chars >= 5 && *begin == c05) - || (num_chars >= 6 && *begin == c06) - || (num_chars >= 7 && *begin == c07) - || (num_chars >= 8 && *begin == c08) - || (num_chars >= 9 && *begin == c09) - || (num_chars >= 10 && *begin == c10) - || (num_chars >= 11 && *begin == c11) - || (num_chars >= 12 && *begin == c12) - || (num_chars >= 13 && *begin == c13) - || (num_chars >= 14 && *begin == c14) - || (num_chars >= 15 && *begin == c15) - || (num_chars >= 16 && *begin == c16)) - return begin; - return end; + for (; pos < end; ++pos) + if ( (num_chars >= 1 && maybe_negate(*pos == c01)) + || (num_chars >= 2 && maybe_negate(*pos == c02)) + || (num_chars >= 3 && maybe_negate(*pos == c03)) + || (num_chars >= 4 && maybe_negate(*pos == c04)) + || (num_chars >= 5 && maybe_negate(*pos == c05)) + || (num_chars >= 6 && maybe_negate(*pos == c06)) + || (num_chars >= 7 && maybe_negate(*pos == c07)) + || (num_chars >= 8 && maybe_negate(*pos == c08)) + || (num_chars >= 9 && maybe_negate(*pos == c09)) + || (num_chars >= 10 && maybe_negate(*pos == c10)) + || (num_chars >= 11 && maybe_negate(*pos == c11)) + || (num_chars >= 12 && maybe_negate(*pos == c12)) + || (num_chars >= 13 && maybe_negate(*pos == c13)) + || (num_chars >= 15 && maybe_negate(*pos == c15)) + || (num_chars >= 16 && maybe_negate(*pos == c16))) + return pos; + return return_mode == ReturnMode::End ? end : nullptr; } -template +template inline const char * find_first_symbols_sse42(const char * begin, const char * end) { - return find_first_symbols_sse42_impl(begin, end); + return find_first_symbols_sse42_impl(begin, end); } /// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do. -template +template inline const char * find_first_symbols_dispatch(const char * begin, const char * end) { #if defined(__SSE4_2__) if (sizeof...(symbols) >= 5) - return find_first_symbols_sse42(begin, end); + return find_first_symbols_sse42(begin, end); else #endif - return find_first_symbols_sse2(begin, end); + return find_first_symbols_sse2(begin, end); } } @@ -182,7 +220,7 @@ inline const char * find_first_symbols_dispatch(const char * begin, const char * template inline const char * find_first_symbols(const char * begin, const char * end) { - return detail::find_first_symbols_dispatch(begin, end); + return detail::find_first_symbols_dispatch(begin, end); } /// Returning non const result for non const arguments. @@ -190,18 +228,66 @@ inline const char * find_first_symbols(const char * begin, const char * end) template inline char * find_first_symbols(char * begin, char * end) { - return const_cast(detail::find_first_symbols_dispatch(begin, end)); + return const_cast(detail::find_first_symbols_dispatch(begin, end)); +} + +template +inline const char * find_first_not_symbols(const char * begin, const char * end) +{ + return detail::find_first_symbols_dispatch(begin, end); +} + +template +inline char * find_first_not_symbols(char * begin, char * end) +{ + return const_cast(detail::find_first_symbols_dispatch(begin, end)); +} + +template +inline const char * find_first_symbols_or_null(const char * begin, const char * end) +{ + return detail::find_first_symbols_dispatch(begin, end); +} + +template +inline char * find_first_symbols_or_null(char * begin, char * end) +{ + return const_cast(detail::find_first_symbols_dispatch(begin, end)); +} + +template +inline const char * find_first_not_symbols_or_null(const char * begin, const char * end) +{ + return detail::find_first_symbols_dispatch(begin, end); +} + +template +inline char * find_first_not_symbols_or_null(char * begin, char * end) +{ + return const_cast(detail::find_first_symbols_dispatch(begin, end)); } template inline const char * find_last_symbols_or_null(const char * begin, const char * end) { - return detail::find_last_symbols_or_null_sse2(begin, end); + return detail::find_last_symbols_sse2(begin, end); } template inline char * find_last_symbols_or_null(char * begin, char * end) { - return const_cast(detail::find_last_symbols_or_null_sse2(begin, end)); + return const_cast(detail::find_last_symbols_sse2(begin, end)); +} + +template +inline const char * find_last_not_symbols_or_null(const char * begin, const char * end) +{ + return detail::find_last_symbols_sse2(begin, end); +} + +template +inline char * find_last_not_symbols_or_null(char * begin, char * end) +{ + return const_cast(detail::find_last_symbols_sse2(begin, end)); } From cdd6dca51771520b70df52550b3ae427b4fc82f9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 00:10:26 +0300 Subject: [PATCH 170/181] Remove Compiler --- dbms/programs/CMakeLists.txt | 28 +- dbms/programs/clang/CMakeLists.txt | 38 -- .../clang/Compiler-5.0.0/CMakeLists.txt | 53 -- .../programs/clang/Compiler-5.0.0/LICENSE.TXT | 63 -- .../clang/Compiler-5.0.0/cc1_main.cpp | 242 -------- .../clang/Compiler-5.0.0/cc1as_main.cpp | 540 ----------------- dbms/programs/clang/Compiler-5.0.0/driver.cpp | 519 ---------------- dbms/programs/clang/Compiler-5.0.0/lld.cpp | 23 - dbms/programs/clang/Compiler-5.0.1 | 1 - dbms/programs/clang/Compiler-5.0.2 | 1 - .../clang/Compiler-6.0.0/CMakeLists.txt | 54 -- .../programs/clang/Compiler-6.0.0/LICENSE.TXT | 63 -- .../clang/Compiler-6.0.0/cc1_main.cpp | 242 -------- .../clang/Compiler-6.0.0/cc1as_main.cpp | 540 ----------------- dbms/programs/clang/Compiler-6.0.0/driver.cpp | 520 ---------------- dbms/programs/clang/Compiler-6.0.0/lld.cpp | 23 - dbms/programs/clang/Compiler-6.0.0svn | 1 - dbms/programs/clang/Compiler-6.0.1 | 1 - .../clang/Compiler-7.0.0/CMakeLists.txt | 49 -- .../clang/Compiler-7.0.0/cc1_main.cpp | 239 -------- .../clang/Compiler-7.0.0/cc1as_main.cpp | 572 ------------------ .../Compiler-7.0.0/cc1gen_reproducer_main.cpp | 196 ------ dbms/programs/clang/Compiler-7.0.0/driver.cpp | 514 ---------------- dbms/programs/clang/Compiler-7.0.0/lld.cpp | 150 ----- .../Compiler-7.0.0bundled/CMakeLists.txt | 49 -- .../clang/Compiler-7.0.0bundled/cc1_main.cpp | 243 -------- .../Compiler-7.0.0bundled/cc1as_main.cpp | 555 ----------------- .../clang/Compiler-7.0.0bundled/driver.cpp | 512 ---------------- .../clang/Compiler-7.0.0bundled/lld.cpp | 10 - dbms/programs/clang/Compiler-7.0.0svn | 1 - dbms/programs/clang/Compiler-7.0.1 | 1 - dbms/programs/clang/clickhouse-clang.cpp | 2 - dbms/programs/clang/clickhouse-lld.cpp | 2 - dbms/programs/clang/copy_headers.sh | 100 --- dbms/programs/main.cpp | 16 - dbms/src/Core/Settings.h | 2 +- dbms/src/Interpreters/Aggregator.cpp | 254 +------- dbms/src/Interpreters/Aggregator.h | 58 +- dbms/src/Interpreters/CMakeLists.txt | 67 -- dbms/src/Interpreters/Compiler.cpp | 326 ---------- dbms/src/Interpreters/Compiler.h | 88 --- dbms/src/Interpreters/Context.cpp | 13 - .../Interpreters/InterpreterSelectQuery.cpp | 4 - dbms/src/Interpreters/SpecializedAggregator.h | 215 ------- dbms/src/Interpreters/config_compile.h.in | 26 - dbms/src/Interpreters/tests/CMakeLists.txt | 3 - dbms/src/Interpreters/tests/compiler_test.cpp | 57 -- .../00281_compile_sizeof_packed.reference | 2 - .../00281_compile_sizeof_packed.sql | 2 - .../00568_compile_catch_throw.reference | 2 - .../0_stateless/00568_compile_catch_throw.sh | 14 - 51 files changed, 8 insertions(+), 7288 deletions(-) delete mode 100644 dbms/programs/clang/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT delete mode 100644 dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-5.0.0/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-5.0.0/lld.cpp delete mode 120000 dbms/programs/clang/Compiler-5.0.1 delete mode 120000 dbms/programs/clang/Compiler-5.0.2 delete mode 100644 dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT delete mode 100644 dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-6.0.0/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-6.0.0/lld.cpp delete mode 120000 dbms/programs/clang/Compiler-6.0.0svn delete mode 120000 dbms/programs/clang/Compiler-6.0.1 delete mode 100644 dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/lld.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp delete mode 120000 dbms/programs/clang/Compiler-7.0.0svn delete mode 120000 dbms/programs/clang/Compiler-7.0.1 delete mode 100644 dbms/programs/clang/clickhouse-clang.cpp delete mode 100644 dbms/programs/clang/clickhouse-lld.cpp delete mode 100755 dbms/programs/clang/copy_headers.sh delete mode 100644 dbms/src/Interpreters/Compiler.cpp delete mode 100644 dbms/src/Interpreters/Compiler.h delete mode 100644 dbms/src/Interpreters/SpecializedAggregator.h delete mode 100644 dbms/src/Interpreters/config_compile.h.in delete mode 100644 dbms/src/Interpreters/tests/compiler_test.cpp delete mode 100644 dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference delete mode 100644 dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql delete mode 100644 dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference delete mode 100755 dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 03eba470949..0dcd4d7ab91 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -81,7 +81,6 @@ add_subdirectory (extract-from-config) add_subdirectory (compressor) add_subdirectory (copier) add_subdirectory (format) -add_subdirectory (clang) add_subdirectory (obfuscator) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) @@ -89,9 +88,9 @@ if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) endif () if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_PERFORMANCE_TEST_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_COMPILER_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) - target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_COMPILER_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) - target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_COMPILER_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) + add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_PERFORMANCE_TEST_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) + target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) + target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif() @@ -104,10 +103,6 @@ if (CLICKHOUSE_SPLIT_BINARY) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) endif () - if (USE_EMBEDDED_COMPILER) - list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) - endif () - set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS}) @@ -115,10 +110,6 @@ if (CLICKHOUSE_SPLIT_BINARY) install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse) else () - if (USE_EMBEDDED_COMPILER) - # before add_executable ! - link_directories (${LLVM_LIBRARY_DIRS}) - endif () add_executable (clickhouse main.cpp) target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils) target_include_directories (clickhouse BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) @@ -154,9 +145,6 @@ else () if (ENABLE_CLICKHOUSE_OBFUSCATOR) clickhouse_target_link_split_lib(clickhouse obfuscator) endif () - if (USE_EMBEDDED_COMPILER) - target_link_libraries(clickhouse PRIVATE clickhouse-compiler-lib) - endif () set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -213,18 +201,8 @@ else () list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) endif() - # install always because depian package want this files: - add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) - add_custom_target (clickhouse-lld ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-lld DEPENDS clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-clang clickhouse-lld) - install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-clang - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-lld - DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) endif () diff --git a/dbms/programs/clang/CMakeLists.txt b/dbms/programs/clang/CMakeLists.txt deleted file mode 100644 index 82f520614f4..00000000000 --- a/dbms/programs/clang/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -if (USE_EMBEDDED_COMPILER) - add_subdirectory ("Compiler-${LLVM_VERSION}") -endif () - -if (CLICKHOUSE_SPLIT_BINARY) - if (USE_EMBEDDED_COMPILER) - link_directories (${LLVM_LIBRARY_DIRS}) - add_executable (clickhouse-clang clickhouse-clang.cpp) - target_link_libraries (clickhouse-clang PRIVATE clickhouse-compiler-lib) - add_executable (clickhouse-lld clickhouse-lld.cpp) - target_link_libraries (clickhouse-lld PRIVATE clickhouse-compiler-lib) - install (TARGETS clickhouse-clang clickhouse-lld RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - endif () -endif () - -set (TMP_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/${INTERNAL_COMPILER_HEADERS_RELATIVE}") -# Make and install empty dir for debian package if compiler disabled -add_custom_target (make-headers-directory ALL COMMAND ${CMAKE_COMMAND} -E make_directory ${TMP_HEADERS_DIR}) -install (DIRECTORY ${TMP_HEADERS_DIR} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/${INTERNAL_COMPILER_HEADERS_DIR} COMPONENT clickhouse) -# TODO: fix on macos copy_headers.sh: sed --posix - -if (USE_EMBEDDED_COMPILER) - set (COPY_HEADERS_COMPILER "${CMAKE_CURRENT_BINARY_DIR}/../${INTERNAL_COMPILER_EXECUTABLE}") - set (COPY_HEADERS_DEPENDS clickhouse-clang) -elseif (EXISTS ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}) - set (COPY_HEADERS_COMPILER "${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}") -endif () - -if (COPY_HEADERS_COMPILER) - add_custom_target (copy-headers [ -f ${TMP_HEADERS_DIR}/dbms/src/Interpreters/SpecializedAggregator.h ] || env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} CMAKE_CXX_COMPILER_VERSION=${CMAKE_CXX_COMPILER_VERSION} ${CMAKE_CURRENT_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES copy_headers.sh) - - if (USE_INTERNAL_LLVM_LIBRARY) - set (CLANG_HEADERS_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/lib/Headers") - set (CLANG_HEADERS_DEST "${TMP_HEADERS_DIR}/usr/local/lib/clang/${LLVM_VERSION}/include") # original: ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include - add_custom_target (copy-headers-clang ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} ) - add_dependencies (copy-headers copy-headers-clang) - endif () -endif () diff --git a/dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt b/dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt deleted file mode 100644 index 83e38cea257..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE - -clangBasic clangCodeGen clangDriver clangFrontend clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic - -lldCOFF -lldDriver -lldELF -#lldMinGW -lldMachO -lldReaderWriter -lldYAML -#lldCommon -lldCore -lldConfig - -${REQUIRED_LLVM_LIBRARIES} - -LLVMSupport - -#Polly -#PollyISL -#PollyPPCG - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT b/dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT deleted file mode 100644 index b452ca2efd8..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT +++ /dev/null @@ -1,63 +0,0 @@ -============================================================================== -LLVM Release License -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2007-2016 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== -The LLVM software contains code written by third parties. Such software will -have its own individual LICENSE.TXT file in the directory in which it appears. -This file will describe the copyrights, license, and restrictions which apply -to that code. - -The disclaimer of warranty in the University of Illinois Open Source License -applies to all code in the LLVM Distribution, and nothing in any of the -other licenses gives permission to use the names of the LLVM Team or the -University of Illinois to endorse or promote products derived from this -Software. - -The following pieces of software have additional or alternate copyrights, -licenses, and/or restrictions: - -Program Directory -------- --------- - - diff --git a/dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp b/dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp deleted file mode 100644 index f6eabaf3387..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp +++ /dev/null @@ -1,242 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -// The amount of stack we think is "sufficient". If less than this much is -// available, we may be unable to reach our template instantiation depth -// limit and other similar limits. -// FIXME: Unify this with the stack we request when spawning a thread to build -// a module. -static const int kSufficientStack = 8 << 20; - -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace(int ExtraChunks = 0) { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = kSufficientStack - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < kSufficientStack) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || rlim.rlim_max >= kSufficientStack) - rlim.rlim_cur = kSufficientStack; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != kSufficientStack) - return; - } - - // We should now have a stack of size at least kSufficientStack. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp b/dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp deleted file mode 100644 index 2fc2b508ef2..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp +++ /dev/null @@ -1,540 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - -namespace { - -/// \brief Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(AssemblerInvocation &Opts, DiagnosticsEngine &Diags, - bool Binary) { - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Opts.OutputPath != "-") - sys::RemoveFileOnSignal(Opts.OutputPath); - - std::error_code EC; - auto Out = llvm::make_unique( - Opts.OutputPath, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Opts.OutputPath - << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - std::unique_ptr FDOS = getOutputStream(Opts, Diags, IsBinary); - if (!FDOS) - return true; - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, CodeModel::Default, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - MCCodeEmitter *CE = nullptr; - MCAsmBackend *MAB = nullptr; - if (Opts.ShowEncoding) { - CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MAB = TheTarget->createMCAsmBackend(*MRI, Opts.Triple, Opts.CPU, Options); - } - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, CE, MAB, Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, Opts.Triple, - Opts.CPU, Options); - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, *MAB, *Out, CE, *STI, Opts.RelaxAll, - Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed && Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-5.0.0/driver.cpp b/dbms/programs/clang/Compiler-5.0.0/driver.cpp deleted file mode 100644 index 5aec2759f9e..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/driver.cpp +++ /dev/null @@ -1,519 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); - -static void insertTargetAndModeArgs(StringRef Target, StringRef Mode, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - if (!Mode.empty()) { - // Add the mode flag to the arguments. - auto it = ArgVector.begin(); - if (it != ArgVector.end()) - ++it; - ArgVector.insert(it, GetStableCStr(SavedStrings, Mode)); - } - - if (!Target.empty()) { - auto it = ArgVector.begin(); - if (it != ArgVector.end()) - ++it; - const char *arr[] = {"-target", GetStableCStr(SavedStrings, Target)}; - ArgVector.insert(it, std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, char **argv_) { - llvm::sys::PrintStackTraceOnErrorSignal(argv_[0]); - llvm::PrettyStackTraceProgram X(argc_, argv_); - llvm::llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - SmallVector argv; - llvm::SpecificBumpPtrAllocator ArgAllocator; - std::error_code EC = llvm::sys::Process::GetArgumentVector( - argv, llvm::makeArrayRef(argv_, argc_), ArgAllocator); - if (EC) { - llvm::errs() << "error: couldn't get arguments: " << EC.message() << '\n'; - return 1; - } - - llvm::InitializeAllTargets(); - std::string ProgName = argv[0]; - std::pair TargetAndMode = - ToolChain::getTargetAndModeFromProgramName(ProgName); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (TargetAndMode.second == "--driver-mode=cl" || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - - insertTargetAndModeArgs(TargetAndMode.first, TargetAndMode.second, argv, - SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef LLVM_ON_WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef LLVM_ON_WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-5.0.0/lld.cpp b/dbms/programs/clang/Compiler-5.0.0/lld.cpp deleted file mode 100644 index 5af29868864..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/lld.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "lld/Driver/Driver.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Signals.h" - -using namespace lld; -using namespace llvm; -using namespace llvm::sys; - -int mainEntryClickHouseLLD(int Argc, char **Argv) -{ - // Standard set up, so program fails gracefully. - sys::PrintStackTraceOnErrorSignal(Argv[0]); - PrettyStackTraceProgram StackPrinter(Argc, Argv); - llvm_shutdown_obj Shutdown; - - std::vector Args(Argv, Argv + Argc); - return !elf::link(Args, true); -} diff --git a/dbms/programs/clang/Compiler-5.0.1 b/dbms/programs/clang/Compiler-5.0.1 deleted file mode 120000 index 7c8af57399f..00000000000 --- a/dbms/programs/clang/Compiler-5.0.1 +++ /dev/null @@ -1 +0,0 @@ -Compiler-5.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-5.0.2 b/dbms/programs/clang/Compiler-5.0.2 deleted file mode 120000 index 7c8af57399f..00000000000 --- a/dbms/programs/clang/Compiler-5.0.2 +++ /dev/null @@ -1 +0,0 @@ -Compiler-5.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt b/dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt deleted file mode 100644 index 4a046674afc..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ - -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE - -clangBasic clangCodeGen clangDriver -clangFrontend -clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic - -lldCOFF -lldDriver -lldELF -lldMinGW -lldMachO -lldReaderWriter -lldYAML -lldCommon -lldCore -#lldWasm - -${REQUIRED_LLVM_LIBRARIES} - -#Polly -#PollyISL -#PollyPPCG - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT b/dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT deleted file mode 100644 index b452ca2efd8..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT +++ /dev/null @@ -1,63 +0,0 @@ -============================================================================== -LLVM Release License -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2007-2016 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== -The LLVM software contains code written by third parties. Such software will -have its own individual LICENSE.TXT file in the directory in which it appears. -This file will describe the copyrights, license, and restrictions which apply -to that code. - -The disclaimer of warranty in the University of Illinois Open Source License -applies to all code in the LLVM Distribution, and nothing in any of the -other licenses gives permission to use the names of the LLVM Team or the -University of Illinois to endorse or promote products derived from this -Software. - -The following pieces of software have additional or alternate copyrights, -licenses, and/or restrictions: - -Program Directory -------- --------- - - diff --git a/dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp b/dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp deleted file mode 100644 index f6eabaf3387..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp +++ /dev/null @@ -1,242 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -// The amount of stack we think is "sufficient". If less than this much is -// available, we may be unable to reach our template instantiation depth -// limit and other similar limits. -// FIXME: Unify this with the stack we request when spawning a thread to build -// a module. -static const int kSufficientStack = 8 << 20; - -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace(int ExtraChunks = 0) { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = kSufficientStack - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < kSufficientStack) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || rlim.rlim_max >= kSufficientStack) - rlim.rlim_cur = kSufficientStack; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != kSufficientStack) - return; - } - - // We should now have a stack of size at least kSufficientStack. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp b/dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp deleted file mode 100644 index caf8409054a..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp +++ /dev/null @@ -1,540 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - -namespace { - -/// \brief Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(AssemblerInvocation &Opts, DiagnosticsEngine &Diags, - bool Binary) { - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Opts.OutputPath != "-") - sys::RemoveFileOnSignal(Opts.OutputPath); - - std::error_code EC; - auto Out = llvm::make_unique( - Opts.OutputPath, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Opts.OutputPath - << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - std::unique_ptr FDOS = getOutputStream(Opts, Diags, IsBinary); - if (!FDOS) - return true; - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - MCCodeEmitter *CE = nullptr; - MCAsmBackend *MAB = nullptr; - if (Opts.ShowEncoding) { - CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options); - } - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, CE, MAB, Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options); - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, std::unique_ptr(MAB), *Out, std::unique_ptr(CE), *STI, - Opts.RelaxAll, Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value = 0; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed && Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0, - /*ShowAllAliases=*/false); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-6.0.0/driver.cpp b/dbms/programs/clang/Compiler-6.0.0/driver.cpp deleted file mode 100644 index 30511b8253a..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/driver.cpp +++ /dev/null @@ -1,520 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); - -static void insertTargetAndModeArgs(const ParsedClangName &NameParts, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - // Put target and mode arguments at the start of argument list so that - // arguments specified in command line could override them. Avoid putting - // them at index 0, as an option like '-cc1' must remain the first. - auto InsertionPoint = ArgVector.begin(); - if (InsertionPoint != ArgVector.end()) - ++InsertionPoint; - - if (NameParts.DriverMode) { - // Add the mode flag to the arguments. - ArgVector.insert(InsertionPoint, - GetStableCStr(SavedStrings, NameParts.DriverMode)); - } - - if (NameParts.TargetIsValid) { - const char *arr[] = {"-target", GetStableCStr(SavedStrings, - NameParts.TargetPrefix)}; - ArgVector.insert(InsertionPoint, std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, char **argv_) { - llvm::sys::PrintStackTraceOnErrorSignal(argv_[0]); - llvm::PrettyStackTraceProgram X(argc_, argv_); - llvm::llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - SmallVector argv; - llvm::SpecificBumpPtrAllocator ArgAllocator; - std::error_code EC = llvm::sys::Process::GetArgumentVector( - argv, llvm::makeArrayRef(argv_, argc_), ArgAllocator); - if (EC) { - llvm::errs() << "error: couldn't get arguments: " << EC.message() << '\n'; - return 1; - } - - llvm::InitializeAllTargets(); - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(argv[0]); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (StringRef(TargetAndMode.DriverMode).equals("--driver-mode=cl") || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - TheDriver.setTargetAndMode(TargetAndMode); - - insertTargetAndModeArgs(TargetAndMode, argv, SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef LLVM_ON_WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef LLVM_ON_WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-6.0.0/lld.cpp b/dbms/programs/clang/Compiler-6.0.0/lld.cpp deleted file mode 100644 index 696ff84dfe6..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/lld.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "lld/Common/Driver.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Signals.h" - -using namespace lld; -using namespace llvm; -using namespace llvm::sys; - -int mainEntryClickHouseLLD(int Argc, char **Argv) -{ - // Standard set up, so program fails gracefully. - sys::PrintStackTraceOnErrorSignal(Argv[0]); - PrettyStackTraceProgram StackPrinter(Argc, Argv); - llvm_shutdown_obj Shutdown; - - std::vector Args(Argv, Argv + Argc); - return !elf::link(Args, true); -} diff --git a/dbms/programs/clang/Compiler-6.0.0svn b/dbms/programs/clang/Compiler-6.0.0svn deleted file mode 120000 index 7eba9cc37d0..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0svn +++ /dev/null @@ -1 +0,0 @@ -Compiler-6.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-6.0.1 b/dbms/programs/clang/Compiler-6.0.1 deleted file mode 120000 index 7eba9cc37d0..00000000000 --- a/dbms/programs/clang/Compiler-6.0.1 +++ /dev/null @@ -1 +0,0 @@ -Compiler-6.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt b/dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt deleted file mode 100644 index a042c821ec4..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1gen_reproducer_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE -clangBasic clangCodeGen clangDriver -clangFrontend -clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic -clangCrossTU clangIndex - -lldCOFF -lldDriver -lldELF -lldMinGW -lldMachO -lldReaderWriter -lldYAML -lldCommon -lldCore - -${REQUIRED_LLVM_LIBRARIES} - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp b/dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp deleted file mode 100644 index 214bfa72476..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp +++ /dev/null @@ -1,239 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Basic/Stack.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace() { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = DesiredStackSize - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && - rlim.rlim_cur < rlim_t(DesiredStackSize)) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || - rlim.rlim_max >= rlim_t(DesiredStackSize)) - rlim.rlim_cur = DesiredStackSize; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != DesiredStackSize) - return; - } - - // We should now have a stack of size at least DesiredStackSize. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp b/dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp deleted file mode 100644 index d93b1f5cb1d..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp +++ /dev/null @@ -1,572 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - -namespace { - -/// Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - std::map DebugPrefixMap; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - std::string SplitDwarfFile; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - auto ArgString = A->getAsString(Args); - std::string Nearest; - if (OptTbl->findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1) - Diags.Report(diag::err_drv_unknown_argument) << ArgString; - else - Diags.Report(diag::err_drv_unknown_argument_with_suggestion) - << ArgString << Nearest; - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ)) - Opts.DebugPrefixMap.insert(StringRef(Arg).split('=')); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(StringRef Path, DiagnosticsEngine &Diags, bool Binary) { - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Path != "-") - sys::RemoveFileOnSignal(Path); - - std::error_code EC; - auto Out = llvm::make_unique( - Path, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - std::unique_ptr FDOS = - getOutputStream(Opts.OutputPath, Diags, IsBinary); - if (!FDOS) - return true; - std::unique_ptr DwoOS; - if (!Opts.SplitDwarfFile.empty()) - DwoOS = getOutputStream(Opts.SplitDwarfFile, Diags, IsBinary); - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.DebugPrefixMap.empty()) - for (const auto &KV : Opts.DebugPrefixMap) - Ctx.addDebugPrefixMapEntry(KV.first, KV.second); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - - std::unique_ptr CE; - if (Opts.ShowEncoding) - CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB), - Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - std::unique_ptr CE( - TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - std::unique_ptr OW = - DwoOS ? MAB->createDwoObjectWriter(*Out, *DwoOS) - : MAB->createObjectWriter(*Out); - - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI, - Opts.RelaxAll, Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - // Assembly to object compilation should leverage assembly info. - Str->setUseAssemblerInfoForParsing(true); - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value = 1; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed) { - if (Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - if (!Opts.SplitDwarfFile.empty() && Opts.SplitDwarfFile != "-") - sys::fs::remove(Opts.SplitDwarfFile); - } - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0, - /*ShowAllAliases=*/false); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp b/dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp deleted file mode 100644 index a4c034d8d35..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp +++ /dev/null @@ -1,196 +0,0 @@ -//===-- cc1gen_reproducer_main.cpp - Clang reproducer generator ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1gen-reproducer functionality, which -// generates reproducers for invocations for clang-based tools. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/VirtualFileSystem.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/YAMLTraits.h" -#include "llvm/Support/raw_ostream.h" - -using namespace clang; - -namespace { - -struct UnsavedFileHash { - std::string Name; - std::string MD5; -}; - -struct ClangInvocationInfo { - std::string Toolchain; - std::string LibclangOperation; - std::string LibclangOptions; - std::vector Arguments; - std::vector InvocationArguments; - std::vector UnsavedFileHashes; - bool Dump = false; -}; - -} // end anonymous namespace - -LLVM_YAML_IS_SEQUENCE_VECTOR(UnsavedFileHash) - -namespace llvm { -namespace yaml { - -template <> struct MappingTraits { - static void mapping(IO &IO, UnsavedFileHash &Info) { - IO.mapRequired("name", Info.Name); - IO.mapRequired("md5", Info.MD5); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &IO, ClangInvocationInfo &Info) { - IO.mapRequired("toolchain", Info.Toolchain); - IO.mapOptional("libclang.operation", Info.LibclangOperation); - IO.mapOptional("libclang.opts", Info.LibclangOptions); - IO.mapRequired("args", Info.Arguments); - IO.mapOptional("invocation-args", Info.InvocationArguments); - IO.mapOptional("unsaved_file_hashes", Info.UnsavedFileHashes); - } -}; - -} // end namespace yaml -} // end namespace llvm - -static std::string generateReproducerMetaInfo(const ClangInvocationInfo &Info) { - std::string Result; - llvm::raw_string_ostream OS(Result); - OS << '{'; - bool NeedComma = false; - auto EmitKey = [&](StringRef Key) { - if (NeedComma) - OS << ", "; - NeedComma = true; - OS << '"' << Key << "\": "; - }; - auto EmitStringKey = [&](StringRef Key, StringRef Value) { - if (Value.empty()) - return; - EmitKey(Key); - OS << '"' << Value << '"'; - }; - EmitStringKey("libclang.operation", Info.LibclangOperation); - EmitStringKey("libclang.opts", Info.LibclangOptions); - if (!Info.InvocationArguments.empty()) { - EmitKey("invocation-args"); - OS << '['; - for (const auto &Arg : llvm::enumerate(Info.InvocationArguments)) { - if (Arg.index()) - OS << ','; - OS << '"' << Arg.value() << '"'; - } - OS << ']'; - } - OS << '}'; - // FIXME: Compare unsaved file hashes and report mismatch in the reproducer. - if (Info.Dump) - llvm::outs() << "REPRODUCER METAINFO: " << OS.str() << "\n"; - return std::move(OS.str()); -} - -/// Generates a reproducer for a set of arguments from a specific invocation. -static llvm::Optional -generateReproducerForInvocationArguments(ArrayRef Argv, - const ClangInvocationInfo &Info) { - using namespace driver; - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(Argv[0]); - - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions; - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, new IgnoringDiagConsumer()); - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - Driver TheDriver(Argv[0], llvm::sys::getDefaultTargetTriple(), Diags); - TheDriver.setTargetAndMode(TargetAndMode); - - std::unique_ptr C(TheDriver.BuildCompilation(Argv)); - if (C && !C->containsError()) { - for (const auto &J : C->getJobs()) { - if (const Command *Cmd = dyn_cast(&J)) { - Driver::CompilationDiagnosticReport Report; - TheDriver.generateCompilationDiagnostics( - *C, *Cmd, generateReproducerMetaInfo(Info), &Report); - return Report; - } - } - } - - return None; -} - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes); - -static void printReproducerInformation( - llvm::raw_ostream &OS, const ClangInvocationInfo &Info, - const driver::Driver::CompilationDiagnosticReport &Report) { - OS << "REPRODUCER:\n"; - OS << "{\n"; - OS << R"("files":[)"; - for (const auto &File : llvm::enumerate(Report.TemporaryFiles)) { - if (File.index()) - OS << ','; - OS << '"' << File.value() << '"'; - } - OS << "]\n}\n"; -} - -int cc1gen_reproducer_main(ArrayRef Argv, const char *Argv0, - void *MainAddr) { - if (Argv.size() < 1) { - llvm::errs() << "error: missing invocation file\n"; - return 1; - } - // Parse the invocation descriptor. - StringRef Input = Argv[0]; - llvm::ErrorOr> Buffer = - llvm::MemoryBuffer::getFile(Input); - if (!Buffer) { - llvm::errs() << "error: failed to read " << Input << ": " - << Buffer.getError().message() << "\n"; - return 1; - } - llvm::yaml::Input YAML(Buffer.get()->getBuffer()); - ClangInvocationInfo InvocationInfo; - YAML >> InvocationInfo; - if (Argv.size() > 1 && Argv[1] == StringRef("-v")) - InvocationInfo.Dump = true; - - // Create an invocation that will produce the reproducer. - std::vector DriverArgs; - for (const auto &Arg : InvocationInfo.Arguments) - DriverArgs.push_back(Arg.c_str()); - std::string Path = GetExecutablePath(Argv0, /*CanonicalPrefixes=*/true); - DriverArgs[0] = Path.c_str(); - llvm::Optional Report = - generateReproducerForInvocationArguments(DriverArgs, InvocationInfo); - - // Emit the information about the reproduce files to stdout. - int Result = 1; - if (Report) { - printReproducerInformation(llvm::outs(), InvocationInfo, *Report); - Result = 0; - } - - // Remove the input file. - llvm::sys::fs::remove(Input); - return Result; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/driver.cpp b/dbms/programs/clang/Compiler-7.0.0/driver.cpp deleted file mode 100644 index 79d71b08ba7..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/driver.cpp +++ /dev/null @@ -1,514 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Driver/Driver.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1gen_reproducer_main(ArrayRef Argv, - const char *Argv0, void *MainAddr); - -static void insertTargetAndModeArgs(const ParsedClangName &NameParts, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - // Put target and mode arguments at the start of argument list so that - // arguments specified in command line could override them. Avoid putting - // them at index 0, as an option like '-cc1' must remain the first. - int InsertionPoint = 0; - if (ArgVector.size() > 0) - ++InsertionPoint; - - if (NameParts.DriverMode) { - // Add the mode flag to the arguments. - ArgVector.insert(ArgVector.begin() + InsertionPoint, - GetStableCStr(SavedStrings, NameParts.DriverMode)); - } - - if (NameParts.TargetIsValid) { - const char *arr[] = {"-target", GetStableCStr(SavedStrings, - NameParts.TargetPrefix)}; - ArgVector.insert(ArgVector.begin() + InsertionPoint, - std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "gen-reproducer") - return cc1gen_reproducer_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'. " - << "Valid tools include '-cc1' and '-cc1as'.\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, /* const */ char **argv_) { - llvm::InitLLVM X(argc_, argv_); - SmallVector argv(argv_, argv_ + argc_); - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - llvm::InitializeAllTargets(); - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(argv[0]); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (StringRef(TargetAndMode.DriverMode).equals("--driver-mode=cl") || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - TheDriver.setTargetAndMode(TargetAndMode); - - insertTargetAndModeArgs(TargetAndMode, argv, SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef _WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef _WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/lld.cpp b/dbms/programs/clang/Compiler-7.0.0/lld.cpp deleted file mode 100644 index 8e118b6e24b..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/lld.cpp +++ /dev/null @@ -1,150 +0,0 @@ -//===- tools/lld/lld.cpp - Linker Driver Dispatcher -----------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the main function of the lld executable. The main -// function is a thin wrapper which dispatches to the platform specific -// driver. -// -// lld is a single executable that contains four different linkers for ELF, -// COFF, WebAssembly and Mach-O. The main function dispatches according to -// argv[0] (i.e. command name). The most common name for each target is shown -// below: -// -// - ld.lld: ELF (Unix) -// - ld64: Mach-O (macOS) -// - lld-link: COFF (Windows) -// - ld-wasm: WebAssembly -// -// lld can be invoked as "lld" along with "-flavor" option. This is for -// backward compatibility and not recommended. -// -//===----------------------------------------------------------------------===// - -#include "lld/Common/Driver.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Path.h" -#include -using namespace lld; -using namespace llvm; -using namespace llvm::sys; - -/* - -enum Flavor { - Invalid, - Gnu, // -flavor gnu - WinLink, // -flavor link - Darwin, // -flavor darwin - Wasm, // -flavor wasm -}; - -LLVM_ATTRIBUTE_NORETURN static void die(const Twine &S) { - errs() << S << "\n"; - exit(1); -} - -static Flavor getFlavor(StringRef S) { - return StringSwitch(S) - .CasesLower("ld", "ld.lld", "gnu", Gnu) - .CasesLower("wasm", "ld-wasm", Wasm) - .CaseLower("link", WinLink) - .CasesLower("ld64", "ld64.lld", "darwin", Darwin) - .Default(Invalid); -} - -static bool isPETarget(const std::vector &V) { - for (auto It = V.begin(); It + 1 != V.end(); ++It) { - if (StringRef(*It) != "-m") - continue; - StringRef S = *(It + 1); - return S == "i386pe" || S == "i386pep" || S == "thumb2pe" || S == "arm64pe"; - } - return false; -} - -static Flavor parseProgname(StringRef Progname) { -#if __APPLE__ - // Use Darwin driver for "ld" on Darwin. - if (Progname == "ld") - return Darwin; -#endif - -#if LLVM_ON_UNIX - // Use GNU driver for "ld" on other Unix-like system. - if (Progname == "ld") - return Gnu; -#endif - - // Progname may be something like "lld-gnu". Parse it. - SmallVector V; - Progname.split(V, "-"); - for (StringRef S : V) - if (Flavor F = getFlavor(S)) - return F; - return Invalid; -} - -static Flavor parseFlavor(std::vector &V) { - // Parse -flavor option. - if (V.size() > 1 && V[1] == StringRef("-flavor")) { - if (V.size() <= 2) - die("missing arg value for '-flavor'"); - Flavor F = getFlavor(V[2]); - if (F == Invalid) - die("Unknown flavor: " + StringRef(V[2])); - V.erase(V.begin() + 1, V.begin() + 3); - return F; - } - - // Deduct the flavor from argv[0]. - StringRef Arg0 = path::filename(V[0]); - if (Arg0.endswith_lower(".exe")) - Arg0 = Arg0.drop_back(4); - return parseProgname(Arg0); -} -*/ - -// If this function returns true, lld calls _exit() so that it quickly -// exits without invoking destructors of globally allocated objects. -// -// We don't want to do that if we are running tests though, because -// doing that breaks leak sanitizer. So, lit sets this environment variable, -// and we use it to detect whether we are running tests or not. -static bool canExitEarly() { return StringRef(getenv("LLD_IN_TEST")) != "1"; } - -/// Universal linker main(). This linker emulates the gnu, darwin, or -/// windows linker based on the argv[0] or -flavor option. -int mainEntryClickHouseLLD(int Argc, /* const */ char **Argv) { - InitLLVM X(Argc, Argv); - - std::vector Args(Argv, Argv + Argc); -/* - switch (parseFlavor(Args)) { - case Gnu: - if (isPETarget(Args)) - return !mingw::link(Args); -*/ - return !elf::link(Args, canExitEarly()); -/* - case WinLink: - return !coff::link(Args, canExitEarly()); - case Darwin: - return !mach_o::link(Args, canExitEarly()); - case Wasm: - return !wasm::link(Args, canExitEarly()); - default: - die("lld is a generic driver.\n" - "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-lld" - " (WebAssembly) instead"); - } -*/ -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt b/dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt deleted file mode 100644 index a5f8314b862..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE - -clangBasic clangCodeGen clangDriver -clangFrontend -clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic -clangCrossTU clangIndex - -lldCOFF -lldDriver -lldELF -lldMinGW -lldMachO -lldReaderWriter -lldYAML -lldCommon -lldCore - -${REQUIRED_LLVM_LIBRARIES} - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp deleted file mode 100644 index 3686475dd42..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp +++ /dev/null @@ -1,243 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -// The amount of stack we think is "sufficient". If less than this much is -// available, we may be unable to reach our template instantiation depth -// limit and other similar limits. -// FIXME: Unify this with the stack we request when spawning a thread to build -// a module. -static const int kSufficientStack = 8 << 20; - -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace(int ExtraChunks = 0) { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = kSufficientStack - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < kSufficientStack) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || rlim.rlim_max >= kSufficientStack) - rlim.rlim_cur = kSufficientStack; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != kSufficientStack) - return; - } - - // We should now have a stack of size at least kSufficientStack. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp deleted file mode 100644 index ce23422077f..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp +++ /dev/null @@ -1,555 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - - -namespace { - -/// \brief Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - auto ArgString = A->getAsString(Args); - std::string Nearest; - if (OptTbl->findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1) - Diags.Report(diag::err_drv_unknown_argument) << ArgString; - else - Diags.Report(diag::err_drv_unknown_argument_with_suggestion) - << ArgString << Nearest; - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(AssemblerInvocation &Opts, DiagnosticsEngine &Diags, - bool Binary) { - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Opts.OutputPath != "-") - sys::RemoveFileOnSignal(Opts.OutputPath); - - std::error_code EC; - auto Out = llvm::make_unique( - Opts.OutputPath, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Opts.OutputPath - << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - std::unique_ptr FDOS = getOutputStream(Opts, Diags, IsBinary); - if (!FDOS) - return true; - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - - std::unique_ptr CE; - if (Opts.ShowEncoding) - CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB), - Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - std::unique_ptr CE( - TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, std::move(MAB), *Out, std::move(CE), *STI, Opts.RelaxAll, - Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - // Assembly to object compilation should leverage assembly info. - Str->setUseAssemblerInfoForParsing(true); - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed && Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool /*GenCrashDiag*/) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char */*Argv0*/, void */*MainAddr*/) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0, - /*ShowAllAliases=*/false); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp deleted file mode 100644 index 9a061b9d137..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp +++ /dev/null @@ -1,512 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Driver/Driver.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1gen_reproducer_main(ArrayRef Argv, - const char *Argv0, void *MainAddr); - -static void insertTargetAndModeArgs(const ParsedClangName &NameParts, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - // Put target and mode arguments at the start of argument list so that - // arguments specified in command line could override them. Avoid putting - // them at index 0, as an option like '-cc1' must remain the first. - int InsertionPoint = 0; - if (ArgVector.size() > 0) - ++InsertionPoint; - - if (NameParts.DriverMode) { - // Add the mode flag to the arguments. - ArgVector.insert(ArgVector.begin() + InsertionPoint, - GetStableCStr(SavedStrings, NameParts.DriverMode)); - } - - if (NameParts.TargetIsValid) { - const char *arr[] = {"-target", GetStableCStr(SavedStrings, - NameParts.TargetPrefix)}; - ArgVector.insert(ArgVector.begin() + InsertionPoint, - std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'. " - << "Valid tools include '-cc1' and '-cc1as'.\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, char **argv_) { - llvm::InitLLVM X(argc_, argv_); - SmallVector argv(argv_, argv_ + argc_); - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - llvm::InitializeAllTargets(); - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(argv[0]); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (StringRef(TargetAndMode.DriverMode).equals("--driver-mode=cl") || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - TheDriver.setTargetAndMode(TargetAndMode); - - insertTargetAndModeArgs(TargetAndMode, argv, SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef _WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef _WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp deleted file mode 100644 index 203e50d42a9..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "lld/Common/Driver.h" -#include "llvm/Support/InitLLVM.h" -#include - -int mainEntryClickHouseLLD(int argc, char ** argv) -{ - llvm::InitLLVM X(argc, argv); - std::vector args(argv, argv + argc); - return !lld::elf::link(args, false); -} diff --git a/dbms/programs/clang/Compiler-7.0.0svn b/dbms/programs/clang/Compiler-7.0.0svn deleted file mode 120000 index eeeb5bbc2c0..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0svn +++ /dev/null @@ -1 +0,0 @@ -Compiler-7.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-7.0.1 b/dbms/programs/clang/Compiler-7.0.1 deleted file mode 120000 index eeeb5bbc2c0..00000000000 --- a/dbms/programs/clang/Compiler-7.0.1 +++ /dev/null @@ -1 +0,0 @@ -Compiler-7.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/clickhouse-clang.cpp b/dbms/programs/clang/clickhouse-clang.cpp deleted file mode 100644 index 261ae18b6d3..00000000000 --- a/dbms/programs/clang/clickhouse-clang.cpp +++ /dev/null @@ -1,2 +0,0 @@ -int mainEntryClickHouseClang(int argc, char ** argv); -int main(int argc_, char ** argv_) { return mainEntryClickHouseClang(argc_, argv_); } diff --git a/dbms/programs/clang/clickhouse-lld.cpp b/dbms/programs/clang/clickhouse-lld.cpp deleted file mode 100644 index baa6182d66d..00000000000 --- a/dbms/programs/clang/clickhouse-lld.cpp +++ /dev/null @@ -1,2 +0,0 @@ -int mainEntryClickHouseLLD(int argc, char ** argv); -int main(int argc_, char ** argv_) { return mainEntryClickHouseLLD(argc_, argv_); } diff --git a/dbms/programs/clang/copy_headers.sh b/dbms/programs/clang/copy_headers.sh deleted file mode 100755 index 45a58855c91..00000000000 --- a/dbms/programs/clang/copy_headers.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env bash - -set -e -#set -x -#echo "Args: $*"; env | sort - -# Этот скрипт собирает все заголовочные файлы, нужные для компиляции некоторого translation unit-а -# и копирует их с сохранением путей в директорию DST. -# Это затем может быть использовано, чтобы скомпилировать translation unit на другом сервере, -# используя ровно такой же набор заголовочных файлов. -# -# Требуется clang, желательно наиболее свежий (trunk). -# -# Используется при сборке пакетов. -# Заголовочные файлы записываются в пакет clickhouse-common, в директорию /usr/share/clickhouse/headers. -# -# Если вы хотите установить их самостоятельно, без сборки пакета, -# чтобы clickhouse-server видел их там, где ожидается, выполните: -# -# sudo ./copy_headers.sh . /usr/share/clickhouse/headers/ - -SOURCE_PATH=${1:-../../..} -DST=${2:-$SOURCE_PATH/../headers} -BUILD_PATH=${BUILD_PATH=${3:-$SOURCE_PATH/build}} - -PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:$PATH" - -if [[ -z $CLANG ]]; then - CLANG="clang" -fi - -START_HEADERS=$(echo \ - $BUILD_PATH/dbms/src/Common/config_version.h \ - $SOURCE_PATH/dbms/src/Interpreters/SpecializedAggregator.h \ - $SOURCE_PATH/dbms/src/AggregateFunctions/AggregateFunction*.h) - -for header in $START_HEADERS; do - START_HEADERS_INCLUDE+="-include $header " -done - - -GCC_ROOT=`$CLANG -v 2>&1 | grep "Selected GCC installation"| sed -n -e 's/^.*: //p'` - -# TODO: Does not work on macos? -GCC_ROOT=${GCC_ROOT:=/usr/lib/clang/${CMAKE_CXX_COMPILER_VERSION}} - -# Опция -mcx16 для того, чтобы выбиралось больше заголовочных файлов (с запасом). -# The latter options are the same that are added while building packages. -for src_file in $(echo | $CLANG -M -xc++ -std=c++1z -Wall -Werror -msse2 -msse4 -mcx16 -mpopcnt -O3 -g -fPIC -fstack-protector -D_FORTIFY_SOURCE=2 \ - -I $GCC_ROOT/include \ - -I $GCC_ROOT/include-fixed \ - $(cat "$BUILD_PATH/include_directories.txt") \ - $START_HEADERS_INCLUDE \ - - | - tr -d '\\' | - sed -E -e 's/^-\.o://'); -do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - dst_file=$(echo $dst_file | sed -E -e 's/build\///') # for simplicity reasons, will put generated headers near the rest. - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; -done - - -# Копируем больше заголовочных файлов с интринсиками, так как на серверах, куда будут устанавливаться -# заголовочные файлы, будет использоваться опция -march=native. - -for src_file in $(ls -1 $($CLANG -v -xc++ - <<<'' 2>&1 | grep '^ /' | grep 'include' | grep -E '/lib/clang/|/include/clang/')/*.h | grep -vE 'arm|altivec|Intrin'); -do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; -done - -if [ -d "$SOURCE_PATH/contrib/boost/libs/smart_ptr/include/boost/smart_ptr/detail" ]; then - # Even more platform-specific headers - for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/libs/smart_ptr/include/boost/smart_ptr/detail/*); - do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; - done -fi - -if [ -d "$SOURCE_PATH/contrib/boost/boost/smart_ptr/detail" ]; then - for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/boost/smart_ptr/detail/*); - do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; - done -fi diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 57821d854e9..3fbbcee0f15 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -56,11 +56,6 @@ int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif -#if USE_EMBEDDED_COMPILER - int mainEntryClickHouseClang(int argc, char ** argv); - int mainEntryClickHouseLLD(int argc, char ** argv); -#endif - namespace { @@ -100,12 +95,6 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) {"obfuscator", mainEntryClickHouseObfuscator}, #endif - -#if USE_EMBEDDED_COMPILER - {"clang", mainEntryClickHouseClang}, - {"clang++", mainEntryClickHouseClang}, - {"lld", mainEntryClickHouseLLD}, -#endif }; @@ -152,11 +141,6 @@ int main(int argc_, char ** argv_) /// will work only after additional call of this function. updatePHDRCache(); -#if USE_EMBEDDED_COMPILER - if (argc_ >= 2 && 0 == strcmp(argv_[1], "-cc1")) - return mainEntryClickHouseClang(argc_, argv_); -#endif - #if USE_TCMALLOC /** Without this option, tcmalloc returns memory to OS too frequently for medium-sized memory allocations * (like IO buffers, column vectors, hash tables, etc.), diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index b7db44700bb..3adae6d9e93 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -85,7 +85,6 @@ struct Settings : public SettingsCollection M(SettingTotalsMode, totals_mode, TotalsMode::AFTER_HAVING_EXCLUSIVE, "How to calculate TOTALS when HAVING is present, as well as when max_rows_to_group_by and group_by_overflow_mode = ‘any’ are present.") \ M(SettingFloat, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.") \ \ - M(SettingBool, compile, false, "Whether query compilation is enabled.") \ M(SettingBool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.") \ M(SettingBool, compile_expressions, false, "Compile some scalar functions and operators to native code.") \ M(SettingUInt64, min_count_to_compile, 3, "The number of structurally identical queries before they are compiled.") \ @@ -351,6 +350,7 @@ struct Settings : public SettingsCollection /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ + M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13") \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 33fbb903497..373b47f7315 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -25,10 +25,6 @@ #include #include -#if __has_include() -#include -#endif - namespace ProfileEvents { @@ -47,7 +43,6 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_COMPILE_CODE; extern const int TOO_MANY_ROWS; extern const int EMPTY_DATA_PASSED; extern const int CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS; @@ -195,200 +190,6 @@ Aggregator::Aggregator(const Params & params_) } -void Aggregator::compileIfPossible(AggregatedDataVariants::Type type) -{ - std::lock_guard lock(mutex); - - if (compiled_if_possible) - return; - - compiled_if_possible = true; - -#if !defined(INTERNAL_COMPILER_HEADERS) - throw Exception("Cannot compile code: Compiler disabled", ErrorCodes::CANNOT_COMPILE_CODE); -#else - std::string method_typename_single_level; - std::string method_typename_two_level; - - if (false) {} -#define M(NAME) \ - else if (type == AggregatedDataVariants::Type::NAME) \ - { \ - method_typename_single_level = "decltype(AggregatedDataVariants::" #NAME ")::element_type"; \ - method_typename_two_level = "decltype(AggregatedDataVariants::" #NAME "_two_level)::element_type"; \ - } - - APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) -#undef M - -#define M(NAME) \ - else if (type == AggregatedDataVariants::Type::NAME) \ - method_typename_single_level = "decltype(AggregatedDataVariants::" #NAME ")::element_type"; - - APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) -#undef M - else if (type == AggregatedDataVariants::Type::without_key) {} - else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - - auto compiler_headers = Poco::Util::Application::instance().config().getString("compiler_headers", INTERNAL_COMPILER_HEADERS); - - /// List of types of aggregate functions. - std::stringstream aggregate_functions_typenames_str; - std::stringstream aggregate_functions_headers_args; - for (size_t i = 0; i < params.aggregates_size; ++i) - { - IAggregateFunction & func = *aggregate_functions[i]; - - int status = 0; - std::string type_name = demangle(typeid(func).name(), status); - - if (status) - throw Exception("Cannot compile code: cannot demangle name " + String(typeid(func).name()) - + ", status: " + toString(status), ErrorCodes::CANNOT_COMPILE_CODE); - - aggregate_functions_typenames_str << ((i != 0) ? ", " : "") << type_name; - - std::string header_path = func.getHeaderFilePath(); - auto pos = header_path.find("/AggregateFunctions/"); - - if (pos == std::string::npos) - throw Exception("Cannot compile code: unusual path of header file for aggregate function: " + header_path, - ErrorCodes::CANNOT_COMPILE_CODE); - - aggregate_functions_headers_args << "-include '" << compiler_headers << "/dbms/src"; - aggregate_functions_headers_args.write(&header_path[pos], header_path.size() - pos); - aggregate_functions_headers_args << "' "; - } - - aggregate_functions_headers_args << "-include '" << compiler_headers << "/dbms/src/Interpreters/SpecializedAggregator.h'"; - - std::string aggregate_functions_typenames = aggregate_functions_typenames_str.str(); - - std::stringstream key_str; - key_str << "Aggregate: "; - if (!method_typename_single_level.empty()) - key_str << method_typename_single_level + ", "; - key_str << aggregate_functions_typenames; - std::string key = key_str.str(); - - auto get_code = [method_typename_single_level, method_typename_two_level, aggregate_functions_typenames] - { - /// A short piece of code, which is an explicit instantiation of the template. - std::stringstream code; - code << /// No explicit inclusion of the header file. It is included using the -include compiler option. - "namespace DB\n" - "{\n" - "\n"; - - /// There can be up to two instantiations for the template - for normal and two_level options. - auto append_code_for_specialization = - [&code, &aggregate_functions_typenames] (const std::string & method_typename, const std::string & suffix) - { - code << - "template void Aggregator::executeSpecialized<\n" - " " << method_typename << ", TypeList<" << aggregate_functions_typenames << ">>(\n" - " " << method_typename << " &, Arena *, size_t, ColumnRawPtrs &,\n" - " AggregateColumns &, bool, AggregateDataPtr) const;\n" - "\n" - "static void wrapper" << suffix << "(\n" - " const Aggregator & aggregator,\n" - " " << method_typename << " & method,\n" - " Arena * arena,\n" - " size_t rows,\n" - " ColumnRawPtrs & key_columns,\n" - " Aggregator::AggregateColumns & aggregate_columns,\n" - " bool no_more_keys,\n" - " AggregateDataPtr overflow_row)\n" - "{\n" - " aggregator.executeSpecialized<\n" - " " << method_typename << ", TypeList<" << aggregate_functions_typenames << ">>(\n" - " method, arena, rows, key_columns, aggregate_columns, no_more_keys, overflow_row);\n" - "}\n" - "\n" - "void * getPtr" << suffix << "() __attribute__((__visibility__(\"default\")));\n" - "void * getPtr" << suffix << "()\n" /// Without this wrapper, it's not clear how to get the desired symbol from the compiled library. - "{\n" - " return reinterpret_cast(&wrapper" << suffix << ");\n" - "}\n"; - }; - - if (!method_typename_single_level.empty()) - append_code_for_specialization(method_typename_single_level, ""); - else - { - /// For `without_key` method. - code << - "template void Aggregator::executeSpecializedWithoutKey<\n" - " " << "TypeList<" << aggregate_functions_typenames << ">>(\n" - " AggregatedDataWithoutKey &, size_t, AggregateColumns &, Arena *) const;\n" - "\n" - "static void wrapper(\n" - " const Aggregator & aggregator,\n" - " AggregatedDataWithoutKey & method,\n" - " size_t rows,\n" - " Aggregator::AggregateColumns & aggregate_columns,\n" - " Arena * arena)\n" - "{\n" - " aggregator.executeSpecializedWithoutKey<\n" - " TypeList<" << aggregate_functions_typenames << ">>(\n" - " method, rows, aggregate_columns, arena);\n" - "}\n" - "\n" - "void * getPtr() __attribute__((__visibility__(\"default\")));\n" - "void * getPtr()\n" - "{\n" - " return reinterpret_cast(&wrapper);\n" - "}\n"; - } - - if (!method_typename_two_level.empty()) - append_code_for_specialization(method_typename_two_level, "TwoLevel"); - else - { - /// The stub. - code << - "void * getPtrTwoLevel() __attribute__((__visibility__(\"default\")));\n" - "void * getPtrTwoLevel()\n" - "{\n" - " return nullptr;\n" - "}\n"; - } - - code << - "}\n"; - - return code.str(); - }; - - auto compiled_data_owned_by_callback = compiled_data; - auto on_ready = [compiled_data_owned_by_callback] (SharedLibraryPtr & lib) - { - if (compiled_data_owned_by_callback.unique()) /// Aggregator is already destroyed. - return; - - compiled_data_owned_by_callback->compiled_aggregator = lib; - compiled_data_owned_by_callback->compiled_method_ptr = lib->get("_ZN2DB6getPtrEv")(); - compiled_data_owned_by_callback->compiled_two_level_method_ptr = lib->get("_ZN2DB14getPtrTwoLevelEv")(); - }; - - /** If the library has already been compiled, a non-zero SharedLibraryPtr is returned. - * If the library was not compiled, then the counter is incremented, and nullptr is returned. - * If the counter has reached the value min_count_to_compile, then the compilation starts asynchronously (in a separate thread) - * at the end of which `on_ready` callback is called. - */ - aggregate_functions_headers_args << " -Wno-unused-function"; - SharedLibraryPtr lib = params.compiler->getOrCount(key, params.min_count_to_compile, - aggregate_functions_headers_args.str(), - get_code, on_ready); - - /// If the result is already ready. - if (lib) - on_ready(lib); -#endif -} - - AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() { /// If no keys. All aggregating to single row. @@ -720,9 +521,6 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re result.keys_size = params.keys_size; result.key_sizes = key_sizes; LOG_TRACE(log, "Aggregation method: " << result.getMethodName()); - - if (params.compiler) - compileIfPossible(result.type); } if (isCancelled()) @@ -794,67 +592,21 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re /// For the case when there are no keys (all aggregate into one row). if (result.type == AggregatedDataVariants::Type::without_key) { - /// If there is a dynamically compiled code. - if (compiled_data->compiled_method_ptr) - { - reinterpret_cast< - void (*)(const Aggregator &, AggregatedDataWithoutKey &, size_t, AggregateColumns &, Arena *)> - (compiled_data->compiled_method_ptr)(*this, result.without_key, rows, aggregate_columns, result.aggregates_pool); - } - else - executeWithoutKeyImpl(result.without_key, rows, aggregate_functions_instructions.data(), result.aggregates_pool); + executeWithoutKeyImpl(result.without_key, rows, aggregate_functions_instructions.data(), result.aggregates_pool); } else { /// This is where data is written that does not fit in `max_rows_to_group_by` with `group_by_overflow_mode = any`. AggregateDataPtr overflow_row_ptr = params.overflow_row ? result.without_key : nullptr; - bool is_two_level = result.isTwoLevel(); - - /// Compiled code, for the normal structure. - if (!is_two_level && compiled_data->compiled_method_ptr) - { - #define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - reinterpret_cast(compiled_data->compiled_method_ptr) \ - (*this, *result.NAME, result.aggregates_pool, rows, key_columns, aggregate_columns, \ - no_more_keys, overflow_row_ptr); - - if (false) {} - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - } - /// Compiled code, for a two-level structure. - else if (is_two_level && compiled_data->compiled_two_level_method_ptr) - { - #define M(NAME) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - reinterpret_cast(compiled_data->compiled_two_level_method_ptr) \ - (*this, *result.NAME, result.aggregates_pool, rows, key_columns, aggregate_columns, \ - no_more_keys, overflow_row_ptr); - - if (false) {} - APPLY_FOR_VARIANTS_TWO_LEVEL(M) - #undef M - } - /// When there is no dynamically compiled code. - else - { #define M(NAME, IS_TWO_LEVEL) \ else if (result.type == AggregatedDataVariants::Type::NAME) \ executeImpl(*result.NAME, result.aggregates_pool, rows, key_columns, aggregate_functions_instructions.data(), \ no_more_keys, overflow_row_ptr); - if (false) {} - APPLY_FOR_AGGREGATED_VARIANTS(M) + if (false) {} + APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M - } } size_t result_size = result.sizeWithoutOverflowRow(); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 41fd957345e..b48663ff689 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -24,7 +24,6 @@ #include #include -#include #include #include @@ -778,10 +777,6 @@ public: const size_t max_rows_to_group_by; const OverflowMode group_by_overflow_mode; - /// For dynamic compilation. - Compiler * compiler; - const UInt32 min_count_to_compile; - /// Two-level aggregation settings (used for a large number of keys). /** With how many keys or the size of the aggregation state in bytes, * two-level aggregation begins to be used. Enough to reach of at least one of the thresholds. @@ -805,7 +800,6 @@ public: const Block & src_header_, const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_, - Compiler * compiler_, UInt32 min_count_to_compile_, size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_, size_t max_bytes_before_external_group_by_, bool empty_result_for_aggregation_by_empty_set_, @@ -813,7 +807,6 @@ public: : src_header(src_header_), keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()), overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_), - compiler(compiler_), min_count_to_compile(min_count_to_compile_), group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_), max_bytes_before_external_group_by(max_bytes_before_external_group_by_), empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_), @@ -824,7 +817,7 @@ public: /// Only parameters that matter during merge. Params(const Block & intermediate_header_, const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_) - : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, nullptr, 0, 0, 0, 0, false, "", max_threads_) + : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, "", max_threads_) { intermediate_header = intermediate_header_; } @@ -956,26 +949,6 @@ protected: Logger * log = &Logger::get("Aggregator"); - /** Dynamically compiled library for aggregation, if any. - * The meaning of dynamic compilation is to specialize code - * for a specific list of aggregate functions. - * This allows you to expand the loop to create and update states of aggregate functions, - * and also use inline-code instead of virtual calls. - */ - struct CompiledData - { - SharedLibraryPtr compiled_aggregator; - - /// Obtained with dlsym. It is still necessary to make reinterpret_cast to the function pointer. - void * compiled_method_ptr = nullptr; - void * compiled_two_level_method_ptr = nullptr; - }; - /// shared_ptr - to pass into a callback, that can survive Aggregator. - std::shared_ptr compiled_data { new CompiledData }; - - bool compiled_if_possible = false; - void compileIfPossible(AggregatedDataVariants::Type type); - /// Returns true if you can abort the current task. CancellationHook isCancelled; @@ -1037,35 +1010,6 @@ protected: Method & method, IBlockOutputStream & out); -public: - /// Templates that are instantiated by dynamic code compilation - see SpecializedAggregator.h - - template - void executeSpecialized( - Method & method, - Arena * aggregates_pool, - size_t rows, - ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, - bool no_more_keys, - AggregateDataPtr overflow_row) const; - - template - void executeSpecializedCase( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - size_t rows, - AggregateColumns & aggregate_columns, - AggregateDataPtr overflow_row) const; - - template - void executeSpecializedWithoutKey( - AggregatedDataWithoutKey & res, - size_t rows, - AggregateColumns & aggregate_columns, - Arena * arena) const; - protected: /// Merge NULL key data from hash table `src` into `dst`. template diff --git a/dbms/src/Interpreters/CMakeLists.txt b/dbms/src/Interpreters/CMakeLists.txt index 75771a07027..65172356645 100644 --- a/dbms/src/Interpreters/CMakeLists.txt +++ b/dbms/src/Interpreters/CMakeLists.txt @@ -1,70 +1,3 @@ - -if (OS_FREEBSD) - set (PATH_SHARE "/usr/local/share" CACHE STRING "") -else () - set (PATH_SHARE "/usr/share" CACHE STRING "") -endif () - -set (INTERNAL_COMPILER_BIN_ROOT "${CMAKE_INSTALL_FULL_BINDIR}/" CACHE STRING "") -set (INTERNAL_COMPILER_EXECUTABLE "clickhouse-clang" CACHE STRING "") -set (INTERNAL_LINKER_EXECUTABLE "clickhouse-lld" CACHE STRING "") - -# Disabling leak reporting for these tools -if (SANITIZE STREQUAL "address") - # Note that this doesn't work for setuid and setcap binaries - set(INTERNAL_COMPILER_ENV "env ASAN_OPTIONS=detect_leaks=0" CACHE STRING "") -else () - set(INTERNAL_COMPILER_ENV "" CACHE STRING "") -endif () - -set (INTERNAL_COMPILER_NO_WARNING OFF CACHE INTERNAL "") -set (INTERNAL_COMPILER_HEADERS_DIR "headers" CACHE STRING "") -set (INTERNAL_COMPILER_HEADERS_RELATIVE "${INTERNAL_COMPILER_HEADERS_DIR}/${VERSION_STRING}" CACHE STRING "") -set (INTERNAL_COMPILER_HEADERS "${PATH_SHARE}/clickhouse/${INTERNAL_COMPILER_HEADERS_RELATIVE}" CACHE STRING "") - -if(OS_FREEBSD) - set(INTERNAL_COMPILER_HEADERS_ROOT "" CACHE STRING "") -else() - set(INTERNAL_COMPILER_HEADERS_ROOT "${INTERNAL_COMPILER_HEADERS}" CACHE STRING "") - set(INTERNAL_COMPILER_CUSTOM_ROOT ON CACHE INTERNAL "") -endif() - -if(NOT INTERNAL_COMPILER_FLAGS) - set(INTERNAL_COMPILER_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UC}} ${CXX_FLAGS_INTERNAL_COMPILER} -x c++ -march=native -fPIC -fvisibility=hidden -fno-implement-inlines -Wno-unused-command-line-argument -Bprefix=${PATH_SHARE}/clickhouse" CACHE STRING "") - if(INTERNAL_COMPILER_CUSTOM_ROOT) - set(INTERNAL_COMPILER_FLAGS "${INTERNAL_COMPILER_FLAGS} -nostdinc -nostdinc++") - if(INTERNAL_COMPILER_HEADERS_ROOT) - set(INTERNAL_COMPILER_FLAGS "${INTERNAL_COMPILER_FLAGS} -isysroot=${INTERNAL_COMPILER_HEADERS_ROOT}") - endif() - endif() -endif() -# TODO: use libs from package: -nodefaultlibs -lm -lc -lgcc_s -lgcc -lc++ -lc++abi - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) -string(REPLACE "-no-pie" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) -if (INTERNAL_COMPILER_NO_WARNING) - string (REPLACE "-Wall" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) - string (REPLACE "-Wextra" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) - string (REPLACE "-Werror" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) -endif () - -list(GET Poco_INCLUDE_DIRS 0 Poco_Foundation_INCLUDE_DIR) -list(GET Poco_INCLUDE_DIRS 1 Poco_Util_INCLUDE_DIR) - -if (NOT DOUBLE_CONVERSION_INCLUDE_DIR) - get_target_property(DOUBLE_CONVERSION_INCLUDE_DIR ${DOUBLE_CONVERSION_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES) -endif () - -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR ${DOUBLE_CONVERSION_INCLUDE_DIR}) -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_Poco_Foundation_INCLUDE_DIR ${Poco_Foundation_INCLUDE_DIR}) -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_Poco_Util_INCLUDE_DIR ${Poco_Util_INCLUDE_DIR}) - -message (STATUS "Using internal=${USE_INTERNAL_LLVM_LIBRARY} compiler=${USE_EMBEDDED_COMPILER}: headers=${INTERNAL_COMPILER_HEADERS} root=${INTERNAL_COMPILER_HEADERS_ROOT}: ${INTERNAL_COMPILER_ENV} ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE} ${INTERNAL_COMPILER_FLAGS}; ${INTERNAL_LINKER_EXECUTABLE}") - -set (CONFIG_COMPILE ${ClickHouse_BINARY_DIR}/dbms/src/Interpreters/config_compile.h) -configure_file (${ClickHouse_SOURCE_DIR}/dbms/src/Interpreters/config_compile.h.in ${CONFIG_COMPILE}) - if (ENABLE_TESTS) add_subdirectory (tests) endif () diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp deleted file mode 100644 index 3b420b6acce..00000000000 --- a/dbms/src/Interpreters/Compiler.cpp +++ /dev/null @@ -1,326 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if __has_include() -#include -#endif - -namespace ProfileEvents -{ - extern const Event CompileAttempt; - extern const Event CompileSuccess; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_DLOPEN; - extern const int CANNOT_COMPILE_CODE; -} - -Compiler::Compiler(const std::string & path_, size_t threads) - : path(path_), pool(threads) -{ - Poco::File(path).createDirectory(); - - Poco::DirectoryIterator dir_end; - for (Poco::DirectoryIterator dir_it(path); dir_end != dir_it; ++dir_it) - { - const std::string & name = dir_it.name(); - if (endsWith(name, ".so")) - { - files.insert(name.substr(0, name.size() - 3)); - } - } - - LOG_INFO(log, "Having " << files.size() << " compiled files from previous start."); -} - -Compiler::~Compiler() -{ - LOG_DEBUG(log, "Waiting for threads to finish."); - pool.wait(); -} - - -static Compiler::HashedKey getHash(const std::string & key) -{ - SipHash hash; - - auto revision = ClickHouseRevision::get(); - hash.update(revision); - hash.update(key.data(), key.size()); - - Compiler::HashedKey res; - hash.get128(res.low, res.high); - return res; -} - - -/// Without .so extension. -static std::string hashedKeyToFileName(Compiler::HashedKey hashed_key) -{ - WriteBufferFromOwnString out; - out << hashed_key.low << '_' << hashed_key.high; - return out.str(); -} - - -SharedLibraryPtr Compiler::getOrCount( - const std::string & key, - UInt32 min_count_to_compile, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready) -{ - HashedKey hashed_key = getHash(key); - - std::lock_guard lock(mutex); - - UInt32 count = ++counts[hashed_key]; - - /// Is there a ready open library? Or, if the library is in the process of compiling, there will be nullptr. - Libraries::iterator libraries_it = libraries.find(hashed_key); - if (libraries.end() != libraries_it) - { - if (!libraries_it->second) - LOG_INFO(log, "Library " << hashedKeyToFileName(hashed_key) << " is already compiling or compilation was failed."); - - /// TODO In this case, after the compilation is finished, the callback will not be called. - - return libraries_it->second; - } - - /// Is there a file with the library left over from the previous launch? - std::string file_name = hashedKeyToFileName(hashed_key); - Files::iterator files_it = files.find(file_name); - if (files.end() != files_it) - { - std::string so_file_path = path + '/' + file_name + ".so"; - LOG_INFO(log, "Loading existing library " << so_file_path); - - SharedLibraryPtr lib; - - try - { - lib = std::make_shared(so_file_path); - } - catch (const Exception & e) - { - if (e.code() != ErrorCodes::CANNOT_DLOPEN) - throw; - - /// Found broken .so file (or file cannot be dlopened by whatever reason). - /// This could happen when filesystem is corrupted after server restart. - /// We remove the file - it will be recompiled on next attempt. - - tryLogCurrentException(log); - - files.erase(files_it); - Poco::File(so_file_path).remove(); - return nullptr; - } - - libraries[hashed_key] = lib; - return lib; - } - - /// Has min_count_to_compile been reached? - if (count >= min_count_to_compile) - { - /// The min_count_to_compile value of zero indicates the need for synchronous compilation. - - /// Indicates that the library is in the process of compiling. - libraries[hashed_key] = nullptr; - - LOG_INFO(log, "Compiling code " << file_name << ", key: " << key); - - if (min_count_to_compile == 0) - { - { - ext::unlock_guard unlock(mutex); - compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); - } - - return libraries[hashed_key]; - } - else - { - bool res = pool.trySchedule([=] - { - try - { - compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); - } - catch (...) - { - tryLogCurrentException("Compiler"); - } - }); - - if (!res) - LOG_INFO(log, "All threads are busy."); - } - } - - return nullptr; -} - - -/// This will guarantee that code will compile only when version of headers match version of running server. -static void addCodeToAssertHeadersMatch(WriteBuffer & out) -{ - out << - "#include \n" - "#if VERSION_REVISION != " << ClickHouseRevision::get() << "\n" - "#define STRING2(x) #x\n" - "#define STRING(x) STRING2(x)\n" - "#pragma message \"ClickHouse headers revision = \" STRING(VERSION_REVISION) \n" - "#error \"ClickHouse headers revision doesn't match runtime revision of the server (" << ClickHouseRevision::get() << ").\"\n" - "#endif\n\n"; -} - - -void Compiler::compile( - HashedKey hashed_key, - std::string file_name, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready) -{ - ProfileEvents::increment(ProfileEvents::CompileAttempt); - -#if !defined(INTERNAL_COMPILER_EXECUTABLE) - throw Exception("Cannot compile code: Compiler disabled", ErrorCodes::CANNOT_COMPILE_CODE); -#else - std::string prefix = path + "/" + file_name; - std::string cpp_file_path = prefix + ".cpp"; - std::string so_file_path = prefix + ".so"; - std::string so_tmp_file_path = prefix + ".so.tmp"; - - { - WriteBufferFromFile out(cpp_file_path); - - addCodeToAssertHeadersMatch(out); - out << get_code(); - } - - std::stringstream command; - - auto compiler_executable_root = Poco::Util::Application::instance().config().getString("compiler_executable_root", INTERNAL_COMPILER_BIN_ROOT); - auto compiler_headers = Poco::Util::Application::instance().config().getString("compiler_headers", INTERNAL_COMPILER_HEADERS); - auto compiler_headers_root = Poco::Util::Application::instance().config().getString("compiler_headers_root", INTERNAL_COMPILER_HEADERS_ROOT); - LOG_DEBUG(log, "Using internal compiler: compiler_executable_root=" << compiler_executable_root << "; compiler_headers_root=" << compiler_headers_root << "; compiler_headers=" << compiler_headers); - - /// Slightly unconvenient. - command << - "(" - INTERNAL_COMPILER_ENV - " " << compiler_executable_root << INTERNAL_COMPILER_EXECUTABLE - " " INTERNAL_COMPILER_FLAGS - /// It is hard to correctly call a ld program manually, because it is easy to skip critical flags, which might lead to - /// unhandled exceptions. Therefore pass path to llvm's lld directly to clang. - " -fuse-ld=" << compiler_executable_root << INTERNAL_LINKER_EXECUTABLE - " -fdiagnostics-color=never" - - /// Do not use libgcc and startup files. The library will work nevertheless and we avoid extra dependency. - " -nodefaultlibs -nostartfiles" - - #if INTERNAL_COMPILER_CUSTOM_ROOT - /// To get correct order merge this results carefully: - /// echo | clang -x c++ -E -Wp,-v - - /// echo | g++ -x c++ -E -Wp,-v - - - " -isystem " << compiler_headers_root << "/usr/include/c++/*" - #if defined(CMAKE_LIBRARY_ARCHITECTURE) - " -isystem " << compiler_headers_root << "/usr/include/" CMAKE_LIBRARY_ARCHITECTURE "/c++/*" - #endif - " -isystem " << compiler_headers_root << "/usr/include/c++/*/backward" - " -isystem " << compiler_headers_root << "/usr/include/clang/*/include" /// if compiler is clang (from package) - " -isystem " << compiler_headers_root << "/usr/local/lib/clang/*/include" /// if clang installed manually - " -isystem " << compiler_headers_root << "/usr/lib/clang/*/include" /// if clang build from submodules - #if defined(CMAKE_LIBRARY_ARCHITECTURE) - " -isystem " << compiler_headers_root << "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include-fixed" - " -isystem " << compiler_headers_root << "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include" - #endif - " -isystem " << compiler_headers_root << "/usr/local/include" /// if something installed manually - #if defined(CMAKE_LIBRARY_ARCHITECTURE) - " -isystem " << compiler_headers_root << "/usr/include/" CMAKE_LIBRARY_ARCHITECTURE - #endif - " -isystem " << compiler_headers_root << "/usr/include" - #endif - " -I " << compiler_headers << "/dbms/src/" - " -isystem " << compiler_headers << "/contrib/cityhash102/include/" - " -isystem " << compiler_headers << "/contrib/libpcg-random/include/" - #if USE_MIMALLOC - " -isystem " << compiler_headers << "/contrib/mimalloc/include/" - #endif - " -isystem " << compiler_headers << INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR - " -isystem " << compiler_headers << INTERNAL_Poco_Foundation_INCLUDE_DIR - " -isystem " << compiler_headers << INTERNAL_Boost_INCLUDE_DIRS - " -I " << compiler_headers << "/libs/libcommon/include/" - " " << additional_compiler_flags << - " -shared -o " << so_tmp_file_path << " " << cpp_file_path - << " 2>&1" - ") || echo Return code: $?"; - -#ifndef NDEBUG - LOG_TRACE(log, "Compile command: " << command.str()); -#endif - - std::string compile_result; - - { - auto process = ShellCommand::execute(command.str()); - readStringUntilEOF(compile_result, process->out); - process->wait(); - } - - if (!compile_result.empty()) - { - std::string error_message = "Cannot compile code:\n\n" + command.str() + "\n\n" + compile_result; - - Poco::File so_tmp_file(so_tmp_file_path); - if (so_tmp_file.exists() && so_tmp_file.canExecute()) - { - /// Compiler may emit information messages. This is suspicious, but we still can use compiled result. - LOG_WARNING(log, error_message); - } - else - throw Exception(error_message, ErrorCodes::CANNOT_COMPILE_CODE); - } - - /// If there was an error before, the file with the code remains for viewing. - Poco::File(cpp_file_path).remove(); - - Poco::File(so_tmp_file_path).renameTo(so_file_path); - SharedLibraryPtr lib(new SharedLibrary(so_file_path)); - - { - std::lock_guard lock(mutex); - libraries[hashed_key] = lib; - } - - LOG_INFO(log, "Compiled code " << file_name); - ProfileEvents::increment(ProfileEvents::CompileSuccess); - - on_ready(lib); - -#endif -} - - -} diff --git a/dbms/src/Interpreters/Compiler.h b/dbms/src/Interpreters/Compiler.h deleted file mode 100644 index b79cf26e0f0..00000000000 --- a/dbms/src/Interpreters/Compiler.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace DB -{ - -/** Lets you compile a piece of code that uses the server's header files into the dynamic library. - * Conducts statistic of calls, and initiates compilation only on the N-th call for one key. - * Compilation is performed asynchronously, in separate threads, if there are free threads. - * NOTE: There is no cleaning of obsolete and unnecessary results. - */ -class Compiler -{ -public: - /** path - path to the directory with temporary files - the results of the compilation. - * The compilation results are saved when the server is restarted, - * but use the revision number as part of the key. That is, they become obsolete when the server is updated. - */ - Compiler(const std::string & path_, size_t threads); - ~Compiler(); - - using HashedKey = UInt128; - - using CodeGenerator = std::function; - using ReadyCallback = std::function; - - /** Increase the counter for the given key `key` by one. - * If the compilation result already exists (already open, or there is a file with the library), - * then return ready SharedLibrary. - * Otherwise, if min_count_to_compile == 0, then initiate the compilation in the same thread, wait for it, and return the result. - * Otherwise, if the counter has reached min_count_to_compile, - * initiate compilation in a separate thread, if there are free threads, and return nullptr. - * Otherwise, return nullptr. - */ - SharedLibraryPtr getOrCount( - const std::string & key, - UInt32 min_count_to_compile, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready); - -private: - using Counts = std::unordered_map; - using Libraries = std::unordered_map; - using Files = std::unordered_set; - - const std::string path; - ThreadPool pool; - - /// Number of calls to `getOrCount`. - Counts counts; - - /// Compiled and open libraries. Or nullptr for libraries in the compilation process. - Libraries libraries; - - /// Compiled files remaining from previous runs, but not yet open. - Files files; - - std::mutex mutex; - - Logger * log = &Logger::get("Compiler"); - - - void compile( - HashedKey hashed_key, - std::string file_name, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready); -}; - -} diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 53392f9ad9a..67a81a94b3a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -140,7 +139,6 @@ struct ContextShared std::optional background_pool; /// The thread pool for the background work performed by the tables. std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. - std::optional compiler; /// Used for dynamic compilation of queries' parts if it necessary. std::unique_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. mutable std::unique_ptr compression_codec_selector; @@ -1634,17 +1632,6 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrcompiler) - shared->compiler.emplace(shared->path + "build/", 1); - - return *shared->compiler; -} - - void Context::initializeSystemLogs() { auto lock = getLock(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index dcc3fa9b3ee..2d583c3c353 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1654,7 +1654,6 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre Aggregator::Params params(header, keys, aggregates, overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, @@ -1721,7 +1720,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const Aggregator::Params params(header_before_aggregation, keys, aggregates, overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, @@ -1943,7 +1941,6 @@ void InterpreterSelectQuery::executeRollupOrCube(Pipeline & pipeline, Modificato Aggregator::Params params(header, keys, aggregates, false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, SettingUInt64(0), SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, context.getTemporaryPath(), settings.max_threads); @@ -1973,7 +1970,6 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPipeline & pipeline, Modif Aggregator::Params params(header_before_transform, keys, aggregates, false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, SettingUInt64(0), SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, context.getTemporaryPath(), settings.max_threads); diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h deleted file mode 100644 index 9a238c77032..00000000000 --- a/dbms/src/Interpreters/SpecializedAggregator.h +++ /dev/null @@ -1,215 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - - -/** An aggregation loop template that allows you to generate a custom variant for a specific combination of aggregate functions. - * It differs from the usual one in that calls to aggregate functions should be inlined, and the update loop of the aggregate functions should be unrolled. - * - * Since there are too many possible combinations, it is not possible to generate them all in advance. - * This template is intended to instantiate it in runtime, - * by running the compiler, compiling shared library, and using it with `dlopen`. - */ - - -struct AggregateFunctionsUpdater -{ - AggregateFunctionsUpdater( - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions_, - const Sizes & offsets_of_aggregate_states_, - Aggregator::AggregateColumns & aggregate_columns_, - AggregateDataPtr & value_, - size_t row_num_, - Arena * arena_) - : aggregate_functions(aggregate_functions_), - offsets_of_aggregate_states(offsets_of_aggregate_states_), - aggregate_columns(aggregate_columns_), - value(value_), row_num(row_num_), arena(arena_) - { - } - - template - void operator()() ALWAYS_INLINE; - - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions; - const Sizes & offsets_of_aggregate_states; - Aggregator::AggregateColumns & aggregate_columns; - AggregateDataPtr & value; - size_t row_num; - Arena * arena; -}; - -template -void AggregateFunctionsUpdater::operator()() -{ - static_cast(aggregate_functions[column_num])->add( - value + offsets_of_aggregate_states[column_num], - aggregate_columns[column_num].data(), - row_num, arena); -} - -struct AggregateFunctionsCreator -{ - AggregateFunctionsCreator( - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions_, - const Sizes & offsets_of_aggregate_states_, - AggregateDataPtr & aggregate_data_) - : aggregate_functions(aggregate_functions_), - offsets_of_aggregate_states(offsets_of_aggregate_states_), - aggregate_data(aggregate_data_) - { - } - - template - void operator()() ALWAYS_INLINE; - - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions; - const Sizes & offsets_of_aggregate_states; - AggregateDataPtr & aggregate_data; -}; - -template -void AggregateFunctionsCreator::operator()() -{ - AggregateFunction * func = static_cast(aggregate_functions[column_num]); - - try - { - /** An exception may occur if there is a shortage of memory. - * To ensure that everything is properly destroyed, we "roll back" some of the created states. - * The code is not very convenient. - */ - func->create(aggregate_data + offsets_of_aggregate_states[column_num]); - } - catch (...) - { - for (size_t rollback_j = 0; rollback_j < column_num; ++rollback_j) - func->destroy(aggregate_data + offsets_of_aggregate_states[rollback_j]); - - throw; - } -} - - -template -void NO_INLINE Aggregator::executeSpecialized( - Method & method, - Arena * aggregates_pool, - size_t rows, - ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, - bool no_more_keys, - AggregateDataPtr overflow_row) const -{ - typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - - if (!no_more_keys) - executeSpecializedCase( - method, state, aggregates_pool, rows, aggregate_columns, overflow_row); - else - executeSpecializedCase( - method, state, aggregates_pool, rows, aggregate_columns, overflow_row); -} - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wuninitialized" - -template -void NO_INLINE Aggregator::executeSpecializedCase( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - size_t rows, - AggregateColumns & aggregate_columns, - AggregateDataPtr overflow_row) const -{ - /// For all rows. - for (size_t i = 0; i < rows; ++i) - { - AggregateDataPtr aggregate_data = nullptr; - - if (!no_more_keys) /// Insert. - { - auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); - - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); - - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - AggregateFunctionsList::forEach(AggregateFunctionsCreator( - aggregate_functions, offsets_of_aggregate_states, aggregate_data)); - - emplace_result.setMapped(aggregate_data); - } - else - aggregate_data = emplace_result.getMapped(); - } - else - { - /// Add only if the key already exists. - auto find_result = state.findKey(method.data, i, *aggregates_pool); - if (find_result.isFound()) - aggregate_data = find_result.getMapped(); - } - - /// If the key does not fit, and the data does not need to be aggregated in a separate row, then there's nothing to do. - if (!aggregate_data && !overflow_row) - continue; - - auto value = aggregate_data ? aggregate_data : overflow_row; - - /// Add values into the aggregate functions. - AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, value, i, aggregates_pool)); - } -} - -#pragma GCC diagnostic pop - -template -void NO_INLINE Aggregator::executeSpecializedWithoutKey( - AggregatedDataWithoutKey & res, - size_t rows, - AggregateColumns & aggregate_columns, - Arena * arena) const -{ - for (size_t i = 0; i < rows; ++i) - { - AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, res, i, arena)); - } -} - -} - - -/** The main code is compiled with gcc 7. - * But SpecializedAggregator is compiled using clang 6 into the .so file. - * This is done because gcc can not get functions inlined, - * which were de-virtualized, in a particular case, and the performance is lower. - * And also it's easier to distribute clang for deploy to the servers. - * - * After switching from gcc 4.8 and gnu++1x to gcc 4.9 and gnu++1y (and then to gcc 5), - * an error occurred with `dlopen`: undefined symbol: __cxa_pure_virtual - * - * Most likely, this is due to the changed version of this symbol: - * gcc creates a symbol in .so - * U __cxa_pure_virtual@@CXXABI_1.3 - * but clang creates a symbol - * U __cxa_pure_virtual - * - * But it does not matter for us how the __cxa_pure_virtual function will be implemented, - * because it is not called during normal program execution, - * and if called - then the program is guaranteed buggy. - * - * Therefore, we can work around the problem this way - */ -extern "C" void __attribute__((__visibility__("default"), __noreturn__)) __cxa_pure_virtual() { abort(); } diff --git a/dbms/src/Interpreters/config_compile.h.in b/dbms/src/Interpreters/config_compile.h.in deleted file mode 100644 index e8db534a62d..00000000000 --- a/dbms/src/Interpreters/config_compile.h.in +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#cmakedefine CMAKE_LIBRARY_ARCHITECTURE "@CMAKE_LIBRARY_ARCHITECTURE@" -#cmakedefine PATH_SHARE "@PATH_SHARE@" -#cmakedefine INTERNAL_COMPILER_FLAGS "@INTERNAL_COMPILER_FLAGS@" -#cmakedefine INTERNAL_COMPILER_BIN_ROOT "@INTERNAL_COMPILER_BIN_ROOT@" -#cmakedefine INTERNAL_LINKER_EXECUTABLE "@INTERNAL_LINKER_EXECUTABLE@" -#cmakedefine INTERNAL_COMPILER_EXECUTABLE "@INTERNAL_COMPILER_EXECUTABLE@" -#cmakedefine INTERNAL_COMPILER_ENV "@INTERNAL_COMPILER_ENV@" -#if !defined(INTERNAL_COMPILER_ENV) -# define INTERNAL_COMPILER_ENV "" -#endif -#cmakedefine INTERNAL_COMPILER_HEADERS "@INTERNAL_COMPILER_HEADERS@" -#if !defined(INTERNAL_COMPILER_HEADERS) -# define INTERNAL_COMPILER_HEADERS "" -#endif -#cmakedefine INTERNAL_COMPILER_HEADERS_ROOT "@INTERNAL_COMPILER_HEADERS_ROOT@" -#if !defined(INTERNAL_COMPILER_HEADERS_ROOT) -# define INTERNAL_COMPILER_HEADERS_ROOT "" -#endif - -#cmakedefine01 INTERNAL_COMPILER_CUSTOM_ROOT -#cmakedefine INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR "@INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR@" -#cmakedefine INTERNAL_Poco_Foundation_INCLUDE_DIR "@INTERNAL_Poco_Foundation_INCLUDE_DIR@" -#cmakedefine INTERNAL_Poco_Util_INCLUDE_DIR "@INTERNAL_Poco_Util_INCLUDE_DIR@" -#cmakedefine INTERNAL_Boost_INCLUDE_DIRS "@INTERNAL_Boost_INCLUDE_DIRS@" diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index b4f9fff1d36..3fac5424c00 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -41,9 +41,6 @@ add_executable (two_level_hash_map two_level_hash_map.cpp) target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) target_link_libraries (two_level_hash_map PRIVATE dbms) -add_executable (compiler_test compiler_test.cpp) -target_link_libraries (compiler_test PRIVATE dbms) - add_executable (logical_expressions_optimizer logical_expressions_optimizer.cpp) target_link_libraries (logical_expressions_optimizer PRIVATE dbms clickhouse_parsers) diff --git a/dbms/src/Interpreters/tests/compiler_test.cpp b/dbms/src/Interpreters/tests/compiler_test.cpp deleted file mode 100644 index c56cf5775d6..00000000000 --- a/dbms/src/Interpreters/tests/compiler_test.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include - -#include - - -int main(int, char **) -{ - using namespace DB; - - Poco::AutoPtr channel = new Poco::ConsoleChannel(std::cerr); - Logger::root().setChannel(channel); - Logger::root().setLevel("trace"); - - /// Check exception handling and catching - try - { - Compiler compiler(".", 1); - - auto lib = compiler.getOrCount("catch_me_if_you_can", 0, "", []() -> std::string - { - return - "#include \n" - "void f() __attribute__((__visibility__(\"default\")));\n" - "void f()" - "{" - "try { throw std::runtime_error(\"Catch me if you can\"); }" - "catch (const std::runtime_error & e) { std::cout << \"Caught in .so: \" << e.what() << std::endl; throw; }\n" - "}" - ; - }, [](SharedLibraryPtr &){}); - - auto f = lib->template get("_Z1fv"); - - try - { - f(); - } - catch (const std::exception & e) - { - std::cout << "Caught in main(): " << e.what() << "\n"; - return 0; - } - catch (...) - { - std::cout << "Unknown exception\n"; - return -1; - } - } - catch (...) - { - std::cerr << getCurrentExceptionMessage(true) << "\n"; - return -1; - } - - return 0; -} diff --git a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference b/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference deleted file mode 100644 index 207dc069e43..00000000000 --- a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 Hello -2 Hello diff --git a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql b/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql deleted file mode 100644 index 5902b94b753..00000000000 --- a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql +++ /dev/null @@ -1,2 +0,0 @@ -SET compile = 1, min_count_to_compile = 0, max_threads = 1, send_logs_level = 'none'; -SELECT arrayJoin([1, 2, 1]) AS UserID, argMax('Hello', today()) AS res GROUP BY UserID; diff --git a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference b/dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference deleted file mode 100644 index 6ed281c757a..00000000000 --- a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 -1 diff --git a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh b/dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh deleted file mode 100755 index fbf5efcda2c..00000000000 --- a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -SETTINGS="--compile=1 --min_count_to_compile=0 --max_threads=1 --max_memory_usage=8000000 --server_logs_file=/dev/null" -output=$($CLICKHOUSE_CLIENT -q "SELECT length(groupArray(number)) FROM (SELECT * FROM system.numbers LIMIT 1000000)" $SETTINGS 2>&1) - -[[ $? -eq 0 ]] && echo "Expected non-zero RC" -if ! echo "$output" | grep -Fc -e 'Memory limit (for query) exceeded' -e 'Cannot compile code' ; then - echo -e 'There is no expected exception "Memory limit (for query) exceeded: would use..." or "Cannot compile code..."' "Whereas got:\n$output" -fi - -$CLICKHOUSE_CLIENT -q "SELECT 1" From cc0157b29378cc6af380976850a75d9c026f768c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 02:07:05 +0300 Subject: [PATCH 171/181] Added a test --- dbms/src/Functions/trim.cpp | 2 +- .../queries/0_stateless/00997_trim.reference | 0 dbms/tests/queries/0_stateless/00997_trim.sql | 20 +++++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00997_trim.reference create mode 100644 dbms/tests/queries/0_stateless/00997_trim.sql diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index 81916604d63..46f69530005 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -85,7 +85,7 @@ private: char_data += num_chars; } - if constexpr (mode::trim_left) + if constexpr (mode::trim_right) { const char * found = find_last_not_symbols_or_null<' '>(char_data, char_end); if (found) diff --git a/dbms/tests/queries/0_stateless/00997_trim.reference b/dbms/tests/queries/0_stateless/00997_trim.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00997_trim.sql b/dbms/tests/queries/0_stateless/00997_trim.sql new file mode 100644 index 00000000000..7519877ec5e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_trim.sql @@ -0,0 +1,20 @@ +WITH + '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' AS x, + replaceRegexpAll(x, '.', ' ') AS spaces, + concat(substring(spaces, 1, rand(1) % 62), substring(x, 1, rand(2) % 62), substring(spaces, 1, rand(3) % 62)) AS s, + trimLeft(s) AS sl, + trimRight(s) AS sr, + trimBoth(s) AS t, + replaceRegexpOne(s, '^ +', '') AS slr, + replaceRegexpOne(s, ' +$', '') AS srr, + replaceRegexpOne(s, '^ *(.*?) *$', '\\1') AS tr +SELECT + replaceAll(s, ' ', '_'), + replaceAll(sl, ' ', '_'), + replaceAll(slr, ' ', '_'), + replaceAll(sr, ' ', '_'), + replaceAll(srr, ' ', '_'), + replaceAll(t, ' ', '_'), + replaceAll(tr, ' ', '_') +FROM numbers(100000) +WHERE NOT ((sl = slr) AND (sr = srr) AND (t = tr)) From 7703d321138c2854142f10709cb6451e4b5a024e Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 24 Aug 2019 13:53:22 +0800 Subject: [PATCH 172/181] remove symlinks --- docs/zh/database_engines/index.md | 1 - docs/zh/database_engines/mysql.md | 1 - 2 files changed, 2 deletions(-) delete mode 120000 docs/zh/database_engines/index.md delete mode 120000 docs/zh/database_engines/mysql.md diff --git a/docs/zh/database_engines/index.md b/docs/zh/database_engines/index.md deleted file mode 120000 index bbdb762a4ad..00000000000 --- a/docs/zh/database_engines/index.md +++ /dev/null @@ -1 +0,0 @@ -../../en/database_engines/index.md \ No newline at end of file diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/database_engines/mysql.md deleted file mode 120000 index 51ac4126e2d..00000000000 --- a/docs/zh/database_engines/mysql.md +++ /dev/null @@ -1 +0,0 @@ -../../en/database_engines/mysql.md \ No newline at end of file From 7d7c13632cb3c400e3920f06493d888da3b654e0 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 24 Aug 2019 13:54:01 +0800 Subject: [PATCH 173/181] Translate database engine documentation, update table engine documentation. --- docs/zh/database_engines/index.md | 11 +++ docs/zh/database_engines/mysql.md | 124 ++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 docs/zh/database_engines/index.md create mode 100644 docs/zh/database_engines/mysql.md diff --git a/docs/zh/database_engines/index.md b/docs/zh/database_engines/index.md new file mode 100644 index 00000000000..f8ae05e2520 --- /dev/null +++ b/docs/zh/database_engines/index.md @@ -0,0 +1,11 @@ +# 数据库引擎 + +您使用的所有表都是由数据库引擎所提供的 + +默认情况下,ClickHouse使用自己的数据库引擎,该引擎提供可配置的[表引擎](../operations/table_engines/index.md)和[所有支持的SQL语法](../query_language/syntax.md). + +除此之外,您还可以选择使用以下的数据库引擎: + +- [MySQL](mysql.md) + +[来源文章](https://clickhouse.yandex/docs/en/database_engines/) diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/database_engines/mysql.md new file mode 100644 index 00000000000..38dfcb5ef64 --- /dev/null +++ b/docs/zh/database_engines/mysql.md @@ -0,0 +1,124 @@ +# MySQL + +MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并允许您对表进行`INSERT`和`SELECT`查询,以方便您在ClickHouse与MySQL之间进行数据交换。 + +`MySQL`数据库引擎会将对其的查询转换为MySQL语法并发送到MySQL服务器中,因此您可以执行诸如`SHOW TABLES`或`SHOW CREATE TABLE`之类的操作。 + +但您无法对其执行以下操作: + +- `ATTACH`/`DETACH` +- `DROP` +- `RENAME` +- `CREATE TABLE` +- `ALTER` + + +## CREATE DATABASE + +``` sql +CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] +ENGINE = MySQL('host:port', 'database', 'user', 'password') +``` + +**MySQL数据库引擎参数** + +- `host:port` — 链接的MySQL地址。 +- `database` — 链接的MySQL数据库。 +- `user` — 链接的MySQL用户。 +- `password` — 链接的MySQL用户密码。 + + +## 支持的类型对应 + +MySQL | ClickHouse +------|------------ +UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) +TINYINT | [Int8](../data_types/int_uint.md) +UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) +SMALLINT | [Int16](../data_types/int_uint.md) +UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) +INT, MEDIUMINT | [Int32](../data_types/int_uint.md) +UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) +BIGINT | [Int64](../data_types/int_uint.md) +FLOAT | [Float32](../data_types/float.md) +DOUBLE | [Float64](../data_types/float.md) +DATE | [Date](../data_types/date.md) +DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) +BINARY | [FixedString](../data_types/fixedstring.md) + +其他的MySQL数据类型将全部都转换为[String](../data_types/string.md)。 + +同时以上的所有类型都支持[Nullable](../data_types/nullable.md)。 + + +## 使用示例 + +在MySQL中创建表: + +``` +mysql> USE test; +Database changed + +mysql> CREATE TABLE `mysql_table` ( + -> `int_id` INT NOT NULL AUTO_INCREMENT, + -> `float` FLOAT NOT NULL, + -> PRIMARY KEY (`int_id`)); +Query OK, 0 rows affected (0,09 sec) + +mysql> insert into mysql_table (`int_id`, `float`) VALUES (1,2); +Query OK, 1 row affected (0,00 sec) + +mysql> select * from mysql_table; ++--------+-------+ +| int_id | value | ++--------+-------+ +| 1 | 2 | ++--------+-------+ +1 row in set (0,00 sec) +``` + +在ClickHouse中创建MySQL类型的数据库,同时与MySQL服务器交换数据: + +```sql +CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') +``` +```sql +SHOW DATABASES +``` +```text +┌─name─────┐ +│ default │ +│ mysql_db │ +│ system │ +└──────────┘ +``` +```sql +SHOW TABLES FROM mysql_db +``` +```text +┌─name─────────┐ +│ mysql_table │ +└──────────────┘ +``` +```sql +SELECT * FROM mysql_db.mysql_table +``` +```text +┌─int_id─┬─value─┐ +│ 1 │ 2 │ +└────────┴───────┘ +``` +```sql +INSERT INTO mysql_db.mysql_table VALUES (3,4) +``` +```sql +SELECT * FROM mysql_db.mysql_table +``` +```text +┌─int_id─┬─value─┐ +│ 1 │ 2 │ +│ 3 │ 4 │ +└────────┴───────┘ +``` + +[来源文章](https://clickhouse.yandex/docs/en/database_engines/mysql/) From 2464dd0b9fc73deb1d265b16f751e884e291af08 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 24 Aug 2019 11:51:02 +0300 Subject: [PATCH 174/181] fix --- .../queries/0_stateless/00943_materialize_index.sh | 6 +++--- .../0_stateless/00944_clear_index_in_partition.sh | 2 +- .../00975_indices_mutation_replicated_zookeeper.sh | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00943_materialize_index.sh b/dbms/tests/queries/0_stateless/00943_materialize_index.sh index feab59b368e..bc59b41b005 100755 --- a/dbms/tests/queries/0_stateless/00943_materialize_index.sh +++ b/dbms/tests/queries/0_stateless/00943_materialize_index.sh @@ -39,13 +39,13 @@ SET allow_experimental_data_skipping_indices=1; ALTER TABLE test.minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_2.txt" "test" +wait_for_mutation "minmax_idx" "mutation_3.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" -wait_for_mutation "minmax_idx" "mutation_3.txt" "test" +wait_for_mutation "minmax_idx" "mutation_4.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" @@ -58,7 +58,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx;" -wait_for_mutation "minmax_idx" "mutation_4.txt" "test" +wait_for_mutation "minmax_idx" "mutation_5.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 5a7bdd8e3ae..74f15e63545 100755 --- a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -43,7 +43,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_2.txt" "test" +wait_for_mutation "minmax_idx" "mutation_3.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh index 5e6159475f8..765dfb6abe5 100755 --- a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh @@ -45,20 +45,20 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test.indices_mutaions1 VALUES (9, 1, 2)" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 CLEAR INDEX idx IN PARTITION 1;" -sleep 0.5 +sleep 1 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "indices_mutaions1" "mutation_2.txt" "test" -wait_for_mutation "indices_mutaions2" "mutation_2.txt" "test" +wait_for_mutation "indices_mutaions1" "0000000000" "test" +wait_for_mutation "indices_mutaions2" "0000000000" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read" $CLICKHOUSE_CLIENT --query="DROP TABLE test.indices_mutaions1" $CLICKHOUSE_CLIENT --query="DROP TABLE test.indices_mutaions2" From b7fdfcc7976bc31a822c98fc8d9219f626c54817 Mon Sep 17 00:00:00 2001 From: Weiqing Xu Date: Sat, 24 Aug 2019 17:27:36 +0800 Subject: [PATCH 175/181] fix HDFS HA can't work on DEBUG mode The Describe the bug when the using HDFS HA nameserivce as the uri, the port will be 0. hdfsBuilderSetNameNodePort will be called to set the port. hdfsBuilderSetNameNodePort call asset to check if the port is greater than 0. So in Release mode, it works OK. In the Debug mode, the asset will fail. How to reproduce when compiler the Clickhouse, use DEBUG mode, it will throw error when using HDFS HA nameservice url --- dbms/src/IO/HDFSCommon.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/IO/HDFSCommon.cpp b/dbms/src/IO/HDFSCommon.cpp index 0f1a58942d6..a94fbeabd60 100644 --- a/dbms/src/IO/HDFSCommon.cpp +++ b/dbms/src/IO/HDFSCommon.cpp @@ -40,7 +40,10 @@ HDFSBuilderPtr createHDFSBuilder(const Poco::URI & uri) hdfsBuilderSetUserName(builder.get(), user.c_str()); } hdfsBuilderSetNameNode(builder.get(), host.c_str()); - hdfsBuilderSetNameNodePort(builder.get(), port); + if (port != 0) + { + hdfsBuilderSetNameNodePort(builder.get(), port); + } return builder; } From 720bb3ac08f74dbc7fd42738d8835562973d1671 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 02:07:05 +0300 Subject: [PATCH 176/181] Added a test --- libs/libcommon/include/common/find_symbols.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/libcommon/include/common/find_symbols.h b/libs/libcommon/include/common/find_symbols.h index 920a7df04c5..162c73251fa 100644 --- a/libs/libcommon/include/common/find_symbols.h +++ b/libs/libcommon/include/common/find_symbols.h @@ -17,7 +17,7 @@ * but with the following differencies: * - works with any memory ranges, including containing zero bytes; * - doesn't require terminating zero byte: end of memory range is passed explicitly; - * - if not found, returns pointer to end instead of NULL; + * - if not found, returns pointer to end instead of nullptr; * - maximum number of symbols to search is 16. * * Uses SSE 2 in case of small number of symbols for search and SSE 4.2 in the case of large number of symbols, @@ -188,6 +188,7 @@ inline const char * find_first_symbols_sse42_impl(const char * const begin, cons || (num_chars >= 11 && maybe_negate(*pos == c11)) || (num_chars >= 12 && maybe_negate(*pos == c12)) || (num_chars >= 13 && maybe_negate(*pos == c13)) + || (num_chars >= 14 && maybe_negate(*pos == c14)) || (num_chars >= 15 && maybe_negate(*pos == c15)) || (num_chars >= 16 && maybe_negate(*pos == c16))) return pos; From 60fde1d29eb4f33a1abdc4eb273c278cf6a83f15 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 15:03:54 +0300 Subject: [PATCH 177/181] Removed useless statements from debian directory --- debian/clickhouse-common-static.install | 1 - debian/clickhouse-server.docs | 1 + debian/clickhouse-server.install | 2 -- debian/rules | 16 ---------------- 4 files changed, 1 insertion(+), 19 deletions(-) diff --git a/debian/clickhouse-common-static.install b/debian/clickhouse-common-static.install index 6666b090272..81b1dc4eb1b 100644 --- a/debian/clickhouse-common-static.install +++ b/debian/clickhouse-common-static.install @@ -1,4 +1,3 @@ usr/bin/clickhouse usr/bin/clickhouse-odbc-bridge etc/security/limits.d/clickhouse.conf -usr/share/clickhouse/* diff --git a/debian/clickhouse-server.docs b/debian/clickhouse-server.docs index 95969d08c43..e12d6533be2 100644 --- a/debian/clickhouse-server.docs +++ b/debian/clickhouse-server.docs @@ -1,3 +1,4 @@ LICENSE AUTHORS README.md +CHANGELOG.md diff --git a/debian/clickhouse-server.install b/debian/clickhouse-server.install index f69969a6084..b1475fdf162 100644 --- a/debian/clickhouse-server.install +++ b/debian/clickhouse-server.install @@ -1,6 +1,4 @@ usr/bin/clickhouse-server -usr/bin/clickhouse-clang -usr/bin/clickhouse-lld usr/bin/clickhouse-copier usr/bin/clickhouse-report etc/clickhouse-server/config.xml diff --git a/debian/rules b/debian/rules index a49ffc3f66e..c21f0999bbc 100755 --- a/debian/rules +++ b/debian/rules @@ -32,11 +32,6 @@ endif CMAKE_FLAGS += -DENABLE_UTILS=0 -#DEB_CLANG ?= $(shell which clang-6.0 || which clang-5.0 || which clang-4.0 || which clang || which clang-3.9 || which clang-3.8) - -#DEB_CC ?= gcc-7 -#DEB_CXX ?= g++-7 - ifdef DEB_CXX DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) DEB_HOST_GNU_TYPE := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) @@ -88,12 +83,8 @@ override_dh_auto_configure: override_dh_auto_build: # Fix for ninja. Do not add -O. $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) -# #cd $(BUILDDIR) && cmake --build . -- -j$(THREADS_COUNT) # cmake return true on error override_dh_auto_test: -# #TODO, use ENABLE_TESTS=1 -# #./debian/tests_wrapper.sh -# cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -R GLIBC_required_version cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server override_dh_clean: @@ -117,11 +108,6 @@ override_dh_install: mkdir -p $(DESTDIR)/etc/systemd/system/ cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/ - # fake metrika files when private dir is empty - mkdir -p $(DESTDIR)/etc/clickhouse-server/metrika - touch $(DESTDIR)/etc/clickhouse-server/metrika/config.xml - touch $(DESTDIR)/etc/clickhouse-server/metrika/users.xml - dh_install --list-missing --sourcedir=$(DESTDIR) override_dh_auto_install: @@ -130,7 +116,5 @@ override_dh_auto_install: override_dh_shlibdeps: true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency. -#TODO: faster packing of non-release builds: ifdef RELEASE_COMPATIBLE override_dh_builddeb: dh_builddeb -- -Z gzip # Older systems don't have "xz", so use "gzip" instead. -#TODO: endif From cd620d2de517acff4943443a25fac71ac063b068 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 15:18:03 +0300 Subject: [PATCH 178/181] Fixed race condition in test (once again) --- .../queries/0_stateless/00600_replace_running_query.reference | 1 - dbms/tests/queries/0_stateless/00600_replace_running_query.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference index 804267a1c11..a01672aae85 100644 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference @@ -2,6 +2,5 @@ 1 1 1 -finished 42 readonly SELECT 2, count() FROM system.numbers 1 44 diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index 513f6d8440e..dbbf41dd772 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -30,7 +30,7 @@ ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' 2>&1 | grep -cF 'is alrea # Trying to replace query of a different user $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user' -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null wait ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & From 2dbfabd08c839a54a27ed99dead67b71ed8660ff Mon Sep 17 00:00:00 2001 From: Nikita Vasilev <31595000+nikvas0@users.noreply.github.com> Date: Sun, 25 Aug 2019 01:01:36 +0300 Subject: [PATCH 179/181] fix Set index check useless (#6651) * fixed useless detection * fixed useless detection * fix * fix * fix --- .../Storages/MergeTree/MergeTreeIndexSet.cpp | 25 ++----------------- .../00997_set_index_array.reference | 1 + .../0_stateless/00997_set_index_array.sql | 24 ++++++++++++++++++ 3 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00997_set_index_array.reference create mode 100644 dbms/tests/queries/0_stateless/00997_set_index_array.sql diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 40aba822353..954ac774583 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -405,25 +405,6 @@ bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) const return true; } -static bool checkAtomName(const String & name) -{ - static std::set atoms = { - "notEquals", - "equals", - "less", - "greater", - "lessOrEquals", - "greaterOrEquals", - "in", - "notIn", - "like", - "startsWith", - "endsWith", - "multiSearchAny" - }; - return atoms.find(name) != atoms.end(); -} - bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr &node, bool atomic) const { if (const auto * func = node->as()) @@ -439,16 +420,14 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr &node, bool atomic return checkASTUseless(args[0], atomic) || checkASTUseless(args[1], atomic); else if (func->name == "not") return checkASTUseless(args[0], atomic); - else if (!atomic && checkAtomName(func->name)) - return checkASTUseless(node, true); else return std::any_of(args.begin(), args.end(), - [this, &atomic](const auto & arg) { return checkASTUseless(arg, atomic); }); + [this](const auto & arg) { return checkASTUseless(arg, true); }); } else if (const auto * literal = node->as()) return !atomic && literal->value.get(); else if (const auto * identifier = node->as()) - return key_columns.find(identifier->getColumnName()) == key_columns.end(); + return key_columns.find(identifier->getColumnName()) == std::end(key_columns); else return true; } diff --git a/dbms/tests/queries/0_stateless/00997_set_index_array.reference b/dbms/tests/queries/0_stateless/00997_set_index_array.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_set_index_array.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00997_set_index_array.sql b/dbms/tests/queries/0_stateless/00997_set_index_array.sql new file mode 100644 index 00000000000..c57507ce22d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_set_index_array.sql @@ -0,0 +1,24 @@ +SET allow_experimental_data_skipping_indices = 1; + +DROP TABLE IF EXISTS test.set_array; + +CREATE TABLE test.set_array +( + primary_key String, + index_array Array(UInt64), + INDEX additional_index_array (index_array) TYPE set(10000) GRANULARITY 1 +) ENGINE = MergeTree() +ORDER BY (primary_key); + +INSERT INTO test.set_array +select + toString(intDiv(number, 1000000)) as primary_key, + array(number) as index_array +from system.numbers +limit 10000000; + +SET max_rows_to_read = 8192; + +select count() from test.set_array where has(index_array, 333); + +DROP TABLE test.set_array; \ No newline at end of file From 7144a3f827a34a30f50c7818681c9fe21d40ac93 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Aug 2019 01:06:13 +0300 Subject: [PATCH 180/181] Speed up MemoryTracker by function inlining --- dbms/src/Common/CurrentThread.cpp | 6 ------ dbms/src/Common/CurrentThread.h | 7 ++++++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index 446772f218d..ca39bec414c 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -51,12 +51,6 @@ MemoryTracker * CurrentThread::getMemoryTracker() return ¤t_thread->memory_tracker; } -Int64 & CurrentThread::getUntrackedMemory() -{ - /// It assumes that (current_thread != nullptr) is already checked with getMemoryTracker() - return current_thread->untracked_memory; -} - void CurrentThread::updateProgressIn(const Progress & value) { if (unlikely(!current_thread)) diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 01e46fbeadc..1e0140c6330 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -52,7 +52,12 @@ public: static ProfileEvents::Counters & getProfileEvents(); static MemoryTracker * getMemoryTracker(); - static Int64 & getUntrackedMemory(); + + static inline Int64 & getUntrackedMemory() + { + /// It assumes that (current_thread != nullptr) is already checked with getMemoryTracker() + return current_thread->untracked_memory; + } /// Update read and write rows (bytes) statistics (used in system.query_thread_log) static void updateProgressIn(const Progress & value); From 6d3250867c1151ebeb8fb3a078fb73a6e1f6509a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Aug 2019 01:08:45 +0300 Subject: [PATCH 181/181] Fixed build of tests --- dbms/src/Interpreters/tests/aggregate.cpp | 2 +- dbms/src/Processors/tests/processors_test_aggregation.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/tests/aggregate.cpp b/dbms/src/Interpreters/tests/aggregate.cpp index e0068e9a56b..73e71d178ea 100644 --- a/dbms/src/Interpreters/tests/aggregate.cpp +++ b/dbms/src/Interpreters/tests/aggregate.cpp @@ -79,7 +79,7 @@ int main(int argc, char ** argv) Aggregator::Params params( stream->getHeader(), {0, 1}, aggregate_descriptions, - false, 0, OverflowMode::THROW, nullptr, 0, 0, 0, 0, false, "", 1); + false, 0, OverflowMode::THROW, 0, 0, 0, false, "", 1); Aggregator aggregator(params); diff --git a/dbms/src/Processors/tests/processors_test_aggregation.cpp b/dbms/src/Processors/tests/processors_test_aggregation.cpp index a645804eba8..2306de4edc0 100644 --- a/dbms/src/Processors/tests/processors_test_aggregation.cpp +++ b/dbms/src/Processors/tests/processors_test_aggregation.cpp @@ -224,8 +224,6 @@ try overflow_row, max_rows_to_group_by, OverflowMode::THROW, - nullptr, /// No compiler - 0, /// min_count_to_compile group_by_two_level_threshold, group_by_two_level_threshold_bytes, max_bytes_before_external_group_by, @@ -298,8 +296,6 @@ try overflow_row, max_rows_to_group_by, OverflowMode::THROW, - nullptr, /// No compiler - 0, /// min_count_to_compile group_by_two_level_threshold, group_by_two_level_threshold_bytes, max_bytes_before_external_group_by,