diff --git a/README.md b/README.md index f5e47153553..e5d998b54e7 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # ClickHouse ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time. -🎤🥂 **ClickHouse Meetup in [Sunnyvale](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/248898966/) & [San Francisco](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/249162518/), April 23-27** 🍰🔥🐻 - Learn more about ClickHouse at [https://clickhouse.yandex/](https://clickhouse.yandex/) [![Build Status](https://travis-ci.org/yandex/ClickHouse.svg?branch=master)](https://travis-ci.org/yandex/ClickHouse) diff --git a/dbms/src/Analyzers/TypeAndConstantInference.cpp b/dbms/src/Analyzers/TypeAndConstantInference.cpp index bc62e0b2c29..d0693d1f567 100644 --- a/dbms/src/Analyzers/TypeAndConstantInference.cpp +++ b/dbms/src/Analyzers/TypeAndConstantInference.cpp @@ -26,6 +26,11 @@ #include #include #include +#include +#include +#include +#include +#include namespace DB @@ -251,7 +256,7 @@ void processFunction(const String & column_name, ASTPtr & ast, TypeAndConstantIn size_t result_position = argument_numbers.size(); block_with_constants.insert({nullptr, expression_info.data_type, column_name}); - function_ptr->execute(block_with_constants, argument_numbers, result_position); + function_ptr->execute(block_with_constants, argument_numbers, result_position, 1); const auto & result_column = block_with_constants.getByPosition(result_position).column; if (result_column->isColumnConst()) diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 2e4a692451f..2e11bbdb38b 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -91,6 +91,11 @@ public: return data->getInt(0); } + UInt8 getBoolRepresentation(size_t) const override + { + return data->getBoolRepresentation(0); + } + bool isNullAt(size_t) const override { return data->isNullAt(0); diff --git a/dbms/src/Columns/ColumnFunction.cpp b/dbms/src/Columns/ColumnFunction.cpp index 4675e7915f7..9cf77e00a2f 100644 --- a/dbms/src/Columns/ColumnFunction.cpp +++ b/dbms/src/Columns/ColumnFunction.cpp @@ -194,7 +194,7 @@ ColumnWithTypeAndName ColumnFunction::reduce() const for (size_t i = 0; i < captured_columns.size(); ++i) arguments[i] = i; - function->execute(block, arguments, captured_columns.size()); + function->execute(block, arguments, captured_columns.size(), size_); return block.getByPosition(captured_columns.size()); } diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 4ac0f87b8da..a96e9651909 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -46,6 +46,7 @@ public: bool isNullAt(size_t n) const override { return static_cast(*null_map).getData()[n] != 0;} Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + UInt8 getBoolRepresentation(size_t n) const override { return isNullAt(n) ? 0 : nested_column->getBoolRepresentation(n); } UInt64 get64(size_t n) const override { return nested_column->get64(n); } StringRef getDataAt(size_t n) const override; void insertData(const char * pos, size_t length) override; diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 5ce33e82028..b276b411d2e 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -231,6 +231,11 @@ public: return UInt64(data[n]); } + UInt8 getBoolRepresentation(size_t n) const override + { + return UInt8(!!data[n]); + } + Int64 getInt(size_t n) const override { return Int64(data[n]); diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 40577a11d3f..544c8c31165 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -95,6 +95,11 @@ public: throw Exception("Method getUInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + virtual UInt8 getBoolRepresentation(size_t /*n*/) const + { + throw Exception("Method getBoolRepresentation is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + virtual Int64 getInt(size_t /*n*/) const { throw Exception("Method getInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 7ee9d799612..fe229f30104 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -378,6 +378,9 @@ namespace ErrorCodes extern const int POCO_EXCEPTION = 1000; extern const int STD_EXCEPTION = 1001; extern const int UNKNOWN_EXCEPTION = 1002; + + extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND = 2001; + extern const int ILLEGAL_PROJECTION_MANIPULATOR = 2002; } } diff --git a/dbms/src/DataStreams/CSVRowOutputStream.cpp b/dbms/src/DataStreams/CSVRowOutputStream.cpp index 438f2718ce2..dd1428e3280 100644 --- a/dbms/src/DataStreams/CSVRowOutputStream.cpp +++ b/dbms/src/DataStreams/CSVRowOutputStream.cpp @@ -7,8 +7,8 @@ namespace DB { -CSVRowOutputStream::CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool with_names_, bool with_types_) - : ostr(ostr_), sample(sample_), with_names(with_names_), with_types(with_types_) +CSVRowOutputStream::CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, const char delimiter_, bool with_names_, bool with_types_) + : ostr(ostr_), sample(sample_), delimiter(delimiter_), with_names(with_names_), with_types(with_types_) { size_t columns = sample.columns(); data_types.resize(columns); @@ -32,7 +32,7 @@ void CSVRowOutputStream::writePrefix() for (size_t i = 0; i < columns; ++i) { writeCSVString(sample.safeGetByPosition(i).name, ostr); - writeChar(i == columns - 1 ? '\n' : ',', ostr); + writeChar(i == columns - 1 ? '\n' : delimiter, ostr); } } @@ -41,7 +41,7 @@ void CSVRowOutputStream::writePrefix() for (size_t i = 0; i < columns; ++i) { writeCSVString(sample.safeGetByPosition(i).type->getName(), ostr); - writeChar(i == columns - 1 ? '\n' : ',', ostr); + writeChar(i == columns - 1 ? '\n' : delimiter, ostr); } } } @@ -55,7 +55,7 @@ void CSVRowOutputStream::writeField(const IColumn & column, const IDataType & ty void CSVRowOutputStream::writeFieldDelimiter() { - writeChar(',', ostr); + writeChar(delimiter, ostr); } diff --git a/dbms/src/DataStreams/CSVRowOutputStream.h b/dbms/src/DataStreams/CSVRowOutputStream.h index 161eab16985..d819960d7b1 100644 --- a/dbms/src/DataStreams/CSVRowOutputStream.h +++ b/dbms/src/DataStreams/CSVRowOutputStream.h @@ -19,7 +19,7 @@ public: /** with_names - output in the first line a header with column names * with_types - output in the next line header with the names of the types */ - CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool with_names_ = false, bool with_types_ = false); + CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, const char delimiter_, bool with_names_ = false, bool with_types_ = false); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; @@ -44,6 +44,7 @@ protected: WriteBuffer & ostr; const Block sample; + const char delimiter; bool with_names; bool with_types; DataTypes data_types; diff --git a/dbms/src/DataStreams/FormatFactory.cpp b/dbms/src/DataStreams/FormatFactory.cpp index a985c9f3dc0..73c7dbdaa3b 100644 --- a/dbms/src/DataStreams/FormatFactory.cpp +++ b/dbms/src/DataStreams/FormatFactory.cpp @@ -81,13 +81,12 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu { return wrap_row_stream(std::make_shared(buf, sample, context, settings.input_format_values_interpret_expressions)); } - else if (name == "CSV") + else if (name == "CSV" || name == "CSVWithNames") { - return wrap_row_stream(std::make_shared(buf, sample, ',')); - } - else if (name == "CSVWithNames") - { - return wrap_row_stream(std::make_shared(buf, sample, ',', true)); + char csv_delimiter = settings.format_csv_delimiter; + bool with_names = name == "CSVWithNames"; + + return wrap_row_stream(std::make_shared(buf, sample, csv_delimiter, with_names)); } else if (name == "TSKV") { @@ -152,10 +151,13 @@ static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf return std::make_shared(std::make_shared(buf, sample, true, true), sample); else if (name == "TabSeparatedRaw" || name == "TSVRaw") return std::make_shared(std::make_shared(buf, sample), sample); - else if (name == "CSV") - return std::make_shared(std::make_shared(buf, sample), sample); - else if (name == "CSVWithNames") - return std::make_shared(std::make_shared(buf, sample, true), sample); + else if (name == "CSV" || name == "CSVWithNames") + { + char csv_delimiter = settings.format_csv_delimiter; + bool with_names = name == "CSVWithNames"; + + return std::make_shared(std::make_shared(buf, sample, csv_delimiter, with_names), sample); + } else if (name == "Pretty") return std::make_shared(buf, sample, false, settings.output_format_pretty_max_rows, context); else if (name == "PrettyCompact") diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index a3eef469dac..3970c195509 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -194,9 +194,9 @@ void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_nu } -void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char /*delimiter*/) const +void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const { - read(*this, column, [&istr](ColumnFixedString::Chars_t & data) { readCSVStringInto(data, istr); }); + read(*this, column, [&istr, delimiter](ColumnFixedString::Chars_t & data) { readCSVStringInto(data, istr, delimiter); }); } diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 5e693c71445..1b5386217a1 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -285,9 +285,9 @@ void DataTypeString::serializeTextCSV(const IColumn & column, size_t row_num, Wr } -void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char /*delimiter*/) const +void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const { - read(column, [&](ColumnString::Chars_t & data) { readCSVStringInto(data, istr); }); + read(column, [&](ColumnString::Chars_t & data) { readCSVStringInto(data, istr, delimiter); }); } diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index cbc5288eac5..1febb4aa20c 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -68,6 +68,13 @@ generate_function_register(Array FunctionArrayResize ) +generate_function_register(Projection + FunctionOneOrZero + FunctionProject + FunctionBuildProjectionComposition + FunctionRestoreProjection +) + add_headers_and_sources(clickhouse_functions .) add_headers_and_sources(clickhouse_functions ./GatherUtils) diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index 33aa6928b5c..5c2e23248b8 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -44,7 +44,6 @@ Columns convertConstTupleToConstantElements(const ColumnConst & column) static Block createBlockWithNestedColumnsImpl(const Block & block, const std::unordered_set & args) { Block res; - size_t rows = block.rows(); size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) @@ -70,7 +69,7 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un const auto & nested_col = static_cast( static_cast(*col.column).getDataColumn()).getNestedColumnPtr(); - res.insert({ ColumnConst::create(nested_col, rows), nested_type, col.name}); + res.insert({ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name}); } else throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); diff --git a/dbms/src/Functions/FunctionsArithmetic.h b/dbms/src/Functions/FunctionsArithmetic.h index b015e203986..16367f0b8b8 100644 --- a/dbms/src/Functions/FunctionsArithmetic.h +++ b/dbms/src/Functions/FunctionsArithmetic.h @@ -64,9 +64,9 @@ struct BinaryOperationImplBase c[i] = Op::template apply(a, b[i]); } - static void constant_constant(A a, B b, ResultType & c) + static ResultType constant_constant(A a, B b) { - c = Op::template apply(a, b); + return Op::template apply(a, b); } }; @@ -476,27 +476,13 @@ struct IntExp10Impl } }; - -/// this one is just for convenience -template using If = std::conditional_t; -/// these ones for better semantics -template using Then = T; -template using Else = T; - /// Used to indicate undefined operation struct InvalidType; -template -struct DataTypeFromFieldType -{ - using Type = DataTypeNumber; -}; +template struct Case : std::bool_constant { using type = T; }; -template <> -struct DataTypeFromFieldType -{ - using Type = InvalidType; -}; +/// Switch, ...> -- select the first Ti for which Ci is true; InvalidType if none. +template using Switch = typename std::disjunction>::type; template constexpr bool IsIntegral = false; template <> constexpr bool IsIntegral = true; @@ -512,270 +498,74 @@ template constexpr bool IsDateOrDateTime = false; template <> constexpr bool IsDateOrDateTime = true; template <> constexpr bool IsDateOrDateTime = true; -/** Returns appropriate result type for binary operator on dates (or datetimes): - * Date + Integral -> Date - * Integral + Date -> Date - * Date - Date -> Int32 - * Date - Integral -> Date - * least(Date, Date) -> Date - * greatest(Date, Date) -> Date - * All other operations are not defined and return InvalidType, operations on - * distinct date types are also undefined (e.g. DataTypeDate - DataTypeDateTime) - */ +template using DataTypeFromFieldType = std::conditional_t, InvalidType, DataTypeNumber>; + template