From b1935500a852054ee83a12ebcc7b0a4640146fa0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2011 03:41:37 +0000 Subject: [PATCH] dbms: development. --- dbms/include/DB/Columns/ColumnArray.h | 17 +- dbms/include/DB/Columns/ColumnConst.h | 8 +- dbms/include/DB/Columns/ColumnFixedArray.h | 5 + dbms/include/DB/Columns/ColumnNullable.h | 2 +- dbms/include/DB/Columns/ColumnVector.h | 9 +- dbms/include/DB/Columns/IColumn.h | 4 + dbms/include/DB/Core/Field.h | 33 +- .../DB/Functions/FunctionsArithmetic.h | 13 +- .../DB/Functions/FunctionsComparison.h | 371 +++++++++++------- dbms/include/DB/Functions/NumberTraits.h | 14 +- dbms/src/Core/Block.cpp | 16 +- dbms/src/Interpreters/Expression.cpp | 2 +- dbms/src/Interpreters/tests/expression.cpp | 34 +- 13 files changed, 342 insertions(+), 186 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnArray.h b/dbms/include/DB/Columns/ColumnArray.h index d4c373c5e56..4a88f60664e 100644 --- a/dbms/include/DB/Columns/ColumnArray.h +++ b/dbms/include/DB/Columns/ColumnArray.h @@ -57,23 +57,26 @@ public: void cut(size_t start, size_t length) { - if (start + length > offsets.size()) + if (length == 0 || start + length > offsets.size()) throw Exception("Parameter out of bound in IColumnArray::cut() method.", ErrorCodes::PARAMETER_OUT_OF_BOUND); + size_t nested_offset = start == 0 ? 0 : offsets[start - 1]; + size_t nested_length = offsets[start + length - 1] - nested_offset; + + data->cut(nested_offset, nested_length); + if (start == 0) offsets.resize(length); else { Offsets_t tmp(length); - memcpy(&tmp[0], &offsets[start], length * sizeof(offsets[0])); + + for (size_t i = 0; i < length; ++i) + tmp[i] = offsets[start + i] - nested_offset; + tmp.swap(offsets); } - - size_t nested_offset = start == 0 ? 0 : offsets[start - 1]; - size_t nested_length = offsets[start + length] - nested_offset; - - data->cut(nested_offset, nested_length); } void insert(const Field & x) diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index 477ee793238..ddc002b5a49 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -1,5 +1,4 @@ -#ifndef DBMS_CORE_COLUMN_CONST_H -#define DBMS_CORE_COLUMN_CONST_H +#pragma once #include @@ -24,6 +23,7 @@ public: ColumnConst(size_t s_, const T & data_) : s(s_), data(data_) {} + bool isNumeric() const { return IsNumber::value; } ColumnPtr cloneEmpty() const { return new ColumnConst(0, data); } size_t size() const { return s; } Field operator[](size_t n) const { return typename NearestFieldType::Type(data); } @@ -48,6 +48,6 @@ private: }; -} +typedef ColumnConst ColumnConstString; -#endif +} diff --git a/dbms/include/DB/Columns/ColumnFixedArray.h b/dbms/include/DB/Columns/ColumnFixedArray.h index 97d29415001..f770308ccbd 100644 --- a/dbms/include/DB/Columns/ColumnFixedArray.h +++ b/dbms/include/DB/Columns/ColumnFixedArray.h @@ -82,6 +82,11 @@ public: return *data; } + size_t getN() const + { + return n; + } + protected: ColumnPtr data; const size_t n; diff --git a/dbms/include/DB/Columns/ColumnNullable.h b/dbms/include/DB/Columns/ColumnNullable.h index bebba127095..0f071c07c14 100644 --- a/dbms/include/DB/Columns/ColumnNullable.h +++ b/dbms/include/DB/Columns/ColumnNullable.h @@ -48,7 +48,7 @@ public: void cut(size_t start, size_t length) { - if (start + length > nulls.size()) + if (length == 0 || start + length > nulls.size()) throw Exception("Parameter out of bound in IColumnNullable::cut() method.", ErrorCodes::PARAMETER_OUT_OF_BOUND); diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index c905f9387cb..f0986d4843b 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -24,6 +24,8 @@ public: ColumnVector() {} ColumnVector(size_t n) : data(n) {} + bool isNumeric() const { return IsNumber::value; } + ColumnPtr cloneEmpty() const { return new ColumnVector; @@ -41,8 +43,11 @@ public: void cut(size_t start, size_t length) { - if (start + length > data.size()) - throw Exception("Parameter out of bound in IColumnVector::cut() method.", + if (length == 0 || start + length > data.size()) + throw Exception("Parameters start = " + + Poco::NumberFormatter::format(start) + ", length = " + + Poco::NumberFormatter::format(length) + " are out of bound in IColumnVector::cut() method" + " (data.size() = " + Poco::NumberFormatter::format(data.size()) + ").", ErrorCodes::PARAMETER_OUT_OF_BOUND); if (start == 0) diff --git a/dbms/include/DB/Columns/IColumn.h b/dbms/include/DB/Columns/IColumn.h index 579a7910116..b620042a14e 100644 --- a/dbms/include/DB/Columns/IColumn.h +++ b/dbms/include/DB/Columns/IColumn.h @@ -15,6 +15,10 @@ using Poco::SharedPtr; class IColumn { public: + /** Столбец представляет собой вектор чисел или числовую константу. + */ + virtual bool isNumeric() const { return false; } + /** Создать пустой столбец такого же типа */ virtual SharedPtr cloneEmpty() const = 0; diff --git a/dbms/include/DB/Core/Field.h b/dbms/include/DB/Core/Field.h index 18f1b020e4e..e08fe356b64 100644 --- a/dbms/include/DB/Core/Field.h +++ b/dbms/include/DB/Core/Field.h @@ -139,20 +139,31 @@ public: template struct NearestFieldType; -template <> struct NearestFieldType { typedef UInt64 Type; }; -template <> struct NearestFieldType { typedef UInt64 Type; }; -template <> struct NearestFieldType { typedef UInt64 Type; }; -template <> struct NearestFieldType { typedef UInt64 Type; }; +template <> struct NearestFieldType { typedef UInt64 Type; }; +template <> struct NearestFieldType { typedef UInt64 Type; }; +template <> struct NearestFieldType { typedef UInt64 Type; }; +template <> struct NearestFieldType { typedef UInt64 Type; }; +template <> struct NearestFieldType { typedef Int64 Type; }; +template <> struct NearestFieldType { typedef Int64 Type; }; +template <> struct NearestFieldType { typedef Int64 Type; }; +template <> struct NearestFieldType { typedef Int64 Type; }; +template <> struct NearestFieldType { typedef Float64 Type; }; +template <> struct NearestFieldType { typedef Float64 Type; }; +template <> struct NearestFieldType { typedef String Type; }; -template <> struct NearestFieldType { typedef Int64 Type; }; -template <> struct NearestFieldType { typedef Int64 Type; }; -template <> struct NearestFieldType { typedef Int64 Type; }; -template <> struct NearestFieldType { typedef Int64 Type; }; -template <> struct NearestFieldType { typedef Float64 Type; }; -template <> struct NearestFieldType { typedef Float64 Type; }; +template struct IsNumber { static const bool value = false; }; -template <> struct NearestFieldType { typedef String Type; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; +template <> struct IsNumber { static const bool value = true; }; } diff --git a/dbms/include/DB/Functions/FunctionsArithmetic.h b/dbms/include/DB/Functions/FunctionsArithmetic.h index ea6bb5e06e9..cdfec2fff2c 100644 --- a/dbms/include/DB/Functions/FunctionsArithmetic.h +++ b/dbms/include/DB/Functions/FunctionsArithmetic.h @@ -13,7 +13,6 @@ namespace DB /** Арифметические функции: +, -, *, /, %, * div (целочисленное деление), - * TODO: <<, >>, <<<, >>>, &, |, ^, ~ */ template @@ -185,26 +184,30 @@ struct ModuloImpl { size_t size = a.size(); for (size_t i = 0; i < size; ++i) - c[i] = a[i] % b[i]; + c[i] = typename NumberTraits::ToInteger::Type(a[i]) + % typename NumberTraits::ToInteger::Type(b[i]); } static void vector_constant(const std::vector & a, B b, std::vector & c) { size_t size = a.size(); for (size_t i = 0; i < size; ++i) - c[i] = a[i] % b; + c[i] = typename NumberTraits::ToInteger::Type(a[i]) + % typename NumberTraits::ToInteger::Type(b); } static void constant_vector(A a, const std::vector & b, std::vector & c) { size_t size = b.size(); for (size_t i = 0; i < size; ++i) - c[i] = a % b[i]; + c[i] = typename NumberTraits::ToInteger::Type(a) + % typename NumberTraits::ToInteger::Type(b[i]); } static void constant_constant(A a, B b, ResultType & c) { - c = a % b; + c = typename NumberTraits::ToInteger::Type(a) + % typename NumberTraits::ToInteger::Type(b); } }; diff --git a/dbms/include/DB/Functions/FunctionsComparison.h b/dbms/include/DB/Functions/FunctionsComparison.h index 4e3c0fa1e5a..7d889398da4 100644 --- a/dbms/include/DB/Functions/FunctionsComparison.h +++ b/dbms/include/DB/Functions/FunctionsComparison.h @@ -2,12 +2,18 @@ #include +#include +#include +#include +#include + #include #include #include #include #include #include + #include @@ -25,42 +31,50 @@ namespace DB * внутри каждой группы, но не из разных групп. */ +/** Игнорируем warning о сравнении signed и unsigned. + * (Результат может быть некорректным.) + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" + template struct EqualsNumImpl { - typedef UInt8 ResultType; - - static void vector_vector(const std::vector & a, const std::vector & b, std::vector & c) + static void vector_vector(const std::vector & a, const std::vector & b, std::vector & c) { size_t size = a.size(); for (size_t i = 0; i < size; ++i) c[i] = a[i] == b[i]; } - static void vector_constant(const std::vector & a, B b, std::vector & c) + static void vector_constant(const std::vector & a, B b, std::vector & c) { size_t size = a.size(); for (size_t i = 0; i < size; ++i) c[i] = a[i] == b; } - static void constant_constant(A a, B b, ResultType & c) + static void constant_vector(A a, const std::vector & b, std::vector & c) + { + size_t size = b.size(); + for (size_t i = 0; i < size; ++i) + c[i] = a == b[i]; + } + + static void constant_constant(A a, B b, UInt8 & c) { c = a == b; } }; -template struct EqualsStringImpl { - typedef UInt8 ResultType; - static void string_vector_string_vector( const std::vector & a_data, const std::vector & a_offsets, const std::vector & b_data, const std::vector & b_offsets, - std::vector & c) + std::vector & c) { - size_t size = a_data.size(); + size_t size = a_offsets.size(); for (size_t i = 0; i < size; ++i) c[i] = (i == 0) ? (a_offsets[0] == b_offsets[0] && !memcmp(&a_data[0], &b_data[0], a_offsets[0])) @@ -71,130 +85,135 @@ struct EqualsStringImpl static void string_vector_fixed_string_vector( const std::vector & a_data, const std::vector & a_offsets, const std::vector & b_data, size_t b_n, - std::vector & c) + std::vector & c) { - size_t size = a_data.size(); + size_t size = a_offsets.size(); for (size_t i = 0; i < size; ++i) c[i] = (i == 0) - ? (a_offsets[0] == b_n && !memcmp(&a_data[0], &b_data[0], b_n)) - : (a_offsets[i] - a_offsets[i - 1] == b_n + ? (a_offsets[0] == b_n + 1 && !memcmp(&a_data[0], &b_data[0], b_n)) + : (a_offsets[i] - a_offsets[i - 1] == b_n + 1 && !memcmp(&a_data[a_offsets[i - 1]], &b_data[b_n * i], b_n)); } static void string_vector_constant( const std::vector & a_data, const std::vector & a_offsets, const std::string & b, - std::vector & c) + std::vector & c) { - size_t size = a_data.size(); + size_t size = a_offsets.size(); size_t b_n = b.size(); - const UInt8 * b_data = reinterpret_cast(b.data()) + const UInt8 * b_data = reinterpret_cast(b.data()); for (size_t i = 0; i < size; ++i) c[i] = (i == 0) - ? (a_offsets[0] == b_n && !memcmp(&a_data[0], b_data, b_n)) - : (a_offsets[i] - a_offsets[i - 1] == b_n + ? (a_offsets[0] == b_n + 1 && !memcmp(&a_data[0], b_data, b_n)) + : (a_offsets[i] - a_offsets[i - 1] == b_n + 1 && !memcmp(&a_data[a_offsets[i - 1]], b_data, b_n)); } + static void fixed_string_vector_string_vector( + const std::vector & a_data, size_t a_n, + const std::vector & b_data, const std::vector & b_offsets, + std::vector & c) + { + size_t size = b_offsets.size(); + for (size_t i = 0; i < size; ++i) + c[i] = (i == 0) + ? (b_offsets[0] == a_n + 1 && !memcmp(&b_data[0], &a_data[0], a_n)) + : (b_offsets[i] - b_offsets[i - 1] == a_n + 1 + && !memcmp(&b_data[b_offsets[i - 1]], &a_data[a_n * i], a_n)); + } + static void fixed_string_vector_fixed_string_vector( const std::vector & a_data, size_t a_n, const std::vector & b_data, size_t b_n, - std::vector & c) + std::vector & c) { size_t size = a_data.size(); - for (size_t i = 0; i < size; i += n) - c[i] = a_n == b_n && !memcmp(&a_data[i], &b_data[i], n); + for (size_t i = 0; i < size; i += a_n) + c[i] = a_n == b_n && !memcmp(&a_data[i], &b_data[i], a_n); } static void fixed_string_vector_constant( const std::vector & a_data, size_t a_n, const std::string & b, - std::vector & c) + std::vector & c) { size_t size = a_data.size(); - for (size_t i = 0; i < size; i += n) - c[i] = !memcmp(&a_data[i], &b_data[i], n); + const UInt8 * b_data = reinterpret_cast(b.data()); + size_t b_n = b.size(); + for (size_t i = 0; i < size; i += a_n) + c[i] = a_n == b_n && !memcmp(&a_data[i], b_data, a_n); + } + + static void constant_string_vector( + const std::string & a, + const std::vector & b_data, const std::vector & b_offsets, + std::vector & c) + { + size_t size = b_offsets.size(); + size_t a_n = a.size(); + const UInt8 * a_data = reinterpret_cast(a.data()); + for (size_t i = 0; i < size; ++i) + c[i] = (i == 0) + ? (b_offsets[0] == a_n + 1 && !memcmp(&b_data[0], a_data, a_n)) + : (b_offsets[i] - b_offsets[i - 1] == a_n + 1 + && !memcmp(&b_data[b_offsets[i - 1]], a_data, a_n)); + } + + static void constant_fixed_string_vector( + const std::string & a, + const std::vector & b_data, size_t b_n, + std::vector & c) + { + size_t size = b_data.size(); + const UInt8 * a_data = reinterpret_cast(a.data()); + size_t a_n = a.size(); + for (size_t i = 0; i < size; i += b_n) + c[i] = a_n == b_n && !memcmp(&b_data[i], a_data, b_n); } static void constant_constant( const std::string & a, const std::string & b, - std::string & c) + UInt8 & c) { c = a == b; } }; +#pragma GCC diagnostic pop -template