From 665fabde9e60eea5f6364200cdad56d00ed39298 Mon Sep 17 00:00:00 2001 From: antikvist Date: Sat, 18 Apr 2020 19:20:34 +0300 Subject: [PATCH 001/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- .../AggregateFunctionWelchTTest.cpp | 28 ++ .../AggregateFunctionWelchTTest.h | 251 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 1 + .../registerAggregateFunctions.h | 1 + 4 files changed, 281 insertions(+) create mode 100644 src/AggregateFunctions/AggregateFunctionWelchTTest.cpp create mode 100644 src/AggregateFunctions/AggregateFunctionWelchTTest.h diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp new file mode 100644 index 00000000000..46b533a2c0b --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) +{ + + + return std::make_shared(argument_types, parameters); + +} + +} + +void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) +{ + + factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h new file mode 100644 index 00000000000..a52528df431 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -0,0 +1,251 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +// hard-codded values - part of the algorithm + +#define SIGN_LVL_CNT 6 + +Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { + // for significance level = 0.2 + {0.2, 3.078, 1.886, 1.638, 1.533, 1.476, 1.44, 1.415, 1.397, 1.383, 1.372, 1.363, 1.356, 1.35, 1.345, 1.341, 1.337, 1.333, 1.33, 1.328, 1.325, 1.323, 1.321, 1.319, 1.318, 1.316, 1.315, 1.314, 1.313, 1.311, 1.31, 1.309, 1.309, 1.308, 1.307, 1.306, 1.306, 1.305, 1.304, 1.304, 1.303, 1.303, 1.302, 1.302, 1.301, 1.301, 1.3, 1.3, 1.299, 1.299, 1.299, 1.298, 1.298, 1.298, 1.297, 1.297, 1.297, 1.297, 1.296, 1.296, 1.296, 1.296, 1.295, 1.295, 1.295, 1.295, 1.295, 1.294, 1.294, 1.294, 1.294, 1.294, 1.293, 1.293, 1.293, 1.293, 1.293, 1.293, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.29, 1.29, 1.29, 1.29, 1.29, 1.282} + + // for significance level = 0.1 + {0.1, 6.314, 2.92, 2.353, 2.132, 2.015, 1.943, 1.895, 1.86, 1.833, 1.812, 1.796, 1.782, 1.771, 1.761, 1.753, 1.746, 1.74, 1.734, 1.729, 1.725, 1.721, 1.717, 1.714, 1.711, 1.708, 1.706, 1.703, 1.701, 1.699, 1.697, 1.696, 1.694, 1.692, 1.691, 1.69, 1.688, 1.687, 1.686, 1.685, 1.684, 1.683, 1.682, 1.681, 1.68, 1.679, 1.679, 1.678, 1.677, 1.677, 1.676, 1.675, 1.675, 1.674, 1.674, 1.673, 1.673, 1.672, 1.672, 1.671, 1.671, 1.67, 1.67, 1.669, 1.669, 1.669, 1.668, 1.668, 1.668, 1.667, 1.667, 1.667, 1.666, 1.666, 1.666, 1.665, 1.665, 1.665, 1.665, 1.664, 1.664, 1.664, 1.664, 1.663, 1.663, 1.663, 1.663, 1.663, 1.662, 1.662, 1.662, 1.662, 1.662, 1.661, 1.661, 1.661, 1.661, 1.661, 1.661, 1.66, 1.66, 1.645} + + // for significance level = 0.05 + {0.05, 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.16, 2.145, 2.131, 2.12, 2.11, 2.101, 2.093, 2.086, 2.08, 2.074, 2.069, 2.064, 2.06, 2.056, 2.052, 2.048, 2.045, 2.042, 2.04, 2.037, 2.035, 2.032, 2.03, 2.028, 2.026, 2.024, 2.023, 2.021, 2.02, 2.018, 2.017, 2.015, 2.014, 2.013, 2.012, 2.011, 2.01, 2.009, 2.008, 2.007, 2.006, 2.005, 2.004, 2.003, 2.002, 2.002, 2.001, 2.0, 2.0, 1.999, 1.998, 1.998, 1.997, 1.997, 1.996, 1.995, 1.995, 1.994, 1.994, 1.993, 1.993, 1.993, 1.992, 1.992, 1.991, 1.991, 1.99, 1.99, 1.99, 1.989, 1.989, 1.989, 1.988, 1.988, 1.988, 1.987, 1.987, 1.987, 1.986, 1.986, 1.986, 1.986, 1.985, 1.985, 1.985, 1.984, 1.984, 1.984, 1.96} + + // for significance level = 0.02 + {0.02, 31.821, 6.965, 4.541, 3.747, 3.365, 3.143, 2.998, 2.896, 2.821, 2.764, 2.718, 2.681, 2.65, 2.624, 2.602, 2.583, 2.567, 2.552, 2.539, 2.528, 2.518, 2.508, 2.5, 2.492, 2.485, 2.479, 2.473, 2.467, 2.462, 2.457, 2.453, 2.449, 2.445, 2.441, 2.438, 2.434, 2.431, 2.429, 2.426, 2.423, 2.421, 2.418, 2.416, 2.414, 2.412, 2.41, 2.408, 2.407, 2.405, 2.403, 2.402, 2.4, 2.399, 2.397, 2.396, 2.395, 2.394, 2.392, 2.391, 2.39, 2.389, 2.388, 2.387, 2.386, 2.385, 2.384, 2.383, 2.382, 2.382, 2.381, 2.38, 2.379, 2.379, 2.378, 2.377, 2.376, 2.376, 2.375, 2.374, 2.374, 2.373, 2.373, 2.372, 2.372, 2.371, 2.37, 2.37, 2.369, 2.369, 2.368, 2.368, 2.368, 2.367, 2.367, 2.366, 2.366, 2.365, 2.365, 2.365, 2.364, 2.326} + + // for significance level = 0.01 + {0.01, 63.657, 9.925, 5.841, 4.604, 4.032, 3.707, 3.499, 3.355, 3.25, 3.169, 3.106, 3.055, 3.012, 2.977, 2.947, 2.921, 2.898, 2.878, 2.861, 2.845, 2.831, 2.819, 2.807, 2.797, 2.787, 2.779, 2.771, 2.763, 2.756, 2.75, 2.744, 2.738, 2.733, 2.728, 2.724, 2.719, 2.715, 2.712, 2.708, 2.704, 2.701, 2.698, 2.695, 2.692, 2.69, 2.687, 2.685, 2.682, 2.68, 2.678, 2.676, 2.674, 2.672, 2.67, 2.668, 2.667, 2.665, 2.663, 2.662, 2.66, 2.659, 2.657, 2.656, 2.655, 2.654, 2.652, 2.651, 2.65, 2.649, 2.648, 2.647, 2.646, 2.645, 2.644, 2.643, 2.642, 2.641, 2.64, 2.64, 2.639, 2.638, 2.637, 2.636, 2.636, 2.635, 2.634, 2.634, 2.633, 2.632, 2.632, 2.631, 2.63, 2.63, 2.629, 2.629, 2.628, 2.627, 2.627, 2.626, 2.626, 2.576} + + // for significance level = 0.002 + {0.002, 318.313, 22.327, 10.215, 7.173, 5.893, 5.208, 4.782, 4.499, 4.296, 4.143, 4.024, 3.929, 3.852, 3.787, 3.733, 3.686, 3.646, 3.61, 3.579, 3.552, 3.527, 3.505, 3.485, 3.467, 3.45, 3.435, 3.421, 3.408, 3.396, 3.385, 3.375, 3.365, 3.356, 3.348, 3.34, 3.333, 3.326, 3.319, 3.313, 3.307, 3.301, 3.296, 3.291, 3.286, 3.281, 3.277, 3.273, 3.269, 3.265, 3.261, 3.258, 3.255, 3.251, 3.248, 3.245, 3.242, 3.239, 3.237, 3.234, 3.232, 3.229, 3.227, 3.225, 3.223, 3.22, 3.218, 3.216, 3.214, 3.213, 3.211, 3.209, 3.207, 3.206, 3.204, 3.202, 3.201, 3.199, 3.198, 3.197, 3.195, 3.194, 3.193, 3.191, 3.19, 3.189, 3.188, 3.187, 3.185, 3.184, 3.183, 3.182, 3.181, 3.18, 3.179, 3.178, 3.177, 3.176, 3.175, 3.175, 3.174, 3.09} +} + +// our algorithm implementation via vectors: +// https://gist.github.com/ltybc-coder/792748cfdb2f7cadef424ffb7b011c71 +// col, col, bool +template +struct AggregateFunctionWelchTTestData final { + + size_t size_x = 0; + size_t size_y = 0; + X sum_x = 0; + Y sum_y = 0; + X square_sum_x = 0; + Y square_sum_y = 0; + Float64 mean_x = 0; + Float64 mean_y = 0; + + /* + not yet sure how to use them + void add_x(X x) { + mean_x = (Float64)(sum_x + x) / (size_x + 1); + size_x ++; + sum_x += x; + square_sum_x += x * x; + } + + void add_y(Y y) { + mean_y = (sum_y + y) / (size_y + 1); + size_y ++; + sum_y += y; + square_sum_y += y * y; + } + */ + + void add(X x, Y y) { + sum_x += x; + sum_y += y; + size_x++; + size_y++; + mean_x = (Float64) sum_x / size_x; + mean_y = (Float64) sum_y / size_y; + square_sum_x += x * x; + square_sum_y += y * y; + } + + void merge(const AggregateFunctionWelchTTestData &other) { + sum_x += other.sum_x; + sum_y += other.sum_y; + size_x += other.size_x; + size_y += other.size_y; + mean_x = (Float64) sum_x / size_x; + mean_y = (Float64) sum_y / size_y; + square_sum_x += other.square_sum_x; + square_sum_y += other.square_sum_y; + } + + void serialize(WriteBuffer &buf) const { + writeBinary(mean_x, buf); + writeBinary(mean_y, buf); + writeBinary(sum_x, buf); + writeBinary(sum_y, buf); + writeBinary(square_sum_x, buf); + writeBinary(square_sum_y, buf); + writeBinary(size_x, buf); + writeBinary(size_y, buf); + } + + void deserialize(ReadBuffer &buf) { + readBinary(mean_x, buf); + readBinary(mean_y, buf); + readBinary(sum_x, buf); + readBinary(sum_y, buf); + readBinary(square_sum_x, buf); + readBinary(square_sum_y, buf); + readBinary(size_x, buf); + readBinary(size_y, buf); + } + + Float64 get_sx() const { + return (Float64)(square_sum_x + size_x * mean_x * mean_x - 2 * mean_x * sum_x) / (size_x - 1); + } + + Float64 get_sy() const { + return (Float64)(square_sum_y + size_y * mean_y * mean_y - 2 * mean_y * sum_y) / (size_y - 1); + } + + Float64 get_T(Float64 sx, Float64 sy) const { + return (Float64)(mean_x - mean_y) / std::sqrt(sx / size_x + sy / size_y); + } + + Float64 get_degrees_of_freed(Float64 sx, Float64 sy) const { + return (Float64)(sx / size_x + sy / size_y) * (sx / size_x + sy / size_y) / + ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); + } + + Ret get_result(Float64 t, Float64 dof, Float64 parametr) const + { + //find our table + int table; + for (int i = 0; i < SIGN_LVL_CNT; ++i) + { + if (CriticalValuesTable[i][0] == parametr) + { + table = i; + } + } + + //round or make infinity dof + dof = (int) dof; + if (dof > 100) + { + dof = 101; + } + //check if abs of t is greater than table[dof] + t = abs(t) + if(t > CriticalValuesTable[table][dof]) { + return (UInt8) 1; + //in this case we reject the null hypothesis + } + else { + return (UInt8) 0; + } + } +}; + +template +class AggregateFunctionWelchTTest final : public + IAggregateFunctionDataHelper< + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest + > +{ +public: + AggregateFunctionWelchTTest( + const DataTypes & arguments, + const Array & params + ): + IAggregateFunctionDataHelper< + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest + > {arguments, params} + { + // notice: arguments has been in factory + } + + String getName() const override + { + return "WelchTTest"; + } + + void add( + AggregateDataPtr place, + const IColumn ** columns, + size_t row_num, + Arena * + ) const override + { + auto col_x = assert_cast *>(columns[0]); + auto col_y = assert_cast *>(columns[1]); + + X x = col_x->getData()[row_num]; + Y y = col_y->getData()[row_num]; + + this->data(place).add(x, y); + } + + void merge( + AggregateDataPtr place, + ConstAggregateDataPtr rhs, Arena * + ) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize( + ConstAggregateDataPtr place, + WriteBuffer & buf + ) const override + { + this->data(place).serialize(buf); + } + + void deserialize( + AggregateDataPtr place, + ReadBuffer & buf, Arena * + ) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto( + ConstAggregateDataPtr place, + IColumn & to + ) const override + { + Float64 significance_level = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + + Float64 sx = this->data(place).get_sx(); + Float64 sy = this->data(place).get_sy(); + Float64 t_value = this->data(place).get_T(sx, sy); + Float64 dof = this->data(place).get_degrees_of_freed(sx, sy); + Ret result = this->data(place).get_result(t_value, dof, significance_level); + + auto & column = static_cast(to); + column.getData().push_back(result); + } + + +} +}; + +}; \ No newline at end of file diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index a9ab1d4f8ea..adc72ec9169 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -45,6 +45,7 @@ void registerAggregateFunctions() registerAggregateFunctionMoving(factory); registerAggregateFunctionCategoricalIV(factory); registerAggregateFunctionAggThrow(factory); + registerAggregateFunctionWelchTTest(factory); } { diff --git a/src/AggregateFunctions/registerAggregateFunctions.h b/src/AggregateFunctions/registerAggregateFunctions.h index 88cdf4a504d..046b125dec5 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.h +++ b/src/AggregateFunctions/registerAggregateFunctions.h @@ -35,6 +35,7 @@ void registerAggregateFunctionSimpleLinearRegression(AggregateFunctionFactory &) void registerAggregateFunctionMoving(AggregateFunctionFactory &); void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &); void registerAggregateFunctionAggThrow(AggregateFunctionFactory &); +void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); From 69c1f33b0b22bd103cbce8f0ce747dc12937d1a1 Mon Sep 17 00:00:00 2001 From: antikvist Date: Mon, 27 Apr 2020 00:09:56 +0300 Subject: [PATCH 002/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- .../AggregateFunctionWelchTTest.h | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index a52528df431..be1e176d540 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -4,13 +4,17 @@ #include #include #include +#include #include #include #include #include #include #include -#include + +#include +#include +#include namespace DB @@ -21,19 +25,19 @@ namespace DB Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { // for significance level = 0.2 - {0.2, 3.078, 1.886, 1.638, 1.533, 1.476, 1.44, 1.415, 1.397, 1.383, 1.372, 1.363, 1.356, 1.35, 1.345, 1.341, 1.337, 1.333, 1.33, 1.328, 1.325, 1.323, 1.321, 1.319, 1.318, 1.316, 1.315, 1.314, 1.313, 1.311, 1.31, 1.309, 1.309, 1.308, 1.307, 1.306, 1.306, 1.305, 1.304, 1.304, 1.303, 1.303, 1.302, 1.302, 1.301, 1.301, 1.3, 1.3, 1.299, 1.299, 1.299, 1.298, 1.298, 1.298, 1.297, 1.297, 1.297, 1.297, 1.296, 1.296, 1.296, 1.296, 1.295, 1.295, 1.295, 1.295, 1.295, 1.294, 1.294, 1.294, 1.294, 1.294, 1.293, 1.293, 1.293, 1.293, 1.293, 1.293, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.29, 1.29, 1.29, 1.29, 1.29, 1.282} + {0.2, 3.078, 1.886, 1.638, 1.533, 1.476, 1.44, 1.415, 1.397, 1.383, 1.372, 1.363, 1.356, 1.35, 1.345, 1.341, 1.337, 1.333, 1.33, 1.328, 1.325, 1.323, 1.321, 1.319, 1.318, 1.316, 1.315, 1.314, 1.313, 1.311, 1.31, 1.309, 1.309, 1.308, 1.307, 1.306, 1.306, 1.305, 1.304, 1.304, 1.303, 1.303, 1.302, 1.302, 1.301, 1.301, 1.3, 1.3, 1.299, 1.299, 1.299, 1.298, 1.298, 1.298, 1.297, 1.297, 1.297, 1.297, 1.296, 1.296, 1.296, 1.296, 1.295, 1.295, 1.295, 1.295, 1.295, 1.294, 1.294, 1.294, 1.294, 1.294, 1.293, 1.293, 1.293, 1.293, 1.293, 1.293, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.29, 1.29, 1.29, 1.29, 1.29, 1.282}, // for significance level = 0.1 - {0.1, 6.314, 2.92, 2.353, 2.132, 2.015, 1.943, 1.895, 1.86, 1.833, 1.812, 1.796, 1.782, 1.771, 1.761, 1.753, 1.746, 1.74, 1.734, 1.729, 1.725, 1.721, 1.717, 1.714, 1.711, 1.708, 1.706, 1.703, 1.701, 1.699, 1.697, 1.696, 1.694, 1.692, 1.691, 1.69, 1.688, 1.687, 1.686, 1.685, 1.684, 1.683, 1.682, 1.681, 1.68, 1.679, 1.679, 1.678, 1.677, 1.677, 1.676, 1.675, 1.675, 1.674, 1.674, 1.673, 1.673, 1.672, 1.672, 1.671, 1.671, 1.67, 1.67, 1.669, 1.669, 1.669, 1.668, 1.668, 1.668, 1.667, 1.667, 1.667, 1.666, 1.666, 1.666, 1.665, 1.665, 1.665, 1.665, 1.664, 1.664, 1.664, 1.664, 1.663, 1.663, 1.663, 1.663, 1.663, 1.662, 1.662, 1.662, 1.662, 1.662, 1.661, 1.661, 1.661, 1.661, 1.661, 1.661, 1.66, 1.66, 1.645} + {0.1, 6.314, 2.92, 2.353, 2.132, 2.015, 1.943, 1.895, 1.86, 1.833, 1.812, 1.796, 1.782, 1.771, 1.761, 1.753, 1.746, 1.74, 1.734, 1.729, 1.725, 1.721, 1.717, 1.714, 1.711, 1.708, 1.706, 1.703, 1.701, 1.699, 1.697, 1.696, 1.694, 1.692, 1.691, 1.69, 1.688, 1.687, 1.686, 1.685, 1.684, 1.683, 1.682, 1.681, 1.68, 1.679, 1.679, 1.678, 1.677, 1.677, 1.676, 1.675, 1.675, 1.674, 1.674, 1.673, 1.673, 1.672, 1.672, 1.671, 1.671, 1.67, 1.67, 1.669, 1.669, 1.669, 1.668, 1.668, 1.668, 1.667, 1.667, 1.667, 1.666, 1.666, 1.666, 1.665, 1.665, 1.665, 1.665, 1.664, 1.664, 1.664, 1.664, 1.663, 1.663, 1.663, 1.663, 1.663, 1.662, 1.662, 1.662, 1.662, 1.662, 1.661, 1.661, 1.661, 1.661, 1.661, 1.661, 1.66, 1.66, 1.645}, // for significance level = 0.05 - {0.05, 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.16, 2.145, 2.131, 2.12, 2.11, 2.101, 2.093, 2.086, 2.08, 2.074, 2.069, 2.064, 2.06, 2.056, 2.052, 2.048, 2.045, 2.042, 2.04, 2.037, 2.035, 2.032, 2.03, 2.028, 2.026, 2.024, 2.023, 2.021, 2.02, 2.018, 2.017, 2.015, 2.014, 2.013, 2.012, 2.011, 2.01, 2.009, 2.008, 2.007, 2.006, 2.005, 2.004, 2.003, 2.002, 2.002, 2.001, 2.0, 2.0, 1.999, 1.998, 1.998, 1.997, 1.997, 1.996, 1.995, 1.995, 1.994, 1.994, 1.993, 1.993, 1.993, 1.992, 1.992, 1.991, 1.991, 1.99, 1.99, 1.99, 1.989, 1.989, 1.989, 1.988, 1.988, 1.988, 1.987, 1.987, 1.987, 1.986, 1.986, 1.986, 1.986, 1.985, 1.985, 1.985, 1.984, 1.984, 1.984, 1.96} + {0.05, 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.16, 2.145, 2.131, 2.12, 2.11, 2.101, 2.093, 2.086, 2.08, 2.074, 2.069, 2.064, 2.06, 2.056, 2.052, 2.048, 2.045, 2.042, 2.04, 2.037, 2.035, 2.032, 2.03, 2.028, 2.026, 2.024, 2.023, 2.021, 2.02, 2.018, 2.017, 2.015, 2.014, 2.013, 2.012, 2.011, 2.01, 2.009, 2.008, 2.007, 2.006, 2.005, 2.004, 2.003, 2.002, 2.002, 2.001, 2.0, 2.0, 1.999, 1.998, 1.998, 1.997, 1.997, 1.996, 1.995, 1.995, 1.994, 1.994, 1.993, 1.993, 1.993, 1.992, 1.992, 1.991, 1.991, 1.99, 1.99, 1.99, 1.989, 1.989, 1.989, 1.988, 1.988, 1.988, 1.987, 1.987, 1.987, 1.986, 1.986, 1.986, 1.986, 1.985, 1.985, 1.985, 1.984, 1.984, 1.984, 1.96}, // for significance level = 0.02 - {0.02, 31.821, 6.965, 4.541, 3.747, 3.365, 3.143, 2.998, 2.896, 2.821, 2.764, 2.718, 2.681, 2.65, 2.624, 2.602, 2.583, 2.567, 2.552, 2.539, 2.528, 2.518, 2.508, 2.5, 2.492, 2.485, 2.479, 2.473, 2.467, 2.462, 2.457, 2.453, 2.449, 2.445, 2.441, 2.438, 2.434, 2.431, 2.429, 2.426, 2.423, 2.421, 2.418, 2.416, 2.414, 2.412, 2.41, 2.408, 2.407, 2.405, 2.403, 2.402, 2.4, 2.399, 2.397, 2.396, 2.395, 2.394, 2.392, 2.391, 2.39, 2.389, 2.388, 2.387, 2.386, 2.385, 2.384, 2.383, 2.382, 2.382, 2.381, 2.38, 2.379, 2.379, 2.378, 2.377, 2.376, 2.376, 2.375, 2.374, 2.374, 2.373, 2.373, 2.372, 2.372, 2.371, 2.37, 2.37, 2.369, 2.369, 2.368, 2.368, 2.368, 2.367, 2.367, 2.366, 2.366, 2.365, 2.365, 2.365, 2.364, 2.326} + {0.02, 31.821, 6.965, 4.541, 3.747, 3.365, 3.143, 2.998, 2.896, 2.821, 2.764, 2.718, 2.681, 2.65, 2.624, 2.602, 2.583, 2.567, 2.552, 2.539, 2.528, 2.518, 2.508, 2.5, 2.492, 2.485, 2.479, 2.473, 2.467, 2.462, 2.457, 2.453, 2.449, 2.445, 2.441, 2.438, 2.434, 2.431, 2.429, 2.426, 2.423, 2.421, 2.418, 2.416, 2.414, 2.412, 2.41, 2.408, 2.407, 2.405, 2.403, 2.402, 2.4, 2.399, 2.397, 2.396, 2.395, 2.394, 2.392, 2.391, 2.39, 2.389, 2.388, 2.387, 2.386, 2.385, 2.384, 2.383, 2.382, 2.382, 2.381, 2.38, 2.379, 2.379, 2.378, 2.377, 2.376, 2.376, 2.375, 2.374, 2.374, 2.373, 2.373, 2.372, 2.372, 2.371, 2.37, 2.37, 2.369, 2.369, 2.368, 2.368, 2.368, 2.367, 2.367, 2.366, 2.366, 2.365, 2.365, 2.365, 2.364, 2.326}, // for significance level = 0.01 - {0.01, 63.657, 9.925, 5.841, 4.604, 4.032, 3.707, 3.499, 3.355, 3.25, 3.169, 3.106, 3.055, 3.012, 2.977, 2.947, 2.921, 2.898, 2.878, 2.861, 2.845, 2.831, 2.819, 2.807, 2.797, 2.787, 2.779, 2.771, 2.763, 2.756, 2.75, 2.744, 2.738, 2.733, 2.728, 2.724, 2.719, 2.715, 2.712, 2.708, 2.704, 2.701, 2.698, 2.695, 2.692, 2.69, 2.687, 2.685, 2.682, 2.68, 2.678, 2.676, 2.674, 2.672, 2.67, 2.668, 2.667, 2.665, 2.663, 2.662, 2.66, 2.659, 2.657, 2.656, 2.655, 2.654, 2.652, 2.651, 2.65, 2.649, 2.648, 2.647, 2.646, 2.645, 2.644, 2.643, 2.642, 2.641, 2.64, 2.64, 2.639, 2.638, 2.637, 2.636, 2.636, 2.635, 2.634, 2.634, 2.633, 2.632, 2.632, 2.631, 2.63, 2.63, 2.629, 2.629, 2.628, 2.627, 2.627, 2.626, 2.626, 2.576} + {0.01, 63.657, 9.925, 5.841, 4.604, 4.032, 3.707, 3.499, 3.355, 3.25, 3.169, 3.106, 3.055, 3.012, 2.977, 2.947, 2.921, 2.898, 2.878, 2.861, 2.845, 2.831, 2.819, 2.807, 2.797, 2.787, 2.779, 2.771, 2.763, 2.756, 2.75, 2.744, 2.738, 2.733, 2.728, 2.724, 2.719, 2.715, 2.712, 2.708, 2.704, 2.701, 2.698, 2.695, 2.692, 2.69, 2.687, 2.685, 2.682, 2.68, 2.678, 2.676, 2.674, 2.672, 2.67, 2.668, 2.667, 2.665, 2.663, 2.662, 2.66, 2.659, 2.657, 2.656, 2.655, 2.654, 2.652, 2.651, 2.65, 2.649, 2.648, 2.647, 2.646, 2.645, 2.644, 2.643, 2.642, 2.641, 2.64, 2.64, 2.639, 2.638, 2.637, 2.636, 2.636, 2.635, 2.634, 2.634, 2.633, 2.632, 2.632, 2.631, 2.63, 2.63, 2.629, 2.629, 2.628, 2.627, 2.627, 2.626, 2.626, 2.576}, // for significance level = 0.002 {0.002, 318.313, 22.327, 10.215, 7.173, 5.893, 5.208, 4.782, 4.499, 4.296, 4.143, 4.024, 3.929, 3.852, 3.787, 3.733, 3.686, 3.646, 3.61, 3.579, 3.552, 3.527, 3.505, 3.485, 3.467, 3.45, 3.435, 3.421, 3.408, 3.396, 3.385, 3.375, 3.365, 3.356, 3.348, 3.34, 3.333, 3.326, 3.319, 3.313, 3.307, 3.301, 3.296, 3.291, 3.286, 3.281, 3.277, 3.273, 3.269, 3.265, 3.261, 3.258, 3.255, 3.251, 3.248, 3.245, 3.242, 3.239, 3.237, 3.234, 3.232, 3.229, 3.227, 3.225, 3.223, 3.22, 3.218, 3.216, 3.214, 3.213, 3.211, 3.209, 3.207, 3.206, 3.204, 3.202, 3.201, 3.199, 3.198, 3.197, 3.195, 3.194, 3.193, 3.191, 3.19, 3.189, 3.188, 3.187, 3.185, 3.184, 3.183, 3.182, 3.181, 3.18, 3.179, 3.178, 3.177, 3.176, 3.175, 3.175, 3.174, 3.09} @@ -145,19 +149,19 @@ struct AggregateFunctionWelchTTestData final { } //round or make infinity dof - dof = (int) dof; + dof = static_cast(dof); if (dof > 100) { dof = 101; } //check if abs of t is greater than table[dof] - t = abs(t) + t = abs(t); if(t > CriticalValuesTable[table][dof]) { - return (UInt8) 1; + return static_cast(1); //in this case we reject the null hypothesis } else { - return (UInt8) 0; + return static_cast(0); } } }; From 62460faf97e2a10d3245d7b11d139240fcafa84b Mon Sep 17 00:00:00 2001 From: antikvist Date: Mon, 27 Apr 2020 01:59:41 +0300 Subject: [PATCH 003/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 2 +- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 46b533a2c0b..a4e0a54775b 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -9,7 +9,7 @@ namespace DB namespace { -AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) +AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argument_types, const Array & parameters) { diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index be1e176d540..0279849be2c 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -41,7 +41,7 @@ Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { // for significance level = 0.002 {0.002, 318.313, 22.327, 10.215, 7.173, 5.893, 5.208, 4.782, 4.499, 4.296, 4.143, 4.024, 3.929, 3.852, 3.787, 3.733, 3.686, 3.646, 3.61, 3.579, 3.552, 3.527, 3.505, 3.485, 3.467, 3.45, 3.435, 3.421, 3.408, 3.396, 3.385, 3.375, 3.365, 3.356, 3.348, 3.34, 3.333, 3.326, 3.319, 3.313, 3.307, 3.301, 3.296, 3.291, 3.286, 3.281, 3.277, 3.273, 3.269, 3.265, 3.261, 3.258, 3.255, 3.251, 3.248, 3.245, 3.242, 3.239, 3.237, 3.234, 3.232, 3.229, 3.227, 3.225, 3.223, 3.22, 3.218, 3.216, 3.214, 3.213, 3.211, 3.209, 3.207, 3.206, 3.204, 3.202, 3.201, 3.199, 3.198, 3.197, 3.195, 3.194, 3.193, 3.191, 3.19, 3.189, 3.188, 3.187, 3.185, 3.184, 3.183, 3.182, 3.181, 3.18, 3.179, 3.178, 3.177, 3.176, 3.175, 3.175, 3.174, 3.09} -} +}; // our algorithm implementation via vectors: // https://gist.github.com/ltybc-coder/792748cfdb2f7cadef424ffb7b011c71 From dcfb99b9877528fe09db02a1afff6346de2c8e57 Mon Sep 17 00:00:00 2001 From: antikvist Date: Mon, 27 Apr 2020 19:16:02 +0300 Subject: [PATCH 004/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- .../AggregateFunctionWelchTTest.h | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 0279849be2c..ecc397cf731 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -17,6 +17,11 @@ #include +#include + +#include + + namespace DB { // hard-codded values - part of the algorithm @@ -80,8 +85,8 @@ struct AggregateFunctionWelchTTestData final { sum_y += y; size_x++; size_y++; - mean_x = (Float64) sum_x / size_x; - mean_y = (Float64) sum_y / size_y; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; square_sum_x += x * x; square_sum_y += y * y; } @@ -91,8 +96,8 @@ struct AggregateFunctionWelchTTestData final { sum_y += other.sum_y; size_x += other.size_x; size_y += other.size_y; - mean_x = (Float64) sum_x / size_x; - mean_y = (Float64) sum_y / size_y; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; square_sum_x += other.square_sum_x; square_sum_y += other.square_sum_y; } @@ -120,19 +125,19 @@ struct AggregateFunctionWelchTTestData final { } Float64 get_sx() const { - return (Float64)(square_sum_x + size_x * mean_x * mean_x - 2 * mean_x * sum_x) / (size_x - 1); + return static_cast(square_sum_x + size_x * mean_x * mean_x - 2 * mean_x * sum_x) / (size_x - 1); } Float64 get_sy() const { - return (Float64)(square_sum_y + size_y * mean_y * mean_y - 2 * mean_y * sum_y) / (size_y - 1); + return static_cast(square_sum_y + size_y * mean_y * mean_y - 2 * mean_y * sum_y) / (size_y - 1); } Float64 get_T(Float64 sx, Float64 sy) const { - return (Float64)(mean_x - mean_y) / std::sqrt(sx / size_x + sy / size_y); + return static_cast(mean_x - mean_y) / std::sqrt(sx / size_x + sy / size_y); } Float64 get_degrees_of_freed(Float64 sx, Float64 sy) const { - return (Float64)(sx / size_x + sy / size_y) * (sx / size_x + sy / size_y) / + return static_cast(sx / size_x + sy / size_y) * (sx / size_x + sy / size_y) / ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); } From 7cfe5ef42b80a45243e70d3b73cf459054e46890 Mon Sep 17 00:00:00 2001 From: antikvist Date: Fri, 1 May 2020 00:36:37 +0300 Subject: [PATCH 005/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index ecc397cf731..e716553e065 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -144,7 +144,7 @@ struct AggregateFunctionWelchTTestData final { Ret get_result(Float64 t, Float64 dof, Float64 parametr) const { //find our table - int table; + int table = 0; for (int i = 0; i < SIGN_LVL_CNT; ++i) { if (CriticalValuesTable[i][0] == parametr) @@ -249,12 +249,11 @@ public: Float64 dof = this->data(place).get_degrees_of_freed(sx, sy); Ret result = this->data(place).get_result(t_value, dof, significance_level); - auto & column = static_cast(to); + //check the type + auto & column = static_cast &>(to); column.getData().push_back(result); } -} }; - -}; \ No newline at end of file +}; From 4f56cc32ae388c5dc002afe071b0b24a9d1adafb Mon Sep 17 00:00:00 2001 From: antikvist Date: Wed, 6 May 2020 00:48:05 +0300 Subject: [PATCH 006/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 6 +++--- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index a4e0a54775b..c4a0c6d4e2b 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -9,11 +9,11 @@ namespace DB namespace { +template AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argument_types, const Array & parameters) { - - - return std::make_shared(argument_types, parameters); + Float64 significance_level = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + return std::make_shared>(argument_types, parameters); } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index e716553e065..a1416f9ab41 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -154,14 +154,14 @@ struct AggregateFunctionWelchTTestData final { } //round or make infinity dof - dof = static_cast(dof); - if (dof > 100) + i_dof = static_cast(dof); + if (i_dof > 100) { - dof = 101; + i_dof = 101; } //check if abs of t is greater than table[dof] t = abs(t); - if(t > CriticalValuesTable[table][dof]) { + if(t > CriticalValuesTable[table][i_dof]) { return static_cast(1); //in this case we reject the null hypothesis } From f0ac5b441f7dccbdfe31512c90ae02da1d9921fa Mon Sep 17 00:00:00 2001 From: antikvist Date: Wed, 6 May 2020 02:07:51 +0300 Subject: [PATCH 007/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 1 - src/AggregateFunctions/AggregateFunctionWelchTTest.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index c4a0c6d4e2b..2c8d1e0aed8 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -12,7 +12,6 @@ namespace template AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argument_types, const Array & parameters) { - Float64 significance_level = applyVisitor(FieldVisitorConvertToNumber(), params[0]); return std::make_shared>(argument_types, parameters); } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index a1416f9ab41..9445ccc506d 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -154,7 +154,7 @@ struct AggregateFunctionWelchTTestData final { } //round or make infinity dof - i_dof = static_cast(dof); + int i_dof = static_cast(dof); if (i_dof > 100) { i_dof = 101; From 3dde788146bcef2697cd2701b60d82f603d9c887 Mon Sep 17 00:00:00 2001 From: antikvist Date: Thu, 7 May 2020 14:17:58 +0300 Subject: [PATCH 008/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 9445ccc506d..5203ba1f988 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -186,7 +186,7 @@ public: IAggregateFunctionDataHelper< AggregateFunctionWelchTTestData, AggregateFunctionWelchTTest - > {arguments, params} + > ({argument}, params) { // notice: arguments has been in factory } From 92afa6c0decda463505263f2956005c58e831862 Mon Sep 17 00:00:00 2001 From: antikvist Date: Thu, 7 May 2020 17:14:29 +0300 Subject: [PATCH 009/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 5203ba1f988..22dda8ea244 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -186,7 +186,7 @@ public: IAggregateFunctionDataHelper< AggregateFunctionWelchTTestData, AggregateFunctionWelchTTest - > ({argument}, params) + > ({arguments}, params) { // notice: arguments has been in factory } From 7975d8d5b0c9c4534f108344deb1fdd279cf1eaf Mon Sep 17 00:00:00 2001 From: antikvist Date: Thu, 7 May 2020 23:11:25 +0300 Subject: [PATCH 010/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- .../AggregateFunctionCount.cpp | 2 +- .../AggregateFunctionWelchTTest.h | 47 +++++++++++-------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index 6c22fec87a2..7ede78e720f 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -22,7 +22,7 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons void registerAggregateFunctionCount(AggregateFunctionFactory & factory) { - factory.registerFunction("count", createAggregateFunctionCount, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("count", createAggregateFunctionCount); } } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 22dda8ea244..e2e720a12ef 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -12,14 +14,8 @@ #include #include -#include -#include -#include - - #include -#include namespace DB @@ -52,7 +48,8 @@ Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { // https://gist.github.com/ltybc-coder/792748cfdb2f7cadef424ffb7b011c71 // col, col, bool template -struct AggregateFunctionWelchTTestData final { +struct AggregateFunctionWelchTTestData final +{ size_t size_x = 0; size_t size_y = 0; @@ -65,14 +62,16 @@ struct AggregateFunctionWelchTTestData final { /* not yet sure how to use them - void add_x(X x) { + void add_x(X x) + { mean_x = (Float64)(sum_x + x) / (size_x + 1); size_x ++; sum_x += x; square_sum_x += x * x; } - void add_y(Y y) { + void add_y(Y y) + { mean_y = (sum_y + y) / (size_y + 1); size_y ++; sum_y += y; @@ -80,7 +79,8 @@ struct AggregateFunctionWelchTTestData final { } */ - void add(X x, Y y) { + void add(X x, Y y) + { sum_x += x; sum_y += y; size_x++; @@ -91,7 +91,8 @@ struct AggregateFunctionWelchTTestData final { square_sum_y += y * y; } - void merge(const AggregateFunctionWelchTTestData &other) { + void merge(const AggregateFunctionWelchTTestData &other) + { sum_x += other.sum_x; sum_y += other.sum_y; size_x += other.size_x; @@ -102,7 +103,8 @@ struct AggregateFunctionWelchTTestData final { square_sum_y += other.square_sum_y; } - void serialize(WriteBuffer &buf) const { + void serialize(WriteBuffer &buf) const + { writeBinary(mean_x, buf); writeBinary(mean_y, buf); writeBinary(sum_x, buf); @@ -113,7 +115,8 @@ struct AggregateFunctionWelchTTestData final { writeBinary(size_y, buf); } - void deserialize(ReadBuffer &buf) { + void deserialize(ReadBuffer &buf) + { readBinary(mean_x, buf); readBinary(mean_y, buf); readBinary(sum_x, buf); @@ -124,19 +127,23 @@ struct AggregateFunctionWelchTTestData final { readBinary(size_y, buf); } - Float64 get_sx() const { + Float64 get_sx() const + { return static_cast(square_sum_x + size_x * mean_x * mean_x - 2 * mean_x * sum_x) / (size_x - 1); } - Float64 get_sy() const { + Float64 get_sy() const + { return static_cast(square_sum_y + size_y * mean_y * mean_y - 2 * mean_y * sum_y) / (size_y - 1); } - Float64 get_T(Float64 sx, Float64 sy) const { + Float64 get_T(Float64 sx, Float64 sy) const + { return static_cast(mean_x - mean_y) / std::sqrt(sx / size_x + sy / size_y); } - Float64 get_degrees_of_freed(Float64 sx, Float64 sy) const { + Float64 get_degrees_of_freed(Float64 sx, Float64 sy) const + { return static_cast(sx / size_x + sy / size_y) * (sx / size_x + sy / size_y) / ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); } @@ -161,11 +168,13 @@ struct AggregateFunctionWelchTTestData final { } //check if abs of t is greater than table[dof] t = abs(t); - if(t > CriticalValuesTable[table][i_dof]) { + if(t > CriticalValuesTable[table][i_dof]) + { return static_cast(1); //in this case we reject the null hypothesis } - else { + else + { return static_cast(0); } } From b390043f31fe9a4023e9a00b248560155f114b72 Mon Sep 17 00:00:00 2001 From: antikvist Date: Fri, 8 May 2020 00:39:51 +0300 Subject: [PATCH 011/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- .../AggregateFunctionWelchTTest.cpp | 12 ++++++++++-- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 12 +++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 2c8d1e0aed8..c8f2a46b1e4 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -12,7 +12,15 @@ namespace template AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argument_types, const Array & parameters) { - return std::make_shared>(argument_types, parameters); + // default value + Float64 significance_level = 0.1; + if (!params.empty()) + { + significance_level = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + } + + + return std::make_shared>(significance_level, argument_types, parameters); } @@ -21,7 +29,7 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argumen void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { - factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); } } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index e2e720a12ef..367970fa4e0 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -168,7 +168,7 @@ struct AggregateFunctionWelchTTestData final } //check if abs of t is greater than table[dof] t = abs(t); - if(t > CriticalValuesTable[table][i_dof]) + if (t > CriticalValuesTable[table][i_dof]) { return static_cast(1); //in this case we reject the null hypothesis @@ -187,15 +187,22 @@ class AggregateFunctionWelchTTest final : public AggregateFunctionWelchTTest > { + + +private: + Float64 significance_level; + + public: AggregateFunctionWelchTTest( + Float64 sglvl_, const DataTypes & arguments, const Array & params ): IAggregateFunctionDataHelper< AggregateFunctionWelchTTestData, AggregateFunctionWelchTTest - > ({arguments}, params) + > ({arguments}, params), significance_level(sglvl_) { // notice: arguments has been in factory } @@ -250,7 +257,6 @@ public: IColumn & to ) const override { - Float64 significance_level = applyVisitor(FieldVisitorConvertToNumber(), params[0]); Float64 sx = this->data(place).get_sx(); Float64 sy = this->data(place).get_sy(); From ab7d1fb86fe2645bb7d3ed43cb79b41517f64b08 Mon Sep 17 00:00:00 2001 From: antikvist Date: Fri, 8 May 2020 00:44:31 +0300 Subject: [PATCH 012/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 367970fa4e0..4b199e799f5 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -264,7 +264,7 @@ public: Float64 dof = this->data(place).get_degrees_of_freed(sx, sy); Ret result = this->data(place).get_result(t_value, dof, significance_level); - //check the type + auto & column = static_cast &>(to); column.getData().push_back(result); } From 4b4ff06cab3642cd061f80a8d0eddb74ff0d79db Mon Sep 17 00:00:00 2001 From: antikvist Date: Fri, 8 May 2020 02:22:12 +0300 Subject: [PATCH 013/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 5 +++-- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index c8f2a46b1e4..151071091e4 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "registerAggregateFunctions.h" namespace DB @@ -14,9 +15,9 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argumen { // default value Float64 significance_level = 0.1; - if (!params.empty()) + if (!parameters.empty()) { - significance_level = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + significance_level = applyVisitor(FieldVisitorConvertToNumber(), parameters[0]); } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 4b199e799f5..855b3fcd917 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -17,7 +17,6 @@ #include - namespace DB { // hard-codded values - part of the algorithm @@ -269,6 +268,5 @@ public: column.getData().push_back(result); } - }; }; From df4c312e141f938099ab5e9d00d17f40c442389d Mon Sep 17 00:00:00 2001 From: antikvist Date: Fri, 8 May 2020 13:17:59 +0300 Subject: [PATCH 014/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 151071091e4..bc759b9f8f1 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argumen void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { - factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); + factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); } } From 6d6f0b00ba532c3bb637d5a8f6d2dcaf845f257e Mon Sep 17 00:00:00 2001 From: antikvist Date: Sat, 9 May 2020 01:55:09 +0300 Subject: [PATCH 015/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- .../AggregateFunctionWelchTTest.cpp | 25 ++++++++--- .../AggregateFunctionWelchTTest.h | 43 +++++++++++-------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index bc759b9f8f1..1634d0149da 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -3,26 +3,39 @@ #include #include "registerAggregateFunctions.h" +#include +#include +#include + namespace DB { namespace { +//template +static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) +{ + return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); +} -template -AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argument_types, const Array & parameters) +//template +AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, + const DataTypes & argument_types, + const Array & parameters) { // default value Float64 significance_level = 0.1; + if (parameters.size() > 1) + throw Exception("Aggregate function " + name + " requires two parameters or less.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!parameters.empty()) { significance_level = applyVisitor(FieldVisitorConvertToNumber(), parameters[0]); } - - return std::make_shared>(significance_level, argument_types, parameters); - + AggregateFunctionPtr res (createWithExtraTypes(significance_level, argument_types, parameters)); + return res; } } @@ -30,7 +43,7 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const DataTypes & argumen void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { - factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); + factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); } } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 855b3fcd917..bbf02200745 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -46,16 +46,16 @@ Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { // our algorithm implementation via vectors: // https://gist.github.com/ltybc-coder/792748cfdb2f7cadef424ffb7b011c71 // col, col, bool -template +//template struct AggregateFunctionWelchTTestData final { size_t size_x = 0; size_t size_y = 0; - X sum_x = 0; - Y sum_y = 0; - X square_sum_x = 0; - Y square_sum_y = 0; + Float64 sum_x = static_cast(0); + Float64 sum_y = static_cast(0); + Float64 square_sum_x = static_cast(0); + Float64 square_sum_y = static_cast(0); Float64 mean_x = 0; Float64 mean_y = 0; @@ -78,7 +78,7 @@ struct AggregateFunctionWelchTTestData final } */ - void add(X x, Y y) + void add(Float64 x, Float64 y) { sum_x += x; sum_y += y; @@ -147,7 +147,7 @@ struct AggregateFunctionWelchTTestData final ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); } - Ret get_result(Float64 t, Float64 dof, Float64 parametr) const + UInt8 get_result(Float64 t, Float64 dof, Float64 parametr) const { //find our table int table = 0; @@ -179,11 +179,11 @@ struct AggregateFunctionWelchTTestData final } }; -template -class AggregateFunctionWelchTTest final : public +//template +class AggregateFunctionWelchTTest : public IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest > { @@ -199,8 +199,8 @@ public: const Array & params ): IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest > ({arguments}, params), significance_level(sglvl_) { // notice: arguments has been in factory @@ -211,6 +211,11 @@ public: return "WelchTTest"; } + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + void add( AggregateDataPtr place, const IColumn ** columns, @@ -218,11 +223,11 @@ public: Arena * ) const override { - auto col_x = assert_cast *>(columns[0]); - auto col_y = assert_cast *>(columns[1]); + auto col_x = assert_cast *>(columns[0]); + auto col_y = assert_cast *>(columns[1]); - X x = col_x->getData()[row_num]; - Y y = col_y->getData()[row_num]; + Float64 x = col_x->getData()[row_num]; + Float64 y = col_y->getData()[row_num]; this->data(place).add(x, y); } @@ -261,10 +266,10 @@ public: Float64 sy = this->data(place).get_sy(); Float64 t_value = this->data(place).get_T(sx, sy); Float64 dof = this->data(place).get_degrees_of_freed(sx, sy); - Ret result = this->data(place).get_result(t_value, dof, significance_level); + UInt8 result = this->data(place).get_result(t_value, dof, significance_level); - auto & column = static_cast &>(to); + auto & column = static_cast &>(to); column.getData().push_back(result); } From f6e6c48d9c477f92f7669996e4742d1125061ea5 Mon Sep 17 00:00:00 2001 From: antikvist Date: Sun, 17 May 2020 12:25:41 +0300 Subject: [PATCH 016/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionCount.cpp | 2 +- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index 7ede78e720f..6c22fec87a2 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -22,7 +22,7 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons void registerAggregateFunctionCount(AggregateFunctionFactory & factory) { - factory.registerFunction("count", createAggregateFunctionCount); + factory.registerFunction("count", createAggregateFunctionCount, AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 1634d0149da..90b1c445a14 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -14,7 +14,7 @@ namespace DB namespace { //template -static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) +IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) { return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); } From 6069750b0e6bf95ab180d033c0fe376102d540fd Mon Sep 17 00:00:00 2001 From: antikvist Date: Sun, 17 May 2020 14:15:49 +0300 Subject: [PATCH 017/174] #WelchTTest aggregate function implementation What's new: -Main classes for aggreagate function added. -Data class with needed mathods added. -Registered function in registerAggregateFunctions.h --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 90b1c445a14..1634d0149da 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -14,7 +14,7 @@ namespace DB namespace { //template -IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) +static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) { return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); } From 1903e6cec227f216159b23656213587beaea4981 Mon Sep 17 00:00:00 2001 From: antikvist Date: Fri, 12 Jun 2020 02:43:55 +0300 Subject: [PATCH 018/174] pray to ClickHouse gods --- .../AggregateFunctionWelchTTest.cpp | 21 +++++--- .../AggregateFunctionWelchTTest.h | 50 +++++++++---------- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 1634d0149da..3a2bc831cc1 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -4,30 +4,34 @@ #include "registerAggregateFunctions.h" #include -#include #include +namespace ErrorCodes +{ +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + namespace DB { namespace { -//template +template static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) { return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); } -//template +template AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, - const DataTypes & argument_types, - const Array & parameters) + const DataTypes & argument_types, + const Array & parameters) { // default value Float64 significance_level = 0.1; if (parameters.size() > 1) - throw Exception("Aggregate function " + name + " requires two parameters or less.", + throw Exception("Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!parameters.empty()) { @@ -40,10 +44,11 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, } +template void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { - factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); + factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); } -} +} \ No newline at end of file diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index bbf02200745..d625cc908ec 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -46,16 +46,17 @@ Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { // our algorithm implementation via vectors: // https://gist.github.com/ltybc-coder/792748cfdb2f7cadef424ffb7b011c71 // col, col, bool -//template +template +//template struct AggregateFunctionWelchTTestData final { size_t size_x = 0; size_t size_y = 0; - Float64 sum_x = static_cast(0); - Float64 sum_y = static_cast(0); - Float64 square_sum_x = static_cast(0); - Float64 square_sum_y = static_cast(0); + X sum_x = static_cast(0); + Y sum_y = static_cast(0); + X square_sum_x = static_cast(0); + Y square_sum_y = static_cast(0); Float64 mean_x = 0; Float64 mean_y = 0; @@ -78,7 +79,7 @@ struct AggregateFunctionWelchTTestData final } */ - void add(Float64 x, Float64 y) + void add(X x, Y y) { sum_x += x; sum_y += y; @@ -147,7 +148,7 @@ struct AggregateFunctionWelchTTestData final ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); } - UInt8 get_result(Float64 t, Float64 dof, Float64 parametr) const + Ret get_result(Float64 t, Float64 dof, Float64 parametr) const { //find our table int table = 0; @@ -169,29 +170,27 @@ struct AggregateFunctionWelchTTestData final t = abs(t); if (t > CriticalValuesTable[table][i_dof]) { - return static_cast(1); + return static_cast(1); //in this case we reject the null hypothesis } else { - return static_cast(0); + return static_cast(0); } } }; -//template +template class AggregateFunctionWelchTTest : public - IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest - > + IAggregateFunctionDataHelper< + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest + > { - private: Float64 significance_level; - public: AggregateFunctionWelchTTest( Float64 sglvl_, @@ -213,7 +212,7 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared(); + return std::make_shared>(); } void add( @@ -223,11 +222,11 @@ public: Arena * ) const override { - auto col_x = assert_cast *>(columns[0]); - auto col_y = assert_cast *>(columns[1]); + auto col_x = assert_cast *>(columns[0]); + auto col_y = assert_cast *>(columns[1]); - Float64 x = col_x->getData()[row_num]; - Float64 y = col_y->getData()[row_num]; + X x = col_x->getData()[row_num]; + Y y = col_y->getData()[row_num]; this->data(place).add(x, y); } @@ -257,7 +256,7 @@ public: } void insertResultInto( - ConstAggregateDataPtr place, + AggregateDataPtr place, IColumn & to ) const override { @@ -266,12 +265,11 @@ public: Float64 sy = this->data(place).get_sy(); Float64 t_value = this->data(place).get_T(sx, sy); Float64 dof = this->data(place).get_degrees_of_freed(sx, sy); - UInt8 result = this->data(place).get_result(t_value, dof, significance_level); + Ret result = this->data(place).get_result(t_value, dof, significance_level); - - auto & column = static_cast &>(to); + auto & column = static_cast &>(to); column.getData().push_back(result); } }; -}; +}; \ No newline at end of file From bbfccd491ece148818b5d0b09ffdd6abaedae908 Mon Sep 17 00:00:00 2001 From: antikvist Date: Fri, 12 Jun 2020 16:51:33 +0300 Subject: [PATCH 019/174] welch t-test --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 2 +- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 3a2bc831cc1..853a1182340 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -20,7 +20,7 @@ namespace template static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) { - return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); + return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); } template diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index d625cc908ec..bbd02d844c4 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -198,8 +198,8 @@ public: const Array & params ): IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest > ({arguments}, params), significance_level(sglvl_) { // notice: arguments has been in factory From 13faa3d83f8e50a2e1ea1c23b0026a11c9d72163 Mon Sep 17 00:00:00 2001 From: antikvist Date: Sat, 13 Jun 2020 19:23:17 +0300 Subject: [PATCH 020/174] welch t-test --- .../AggregateFunctionWelchTTest.cpp | 26 ++++++++++------- .../AggregateFunctionWelchTTest.h | 28 +++++++++---------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 853a1182340..9f451fd5d88 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -7,6 +7,8 @@ #include +// the return type is boolean (we use UInt8 as we do not have boolean in clickhouse) + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -17,13 +19,11 @@ namespace DB namespace { -template -static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) -{ - return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); -} +//static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) +//{ +// return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); +//} -template AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) @@ -38,17 +38,23 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, significance_level = applyVisitor(FieldVisitorConvertToNumber(), parameters[0]); } - AggregateFunctionPtr res (createWithExtraTypes(significance_level, argument_types, parameters)); + AggregateFunctionPtr res; + DataTypePtr data_type = argument_types[0]; +// if (isDecimal(data_type)) +// res.reset(createWithDecimalType(*data_type, significance_level, argument_types, parameters)); +// else + res.reset(createWithNumericType(*data_type, significance_level, argument_types, parameters)); + + //AggregateFunctionPtr res (createWithExtraTypes(significance_level, argument_types, parameters)); return res; } } -template + void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { - - factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); + factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); } } \ No newline at end of file diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index bbd02d844c4..13b9c992162 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -46,8 +46,8 @@ Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { // our algorithm implementation via vectors: // https://gist.github.com/ltybc-coder/792748cfdb2f7cadef424ffb7b011c71 // col, col, bool -template -//template +template +//template struct AggregateFunctionWelchTTestData final { @@ -148,7 +148,7 @@ struct AggregateFunctionWelchTTestData final ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); } - Ret get_result(Float64 t, Float64 dof, Float64 parametr) const + UInt8 get_result(Float64 t, Float64 dof, Float64 parametr) const { //find our table int table = 0; @@ -170,21 +170,21 @@ struct AggregateFunctionWelchTTestData final t = abs(t); if (t > CriticalValuesTable[table][i_dof]) { - return static_cast(1); + return static_cast(1); //in this case we reject the null hypothesis } else { - return static_cast(0); + return static_cast(0); } } }; -template +template class AggregateFunctionWelchTTest : public IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest > { @@ -198,8 +198,8 @@ public: const Array & params ): IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest + AggregateFunctionWelchTTestData, + AggregateFunctionWelchTTest > ({arguments}, params), significance_level(sglvl_) { // notice: arguments has been in factory @@ -212,7 +212,7 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared>(); + return std::make_shared>(); } void add( @@ -265,11 +265,11 @@ public: Float64 sy = this->data(place).get_sy(); Float64 t_value = this->data(place).get_T(sx, sy); Float64 dof = this->data(place).get_degrees_of_freed(sx, sy); - Ret result = this->data(place).get_result(t_value, dof, significance_level); + UInt8 result = this->data(place).get_result(t_value, dof, significance_level); - auto & column = static_cast &>(to); + auto & column = static_cast &>(to); column.getData().push_back(result); } }; -}; \ No newline at end of file +}; \ No newline at end of file From cb8eec8def65f817773f623e57d8e02518d3c2bd Mon Sep 17 00:00:00 2001 From: antikvist Date: Sun, 14 Jun 2020 00:55:01 +0300 Subject: [PATCH 021/174] welch t-test --- .../AggregateFunctionWelchTTest.cpp | 24 +++++++++++-------- .../AggregateFunctionWelchTTest.h | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 9f451fd5d88..8d2963aba74 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -12,6 +12,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int NOT_IMPLEMENTED; } namespace DB @@ -31,21 +32,24 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, // default value Float64 significance_level = 0.1; if (parameters.size() > 1) - throw Exception("Aggregate function " + name + " requires one parameter or less.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + { + throw Exception("Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + if (!parameters.empty()) { significance_level = applyVisitor(FieldVisitorConvertToNumber(), parameters[0]); } AggregateFunctionPtr res; - DataTypePtr data_type = argument_types[0]; -// if (isDecimal(data_type)) -// res.reset(createWithDecimalType(*data_type, significance_level, argument_types, parameters)); -// else - res.reset(createWithNumericType(*data_type, significance_level, argument_types, parameters)); - //AggregateFunctionPtr res (createWithExtraTypes(significance_level, argument_types, parameters)); + if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) + { + throw Exception("Aggregate function " + name + " does not support decimal types.", ErrorCodes::NOT_IMPLEMENTED); + } + + res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, argument_types, parameters)); + return res; } @@ -54,7 +58,7 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { - factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest); + factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive); } -} \ No newline at end of file +} diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 13b9c992162..29f8e17b6be 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -272,4 +272,4 @@ public: } }; -}; \ No newline at end of file +}; From 9638eb7490bd8cfad234353d791115cd9988cbf6 Mon Sep 17 00:00:00 2001 From: antikvist Date: Sun, 14 Jun 2020 19:18:04 +0300 Subject: [PATCH 022/174] welch t-test --- .../AggregateFunctionWelchTTest.cpp | 15 ++++++--------- .../AggregateFunctionWelchTTest.h | 12 ++++++------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 8d2963aba74..c7349f28d90 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -20,10 +20,6 @@ namespace DB namespace { -//static IAggregateFunction * createWithExtraTypes(Float64 significance_level, const DataTypes & argument_types, const Array & parameters) -//{ -// return new AggregateFunctionWelchTTest(significance_level, argument_types, parameters); -//} AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, @@ -43,12 +39,13 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, AggregateFunctionPtr res; - if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) - { - throw Exception("Aggregate function " + name + " does not support decimal types.", ErrorCodes::NOT_IMPLEMENTED); - } +// if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) +// { +// throw Exception("Aggregate function " + name + " does not support decimal types.", ErrorCodes::NOT_IMPLEMENTED); +// } - res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, argument_types, parameters)); + res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, + argument_types, parameters)); return res; } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 29f8e17b6be..210b8990693 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -53,12 +53,12 @@ struct AggregateFunctionWelchTTestData final size_t size_x = 0; size_t size_y = 0; - X sum_x = static_cast(0); - Y sum_y = static_cast(0); - X square_sum_x = static_cast(0); - Y square_sum_y = static_cast(0); - Float64 mean_x = 0; - Float64 mean_y = 0; + X sum_x = static_cast(0); + Y sum_y = static_cast(0); + X square_sum_x = static_cast(0); + Y square_sum_y = static_cast(0); + Float64 mean_x = static_cast(0); + Float64 mean_y = static_cast(0); /* not yet sure how to use them From 3b30ea2f373d841f936b6b2bd5be25e335b82d70 Mon Sep 17 00:00:00 2001 From: antikvist Date: Sun, 14 Jun 2020 23:00:02 +0300 Subject: [PATCH 023/174] welch t-test --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index c7349f28d90..e7ba204046e 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -25,6 +25,8 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) { + assertBinary(name, argument_types); + // default value Float64 significance_level = 0.1; if (parameters.size() > 1) @@ -39,10 +41,10 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, AggregateFunctionPtr res; -// if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) -// { -// throw Exception("Aggregate function " + name + " does not support decimal types.", ErrorCodes::NOT_IMPLEMENTED); -// } + if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) + { + throw Exception("Aggregate function " + name + " does not support decimal types.", ErrorCodes::NOT_IMPLEMENTED); + } res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, argument_types, parameters)); From 2f073ab3f785fc267715940d685a1b7743c588f4 Mon Sep 17 00:00:00 2001 From: antikvist Date: Mon, 15 Jun 2020 21:18:22 +0300 Subject: [PATCH 024/174] welch t-test --- .../AggregateFunctionWelchTTest.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index e7ba204046e..b1c8f73d2e9 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -43,11 +43,17 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) { - throw Exception("Aggregate function " + name + " does not support decimal types.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); + } + + else{ + res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, + argument_types, parameters)); } - res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, - argument_types, parameters)); + if(!res){ + throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); + } return res; } From ccce2537f686152e09c7143517707e5dcec8fe95 Mon Sep 17 00:00:00 2001 From: antikvist Date: Mon, 15 Jun 2020 21:21:48 +0300 Subject: [PATCH 025/174] welch t-test --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index b1c8f73d2e9..28ad414146e 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -45,12 +45,13 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, { throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); } - + else{ res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, argument_types, parameters)); } + if(!res){ throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); } From 965bf4bd6535676f99b667b8630fcdce7f4f9e5b Mon Sep 17 00:00:00 2001 From: antikvist Date: Tue, 16 Jun 2020 01:38:35 +0300 Subject: [PATCH 026/174] welch t-test --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 28ad414146e..3d2e98e2a0e 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -46,13 +46,15 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); } - else{ + else + { res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, argument_types, parameters)); } - if(!res){ + if (!res) + { throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); } From 51ff2f4e92d163593c192437e98a5a03bf67011e Mon Sep 17 00:00:00 2001 From: antikvist Date: Tue, 16 Jun 2020 09:14:54 +0300 Subject: [PATCH 027/174] welch t-test --- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 210b8990693..897c583d913 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -272,4 +272,5 @@ public: } }; + }; From a7f8d6b380f290f25725fef6ab97c10dba0c37b1 Mon Sep 17 00:00:00 2001 From: antikvist Date: Tue, 16 Jun 2020 12:45:46 +0300 Subject: [PATCH 028/174] welch t-test --- src/AggregateFunctions/ya.make | 1 + 1 file changed, 1 insertion(+) diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index bfa32b6dd78..edae91cc745 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -49,6 +49,7 @@ SRCS( registerAggregateFunctions.cpp UniqCombinedBiasData.cpp UniqVariadicHash.cpp + AggregateFunctionWelchTTest.cpp ) END() From 21c5ecb597f3c3e4e65cbf4d48b3e604f9428931 Mon Sep 17 00:00:00 2001 From: antikvist Date: Tue, 16 Jun 2020 22:58:06 +0300 Subject: [PATCH 029/174] welch t-test --- .../AggregateFunctionWelchTTest.h | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 897c583d913..266da9fde5b 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -16,6 +16,10 @@ #include +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} namespace DB { @@ -127,6 +131,16 @@ struct AggregateFunctionWelchTTestData final readBinary(size_y, buf); } + size_t get_size_y() const + { + return size_y; + } + + size_t get_size_x() const + { + return size_x; + } + Float64 get_sx() const { return static_cast(square_sum_x + size_x * mean_x * mean_x - 2 * mean_x * sum_x) / (size_x - 1); @@ -162,10 +176,17 @@ struct AggregateFunctionWelchTTestData final //round or make infinity dof int i_dof = static_cast(dof); + if (i_dof > 100) { i_dof = 101; } + + if(i_dof < 100) + { + i_dof = 1; + } + //check if abs of t is greater than table[dof] t = abs(t); if (t > CriticalValuesTable[table][i_dof]) @@ -260,6 +281,13 @@ public: IColumn & to ) const override { + size_t size_x = this->data(place).get_size_x(); + size_t size_y = this->data(place).get_size_y(); + + if(size_x < 2 || size_y < 2) + { + throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); + } Float64 sx = this->data(place).get_sx(); Float64 sy = this->data(place).get_sy(); From d0f92b5492b635881b72f32d2b4462b4f44b907c Mon Sep 17 00:00:00 2001 From: antikvist Date: Wed, 17 Jun 2020 18:43:22 +0300 Subject: [PATCH 030/174] welch t-test --- .../AggregateFunctionWelchTTest.h | 5 +++-- .../0_stateless/01319_welch_ttest.reference | 3 +++ tests/queries/0_stateless/01319_welch_ttest.sql | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01319_welch_ttest.reference create mode 100644 tests/queries/0_stateless/01319_welch_ttest.sql diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 266da9fde5b..e2ae1761c51 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -189,7 +190,7 @@ struct AggregateFunctionWelchTTestData final //check if abs of t is greater than table[dof] t = abs(t); - if (t > CriticalValuesTable[table][i_dof]) + if (t >= CriticalValuesTable[table][i_dof]) { return static_cast(1); //in this case we reject the null hypothesis @@ -284,7 +285,7 @@ public: size_t size_x = this->data(place).get_size_x(); size_t size_y = this->data(place).get_size_y(); - if(size_x < 2 || size_y < 2) + if( size_x < 2 || size_y < 2) { throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); } diff --git a/tests/queries/0_stateless/01319_welch_ttest.reference b/tests/queries/0_stateless/01319_welch_ttest.reference new file mode 100644 index 00000000000..e22493782f0 --- /dev/null +++ b/tests/queries/0_stateless/01319_welch_ttest.reference @@ -0,0 +1,3 @@ +1 +0 +0 diff --git a/tests/queries/0_stateless/01319_welch_ttest.sql b/tests/queries/0_stateless/01319_welch_ttest.sql new file mode 100644 index 00000000000..ea103cc433b --- /dev/null +++ b/tests/queries/0_stateless/01319_welch_ttest.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS welch_ttest; +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; + +INSERT INTO welch_ttest VALUES (2224.779, 2465.0984), (2588.11, 1909.0328), (1979.625, 1175.8747), (2137.442, 2171.378), (2565.818, 2193.2821), (1754.023, 2854.9475), (1654.947, 2060.1777), (1789.256, 2258.2366), (2320.659, 1856.0535), (2039.532, 1501.8126), (1983.497, 2987.6542), (2232.903, 1681.9778), (2513.93, 2479.6776), (2066.382, 1259.8584), (2492.715, 1120.9043), (1988.287, 1982.1213), (1840.036, 3012.3949), (2249.749, 2252.373), (1766.982, 2591.3122), (1724.84, 1940.589), (0, 1995.185), (0, 2535.1344), (0, 597.3155), (0, 2343.2192), (0, 3154.84), (0, 1125.1966), (0, 1227.8842), (0, 1692.805), (0, 2539.6772), (0, 1936.1927), (0, 1783.7795), (0, 1703.4384), (0, 2077.194), (0, 1614.4071), (0, 2360.0365), (0, 1619.2781), (0, 2033.5109), (0, 2333.7834), (0, 2144.0485), (0, 2583.8709), (0, 1116.7213), (0, 1601.9383), (0, 1570.0431), (0, 1963.0777), (0, 1639.2533), (0, 2277.5223), (0, 1991.9286), (0, 2044.3338), (0, 1794.4781), (0, 1597.9119) +SELECT WelchTTest(0.1)(left, right) from welch_ttest; + +DROP TABLE IF EXISTS welch_ttest; +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; + +INSERT INTO welch_ttest VALUES (2224.779, 2465.0984), (2588.11, 1909.0328), (1979.625, 1175.8747), (2137.442, 2171.378), (2565.818, 2193.2821), (1754.023, 2854.9475), (1654.947, 2060.1777), (1789.256, 2258.2366), (2320.659, 1856.0535), (2039.532, 1501.8126), (1983.497, 2987.6542), (2232.903, 1681.9778), (2513.93, 2479.6776), (2066.382, 1259.8584), (2492.715, 1120.9043), (1988.287, 1982.1213), (1840.036, 3012.3949), (2249.749, 2252.373), (1766.982, 2591.3122), (1724.84, 1940.589), (0, 1995.185), (0, 2535.1344), (0, 597.3155), (0, 2343.2192), (0, 3154.84), (0, 1125.1966), (0, 1227.8842), (0, 1692.805), (0, 2539.6772), (0, 1936.1927), (0, 1783.7795), (0, 1703.4384), (0, 2077.194), (0, 1614.4071), (0, 2360.0365), (0, 1619.2781), (0, 2033.5109), (0, 2333.7834), (0, 2144.0485), (0, 2583.8709), (0, 1116.7213), (0, 1601.9383), (0, 1570.0431), (0, 1963.0777), (0, 1639.2533), (0, 2277.5223), (0, 1991.9286), (0, 2044.3338), (0, 1794.4781), (0, 1597.9119) +SELECT WelchTTest(0.02)(left, right) from welch_ttest; + +DROP TABLE IF EXISTS welch_ttest; +CREATE TABLE welch_ttest (left Int64, right Int64) ENGINE = Memory; + +INSERT INTO welch_ttest VALUES (1, 1), (1, 1), (1, 1); +SELECT WelchTTest(0.1)(left, right) from welch_ttest; \ No newline at end of file From d92160e734fe71b03e66b330512f2f261274becf Mon Sep 17 00:00:00 2001 From: antikvist Date: Wed, 17 Jun 2020 18:59:58 +0300 Subject: [PATCH 031/174] welch t-test --- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index e2ae1761c51..456effc53b8 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -183,7 +183,7 @@ struct AggregateFunctionWelchTTestData final i_dof = 101; } - if(i_dof < 100) + if (i_dof < 100) { i_dof = 1; } @@ -285,7 +285,7 @@ public: size_t size_x = this->data(place).get_size_x(); size_t size_y = this->data(place).get_size_y(); - if( size_x < 2 || size_y < 2) + if (size_x < 2 || size_y < 2) { throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); } From e4792df9a96be8a99a68292affb7c406e40d15a5 Mon Sep 17 00:00:00 2001 From: antikvist Date: Wed, 17 Jun 2020 21:56:39 +0300 Subject: [PATCH 032/174] welch t-test --- tests/queries/0_stateless/01319_welch_ttest.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01319_welch_ttest.sql b/tests/queries/0_stateless/01319_welch_ttest.sql index ea103cc433b..26bd686efab 100644 --- a/tests/queries/0_stateless/01319_welch_ttest.sql +++ b/tests/queries/0_stateless/01319_welch_ttest.sql @@ -14,4 +14,5 @@ DROP TABLE IF EXISTS welch_ttest; CREATE TABLE welch_ttest (left Int64, right Int64) ENGINE = Memory; INSERT INTO welch_ttest VALUES (1, 1), (1, 1), (1, 1); -SELECT WelchTTest(0.1)(left, right) from welch_ttest; \ No newline at end of file +SELECT WelchTTest(0.1)(left, right) from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; \ No newline at end of file From ae8ee1cbfaba57dcee2210d98ccf2ac7f054e6a4 Mon Sep 17 00:00:00 2001 From: antikvist Date: Wed, 17 Jun 2020 22:22:26 +0300 Subject: [PATCH 033/174] welch t-test --- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 456effc53b8..cc1417e6659 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -154,6 +154,16 @@ struct AggregateFunctionWelchTTestData final Float64 get_T(Float64 sx, Float64 sy) const { + if (sx == 0 && sy == 0) + { + throw Exception("division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); + } + + if (sx == -sy && size_x == size_y) + { + throw Exception("division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); + } + return static_cast(mean_x - mean_y) / std::sqrt(sx / size_x + sy / size_y); } From 08f9444842dfa6c0782237b839d491a54200c21e Mon Sep 17 00:00:00 2001 From: antikvist Date: Thu, 18 Jun 2020 00:02:50 +0300 Subject: [PATCH 034/174] welch t-test --- tests/queries/0_stateless/01319_welch_ttest.reference | 1 - tests/queries/0_stateless/01319_welch_ttest.sql | 5 ----- 2 files changed, 6 deletions(-) diff --git a/tests/queries/0_stateless/01319_welch_ttest.reference b/tests/queries/0_stateless/01319_welch_ttest.reference index e22493782f0..b261da18d51 100644 --- a/tests/queries/0_stateless/01319_welch_ttest.reference +++ b/tests/queries/0_stateless/01319_welch_ttest.reference @@ -1,3 +1,2 @@ 1 0 -0 diff --git a/tests/queries/0_stateless/01319_welch_ttest.sql b/tests/queries/0_stateless/01319_welch_ttest.sql index 26bd686efab..b8e881a069b 100644 --- a/tests/queries/0_stateless/01319_welch_ttest.sql +++ b/tests/queries/0_stateless/01319_welch_ttest.sql @@ -10,9 +10,4 @@ CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; INSERT INTO welch_ttest VALUES (2224.779, 2465.0984), (2588.11, 1909.0328), (1979.625, 1175.8747), (2137.442, 2171.378), (2565.818, 2193.2821), (1754.023, 2854.9475), (1654.947, 2060.1777), (1789.256, 2258.2366), (2320.659, 1856.0535), (2039.532, 1501.8126), (1983.497, 2987.6542), (2232.903, 1681.9778), (2513.93, 2479.6776), (2066.382, 1259.8584), (2492.715, 1120.9043), (1988.287, 1982.1213), (1840.036, 3012.3949), (2249.749, 2252.373), (1766.982, 2591.3122), (1724.84, 1940.589), (0, 1995.185), (0, 2535.1344), (0, 597.3155), (0, 2343.2192), (0, 3154.84), (0, 1125.1966), (0, 1227.8842), (0, 1692.805), (0, 2539.6772), (0, 1936.1927), (0, 1783.7795), (0, 1703.4384), (0, 2077.194), (0, 1614.4071), (0, 2360.0365), (0, 1619.2781), (0, 2033.5109), (0, 2333.7834), (0, 2144.0485), (0, 2583.8709), (0, 1116.7213), (0, 1601.9383), (0, 1570.0431), (0, 1963.0777), (0, 1639.2533), (0, 2277.5223), (0, 1991.9286), (0, 2044.3338), (0, 1794.4781), (0, 1597.9119) SELECT WelchTTest(0.02)(left, right) from welch_ttest; -DROP TABLE IF EXISTS welch_ttest; -CREATE TABLE welch_ttest (left Int64, right Int64) ENGINE = Memory; - -INSERT INTO welch_ttest VALUES (1, 1), (1, 1), (1, 1); -SELECT WelchTTest(0.1)(left, right) from welch_ttest; DROP TABLE IF EXISTS welch_ttest; \ No newline at end of file From 4660da3e5e509bd2abd8674fc2babb4dcbeee1c4 Mon Sep 17 00:00:00 2001 From: antikvist Date: Thu, 18 Jun 2020 22:18:52 +0300 Subject: [PATCH 035/174] welch t-test --- .../AggregateFunctionWelchTTest.h | 34 +++++++++---------- .../0_stateless/01319_welch_ttest.reference | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index cc1417e6659..2f56e5e6b6c 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -132,27 +132,27 @@ struct AggregateFunctionWelchTTestData final readBinary(size_y, buf); } - size_t get_size_y() const + size_t getSizeY() const { return size_y; } - size_t get_size_x() const + size_t getSizeX() const { return size_x; } - Float64 get_sx() const + Float64 getSx() const { return static_cast(square_sum_x + size_x * mean_x * mean_x - 2 * mean_x * sum_x) / (size_x - 1); } - Float64 get_sy() const + Float64 getSy() const { return static_cast(square_sum_y + size_y * mean_y * mean_y - 2 * mean_y * sum_y) / (size_y - 1); } - Float64 get_T(Float64 sx, Float64 sy) const + Float64 getT(Float64 sx, Float64 sy) const { if (sx == 0 && sy == 0) { @@ -167,13 +167,13 @@ struct AggregateFunctionWelchTTestData final return static_cast(mean_x - mean_y) / std::sqrt(sx / size_x + sy / size_y); } - Float64 get_degrees_of_freed(Float64 sx, Float64 sy) const + Float64 getDegreesOfFreedom(Float64 sx, Float64 sy) const { return static_cast(sx / size_x + sy / size_y) * (sx / size_x + sy / size_y) / ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); } - UInt8 get_result(Float64 t, Float64 dof, Float64 parametr) const + UInt8 getResult(Float64 t, Float64 dof, Float64 parametr) const { //find our table int table = 0; @@ -193,7 +193,7 @@ struct AggregateFunctionWelchTTestData final i_dof = 101; } - if (i_dof < 100) + if (i_dof < 1) { i_dof = 1; } @@ -202,12 +202,12 @@ struct AggregateFunctionWelchTTestData final t = abs(t); if (t >= CriticalValuesTable[table][i_dof]) { - return static_cast(1); + return static_cast(0); //in this case we reject the null hypothesis } else { - return static_cast(0); + return static_cast(1); } } }; @@ -292,19 +292,19 @@ public: IColumn & to ) const override { - size_t size_x = this->data(place).get_size_x(); - size_t size_y = this->data(place).get_size_y(); + size_t size_x = this->data(place).getSizeX(); + size_t size_y = this->data(place).getSizeY(); if (size_x < 2 || size_y < 2) { throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); } - Float64 sx = this->data(place).get_sx(); - Float64 sy = this->data(place).get_sy(); - Float64 t_value = this->data(place).get_T(sx, sy); - Float64 dof = this->data(place).get_degrees_of_freed(sx, sy); - UInt8 result = this->data(place).get_result(t_value, dof, significance_level); + Float64 sx = this->data(place).getSx(); + Float64 sy = this->data(place).getSy(); + Float64 t_value = this->data(place).getT(sx, sy); + Float64 dof = this->data(place).getDegreesOfFreedom(sx, sy); + UInt8 result = this->data(place).getResult(t_value, dof, significance_level); auto & column = static_cast &>(to); column.getData().push_back(result); diff --git a/tests/queries/0_stateless/01319_welch_ttest.reference b/tests/queries/0_stateless/01319_welch_ttest.reference index b261da18d51..aa47d0d46d4 100644 --- a/tests/queries/0_stateless/01319_welch_ttest.reference +++ b/tests/queries/0_stateless/01319_welch_ttest.reference @@ -1,2 +1,2 @@ -1 +0 0 From 839ee63294515a2899d5a5855bfe824231d17319 Mon Sep 17 00:00:00 2001 From: antikvist Date: Sat, 20 Jun 2020 19:31:00 +0300 Subject: [PATCH 036/174] welch --- .../{01319_welch_ttest.reference => 01322_welch_ttest.reference} | 0 .../0_stateless/{01319_welch_ttest.sql => 01322_welch_ttest.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{01319_welch_ttest.reference => 01322_welch_ttest.reference} (100%) rename tests/queries/0_stateless/{01319_welch_ttest.sql => 01322_welch_ttest.sql} (100%) diff --git a/tests/queries/0_stateless/01319_welch_ttest.reference b/tests/queries/0_stateless/01322_welch_ttest.reference similarity index 100% rename from tests/queries/0_stateless/01319_welch_ttest.reference rename to tests/queries/0_stateless/01322_welch_ttest.reference diff --git a/tests/queries/0_stateless/01319_welch_ttest.sql b/tests/queries/0_stateless/01322_welch_ttest.sql similarity index 100% rename from tests/queries/0_stateless/01319_welch_ttest.sql rename to tests/queries/0_stateless/01322_welch_ttest.sql From 3d89f0e9df325b6830153735263b5efaa829cf0a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 21 Jul 2020 15:41:14 +0300 Subject: [PATCH 037/174] Perf test: bind server to one NUMA node --- docker/test/performance-comparison/compare.sh | 12 ++++++++++-- tests/performance/decimal_aggregates.xml | 2 +- tests/performance/jit_large_requests.xml | 4 +--- tests/performance/string_sort.xml | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 378e87f443b..4bde4f945a6 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -55,12 +55,18 @@ function restart set -m # Spawn servers in their own process groups - left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log & + numactl --membind=0 --cpunodebind=0 --localalloc \ + left/clickhouse-server --config-file=left/config/config.xml \ + -- --path left/db --user_files_path left/db/user_files \ + &>> left-server-log.log & left_pid=$! kill -0 $left_pid disown $left_pid - right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log & + numactl --membind=0 --cpunodebind=0 --localalloc \ + right/clickhouse-server --config-file=right/config/config.xml \ + -- --path right/db --user_files_path right/db/user_files \ + &>> right-server-log.log & right_pid=$! kill -0 $right_pid disown $right_pid @@ -909,6 +915,8 @@ case "$stage" in time configure ;& "restart") + numactl --hardware ||: + lscpu ||: time restart ;& "run_tests") diff --git a/tests/performance/decimal_aggregates.xml b/tests/performance/decimal_aggregates.xml index 0c8df88c73c..142d9388404 100644 --- a/tests/performance/decimal_aggregates.xml +++ b/tests/performance/decimal_aggregates.xml @@ -1,6 +1,6 @@ - 30G + 35G CREATE TABLE t (x UInt64, d32 Decimal32(3), d64 Decimal64(4), d128 Decimal128(5)) ENGINE = Memory diff --git a/tests/performance/jit_large_requests.xml b/tests/performance/jit_large_requests.xml index 805b7f2edb1..6aed7bea544 100644 --- a/tests/performance/jit_large_requests.xml +++ b/tests/performance/jit_large_requests.xml @@ -1,6 +1,4 @@ - - CREATE TABLE jit_test ( a UInt64, @@ -43,7 +41,7 @@ SETTINGS compile_expressions = 0; - + SELECT COUNT() FROM diff --git a/tests/performance/string_sort.xml b/tests/performance/string_sort.xml index ce5a54e2680..5d859398ece 100644 --- a/tests/performance/string_sort.xml +++ b/tests/performance/string_sort.xml @@ -43,7 +43,7 @@ - + From 9f49bf2d82154db6417585505bfab0897d3d1d4e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 22 Jul 2020 01:09:54 +0300 Subject: [PATCH 038/174] fixup --- docker/test/performance-comparison/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 912a8bd12cd..df666af8e8e 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -17,6 +17,7 @@ RUN apt-get update \ libc6-dbg \ moreutils \ ncdu \ + numactl \ p7zip-full \ parallel \ psmisc \ From 2b7c0167cb0fda79b59eecf752a56611eae36ab0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 22 Jul 2020 21:02:56 +0300 Subject: [PATCH 039/174] Update compare.sh --- docker/test/performance-comparison/compare.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 4bde4f945a6..caaff129cfb 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -55,7 +55,7 @@ function restart set -m # Spawn servers in their own process groups - numactl --membind=0 --cpunodebind=0 --localalloc \ + numactl --cpunodebind=0 --localalloc \ left/clickhouse-server --config-file=left/config/config.xml \ -- --path left/db --user_files_path left/db/user_files \ &>> left-server-log.log & @@ -63,7 +63,7 @@ function restart kill -0 $left_pid disown $left_pid - numactl --membind=0 --cpunodebind=0 --localalloc \ + numactl --cpunodebind=0 --localalloc \ right/clickhouse-server --config-file=right/config/config.xml \ -- --path right/db --user_files_path right/db/user_files \ &>> right-server-log.log & From 15cd448afaf98c57f5300674e451eebc5b1f427e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 23 Jul 2020 17:59:32 +0300 Subject: [PATCH 040/174] Update compare.sh --- docker/test/performance-comparison/compare.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index caaff129cfb..84aa31fb76b 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -55,7 +55,7 @@ function restart set -m # Spawn servers in their own process groups - numactl --cpunodebind=0 --localalloc \ + numactl --cpunodebind=0 --membind=0 \ left/clickhouse-server --config-file=left/config/config.xml \ -- --path left/db --user_files_path left/db/user_files \ &>> left-server-log.log & @@ -63,7 +63,7 @@ function restart kill -0 $left_pid disown $left_pid - numactl --cpunodebind=0 --localalloc \ + numactl --cpunodebind=0 --membind=0 \ right/clickhouse-server --config-file=right/config/config.xml \ -- --path right/db --user_files_path right/db/user_files \ &>> right-server-log.log & From 0e205075394aec860a959c5c4125bf01f04a1c86 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 30 Jul 2020 17:26:47 +0300 Subject: [PATCH 041/174] Update compare.sh --- docker/test/performance-comparison/compare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 84aa31fb76b..387e259823f 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -63,7 +63,7 @@ function restart kill -0 $left_pid disown $left_pid - numactl --cpunodebind=0 --membind=0 \ + numactl --cpunodebind=1 --membind=1 \ right/clickhouse-server --config-file=right/config/config.xml \ -- --path right/db --user_files_path right/db/user_files \ &>> right-server-log.log & From 42e5f8ec44b14e65eb3c8efc097a39356b9bf750 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 31 Jul 2020 03:49:36 +0300 Subject: [PATCH 042/174] Update compare.sh --- docker/test/performance-comparison/compare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 387e259823f..29576be2903 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -952,7 +952,7 @@ case "$stage" in # to collect the logs. Prefer not to restart, because addresses might change # and we won't be able to process trace_log data. Start in a subshell, so that # it doesn't interfere with the watchdog through `wait`. - ( get_profiles || restart || get_profiles ||: ) + ( get_profiles || restart && get_profiles ||: ) # Kill the whole process group, because somehow when the subshell is killed, # the sleep inside remains alive and orphaned. From ff3f378e8adfe21cfdcb412f6e014cc42a4eec79 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 6 Aug 2020 00:58:56 +0300 Subject: [PATCH 043/174] try to rebuild package for 2b7c0167cb0fda79b59eecf752a56611eae36ab0 --- docker/test/performance-comparison/compare.sh | 6 +- .../test/performance-comparison/perf.py.orig | 190 ++++++++++++++++++ 2 files changed, 193 insertions(+), 3 deletions(-) create mode 100755 docker/test/performance-comparison/perf.py.orig diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 29576be2903..caaff129cfb 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -55,7 +55,7 @@ function restart set -m # Spawn servers in their own process groups - numactl --cpunodebind=0 --membind=0 \ + numactl --cpunodebind=0 --localalloc \ left/clickhouse-server --config-file=left/config/config.xml \ -- --path left/db --user_files_path left/db/user_files \ &>> left-server-log.log & @@ -63,7 +63,7 @@ function restart kill -0 $left_pid disown $left_pid - numactl --cpunodebind=1 --membind=1 \ + numactl --cpunodebind=0 --localalloc \ right/clickhouse-server --config-file=right/config/config.xml \ -- --path right/db --user_files_path right/db/user_files \ &>> right-server-log.log & @@ -952,7 +952,7 @@ case "$stage" in # to collect the logs. Prefer not to restart, because addresses might change # and we won't be able to process trace_log data. Start in a subshell, so that # it doesn't interfere with the watchdog through `wait`. - ( get_profiles || restart && get_profiles ||: ) + ( get_profiles || restart || get_profiles ||: ) # Kill the whole process group, because somehow when the subshell is killed, # the sleep inside remains alive and orphaned. diff --git a/docker/test/performance-comparison/perf.py.orig b/docker/test/performance-comparison/perf.py.orig new file mode 100755 index 00000000000..c25a3041a67 --- /dev/null +++ b/docker/test/performance-comparison/perf.py.orig @@ -0,0 +1,190 @@ +#!/usr/bin/python3 + +import os +import sys +import itertools +import clickhouse_driver +import xml.etree.ElementTree as et +import argparse +import pprint +import string +import time +import traceback + +stage_start_seconds = time.perf_counter() + +def report_stage_end(stage_name): + global stage_start_seconds + print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds)) + stage_start_seconds = time.perf_counter() + +report_stage_end('start') + +parser = argparse.ArgumentParser(description='Run performance test.') +# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. +parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') +parser.add_argument('--host', nargs='*', default=['127.0.0.1', '127.0.0.1'], help="Server hostname. Parallel to '--port'.") +parser.add_argument('--port', nargs='*', default=[9001, 9002], help="Server port. Parallel to '--host'.") +parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 7)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') +parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.') +args = parser.parse_args() + +tree = et.parse(args.file[0]) +root = tree.getroot() + +# Skip long tests +for tag in root.findall('.//tag'): + if tag.text == 'long': + print('skipped\tTest is tagged as long.') + sys.exit(0) + +# Check main metric +main_metric_element = root.find('main_metric/*') +if main_metric_element is not None and main_metric_element.tag != 'min_time': + raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag)) + +# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't. +infinite_sign = root.find('.//average_speed_not_changing_for_ms') +if infinite_sign is not None: + raise Exception('Looks like the test is infinite (sign 1)') + +# Open connections +servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] +connections = [clickhouse_driver.Client(**server) for server in servers] + +for s in servers: + print('server\t{}\t{}'.format(s['host'], s['port'])) + +report_stage_end('connect') + +# Process query parameters +subst_elems = root.findall('substitutions/substitution') +available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } +for e in subst_elems: + available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')] + +# Take care to keep the order of queries -- sometimes we have DROP IF EXISTS +# followed by CREATE in create queries section, so the order matters. +def substitute_parameters(query_templates): + result = [] + for q in query_templates: + keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n) + values = [available_parameters[k] for k in keys] + result.extend([ + q.format(**dict(zip(keys, values_combo))) + for values_combo in itertools.product(*values)]) + return result + +report_stage_end('substitute') + +# Run drop queries, ignoring errors. Do this before all other activity, because +# clickhouse_driver disconnects on error (this is not configurable), and the new +# connection loses the changes in settings. +drop_query_templates = [q.text for q in root.findall('drop_query')] +drop_queries = substitute_parameters(drop_query_templates) +for c in connections: + for q in drop_queries: + try: + c.execute(q) + except: + traceback.print_exc() + pass + +report_stage_end('drop1') + +# Apply settings +settings = root.findall('settings/*') +for c in connections: + for s in settings: + c.execute("set {} = '{}'".format(s.tag, s.text)) + +report_stage_end('settings') + +# Check tables that should exist. If they don't exist, just skip this test. +tables = [e.text for e in root.findall('preconditions/table_exists')] +for t in tables: + for c in connections: + try: + res = c.execute("show create table {}".format(t)) + except: + print('skipped\t' + traceback.format_exception_only(*sys.exc_info()[:2])[-1]) + traceback.print_exc() + sys.exit(0) + +report_stage_end('preconditions') + +# Run create queries +create_query_templates = [q.text for q in root.findall('create_query')] +create_queries = substitute_parameters(create_query_templates) +for c in connections: + for q in create_queries: + c.execute(q) + +# Run fill queries +fill_query_templates = [q.text for q in root.findall('fill_query')] +fill_queries = substitute_parameters(fill_query_templates) +for c in connections: + for q in fill_queries: + c.execute(q) + +report_stage_end('fill') + +# Run test queries +def tsv_escape(s): + return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') + +test_query_templates = [q.text for q in root.findall('query')] +test_queries = substitute_parameters(test_query_templates) + +report_stage_end('substitute2') + +for q in test_queries: + # Prewarm: run once on both servers. Helps to bring the data into memory, + # precompile the queries, etc. +<<<<<<< HEAD + for conn_index, c in enumerate(connections): + res = c.execute(q, query_id = 'prewarm {} {}'.format(0, q)) + print('prewarm\t' + tsv_escape(q) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed)) +======= + try: + for conn_index, c in enumerate(connections): + prewarm_id = f'{query_prefix}.prewarm0' + res = c.execute(q, query_id = prewarm_id) + print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') + except KeyboardInterrupt: + raise + except: + # If prewarm fails for some query -- skip it, and try to test the others. + # This might happen if the new test introduces some function that the + # old server doesn't support. Still, report it as an error. + # FIXME the driver reconnects on error and we lose settings, so this might + # lead to further errors or unexpected behavior. + print(traceback.format_exc(), file=sys.stderr) + continue +>>>>>>> 4b1bb43543... Merge pull request #11076 from ClickHouse/aku/join-error-messages + + # Now, perform measured runs. + # Track the time spent by the client to process this query, so that we can notice + # out the queries that take long to process on the client side, e.g. by sending + # excessive data. + start_seconds = time.perf_counter() + server_seconds = 0 + for run in range(0, args.runs): + for conn_index, c in enumerate(connections): + res = c.execute(q) + print('query\t' + tsv_escape(q) + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed)) + server_seconds += c.last_query.elapsed + + client_seconds = time.perf_counter() - start_seconds + print('client-time\t{}\t{}\t{}'.format(tsv_escape(q), client_seconds, server_seconds)) + +report_stage_end('benchmark') + +# Run drop queries +drop_query_templates = [q.text for q in root.findall('drop_query')] +drop_queries = substitute_parameters(drop_query_templates) +for c in connections: + for q in drop_queries: + c.execute(q) + +report_stage_end('drop2') From 5d31442fa88271072a5478b95f4903977cef4fb1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 26 Aug 2020 03:26:54 +0300 Subject: [PATCH 044/174] Delete perf.py.orig --- .../test/performance-comparison/perf.py.orig | 190 ------------------ 1 file changed, 190 deletions(-) delete mode 100755 docker/test/performance-comparison/perf.py.orig diff --git a/docker/test/performance-comparison/perf.py.orig b/docker/test/performance-comparison/perf.py.orig deleted file mode 100755 index c25a3041a67..00000000000 --- a/docker/test/performance-comparison/perf.py.orig +++ /dev/null @@ -1,190 +0,0 @@ -#!/usr/bin/python3 - -import os -import sys -import itertools -import clickhouse_driver -import xml.etree.ElementTree as et -import argparse -import pprint -import string -import time -import traceback - -stage_start_seconds = time.perf_counter() - -def report_stage_end(stage_name): - global stage_start_seconds - print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds)) - stage_start_seconds = time.perf_counter() - -report_stage_end('start') - -parser = argparse.ArgumentParser(description='Run performance test.') -# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. -parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') -parser.add_argument('--host', nargs='*', default=['127.0.0.1', '127.0.0.1'], help="Server hostname. Parallel to '--port'.") -parser.add_argument('--port', nargs='*', default=[9001, 9002], help="Server port. Parallel to '--host'.") -parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 7)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') -parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.') -args = parser.parse_args() - -tree = et.parse(args.file[0]) -root = tree.getroot() - -# Skip long tests -for tag in root.findall('.//tag'): - if tag.text == 'long': - print('skipped\tTest is tagged as long.') - sys.exit(0) - -# Check main metric -main_metric_element = root.find('main_metric/*') -if main_metric_element is not None and main_metric_element.tag != 'min_time': - raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag)) - -# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't. -infinite_sign = root.find('.//average_speed_not_changing_for_ms') -if infinite_sign is not None: - raise Exception('Looks like the test is infinite (sign 1)') - -# Open connections -servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] -connections = [clickhouse_driver.Client(**server) for server in servers] - -for s in servers: - print('server\t{}\t{}'.format(s['host'], s['port'])) - -report_stage_end('connect') - -# Process query parameters -subst_elems = root.findall('substitutions/substitution') -available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } -for e in subst_elems: - available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')] - -# Take care to keep the order of queries -- sometimes we have DROP IF EXISTS -# followed by CREATE in create queries section, so the order matters. -def substitute_parameters(query_templates): - result = [] - for q in query_templates: - keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n) - values = [available_parameters[k] for k in keys] - result.extend([ - q.format(**dict(zip(keys, values_combo))) - for values_combo in itertools.product(*values)]) - return result - -report_stage_end('substitute') - -# Run drop queries, ignoring errors. Do this before all other activity, because -# clickhouse_driver disconnects on error (this is not configurable), and the new -# connection loses the changes in settings. -drop_query_templates = [q.text for q in root.findall('drop_query')] -drop_queries = substitute_parameters(drop_query_templates) -for c in connections: - for q in drop_queries: - try: - c.execute(q) - except: - traceback.print_exc() - pass - -report_stage_end('drop1') - -# Apply settings -settings = root.findall('settings/*') -for c in connections: - for s in settings: - c.execute("set {} = '{}'".format(s.tag, s.text)) - -report_stage_end('settings') - -# Check tables that should exist. If they don't exist, just skip this test. -tables = [e.text for e in root.findall('preconditions/table_exists')] -for t in tables: - for c in connections: - try: - res = c.execute("show create table {}".format(t)) - except: - print('skipped\t' + traceback.format_exception_only(*sys.exc_info()[:2])[-1]) - traceback.print_exc() - sys.exit(0) - -report_stage_end('preconditions') - -# Run create queries -create_query_templates = [q.text for q in root.findall('create_query')] -create_queries = substitute_parameters(create_query_templates) -for c in connections: - for q in create_queries: - c.execute(q) - -# Run fill queries -fill_query_templates = [q.text for q in root.findall('fill_query')] -fill_queries = substitute_parameters(fill_query_templates) -for c in connections: - for q in fill_queries: - c.execute(q) - -report_stage_end('fill') - -# Run test queries -def tsv_escape(s): - return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') - -test_query_templates = [q.text for q in root.findall('query')] -test_queries = substitute_parameters(test_query_templates) - -report_stage_end('substitute2') - -for q in test_queries: - # Prewarm: run once on both servers. Helps to bring the data into memory, - # precompile the queries, etc. -<<<<<<< HEAD - for conn_index, c in enumerate(connections): - res = c.execute(q, query_id = 'prewarm {} {}'.format(0, q)) - print('prewarm\t' + tsv_escape(q) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed)) -======= - try: - for conn_index, c in enumerate(connections): - prewarm_id = f'{query_prefix}.prewarm0' - res = c.execute(q, query_id = prewarm_id) - print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') - except KeyboardInterrupt: - raise - except: - # If prewarm fails for some query -- skip it, and try to test the others. - # This might happen if the new test introduces some function that the - # old server doesn't support. Still, report it as an error. - # FIXME the driver reconnects on error and we lose settings, so this might - # lead to further errors or unexpected behavior. - print(traceback.format_exc(), file=sys.stderr) - continue ->>>>>>> 4b1bb43543... Merge pull request #11076 from ClickHouse/aku/join-error-messages - - # Now, perform measured runs. - # Track the time spent by the client to process this query, so that we can notice - # out the queries that take long to process on the client side, e.g. by sending - # excessive data. - start_seconds = time.perf_counter() - server_seconds = 0 - for run in range(0, args.runs): - for conn_index, c in enumerate(connections): - res = c.execute(q) - print('query\t' + tsv_escape(q) + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed)) - server_seconds += c.last_query.elapsed - - client_seconds = time.perf_counter() - start_seconds - print('client-time\t{}\t{}\t{}'.format(tsv_escape(q), client_seconds, server_seconds)) - -report_stage_end('benchmark') - -# Run drop queries -drop_query_templates = [q.text for q in root.findall('drop_query')] -drop_queries = substitute_parameters(drop_query_templates) -for c in connections: - for q in drop_queries: - c.execute(q) - -report_stage_end('drop2') From ab19bb25fd8c286713580649ad1c183d493ea5dc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 22 Sep 2020 14:31:33 +0300 Subject: [PATCH 045/174] disable percpu arenas --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index dd7f9f3e2bb..563d41301b1 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -81,7 +81,7 @@ if (NOT EXTERNAL_JEMALLOC_LIBRARY_FOUND OR NOT EXTERNAL_JEMALLOC_LIBRARY_WORKS) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000") + set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000") endif() From 833c07f1f76b71527262362545fef3973fad686c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 23 Sep 2020 11:31:18 +0300 Subject: [PATCH 046/174] Update compare.sh --- docker/test/performance-comparison/compare.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 2fd5641b9fd..7851cf8e81d 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -7,6 +7,10 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# https://github.com/jemalloc/jemalloc/wiki/Getting-Started +export MALLOC_CONF="percpu_arena:disabled" +echo "$MALLOC_CONF" > /etc/malloc.conf ||: + function wait_for_server # port, pid { for _ in {1..60} From bb51aade56d478588a8db60315c30870208aeec3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 23 Sep 2020 12:02:22 +0300 Subject: [PATCH 047/174] Update docker/test/performance-comparison/compare.sh Co-authored-by: Azat Khuzhin --- docker/test/performance-comparison/compare.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 7851cf8e81d..35bb5890488 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -8,8 +8,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # https://github.com/jemalloc/jemalloc/wiki/Getting-Started -export MALLOC_CONF="percpu_arena:disabled" -echo "$MALLOC_CONF" > /etc/malloc.conf ||: +ln -s "percpu_arena:disabled" > /etc/malloc.conf function wait_for_server # port, pid { From d96c89972cc9e3ff7e8baca7a3899c1aeffed891 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 23 Sep 2020 12:04:13 +0300 Subject: [PATCH 048/174] Update compare.sh --- docker/test/performance-comparison/compare.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 35bb5890488..ed89e6f875c 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -8,7 +8,8 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # https://github.com/jemalloc/jemalloc/wiki/Getting-Started -ln -s "percpu_arena:disabled" > /etc/malloc.conf +export MALLOC_CONF="percpu_arena:disabled" +ln -s "percpu_arena:disabled" /etc/malloc.conf function wait_for_server # port, pid { From 26abe8cb30819eea1c1f3383cc99620ab2e9da9a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 24 Sep 2020 12:24:30 +0300 Subject: [PATCH 049/174] Update compare.sh --- docker/test/performance-comparison/compare.sh | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index ed89e6f875c..7708d11d81f 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -8,8 +8,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # https://github.com/jemalloc/jemalloc/wiki/Getting-Started -export MALLOC_CONF="percpu_arena:disabled" -ln -s "percpu_arena:disabled" /etc/malloc.conf +export MALLOC_CONF="confirm_conf:true" function wait_for_server # port, pid { @@ -83,18 +82,16 @@ function restart set -m # Spawn servers in their own process groups - numactl --cpunodebind=0 --localalloc \ - left/clickhouse-server --config-file=left/config/config.xml \ - -- --path left/db --user_files_path left/db/user_files \ - &>> left-server-log.log & + left/clickhouse-server --config-file=left/config/config.xml \ + -- --path left/db --user_files_path left/db/user_files \ + &>> left-server-log.log & left_pid=$! kill -0 $left_pid disown $left_pid - numactl --cpunodebind=0 --localalloc \ - right/clickhouse-server --config-file=right/config/config.xml \ - -- --path right/db --user_files_path right/db/user_files \ - &>> right-server-log.log & + right/clickhouse-server --config-file=right/config/config.xml \ + -- --path right/db --user_files_path right/db/user_files \ + &>> right-server-log.log & right_pid=$! kill -0 $right_pid disown $right_pid From 425150e78308941c5e7a7c097ac66c84721008df Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 25 Sep 2020 13:19:37 +0300 Subject: [PATCH 050/174] bind to different nodes --- docker/test/performance-comparison/compare.sh | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 7708d11d81f..98040b037f6 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -7,9 +7,6 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -# https://github.com/jemalloc/jemalloc/wiki/Getting-Started -export MALLOC_CONF="confirm_conf:true" - function wait_for_server # port, pid { for _ in {1..60} @@ -80,24 +77,31 @@ function restart while killall clickhouse-server; do echo . ; sleep 1 ; done echo all killed - set -m # Spawn servers in their own process groups + # https://github.com/jemalloc/jemalloc/wiki/Getting-Started + export MALLOC_CONF="percpu_arena:disabled,confirm_conf:true" - left/clickhouse-server --config-file=left/config/config.xml \ - -- --path left/db --user_files_path left/db/user_files \ - &>> left-server-log.log & + set -m # Spawn servers in their own process groups + + numactl --cpunodebind=1 --localalloc \ + left/clickhouse-server --config-file=left/config/config.xml \ + -- --path left/db --user_files_path left/db/user_files \ + &>> left-server-log.log & left_pid=$! kill -0 $left_pid disown $left_pid - right/clickhouse-server --config-file=right/config/config.xml \ - -- --path right/db --user_files_path right/db/user_files \ - &>> right-server-log.log & + numactl --cpunodebind=0 --localalloc \ + right/clickhouse-server --config-file=right/config/config.xml \ + -- --path right/db --user_files_path right/db/user_files \ + &>> right-server-log.log & right_pid=$! kill -0 $right_pid disown $right_pid set +m + unset MALLOC_CONF + wait_for_server 9001 $left_pid echo left ok From bde19bf240571ca6d05450412a838ae4bea2f782 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 28 Sep 2020 15:26:51 +0300 Subject: [PATCH 051/174] restart the build From 824d5b093c453506cb64f2abc67ce3034a184da6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 30 Sep 2020 14:32:49 +0300 Subject: [PATCH 052/174] bind everything to node 0 --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- docker/test/performance-comparison/Dockerfile | 9 ++++++++- docker/test/performance-comparison/compare.sh | 18 ++++++++++-------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 563d41301b1..dd7f9f3e2bb 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -81,7 +81,7 @@ if (NOT EXTERNAL_JEMALLOC_LIBRARY_FOUND OR NOT EXTERNAL_JEMALLOC_LIBRARY_WORKS) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000") endif() diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index a4f8af2f388..99f2f9b2b4b 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -37,7 +37,14 @@ RUN apt-get update \ COPY * / -CMD /entrypoint.sh +# Bind everything to node 0 early. We have to bind both servers and the tmpfs +# on which the database is stored. How to do it through Yandex Sandbox API is +# unclear, but by default tmpfs uses 'process allocation policy', not sure +# which process but hopefully the one that writes to it, so just bind the +# downloader script as well. +# We could also try to remount it with proper options in Sandbox task. +# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt +CMD ['numactl', '--cpunodebind=0', '--localalloc', '/entrypoint.sh'] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 0134d03aea1..2f03ecc9ad7 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -77,23 +77,25 @@ function restart while killall clickhouse-server; do echo . ; sleep 1 ; done echo all killed + # Disable percpu arenas because they segfault when the process is bound to + # a particular NUMA node: https://github.com/jemalloc/jemalloc/pull/1939 + # + # About the jemalloc settings: # https://github.com/jemalloc/jemalloc/wiki/Getting-Started export MALLOC_CONF="percpu_arena:disabled,confirm_conf:true" set -m # Spawn servers in their own process groups - numactl --cpunodebind=1 --localalloc \ - left/clickhouse-server --config-file=left/config/config.xml \ - -- --path left/db --user_files_path left/db/user_files \ - &>> left-server-log.log & + left/clickhouse-server --config-file=left/config/config.xml \ + -- --path left/db --user_files_path left/db/user_files \ + &>> left-server-log.log & left_pid=$! kill -0 $left_pid disown $left_pid - numactl --cpunodebind=0 --localalloc \ - right/clickhouse-server --config-file=right/config/config.xml \ - -- --path right/db --user_files_path right/db/user_files \ - &>> right-server-log.log & + right/clickhouse-server --config-file=right/config/config.xml \ + -- --path right/db --user_files_path right/db/user_files \ + &>> right-server-log.log & right_pid=$! kill -0 $right_pid disown $right_pid From aa543a2d3d51f16c6d043a36bb9f4249ba35cd05 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 30 Sep 2020 17:40:24 +0300 Subject: [PATCH 053/174] quotes --- docker/test/performance-comparison/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 99f2f9b2b4b..535f7de9e29 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -44,7 +44,7 @@ COPY * / # downloader script as well. # We could also try to remount it with proper options in Sandbox task. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -CMD ['numactl', '--cpunodebind=0', '--localalloc', '/entrypoint.sh'] +CMD ["numactl", "--cpunodebind=0", "--localalloc", "/entrypoint.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison From 22a0ec0892c6058acb9b0e09f3f4b57f5b99647e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 30 Sep 2020 17:55:40 +0300 Subject: [PATCH 054/174] try split debug -Og build in fasttest --- docker/test/fasttest/run.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 4a47fcfe4dc..e16a70fc3b2 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -124,7 +124,20 @@ git submodule foreach git clean -xfd function run_cmake { -CMAKE_LIBS_CONFIG=("-DENABLE_LIBRARIES=0" "-DENABLE_TESTS=0" "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1") +CMAKE_LIBS_CONFIG=( + "-DENABLE_LIBRARIES=0" + "-DENABLE_TESTS=0" + "-DENABLE_UTILS=0" + "-DENABLE_EMBEDDED_COMPILER=0" + "-DENABLE_THINLTO=0" + "-DUSE_UNWIND=1" + "-DUSE_STATIC_LIBRARIES=0" + "-DSPLIT_SHARED_LIBRARIES=1" + "-DCLICKHOUSE_SPLIT_BINARY=1" + "-DCMAKE_BUILD_TYPE=Debug" + "-DCMAKE_C_FLAGS=-Og" + "-DCMAKE_CXX_FLAGS=-Og" +) # TODO remove this? we don't use ccache anyway. An option would be to download it # from S3 simultaneously with cloning. From c5d1f51f5836da49a33c531dad6cd46dccc271d5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 30 Sep 2020 19:14:20 +0300 Subject: [PATCH 055/174] just split --- docker/test/fasttest/run.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index e16a70fc3b2..f769b342846 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -134,9 +134,9 @@ CMAKE_LIBS_CONFIG=( "-DUSE_STATIC_LIBRARIES=0" "-DSPLIT_SHARED_LIBRARIES=1" "-DCLICKHOUSE_SPLIT_BINARY=1" - "-DCMAKE_BUILD_TYPE=Debug" - "-DCMAKE_C_FLAGS=-Og" - "-DCMAKE_CXX_FLAGS=-Og" +# "-DCMAKE_BUILD_TYPE=Debug" +# "-DCMAKE_C_FLAGS=-Og" +# "-DCMAKE_CXX_FLAGS=-Og" ) # TODO remove this? we don't use ccache anyway. An option would be to download it @@ -255,7 +255,7 @@ TESTS_TO_SKIP=( 00974_query_profiler # Look at DistributedFilesToInsert, so cannot run in parallel. - 01460_DistributedFilesToInsert + 01457_DistributedFilesToInsert ) time clickhouse-test -j 8 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" From 74f8e41b55a2504c3419cb33e7c2429a2c1e116a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 1 Oct 2020 13:56:56 +0300 Subject: [PATCH 056/174] calculate on all nodes --- docker/test/performance-comparison/compare.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 2f03ecc9ad7..08b18758874 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -462,7 +462,10 @@ wait unset IFS ) -parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log +# The comparison script might be bound to one NUMA node for better test +# stability, and the calculation runs out of memory because of this. Use +# all nodes. +numactl --all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log clickhouse-local --query " -- Join the metric names back to the metric statistics we've calculated, and make From 6802db6954e1ce36a6c1eea997f47f7331c139a9 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Mon, 12 Oct 2020 09:59:35 +0300 Subject: [PATCH 057/174] Database or Table Engine descrition template upd --- docs/README.md | 2 +- ...ate-table-engine.md => template-engine.md} | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) rename docs/_description_templates/{template-table-engine.md => template-engine.md} (59%) diff --git a/docs/README.md b/docs/README.md index c7fa0221726..8b3066501bf 100644 --- a/docs/README.md +++ b/docs/README.md @@ -195,7 +195,7 @@ Templates: - [Function](_description_templates/template-function.md) - [Setting](_description_templates/template-setting.md) -- [Table engine](_description_templates/template-table-engine.md) +- [Database or Table engine](_description_templates/template-engine.md) - [System table](_description_templates/template-system-table.md) diff --git a/docs/_description_templates/template-table-engine.md b/docs/_description_templates/template-engine.md similarity index 59% rename from docs/_description_templates/template-table-engine.md rename to docs/_description_templates/template-engine.md index c1bfcb3ec86..35181881134 100644 --- a/docs/_description_templates/template-table-engine.md +++ b/docs/_description_templates/template-engine.md @@ -1,8 +1,14 @@ # EngineName {#enginename} -- What the engine does. +- What the Database/Table engine does. - Relations with other engines if they exist. +## Creating a Database {#creating-a-database} +``` sql + CREATE DATABASE ... +``` +or + ## Creating a Table {#creating-a-table} ``` sql CREATE TABLE ... @@ -10,12 +16,19 @@ **Engine Parameters** -**Query Clauses** +**Query Clauses** (for Table engines only) -## Virtual columns {#virtual-columns} +## Virtual columns {#virtual-columns} (for Table engines only) List and virtual columns with description, if they exist. +## Data Types Support {#data_types-support} (for Database engines only) + +| EngineName | ClickHouse | +|-----------------------|------------------------------------| +| NativeDataTypeName | [ClickHouseDataTypeName](link#) | + + ## Specifics and recommendations {#specifics-and-recommendations} Algorithms From 20ebd4fd5bde96ff52ba2fa680d1b6a12b08191b Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Mon, 12 Oct 2020 19:37:04 +0300 Subject: [PATCH 058/174] better --- .../AggregateFunctionWelchTTest.cpp | 26 +-- .../AggregateFunctionWelchTTest.h | 205 +++++------------- .../0_stateless/01322_welch_ttest.reference | 8 +- .../queries/0_stateless/01322_welch_ttest.sql | 19 +- 4 files changed, 81 insertions(+), 177 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 3d2e98e2a0e..d9fce97680c 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -21,41 +21,25 @@ namespace DB namespace { -AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, - const DataTypes & argument_types, - const Array & parameters) +AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) { assertBinary(name, argument_types); - - // default value - Float64 significance_level = 0.1; - if (parameters.size() > 1) - { - throw Exception("Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - } - - if (!parameters.empty()) - { - significance_level = applyVisitor(FieldVisitorConvertToNumber(), parameters[0]); - } + assertNoParameters(name, parameters); AggregateFunctionPtr res; if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) { - throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); } - else { - res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], significance_level, - argument_types, parameters)); + res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); } - if (!res) { - throw Exception("Aggregate function " + name + " only supports numerical types.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); } return res; diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 2f56e5e6b6c..e445278e9e7 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -24,38 +25,10 @@ extern const int BAD_ARGUMENTS; namespace DB { -// hard-codded values - part of the algorithm -#define SIGN_LVL_CNT 6 - -Float64 CriticalValuesTable[SIGN_LVL_CNT][102] = { - // for significance level = 0.2 - {0.2, 3.078, 1.886, 1.638, 1.533, 1.476, 1.44, 1.415, 1.397, 1.383, 1.372, 1.363, 1.356, 1.35, 1.345, 1.341, 1.337, 1.333, 1.33, 1.328, 1.325, 1.323, 1.321, 1.319, 1.318, 1.316, 1.315, 1.314, 1.313, 1.311, 1.31, 1.309, 1.309, 1.308, 1.307, 1.306, 1.306, 1.305, 1.304, 1.304, 1.303, 1.303, 1.302, 1.302, 1.301, 1.301, 1.3, 1.3, 1.299, 1.299, 1.299, 1.298, 1.298, 1.298, 1.297, 1.297, 1.297, 1.297, 1.296, 1.296, 1.296, 1.296, 1.295, 1.295, 1.295, 1.295, 1.295, 1.294, 1.294, 1.294, 1.294, 1.294, 1.293, 1.293, 1.293, 1.293, 1.293, 1.293, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.292, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.291, 1.29, 1.29, 1.29, 1.29, 1.29, 1.282}, - - // for significance level = 0.1 - {0.1, 6.314, 2.92, 2.353, 2.132, 2.015, 1.943, 1.895, 1.86, 1.833, 1.812, 1.796, 1.782, 1.771, 1.761, 1.753, 1.746, 1.74, 1.734, 1.729, 1.725, 1.721, 1.717, 1.714, 1.711, 1.708, 1.706, 1.703, 1.701, 1.699, 1.697, 1.696, 1.694, 1.692, 1.691, 1.69, 1.688, 1.687, 1.686, 1.685, 1.684, 1.683, 1.682, 1.681, 1.68, 1.679, 1.679, 1.678, 1.677, 1.677, 1.676, 1.675, 1.675, 1.674, 1.674, 1.673, 1.673, 1.672, 1.672, 1.671, 1.671, 1.67, 1.67, 1.669, 1.669, 1.669, 1.668, 1.668, 1.668, 1.667, 1.667, 1.667, 1.666, 1.666, 1.666, 1.665, 1.665, 1.665, 1.665, 1.664, 1.664, 1.664, 1.664, 1.663, 1.663, 1.663, 1.663, 1.663, 1.662, 1.662, 1.662, 1.662, 1.662, 1.661, 1.661, 1.661, 1.661, 1.661, 1.661, 1.66, 1.66, 1.645}, - - // for significance level = 0.05 - {0.05, 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.16, 2.145, 2.131, 2.12, 2.11, 2.101, 2.093, 2.086, 2.08, 2.074, 2.069, 2.064, 2.06, 2.056, 2.052, 2.048, 2.045, 2.042, 2.04, 2.037, 2.035, 2.032, 2.03, 2.028, 2.026, 2.024, 2.023, 2.021, 2.02, 2.018, 2.017, 2.015, 2.014, 2.013, 2.012, 2.011, 2.01, 2.009, 2.008, 2.007, 2.006, 2.005, 2.004, 2.003, 2.002, 2.002, 2.001, 2.0, 2.0, 1.999, 1.998, 1.998, 1.997, 1.997, 1.996, 1.995, 1.995, 1.994, 1.994, 1.993, 1.993, 1.993, 1.992, 1.992, 1.991, 1.991, 1.99, 1.99, 1.99, 1.989, 1.989, 1.989, 1.988, 1.988, 1.988, 1.987, 1.987, 1.987, 1.986, 1.986, 1.986, 1.986, 1.985, 1.985, 1.985, 1.984, 1.984, 1.984, 1.96}, - - // for significance level = 0.02 - {0.02, 31.821, 6.965, 4.541, 3.747, 3.365, 3.143, 2.998, 2.896, 2.821, 2.764, 2.718, 2.681, 2.65, 2.624, 2.602, 2.583, 2.567, 2.552, 2.539, 2.528, 2.518, 2.508, 2.5, 2.492, 2.485, 2.479, 2.473, 2.467, 2.462, 2.457, 2.453, 2.449, 2.445, 2.441, 2.438, 2.434, 2.431, 2.429, 2.426, 2.423, 2.421, 2.418, 2.416, 2.414, 2.412, 2.41, 2.408, 2.407, 2.405, 2.403, 2.402, 2.4, 2.399, 2.397, 2.396, 2.395, 2.394, 2.392, 2.391, 2.39, 2.389, 2.388, 2.387, 2.386, 2.385, 2.384, 2.383, 2.382, 2.382, 2.381, 2.38, 2.379, 2.379, 2.378, 2.377, 2.376, 2.376, 2.375, 2.374, 2.374, 2.373, 2.373, 2.372, 2.372, 2.371, 2.37, 2.37, 2.369, 2.369, 2.368, 2.368, 2.368, 2.367, 2.367, 2.366, 2.366, 2.365, 2.365, 2.365, 2.364, 2.326}, - - // for significance level = 0.01 - {0.01, 63.657, 9.925, 5.841, 4.604, 4.032, 3.707, 3.499, 3.355, 3.25, 3.169, 3.106, 3.055, 3.012, 2.977, 2.947, 2.921, 2.898, 2.878, 2.861, 2.845, 2.831, 2.819, 2.807, 2.797, 2.787, 2.779, 2.771, 2.763, 2.756, 2.75, 2.744, 2.738, 2.733, 2.728, 2.724, 2.719, 2.715, 2.712, 2.708, 2.704, 2.701, 2.698, 2.695, 2.692, 2.69, 2.687, 2.685, 2.682, 2.68, 2.678, 2.676, 2.674, 2.672, 2.67, 2.668, 2.667, 2.665, 2.663, 2.662, 2.66, 2.659, 2.657, 2.656, 2.655, 2.654, 2.652, 2.651, 2.65, 2.649, 2.648, 2.647, 2.646, 2.645, 2.644, 2.643, 2.642, 2.641, 2.64, 2.64, 2.639, 2.638, 2.637, 2.636, 2.636, 2.635, 2.634, 2.634, 2.633, 2.632, 2.632, 2.631, 2.63, 2.63, 2.629, 2.629, 2.628, 2.627, 2.627, 2.626, 2.626, 2.576}, - - // for significance level = 0.002 - {0.002, 318.313, 22.327, 10.215, 7.173, 5.893, 5.208, 4.782, 4.499, 4.296, 4.143, 4.024, 3.929, 3.852, 3.787, 3.733, 3.686, 3.646, 3.61, 3.579, 3.552, 3.527, 3.505, 3.485, 3.467, 3.45, 3.435, 3.421, 3.408, 3.396, 3.385, 3.375, 3.365, 3.356, 3.348, 3.34, 3.333, 3.326, 3.319, 3.313, 3.307, 3.301, 3.296, 3.291, 3.286, 3.281, 3.277, 3.273, 3.269, 3.265, 3.261, 3.258, 3.255, 3.251, 3.248, 3.245, 3.242, 3.239, 3.237, 3.234, 3.232, 3.229, 3.227, 3.225, 3.223, 3.22, 3.218, 3.216, 3.214, 3.213, 3.211, 3.209, 3.207, 3.206, 3.204, 3.202, 3.201, 3.199, 3.198, 3.197, 3.195, 3.194, 3.193, 3.191, 3.19, 3.189, 3.188, 3.187, 3.185, 3.184, 3.183, 3.182, 3.181, 3.18, 3.179, 3.178, 3.177, 3.176, 3.175, 3.175, 3.174, 3.09} -}; - -// our algorithm implementation via vectors: -// https://gist.github.com/ltybc-coder/792748cfdb2f7cadef424ffb7b011c71 -// col, col, bool template -//template struct AggregateFunctionWelchTTestData final { - size_t size_x = 0; size_t size_y = 0; X sum_x = static_cast(0); @@ -65,25 +38,6 @@ struct AggregateFunctionWelchTTestData final Float64 mean_x = static_cast(0); Float64 mean_y = static_cast(0); - /* - not yet sure how to use them - void add_x(X x) - { - mean_x = (Float64)(sum_x + x) / (size_x + 1); - size_x ++; - sum_x += x; - square_sum_x += x * x; - } - - void add_y(Y y) - { - mean_y = (sum_y + y) / (size_y + 1); - size_y ++; - sum_y += y; - square_sum_y += y * y; - } - */ - void add(X x, Y y) { sum_x += x; @@ -142,100 +96,80 @@ struct AggregateFunctionWelchTTestData final return size_x; } - Float64 getSx() const + Float64 getSxSquared() const { - return static_cast(square_sum_x + size_x * mean_x * mean_x - 2 * mean_x * sum_x) / (size_x - 1); + /// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2} + /// But we made some mathematical transformations not to store original sequences. + /// Also we dropped sqrt, because later it will be squared later. + return static_cast(square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x) / (size_x - 1); } - Float64 getSy() const + Float64 getSySquared() const { - return static_cast(square_sum_y + size_y * mean_y * mean_y - 2 * mean_y * sum_y) / (size_y - 1); + /// The original formulae looks like \frac{1}{size_y - 1} \sum_{i = 1}^{size_y}{(y_i - \bar{y}) ^ 2} + /// But we made some mathematical transformations not to store original sequences. + /// Also we dropped sqrt, because later it will be squared later. + return static_cast(square_sum_y + size_y * std::pow(mean_y, 2) - 2 * mean_y * sum_y) / (size_y - 1); } - Float64 getT(Float64 sx, Float64 sy) const + Float64 getTStatisticSquared() const { - if (sx == 0 && sy == 0) + if (size_x == 0 || size_y == 0) { - throw Exception("division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); } - if (sx == -sy && size_x == size_y) - { - throw Exception("division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); - } - - return static_cast(mean_x - mean_y) / std::sqrt(sx / size_x + sy / size_y); + return std::pow(mean_x - mean_y, 2) / (getSxSquared() / size_x + getSySquared() / size_y); } - Float64 getDegreesOfFreedom(Float64 sx, Float64 sy) const + Float64 getDegreesOfFreedom() const { - return static_cast(sx / size_x + sy / size_y) * (sx / size_x + sy / size_y) / - ((sx * sx / (size_x * size_x * (size_x - 1))) + (sy * sy / (size_y * size_y * (size_y - 1)))); + auto sx = getSxSquared(); + auto sy = getSySquared(); + Float64 numerator = std::pow(sx / size_x + sy / size_y, 2); + Float64 denominator_first = std::pow(sx, 2) / (std::pow(size_x, 2) * (size_x - 1)); + Float64 denominator_second = std::pow(sy, 2) / (std::pow(size_y, 2) * (size_y - 1)); + return numerator / (denominator_first + denominator_second); } - UInt8 getResult(Float64 t, Float64 dof, Float64 parametr) const + static Float64 integrateSimpson(Float64 a, Float64 b, std::function func, size_t iterations = 1e6) { - //find our table - int table = 0; - for (int i = 0; i < SIGN_LVL_CNT; ++i) - { - if (CriticalValuesTable[i][0] == parametr) - { - table = i; - } - } + double h = (b - a) / iterations; + Float64 sum_odds = 0.0; + for (size_t i = 1; i < iterations; i += 2) + sum_odds += func(a + i * h); + Float64 sum_evens = 0.0; + for (size_t i = 2; i < iterations; i += 2) + sum_evens += func(a + i * h); + return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3; + } - //round or make infinity dof - int i_dof = static_cast(dof); + Float64 getPValue() const + { + const Float64 v = getDegreesOfFreedom(); + const Float64 t = getTStatisticSquared(); + auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); }; + Float64 numenator = integrateSimpson(0, v / (t + v), f); + Float64 denominator = std::exp(std::lgammal(v/2) + std::lgammal(0.5) - std::lgammal(v/2 + 0.5)); + return numenator / denominator; + } - if (i_dof > 100) - { - i_dof = 101; - } - - if (i_dof < 1) - { - i_dof = 1; - } - - //check if abs of t is greater than table[dof] - t = abs(t); - if (t >= CriticalValuesTable[table][i_dof]) - { - return static_cast(0); - //in this case we reject the null hypothesis - } - else - { - return static_cast(1); - } + Float64 getResult() const + { + return getPValue(); } }; +/// Returns p-value template -class AggregateFunctionWelchTTest : public - IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest - > +class AggregateFunctionWelchTTest : + public IAggregateFunctionDataHelper,AggregateFunctionWelchTTest> { -private: - Float64 significance_level; - public: - AggregateFunctionWelchTTest( - Float64 sglvl_, - const DataTypes & arguments, - const Array & params - ): - IAggregateFunctionDataHelper< - AggregateFunctionWelchTTestData, - AggregateFunctionWelchTTest - > ({arguments}, params), significance_level(sglvl_) - { - // notice: arguments has been in factory - } + AggregateFunctionWelchTTest(const DataTypes & arguments) + : IAggregateFunctionDataHelper, AggregateFunctionWelchTTest> ({arguments}, {}) + {} String getName() const override { @@ -244,15 +178,10 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared>(); + return std::make_shared>(); } - void add( - AggregateDataPtr place, - const IColumn ** columns, - size_t row_num, - Arena * - ) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { auto col_x = assert_cast *>(columns[0]); auto col_y = assert_cast *>(columns[1]); @@ -263,34 +192,22 @@ public: this->data(place).add(x, y); } - void merge( - AggregateDataPtr place, - ConstAggregateDataPtr rhs, Arena * - ) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); } - void serialize( - ConstAggregateDataPtr place, - WriteBuffer & buf - ) const override + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).serialize(buf); } - void deserialize( - AggregateDataPtr place, - ReadBuffer & buf, Arena * - ) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } - void insertResultInto( - AggregateDataPtr place, - IColumn & to - ) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override { size_t size_x = this->data(place).getSizeX(); size_t size_y = this->data(place).getSizeY(); @@ -300,14 +217,8 @@ public: throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); } - Float64 sx = this->data(place).getSx(); - Float64 sy = this->data(place).getSy(); - Float64 t_value = this->data(place).getT(sx, sy); - Float64 dof = this->data(place).getDegreesOfFreedom(sx, sy); - UInt8 result = this->data(place).getResult(t_value, dof, significance_level); - - auto & column = static_cast &>(to); - column.getData().push_back(result); + auto & column = static_cast &>(to); + column.getData().push_back(this->data(place).getResult()); } }; diff --git a/tests/queries/0_stateless/01322_welch_ttest.reference b/tests/queries/0_stateless/01322_welch_ttest.reference index aa47d0d46d4..015dd503b7e 100644 --- a/tests/queries/0_stateless/01322_welch_ttest.reference +++ b/tests/queries/0_stateless/01322_welch_ttest.reference @@ -1,2 +1,6 @@ -0 -0 +0.021378001462867 +0.021378 +0.090773324285671 +0.09077332 +0.00339907162713746 +0.00339907 diff --git a/tests/queries/0_stateless/01322_welch_ttest.sql b/tests/queries/0_stateless/01322_welch_ttest.sql index b8e881a069b..073e71f69fe 100644 --- a/tests/queries/0_stateless/01322_welch_ttest.sql +++ b/tests/queries/0_stateless/01322_welch_ttest.sql @@ -1,13 +1,18 @@ DROP TABLE IF EXISTS welch_ttest; CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; - -INSERT INTO welch_ttest VALUES (2224.779, 2465.0984), (2588.11, 1909.0328), (1979.625, 1175.8747), (2137.442, 2171.378), (2565.818, 2193.2821), (1754.023, 2854.9475), (1654.947, 2060.1777), (1789.256, 2258.2366), (2320.659, 1856.0535), (2039.532, 1501.8126), (1983.497, 2987.6542), (2232.903, 1681.9778), (2513.93, 2479.6776), (2066.382, 1259.8584), (2492.715, 1120.9043), (1988.287, 1982.1213), (1840.036, 3012.3949), (2249.749, 2252.373), (1766.982, 2591.3122), (1724.84, 1940.589), (0, 1995.185), (0, 2535.1344), (0, 597.3155), (0, 2343.2192), (0, 3154.84), (0, 1125.1966), (0, 1227.8842), (0, 1692.805), (0, 2539.6772), (0, 1936.1927), (0, 1783.7795), (0, 1703.4384), (0, 2077.194), (0, 1614.4071), (0, 2360.0365), (0, 1619.2781), (0, 2033.5109), (0, 2333.7834), (0, 2144.0485), (0, 2583.8709), (0, 1116.7213), (0, 1601.9383), (0, 1570.0431), (0, 1963.0777), (0, 1639.2533), (0, 2277.5223), (0, 1991.9286), (0, 2044.3338), (0, 1794.4781), (0, 1597.9119) -SELECT WelchTTest(0.1)(left, right) from welch_ttest; - +INSERT INTO welch_ttest VALUES (27.5,27.1), (21.0,22.0), (19.0,20.8), (23.6,23.4), (17.0,23.4), (17.9,23.5), (16.9,25.8), (20.1,22.0), (21.9,24.8), (22.6,20.2), (23.1,21.9), (19.6,22.1), (19.0,22.9), (21.7,20.5), (21.4,24.4); +SELECT '0.021378001462867'; +SELECT roundBankers(WelchTTest(left, right), 8) from welch_ttest; DROP TABLE IF EXISTS welch_ttest; + CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (30.02,29.89), (29.99,29.93), (30.11,29.72), (29.97,29.98), (30.01,30.02), (29.99,29.98); +SELECT '0.090773324285671'; +SELECT roundBankers(WelchTTest(left, right), 8) from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; -INSERT INTO welch_ttest VALUES (2224.779, 2465.0984), (2588.11, 1909.0328), (1979.625, 1175.8747), (2137.442, 2171.378), (2565.818, 2193.2821), (1754.023, 2854.9475), (1654.947, 2060.1777), (1789.256, 2258.2366), (2320.659, 1856.0535), (2039.532, 1501.8126), (1983.497, 2987.6542), (2232.903, 1681.9778), (2513.93, 2479.6776), (2066.382, 1259.8584), (2492.715, 1120.9043), (1988.287, 1982.1213), (1840.036, 3012.3949), (2249.749, 2252.373), (1766.982, 2591.3122), (1724.84, 1940.589), (0, 1995.185), (0, 2535.1344), (0, 597.3155), (0, 2343.2192), (0, 3154.84), (0, 1125.1966), (0, 1227.8842), (0, 1692.805), (0, 2539.6772), (0, 1936.1927), (0, 1783.7795), (0, 1703.4384), (0, 2077.194), (0, 1614.4071), (0, 2360.0365), (0, 1619.2781), (0, 2033.5109), (0, 2333.7834), (0, 2144.0485), (0, 2583.8709), (0, 1116.7213), (0, 1601.9383), (0, 1570.0431), (0, 1963.0777), (0, 1639.2533), (0, 2277.5223), (0, 1991.9286), (0, 2044.3338), (0, 1794.4781), (0, 1597.9119) -SELECT WelchTTest(0.02)(left, right) from welch_ttest; - +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (0.010268,0.159258), (0.000167,0.136278), (0.000167,0.122389); +SELECT '0.00339907162713746'; +SELECT roundBankers(WelchTTest(left, right), 8) from welch_ttest; DROP TABLE IF EXISTS welch_ttest; \ No newline at end of file From e65a2a1cbd6d784e8a0307f9d84e0dde9c241e16 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Mon, 12 Oct 2020 21:10:01 +0300 Subject: [PATCH 059/174] add student t test --- .../AggregateFunctionStudentTTest.cpp | 56 +++++ .../AggregateFunctionStudentTTest.h | 230 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 1 + .../registerAggregateFunctions.h | 1 + 4 files changed, 288 insertions(+) create mode 100644 src/AggregateFunctions/AggregateFunctionStudentTTest.cpp create mode 100644 src/AggregateFunctions/AggregateFunctionStudentTTest.h diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp new file mode 100644 index 00000000000..b6f32409946 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include "registerAggregateFunctions.h" + +#include +#include + + +// the return type is boolean (we use UInt8 as we do not have boolean in clickhouse) + +namespace ErrorCodes +{ +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int NOT_IMPLEMENTED; +} + +namespace DB +{ + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) +{ + assertBinary(name, argument_types); + assertNoParameters(name, parameters); + + AggregateFunctionPtr res; + + if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) + { + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); + } + else + { + res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); + } + + if (!res) + { + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); + } + + return res; +} + +} + + +void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory) +{ + factory.registerFunction("StudentTTest", createAggregateFunctionStudentTTest, AggregateFunctionFactory::CaseInsensitive); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.h b/src/AggregateFunctions/AggregateFunctionStudentTTest.h new file mode 100644 index 00000000000..b03f9178709 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.h @@ -0,0 +1,230 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace DB +{ + +template +struct AggregateFunctionStudentTTestData final +{ + size_t size_x = 0; + size_t size_y = 0; + X sum_x = static_cast(0); + Y sum_y = static_cast(0); + X square_sum_x = static_cast(0); + Y square_sum_y = static_cast(0); + Float64 mean_x = static_cast(0); + Float64 mean_y = static_cast(0); + + void add(X x, Y y) + { + sum_x += x; + sum_y += y; + size_x++; + size_y++; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; + square_sum_x += x * x; + square_sum_y += y * y; + } + + void merge(const AggregateFunctionStudentTTestData &other) + { + sum_x += other.sum_x; + sum_y += other.sum_y; + size_x += other.size_x; + size_y += other.size_y; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; + square_sum_x += other.square_sum_x; + square_sum_y += other.square_sum_y; + } + + void serialize(WriteBuffer &buf) const + { + writeBinary(mean_x, buf); + writeBinary(mean_y, buf); + writeBinary(sum_x, buf); + writeBinary(sum_y, buf); + writeBinary(square_sum_x, buf); + writeBinary(square_sum_y, buf); + writeBinary(size_x, buf); + writeBinary(size_y, buf); + } + + void deserialize(ReadBuffer &buf) + { + readBinary(mean_x, buf); + readBinary(mean_y, buf); + readBinary(sum_x, buf); + readBinary(sum_y, buf); + readBinary(square_sum_x, buf); + readBinary(square_sum_y, buf); + readBinary(size_x, buf); + readBinary(size_y, buf); + } + + size_t getSizeY() const + { + return size_y; + } + + size_t getSizeX() const + { + return size_x; + } + + Float64 getSSquared() const + { + /// TODO: Update comment with Tex. + /// The original formulae looks like ... + /// But we made some mathematical transformations not to store original sequences. + /// Also we dropped sqrt, because later it will be squared later. + const Float64 all_x = square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x; + const Float64 all_y = square_sum_y + size_y * std::pow(mean_y, 2) - 2 * mean_y * sum_y; + return static_cast(all_x + all_y) / (size_x + size_y - 2); + } + + + Float64 getTStatisticSquared() const + { + if (size_x == 0 || size_y == 0) + { + throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS); + } + + if (mean_x - mean_y < 1e-8) + { + return static_cast(0.0); + } + + return std::pow(mean_x - mean_y, 2) / getStandartErrorSquared(); + } + + + Float64 getStandartErrorSquared() const + { + return getSSquared() * (1 / size_x + 1 / size_y); + } + + Float64 getDegreesOfFreedom() const + { + return static_cast(size_x + size_y - 2); + } + + static Float64 integrateSimpson(Float64 a, Float64 b, std::function func, size_t iterations = 1e6) + { + double h = (b - a) / iterations; + Float64 sum_odds = 0.0; + for (size_t i = 1; i < iterations; i += 2) + sum_odds += func(a + i * h); + Float64 sum_evens = 0.0; + for (size_t i = 2; i < iterations; i += 2) + sum_evens += func(a + i * h); + return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3; + } + + Float64 getPValue() const + { + const Float64 v = getDegreesOfFreedom(); + const Float64 t = getTStatisticSquared(); + std::cout << "getDegreesOfFreedom " << v << " getTStatisticSquared " << t << std::endl; + auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); }; + Float64 numenator = integrateSimpson(0, v / (t + v), f); + Float64 denominator = std::exp(std::lgammal(v/2) + std::lgammal(0.5) - std::lgammal(v/2 + 0.5)); + return numenator / denominator; + } + + Float64 getResult() const + { + return getPValue(); + } +}; + +/// Returns p-value +/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf +template +class AggregateFunctionStudentTTest : + public IAggregateFunctionDataHelper,AggregateFunctionStudentTTest> +{ + +public: + AggregateFunctionStudentTTest(const DataTypes & arguments) + : IAggregateFunctionDataHelper, AggregateFunctionStudentTTest> ({arguments}, {}) + {} + + String getName() const override + { + return "StudentTTest"; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared>(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + auto col_x = assert_cast *>(columns[0]); + auto col_y = assert_cast *>(columns[1]); + + X x = col_x->getData()[row_num]; + Y y = col_y->getData()[row_num]; + + this->data(place).add(x, y); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).serialize(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override + { + size_t size_x = this->data(place).getSizeX(); + size_t size_y = this->data(place).getSizeY(); + + if (size_x < 2 || size_y < 2) + { + throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); + } + + auto & column = static_cast &>(to); + column.getData().push_back(this->data(place).getResult()); + } + +}; + +}; diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index fd914443ba2..9fd02ba9d6c 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -46,6 +46,7 @@ void registerAggregateFunctions() registerAggregateFunctionCategoricalIV(factory); registerAggregateFunctionAggThrow(factory); registerAggregateFunctionWelchTTest(factory); + registerAggregateFunctionStudentTTest(factory); registerAggregateFunctionRankCorrelation(factory); } diff --git a/src/AggregateFunctions/registerAggregateFunctions.h b/src/AggregateFunctions/registerAggregateFunctions.h index de239258fa0..abbba56ed32 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.h +++ b/src/AggregateFunctions/registerAggregateFunctions.h @@ -36,6 +36,7 @@ void registerAggregateFunctionMoving(AggregateFunctionFactory &); void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &); void registerAggregateFunctionAggThrow(AggregateFunctionFactory &); void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &); +void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &); void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; From dbaada559782898e6a992b048e9f6fc58df22f2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 19 Sep 2020 15:15:47 +0300 Subject: [PATCH 060/174] Another test (cherry picked from commit da87861285e63369bd79e176ce375a8d6ea18b85) --- .../01502_log_tinylog_deadlock_race.reference | 6 ++ .../01502_log_tinylog_deadlock_race.sh | 85 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference create mode 100755 tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh diff --git a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference new file mode 100644 index 00000000000..4bf85ae79f3 --- /dev/null +++ b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference @@ -0,0 +1,6 @@ +Testing TinyLog +Done TinyLog +Testing StripeLog +Done StripeLog +Testing Log +Done Log diff --git a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh new file mode 100755 index 00000000000..a5b2ff6db8f --- /dev/null +++ b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +set -e + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + + +function thread_create { + while true; do + $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS $1 (x UInt64, s Array(Nullable(String))) ENGINE = $2" + sleep 0.0$RANDOM + done +} + +function thread_drop { + while true; do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS $1" + sleep 0.0$RANDOM + done +} + +function thread_rename { + while true; do + $CLICKHOUSE_CLIENT --query "RENAME TABLE $1 TO $2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|57)' + sleep 0.0$RANDOM + done +} + +function thread_select { + while true; do + $CLICKHOUSE_CLIENT --query "SELECT * FROM $1 FORMAT Null" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +function thread_insert { + while true; do + $CLICKHOUSE_CLIENT --query "INSERT INTO $1 SELECT rand64(1), [toString(rand64(2))] FROM numbers($2)" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +function thread_insert_select { + while true; do + $CLICKHOUSE_CLIENT --query "INSERT INTO $1 SELECT * FROM $2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +export -f thread_create +export -f thread_drop +export -f thread_rename +export -f thread_select +export -f thread_insert +export -f thread_insert_select + + +# Do randomized queries and expect nothing extraordinary happens. + +function test_with_engine { + echo "Testing $1" + + timeout 10 bash -c "thread_create t1 $1" & + timeout 10 bash -c "thread_create t2 $1" & + timeout 10 bash -c 'thread_drop t1' & + timeout 10 bash -c 'thread_drop t2' & + timeout 10 bash -c 'thread_rename t1 t2' & + timeout 10 bash -c 'thread_rename t2 t1' & + timeout 10 bash -c 'thread_select t1' & + timeout 10 bash -c 'thread_select t2' & + timeout 10 bash -c 'thread_insert t1 5' & + timeout 10 bash -c 'thread_insert t2 10' & + timeout 10 bash -c 'thread_insert_select t1 t2' & + timeout 10 bash -c 'thread_insert_select t2 t1' & + + wait + echo "Done $1" +} + +test_with_engine TinyLog +test_with_engine StripeLog +test_with_engine Log From b3fc6d9b3385a47cc9553c32f1181797b64486e2 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 13 Oct 2020 16:31:02 +0300 Subject: [PATCH 061/174] fix IF [NOT] EXISTS failure --- src/Interpreters/InterpreterDropQuery.cpp | 24 ++++++++----------- src/Interpreters/InterpreterDropQuery.h | 2 +- .../01502_log_tinylog_deadlock_race.reference | 8 ++----- .../01502_log_tinylog_deadlock_race.sh | 7 +++--- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index c70431e5238..a9bc738f614 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -47,7 +47,7 @@ BlockIO InterpreterDropQuery::execute() if (!drop.table.empty()) { if (!drop.is_dictionary) - return executeToTable({drop.database, drop.table, drop.uuid}, drop); + return executeToTable(drop); else return executeToDictionary(drop.database, drop.table, drop.kind, drop.if_exists, drop.temporary, drop.no_ddl_lock); } @@ -58,29 +58,25 @@ BlockIO InterpreterDropQuery::execute() } -BlockIO InterpreterDropQuery::executeToTable( - const StorageID & table_id_, - const ASTDropQuery & query) +BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query) { - if (query.temporary || table_id_.database_name.empty()) + auto table_id = StorageID(query); + if (query.temporary || table_id.database_name.empty()) { - if (context.tryResolveStorageID(table_id_, Context::ResolveExternal)) - return executeToTemporaryTable(table_id_.getTableName(), query.kind); + if (context.tryResolveStorageID(table_id, Context::ResolveExternal)) + return executeToTemporaryTable(table_id.getTableName(), query.kind); + else + table_id.database_name = context.getCurrentDatabase(); } if (query.temporary) { if (query.if_exists) return {}; - throw Exception("Temporary table " + backQuoteIfNeed(table_id_.table_name) + " doesn't exist", + throw Exception("Temporary table " + backQuoteIfNeed(table_id.table_name) + " doesn't exist", ErrorCodes::UNKNOWN_TABLE); } - auto table_id = query.if_exists ? context.tryResolveStorageID(table_id_, Context::ResolveOrdinary) - : context.resolveStorageID(table_id_, Context::ResolveOrdinary); - if (!table_id) - return {}; - auto ddl_guard = (!query.no_ddl_lock ? DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name) : nullptr); /// If table was already dropped by anyone, an exception will be thrown @@ -255,7 +251,7 @@ BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, AS for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { query.table = iterator->name(); - executeToTable({query.database, query.table}, query); + executeToTable(query); } } diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index b54736b5c21..c5d9aacdfd5 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -31,7 +31,7 @@ private: BlockIO executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists); - BlockIO executeToTable(const StorageID & table_id, const ASTDropQuery & query); + BlockIO executeToTable(const ASTDropQuery & query); BlockIO executeToDictionary(const String & database_name, const String & dictionary_name, ASTDropQuery::Kind kind, bool if_exists, bool is_temporary, bool no_ddl_lock); diff --git a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference index 4bf85ae79f3..c62a2b18918 100644 --- a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference +++ b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference @@ -1,6 +1,2 @@ -Testing TinyLog -Done TinyLog -Testing StripeLog -Done StripeLog -Testing Log -Done Log +Testing Memory +Done Memory diff --git a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh index a5b2ff6db8f..f0b5f0a3568 100755 --- a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh +++ b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh @@ -80,6 +80,7 @@ function test_with_engine { echo "Done $1" } -test_with_engine TinyLog -test_with_engine StripeLog -test_with_engine Log +#test_with_engine TinyLog +#test_with_engine StripeLog +#test_with_engine Log +test_with_engine Memory From cb8d132cca22da7531f345371f75ad6d4e793d61 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 13 Oct 2020 18:00:36 +0300 Subject: [PATCH 062/174] fix deadlock with DDLGuard --- src/Interpreters/DatabaseCatalog.cpp | 27 +++++++++--- src/Interpreters/DatabaseCatalog.h | 4 +- src/Interpreters/InterpreterDropQuery.cpp | 1 + .../0_stateless/01150_ddl_guard_rwr.reference | 0 .../0_stateless/01150_ddl_guard_rwr.sh | 43 +++++++++++++++++++ 5 files changed, 67 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/01150_ddl_guard_rwr.reference create mode 100755 tests/queries/0_stateless/01150_ddl_guard_rwr.sh diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 203e2292c08..03f0e057821 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -530,7 +530,7 @@ std::unique_ptr DatabaseCatalog::getDDLGuard(const String & database, std::unique_lock lock(ddl_guards_mutex); auto db_guard_iter = ddl_guards.try_emplace(database).first; DatabaseGuard & db_guard = db_guard_iter->second; - return std::make_unique(db_guard.first, db_guard.second, std::move(lock), table); + return std::make_unique(db_guard.first, db_guard.second, std::move(lock), table, database); } std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) @@ -832,7 +832,7 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) } -DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem) +DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name) : map(map_), db_mutex(db_mutex_), guards_lock(std::move(guards_lock_)) { it = map.emplace(elem, Entry{std::make_unique(), 0}).first; @@ -841,14 +841,19 @@ DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_locksecond.mutex); bool is_database = elem.empty(); if (!is_database) - db_mutex.lock_shared(); + { + + bool locked_database_for_read = db_mutex.try_lock_shared(); + if (!locked_database_for_read) + { + removeTableLock(); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} is currently dropped or renamed", database_name); + } + } } -DDLGuard::~DDLGuard() +void DDLGuard::removeTableLock() { - bool is_database = it->first.empty(); - if (!is_database) - db_mutex.unlock_shared(); guards_lock.lock(); --it->second.counter; if (!it->second.counter) @@ -858,4 +863,12 @@ DDLGuard::~DDLGuard() } } +DDLGuard::~DDLGuard() +{ + bool is_database = it->first.empty(); + if (!is_database) + db_mutex.unlock_shared(); + removeTableLock(); +} + } diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 7bc6923bde4..c6f50117564 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -51,7 +51,7 @@ public: /// NOTE: using std::map here (and not std::unordered_map) to avoid iterator invalidation on insertion. using Map = std::map; - DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem); + DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name); ~DDLGuard(); private: @@ -60,6 +60,8 @@ private: Map::iterator it; std::unique_lock guards_lock; std::unique_lock table_lock; + + void removeTableLock(); }; diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index a9bc738f614..b29f2893db9 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -247,6 +247,7 @@ BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, AS ASTDropQuery query; query.kind = kind; + query.if_exists = true; query.database = database_name; for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { diff --git a/tests/queries/0_stateless/01150_ddl_guard_rwr.reference b/tests/queries/0_stateless/01150_ddl_guard_rwr.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01150_ddl_guard_rwr.sh b/tests/queries/0_stateless/01150_ddl_guard_rwr.sh new file mode 100755 index 00000000000..c14e4c38f54 --- /dev/null +++ b/tests/queries/0_stateless/01150_ddl_guard_rwr.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS test_01150" +$CLICKHOUSE_CLIENT --query "CREATE DATABASE test_01150" + +$CLICKHOUSE_CLIENT --query "CREATE TABLE test_01150.t1 (x UInt64, s Array(Nullable(String))) ENGINE = Memory" +$CLICKHOUSE_CLIENT --query "CREATE TABLE test_01150.t2 (x UInt64, s Array(Nullable(String))) ENGINE = Memory" + +function thread_detach_attach { + while true; do + $CLICKHOUSE_CLIENT --query "DETACH DATABASE test_01150" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (219)' + sleep 0.0$RANDOM + $CLICKHOUSE_CLIENT --query "ATTACH DATABASE test_01150" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (82)' + sleep 0.0$RANDOM + done +} + +function thread_rename { + while true; do + $CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t1 TO test_01150.t2_tmp, test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (81|60|57|521)' + sleep 0.0$RANDOM + $CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (81|60|57|521)' + sleep 0.0$RANDOM + $CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (81|60|57|521)' + sleep 0.0$RANDOM + done +} + +export -f thread_detach_attach +export -f thread_rename + +timeout 20 bash -c "thread_detach_attach" & +timeout 20 bash -c 'thread_rename' & +wait +sleep 1 + +$CLICKHOUSE_CLIENT --query "ATTACH DATABASE IF NOT EXISTS test_01150" +$CLICKHOUSE_CLIENT --query "DROP DATABASE test_01150"; From 8a64b65e51713d429bd744ca7288ef858041a2bd Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 13 Oct 2020 20:45:59 +0300 Subject: [PATCH 063/174] fix --- src/Interpreters/InterpreterDropQuery.cpp | 2 ++ .../0_stateless/01516_drop_table_stress.sh | 18 +++++++----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index b29f2893db9..a250ab1afd4 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -88,6 +88,8 @@ BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query) if (query_ptr->as().is_view && !table->isView()) throw Exception("Table " + table_id.getNameForLogs() + " is not a View", ErrorCodes::LOGICAL_ERROR); + table_id = table->getStorageID(); + if (query.kind == ASTDropQuery::Kind::Detach) { context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id); diff --git a/tests/queries/0_stateless/01516_drop_table_stress.sh b/tests/queries/0_stateless/01516_drop_table_stress.sh index 3d6218c4549..3e2fd613a36 100755 --- a/tests/queries/0_stateless/01516_drop_table_stress.sh +++ b/tests/queries/0_stateless/01516_drop_table_stress.sh @@ -12,21 +12,17 @@ function drop_database() function drop_table() { - ${CLICKHOUSE_CLIENT} -nm <&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS db_01516.data1;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS db_01516.data2;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" } function create() { - ${CLICKHOUSE_CLIENT} -nm <&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS db_01516.data2 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS db_01516.data3 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" } for _ in {1..100}; do From 744013d4b8e1624c528697b594982d3b696345d6 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Tue, 13 Oct 2020 21:46:15 +0300 Subject: [PATCH 064/174] test in comparison with scipy --- .../AggregateFunctionStudentTTest.h | 56 ++++++++++++++----- .../AggregateFunctionWelchTTest.h | 43 ++++++++++++-- src/AggregateFunctions/ya.make | 4 +- .../0_stateless/01322_student_ttest.reference | 4 ++ .../0_stateless/01322_student_ttest.sql | 19 +++++++ .../0_stateless/01322_welch_ttest.reference | 10 +++- .../queries/0_stateless/01322_welch_ttest.sql | 27 +++++++-- 7 files changed, 137 insertions(+), 26 deletions(-) create mode 100644 tests/queries/0_stateless/01322_student_ttest.reference create mode 100644 tests/queries/0_stateless/01322_student_ttest.sql diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.h b/src/AggregateFunctions/AggregateFunctionStudentTTest.h index b03f9178709..ac05a11d334 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.h +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.h @@ -115,18 +115,22 @@ struct AggregateFunctionStudentTTestData final throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS); } - if (mean_x - mean_y < 1e-8) - { - return static_cast(0.0); - } - return std::pow(mean_x - mean_y, 2) / getStandartErrorSquared(); } + Float64 getTStatistic() const + { + if (size_x == 0 || size_y == 0) + { + throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS); + } + + return (mean_x - mean_y) / std::sqrt(getStandartErrorSquared()); + } Float64 getStandartErrorSquared() const { - return getSSquared() * (1 / size_x + 1 / size_y); + return getSSquared() * (1.0 / static_cast(size_x) + 1.0 / static_cast(size_y)); } Float64 getDegreesOfFreedom() const @@ -150,20 +154,23 @@ struct AggregateFunctionStudentTTestData final { const Float64 v = getDegreesOfFreedom(); const Float64 t = getTStatisticSquared(); - std::cout << "getDegreesOfFreedom " << v << " getTStatisticSquared " << t << std::endl; + std::cout << "getDegreesOfFreedom() " << getDegreesOfFreedom() << std::endl; + std::cout << "getTStatisticSquared() " << getTStatisticSquared() << std::endl; auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); }; Float64 numenator = integrateSimpson(0, v / (t + v), f); Float64 denominator = std::exp(std::lgammal(v/2) + std::lgammal(0.5) - std::lgammal(v/2 + 0.5)); + std::cout << "numenator " << numenator << std::endl; + std::cout << "denominator " << denominator << std::endl; return numenator / denominator; } - Float64 getResult() const + std::pair getResult() const { - return getPValue(); + return std::make_pair(getTStatistic(), getPValue()); } }; -/// Returns p-value +/// Returns tuple of (t-statistic, p-value) /// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf template class AggregateFunctionStudentTTest : @@ -182,7 +189,22 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared>(); + DataTypes types + { + std::make_shared>(), + std::make_shared>(), + }; + + Strings names + { + "t-statistic", + "p-value" + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override @@ -221,8 +243,16 @@ public: throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); } - auto & column = static_cast &>(to); - column.getData().push_back(this->data(place).getResult()); + Float64 t_statistic = 0.0; + Float64 p_value = 0.0; + std::tie(t_statistic, p_value) = this->data(place).getResult(); + + auto & column_tuple = assert_cast(to); + auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); + auto & column_value = assert_cast &>(column_tuple.getColumn(1)); + + column_stat.getData().push_back(t_statistic); + column_value.getData().push_back(p_value); } }; diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index e445278e9e7..36641b826b1 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -122,6 +122,16 @@ struct AggregateFunctionWelchTTestData final return std::pow(mean_x - mean_y, 2) / (getSxSquared() / size_x + getSySquared() / size_y); } + Float64 getTStatistic() const + { + if (size_x == 0 || size_y == 0) + { + throw Exception("Division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); + } + + return (mean_x - mean_y) / std::sqrt(getSxSquared() / size_x + getSySquared() / size_y); + } + Float64 getDegreesOfFreedom() const { auto sx = getSxSquared(); @@ -154,9 +164,9 @@ struct AggregateFunctionWelchTTestData final return numenator / denominator; } - Float64 getResult() const + std::pair getResult() const { - return getPValue(); + return std::make_pair(getTStatistic(), getPValue()); } }; @@ -178,7 +188,22 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared>(); + DataTypes types + { + std::make_shared>(), + std::make_shared>(), + }; + + Strings names + { + "t-statistic", + "p-value" + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override @@ -217,8 +242,16 @@ public: throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); } - auto & column = static_cast &>(to); - column.getData().push_back(this->data(place).getResult()); + Float64 t_statistic = 0.0; + Float64 p_value = 0.0; + std::tie(t_statistic, p_value) = this->data(place).getResult(); + + auto & column_tuple = assert_cast(to); + auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); + auto & column_value = assert_cast &>(column_tuple.getColumn(1)); + + column_stat.getData().push_back(t_statistic); + column_value.getData().push_back(p_value); } }; diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index f5a869b2f78..1578e0c80ea 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -42,6 +42,7 @@ SRCS( AggregateFunctionState.cpp AggregateFunctionStatistics.cpp AggregateFunctionStatisticsSimple.cpp + AggregateFunctionStudentTTest.cpp AggregateFunctionSum.cpp AggregateFunctionSumMap.cpp AggregateFunctionTimeSeriesGroupSum.cpp @@ -49,12 +50,13 @@ SRCS( AggregateFunctionUniqCombined.cpp AggregateFunctionUniq.cpp AggregateFunctionUniqUpTo.cpp + AggregateFunctionWelchTTest.cpp AggregateFunctionWindowFunnel.cpp parseAggregateFunctionParameters.cpp registerAggregateFunctions.cpp UniqCombinedBiasData.cpp UniqVariadicHash.cpp - AggregateFunctionWelchTTest.cpp + ) END() diff --git a/tests/queries/0_stateless/01322_student_ttest.reference b/tests/queries/0_stateless/01322_student_ttest.reference new file mode 100644 index 00000000000..02e44744629 --- /dev/null +++ b/tests/queries/0_stateless/01322_student_ttest.reference @@ -0,0 +1,4 @@ +-2.610898982580138 0.00916587538237954 +-2.610898982580134 0.0091658753823792 +-28.740781574102936 7.667329672103986e-133 +-28.74078157410298 0 diff --git a/tests/queries/0_stateless/01322_student_ttest.sql b/tests/queries/0_stateless/01322_student_ttest.sql new file mode 100644 index 00000000000..3636e239fe8 --- /dev/null +++ b/tests/queries/0_stateless/01322_student_ttest.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS student_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=1, sigma (not sigma^2)=5, size=500 + Second: a=1, sigma = 5, size = 500 */ +CREATE TABLE student_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO student_ttest VALUES (0.88854,-2.90702), (-5.76966,3.61651), (6.76618,4.27458), (3.55546,4.82133), (-9.76948,9.59483), (4.92323,1.00424), (-0.36352,2.04147), (0.97018,-3.58214), (4.61656,6.59543), (-6.78292,-1.00532), (4.02008,-3.59794), (12.41838,-2.82434), (5.14417,-3.13194), (3.86836,9.90977), (-1.26199,0.523), (12.44106,4.62779), (3.28349,-2.56872), (1.77261,2.25807), (-8.94748,1.04044), (-1.01449,-2.35744), (-1.26377,10.81531), (6.79682,-9.68469), (6.32333,3.80885), (-8.21214,12.70435), (-1.68565,-6.01112), (9.7557,1.89065), (3.66694,5.08892), (1.39967,3.45254), (-5.52035,11.58151), (-10.95601,0.85035), (0.93877,8.38397), (1.45933,1.17169), (-5.40551,4.74621), (-0.83857,-1.66614), (8.50794,4.2414), (-6.68686,1.68765), (5.03099,1.85223), (1.56251,9.10111), (4.17381,-2.38085), (-2.92644,-14.79595), (5.11068,-3.8938), (2.09617,-3.41864), (11.7787,-3.15282), (6.50336,-0.56684), (0.62098,12.87997), (-7.97121,6.89115), (3.81902,12.921), (0.33151,-7.94908), (10.68584,2.45687), (0.56007,2.14957), (-7.38621,7.55081), (5.05882,-3.71534), (2.34616,-2.41064), (11.3806,-0.80734), (5.95276,-4.75651), (-3.01429,2.05241), (5.98169,-5.44523), (0.96985,-2.75054), (-1.15932,-13.00131), (2.11547,-2.74451), (2.49668,-1.39004), (-12.49569,-3.02854), (-4.94667,7.65112), (-3.64215,1.1245), (-8.35595,6.74117), (3.211,-0.75777), (2.33805,8.93451), (2.38608,-8.85559), (-3.2862,-0.36405), (-0.80454,4.02742), (-0.53483,6.88718), (10.66445,-1.05124), (-0.37619,3.04085), (0.48246,3.32368), (7.41919,1.147), (0.42414,3.41554), (-2.32335,-3.47851), (-0.70223,-0.47684), (-5.9332,-0.55605), (-1.20561,-0.17006), (3.39865,2.26218), (9.61739,12.45494), (-0.78651,-1.84097), (-4.00256,1.64934), (-7.99646,-7.07496), (8.72923,-9.99462), (0.71859,6.09954), (-1.62726,-1.05319), (5.11234,3.04757), (-0.95625,0.93899), (-3.75573,-4.63243), (1.03141,-7.43322), (-3.33588,-7.298), (1.51804,-6.59016), (-3.30935,-6.11649), (-1.97507,0.56682), (4.06456,2.00661), (3.27195,-2.79814), (-7.81761,2.84482), (-3.81785,3.65348), (-4.18311,-4.22807), (-11.33313,-4.54336), (-0.25221,-3.63343), (7.2514,2.96878), (5.30301,6.11661), (2.46762,-1.70919), (4.22716,-4.71133), (0.33916,6.09652), (9.7638,-6.83454), (-7.58684,0.18006), (-4.09888,1.51676), (4.26617,-5.31646), (-0.56744,-3.21215), (4.65125,-5.07599), (-1.30301,-2.36591), (4.53771,3.55724), (9.96929,4.8904), (3.72939,-3.22586), (-2.29818,-1.74928), (3.09417,5.73458), (0.82251,1.41188), (5.29975,2.86255), (2.8685,2.90179), (-5.73321,-2.19949), (-1.85651,1.72727), (-1.07984,1.76939), (9.78342,-0.12848), (-13.49652,-0.52), (3.68791,3.48333), (1.9998,7.8262), (1.11674,0.09099), (9.43869,7.77017), (4.07029,9.49484), (5.32715,1.42825), (7.16504,1.99624), (6.66096,4.00419), (-5.7111,1.07925), (-0.38575,-0.09987), (4.49165,-5.48733), (-3.36489,-1.83517), (7.71814,2.38059), (-1.58966,1.42075), (-1.61063,-1.11968), (-0.91602,-6.46035), (0.73459,7.66576), (-3.24463,4.6307), (6.3947,5.55989), (-2.77845,3.16684), (4.45899,5.07671), (-8.84186,-10.20566), (2.62276,-4.73386), (1.774,1.28353), (4.3692,6.75679), (0.05942,12.09895), (-1.44042,7.0049), (-2.53594,7.16156), (-2.24752,-0.64311), (4.98874,-0.66747), (4.05434,3.99996), (-2.56483,9.07298), (-6.79286,-4.60971), (-2.06165,0.70744), (-0.26056,2.56774), (1.89567,9.32424), (-3.15145,3.95087), (-7.31321,7.11372), (0.28936,-0.89284), (-0.63111,8.6155), (0.22611,-0.14141), (-9.3377,-4.86319), (-5.76638,-6.95801), (3.87306,4.44883), (6.7011,4.6156), (9.03915,-2.3579), (-1.21835,-5.1186), (0.82892,8.12819), (2.80656,2.78392), (-1.34746,-4.30221), (-1.99912,-1.47506), (0.6036,6.8598), (-3.46117,0.47636), (5.23732,0.95383), (-1.86702,7.79779), (-5.86115,-2.61767), (6.48523,-10.5087), (-7.40158,-2.74299), (-1.38913,3.87369), (4.94613,-1.07093), (-2.07818,4.98864), (2.39808,-7.50772), (4.89238,6.41316), (4.39481,1.39061), (5.20425,-3.1747), (13.62598,-2.13621), (-2.86293,-0.02203), (-3.62396,0.89025), (-4.28695,-5.87746), (4.66425,3.60026), (2.20871,-0.23178), (1.60382,-2.1897), (-9.87024,-5.85101), (-7.37302,-1.6053), (-4.17814,3.6184), (2.5148,-8.53795), (3.21708,-0.35987), (-11.48089,2.15301), (1.19821,-6.60692), (-0.07436,9.54341), (-1.10652,1.11511), (4.03395,2.94025), (-4.35883,12.05657), (2.04013,3.75156), (0.52264,7.95597), (8.14004,-0.99449), (-8.86949,0.90597), (-0.35807,-7.90627), (-10.71113,3.50863), (-2.13755,-1.47493), (0.50715,4.11671), (6.30826,10.06325), (2.37527,-1.06059), (0.20872,-1.37737), (-5.85729,-0.42542), (-4.97217,-3.90267), (-9.78434,9.35037), (-1.53277,-7.91219), (0.14827,-4.69945), (-1.053,3.63776), (1.74558,3.46492), (11.17194,2.84518), (9.35487,-3.04301), (-9.17209,8.82764), (10.41814,7.80134), (7.41206,7.87755), (3.71775,7.01035), (-2.04674,2.43271), (6.18037,11.36418), (5.6383,-6.92659), (-0.90058,5.95541), (-1.27073,3.59436), (-2.3473,5.18429), (-8.44271,4.20225), (2.75551,0.5029), (-1.15521,4.03074), (4.08722,5.23152), (-1.70399,10.65409), (7.24114,-0.69845), (-8.43976,11.70096), (-1.53052,5.80692), (-0.00526,-8.1819), (-4.04813,4.31485), (-2.84299,5.7227), (-5.201,5.67398), (7.75774,-1.75826), (-2.85791,7.54164), (-3.86071,-1.79026), (-1.80029,-1.7395), (-5.26015,5.65042), (-3.158,0.38765), (7.71014,-4.64719), (-4.84866,-10.22048), (-8.38785,-2.05447), (7.67021,-2.43441), (4.96521,-5.38551), (-0.40919,5.47764), (-3.25711,8.26637), (3.07685,-3.6421), (2.89376,-11.66269), (-10.47331,3.972), (-3.48942,5.46642), (1.13906,-3.72304), (-8.57454,5.75251), (-3.38963,5.12841), (-2.3195,0.59067), (-1.60694,5.21138), (-5.57406,-4.58702), (-0.93075,-8.737), (-11.76579,-2.12737), (10.68283,0.22888), (8.74324,-1.46448), (7.66409,2.40311), (4.76715,-5.21814), (0.44539,13.94749), (-1.35941,-2.77448), (4.18849,-3.7867), (-6.17097,3.4954), (0.27977,3.12586), (-1.45006,-7.01485), (-4.81694,-3.20727), (-3.0297,6.31415), (0.02145,2.37521), (2.46883,8.13787), (9.60317,2.15956), (-9.93898,-0.40842), (1.05549,-7.27283), (5.55366,4.27575), (-3.80722,-2.89126), (-4.18851,6.84344), (1.00351,7.0869), (3.11385,-5.18837), (-5.17623,2.67648), (-3.18396,-6.57021), (-6.65302,0.60429), (-0.50832,-1.04921), (-4.04375,7.12873), (4.52707,1.68973), (6.63124,-2.58404), (-3.72082,-3.83114), (5.79825,-7.26546), (-2.0158,-5.07153), (-2.78369,-0.80395), (-1.91821,2.09455), (6.31714,4.33374), (-1.80869,8.54335), (8.55586,0.80566), (2.40826,-8.38085), (-8.46361,7.54812), (5.04452,8.78007), (-0.84665,1.5857), (2.30903,8.43855), (-3.71837,-1.90846), (-0.69419,-1.2434), (3.6733,7.16172), (-1.96098,-3.44129), (2.36747,-6.37542), (-12.03622,-4.99486), (4.38481,4.99033), (2.93955,-1.83734), (2.16804,-2.83289), (-0.08218,-4.13997), (-3.97934,1.40163), (-7.43985,8.57867), (0.91666,-1.87639), (7.23432,3.41667), (-6.13303,6.31762), (-10.23217,1.58473), (-6.21681,1.63625), (-0.80934,-6.93618), (0.17914,3.58046), (2.13338,-6.8097), (6.97656,4.69978), (6.90455,-1.72912), (6.25943,5.29491), (-6.04019,-1.63062), (-7.30909,5.83818), (1.4589,17.0769), (12.00208,4.54301), (2.22457,-1.33801), (-2.45912,5.64339), (-6.92213,1.26913), (4.05547,-1.01553), (0.04709,4.8316), (-7.70952,3.08635), (-1.47883,-2.27738), (1.3701,-1.13761), (-4.92928,10.08698), (-2.75872,5.33827), (-0.09178,2.84345), (2.62642,-1.51132), (-1.14623,13.46078), (2.76609,8.58965), (4.94404,-2.36683), (-7.01764,-1.8217), (-10.91568,1.96981), (-2.49738,2.31718), (0.73576,3.66493), (2.25436,1.93104), (-1.72956,5.20332), (2.41054,3.20519), (5.72149,3.34631), (-6.41371,7.0087), (3.38217,-7.96126), (1.24133,-0.62182), (10.03634,-4.65227), (-2.37303,10.6572), (-1.35543,4.50891), (-1.4387,9.74298), (-4.0976,3.85707), (-0.82501,6.41144), (-1.93498,1.48649), (5.59955,2.28076), (5.46656,2.75342), (2.43568,-5.40401), (-0.23926,7.11389), (-4.9945,5.74368), (-4.96655,6.78345), (-0.59258,3.83773), (2.02497,0.70959), (0.67583,0.57434), (3.16522,1.5888), (-1.9673,3.94889), (-6.75319,5.8234), (-6.69723,7.78366), (0.81148,9.08354), (4.44531,-7.99182), (-4.43522,-2.77033), (-5.28602,-10.29342), (-3.58829,1.76251), (-7.97395,2.09266), (-2.84891,4.20614), (-3.95112,-3.63064), (3.54945,-2.17794), (12.12376,-2.66225), (-3.12347,-2.74707), (3.65209,-1.93431), (9.34031,1.38629), (-0.26348,4.12816), (-5.23968,-1.58902), (2.22336,-5.08864), (-10.70405,-2.30491), (-4.41319,2.64605), (-5.94912,1.16158), (1.8147,2.63534), (7.69287,1.4956), (9.46125,-4.60768), (4.72497,0.60771), (-0.57565,3.29549), (-1.12303,-1.42592), (2.90272,0.8883), (-4.4584,-1.10612), (4.28819,-2.57296), (11.64512,5.88085), (-1.80395,7.40745), (2.51605,13.48116), (-3.18439,5.53539), (-0.70213,-1.46014), (-7.68383,3.73304), (-8.32268,3.5435), (-8.71115,-3.89151), (9.96933,4.16265), (0.95675,2.32663), (3.35114,5.31735), (-2.66008,6.33485), (7.75456,2.1339), (0.73568,0.82708), (0.3483,-2.95155), (-1.09203,-6.76019), (-7.76963,-4.20179), (5.81902,8.78354), (-3.41424,1.41863), (-0.39209,7.65689), (4.67608,-6.52601), (0.68753,-4.4426), (5.17179,-4.49483), (4.98983,-3.91479), (-0.12659,-2.84562), (3.25267,2.58974), (1.50184,2.24424), (2.94507,-4.65846), (-0.42333,8.4062), (-3.66227,8.20262), (8.90812,-8.63752), (4.74411,4.97966), (2.22018,-0.35563), (-2.07976,-4.72116), (4.8711,-2.95997), (0.5023,2.73959), (6.31569,-0.23956), (-4.36903,10.13915), (3.82146,11.83775), (-6.99477,-2.50332), (3.61225,-0.58181), (14.69335,-7.62836), (0.58368,2.26478), (4.65341,-3.50179), (-3.14272,-2.08023), (2.67048,4.07256), (4.64963,-1.40826), (-2.70828,-2.33644), (1.42923,3.00197), (5.84498,4.23668), (-4.76568,-2.24647), (0.19907,1.0445), (1.67486,-0.31901), (5.32145,8.62657), (-8.03477,3.92817), (3.46776,0.08462), (4.66374,10.15884), (-5.37394,0.4113), (5.39045,4.45847), (-1.44756,5.82941), (-1.64419,6.59202), (3.39699,-3.73441), (-2.94659,-5.86969), (-2.38437,-4.56543), (-0.23958,-1.32636), (6.88389,-0.17884), (-2.7172,-3.56181), (-1.53419,-0.66932), (7.38841,6.87538), (-5.44178,0.73527), (-0.89287,-0.24177), (2.93546,-0.8657), (-0.26901,-0.22977), (-4.70044,1.02095), (2.25846,6.16311), (-9.28813,-5.68027), (6.04268,-3.7619), (4.41693,4.22959), (1.75714,-1.5249); +SELECT '-2.610898982580138', '0.00916587538237954'; +SELECT roundBankers(StudentTTest(left, right).1, 16) as t_stat, roundBankers(StudentTTest(left, right).2, 16) as p_value from student_ttest; +DROP TABLE IF EXISTS student_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=1, sigma (not sigma^2)=5, size=500 + Second: a=1, sigma = 5, size = 500 */ +CREATE TABLE student_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO student_ttest VALUES (4.52546,8.69444), (3.73628,3.81414), (-0.39478,12.38442), (5.15633,8.9738), (0.50539,9.19594), (-5.34036,7.21009), (0.19336,4.97743), (8.35729,4.94756), (6.95818,19.80911), (-2.93812,13.75358), (8.30807,16.56373), (-3.3517,9.72882), (4.16279,4.64509), (-3.17231,17.76854), (1.93545,4.80693), (11.06606,8.79505), (-4.22678,10.88868), (-1.99975,6.21932), (-4.51178,15.11614), (-4.50711,13.24703), (1.89786,14.76476), (-6.19638,-0.6117), (-3.70188,17.48993), (5.01334,12.11847), (1.79036,4.87439), (2.14435,18.56479), (3.0282,1.23712), (2.35528,5.41596), (-12.18535,4.54994), (5.59709,11.37668), (-12.92336,9.5982), (-0.04281,6.59822), (-0.16923,1.16703), (0.88924,8.88418), (-4.68414,10.95047), (8.01099,5.52787), (2.61686,-1.11647), (-2.76895,14.49946), (3.32165,3.27585), (-0.85135,-0.42025), (1.21368,6.37906), (4.38673,2.5242), (6.20964,8.1405), (-1.23172,6.46732), (4.65516,9.89332), (-1.87143,10.4374), (0.86429,-1.06465), (2.51184,6.84902), (-1.88822,10.96576), (-1.61802,7.83319), (1.93653,14.39823), (-3.66631,7.02594), (-1.05294,13.46629), (-10.74718,10.39531), (16.49295,11.27348), (-7.65494,9.32187), (-3.39303,12.32667), (-4.89418,8.98905), (3.2521,9.54757), (0.05831,5.98325), (-3.00409,3.47248), (5.76702,9.26966), (2.67674,5.77816), (10.52623,6.32966), (-0.54501,9.49313), (-4.89835,6.21337), (3.52457,10.00242), (-0.0451,6.25167), (-6.61226,15.64671), (9.02391,2.78968), (5.52571,6.55442), (4.54352,3.68819), (-3.8394,9.55934), (-7.75295,4.166), (5.91167,12.32471), (1.38897,7.10969), (6.24166,16.31723), (5.58536,12.99482), (4.7591,10.11585), (-2.58336,10.29455), (-1.91263,18.27524), (3.31575,12.84435), (5.3507,13.11954), (-15.22081,12.84147), (-0.84775,15.55658), (-4.538,11.45329), (6.71177,7.50912), (0.52882,8.56226), (2.0242,8.63104), (5.69146,15.68026), (4.63328,21.6361), (0.22984,6.23925), (-2.84052,8.65714), (7.91867,9.9423), (1.11001,12.28213), (-0.11251,3.11279), (-0.20905,13.58128), (0.03287,16.51407), (-1.59397,16.60476), (-5.39405,12.02022), (-7.1233,12.11035), (4.51517,9.47832), (-0.70967,6.40742), (5.67299,8.87252), (-0.33835,15.14265), (-1.83047,2.23572), (-0.62877,11.57144), (-7.23148,18.87737), (0.1802,12.1833), (11.73325,11.17519), (2.17603,16.80422), (-0.11683,6.81423), (-1.29102,12.12546), (-0.23201,8.06153), (-6.8643,10.97228), (-6.85153,7.30596), (-4.77163,15.44026), (6.11721,8.00993), (5.96406,12.60196), (3.59135,13.96832), (-0.60095,14.03207), (3.11163,4.53758), (-0.18831,8.08297), (0.67657,4.90451), (-3.16117,8.14253), (0.26957,19.88605), (2.18653,13.85254), (-5.94611,23.01839), (-4.39352,6.02084), (-3.71525,9.60319), (5.11103,1.90511), (1.33998,10.35237), (1.01629,16.27082), (-3.36917,12.52379), (-3.99661,11.37435), (8.19336,13.61823), (2.89168,15.77622), (-11.10373,15.17254), (11.68005,6.711), (3.08282,4.74205), (-6.81506,10.09812), (-2.34587,6.61722), (-2.68725,10.34164), (0.3577,8.96602), (-3.05682,12.32157), (9.08062,11.75711), (-0.77913,13.49499), (10.35215,8.57713), (6.82565,11.50313), (-1.24674,1.13097), (5.18822,7.83205), (-3.70743,5.77957), (1.40319,15.5519), (5.89432,10.82676), (1.43152,11.51218), (6.70638,9.29779), (9.76613,9.77021), (4.27604,9.94114), (-2.63141,15.54513), (-7.8133,19.10736), (-0.06668,15.04205), (1.05391,9.03114), (4.41797,24.0104), (0.09337,9.94205), (6.16075,2.5925), (7.49413,8.82726), (-3.52872,10.0209), (-2.17126,8.1635), (-3.87605,4.24074), (3.26607,7.67291), (-3.28045,5.21642), (2.1429,11.2808), (1.53386,6.88172), (0.21169,5.98743), (-0.63674,17.97249), (5.84893,6.46323), (-0.63498,15.37416), (8.29526,2.89957), (-1.08358,17.13044), (-2.306,11.06355), (2.86991,3.09625), (-0.76074,-2.33019), (5.49191,7.42675), (1.82883,15.06792), (-3.70497,8.81116), (-0.53232,19.17446), (-11.49722,18.77181), (3.44877,14.06443), (-1.8596,12.81241), (-10.34851,2.72299), (1.13093,18.67739), (-10.93389,11.63275), (-3.39703,2.23891), (0.19749,13.01195), (-3.68389,7.43402), (-4.67863,8.14599), (10.78916,16.65328), (0.37675,1.362), (3.98094,3.87957), (-3.64775,11.16134), (-4.8443,6.25357), (1.102,4.21945), (8.72112,12.50047), (-1.47361,6.45486), (6.24183,18.99924), (6.83569,18.09508), (-3.11684,13.59528), (4.91306,3.39681), (-0.03628,13.33157), (5.1282,5.8945), (-2.38558,5.61212), (2.33351,8.41149), (-0.97191,13.78608), (-0.05588,6.08609), (-4.70019,12.76962), (-5.12371,3.26206), (0.65606,0.25528), (-0.11574,11.9083), (4.4238,4.35071), (6.93399,11.19855), (3.68712,13.87404), (-0.01187,6.87986), (1.8332,8.32566), (5.81322,22.51334), (-4.04709,2.5226), (-8.26397,16.84498), (-2.11273,6.26108), (5.28396,13.84824), (0.73054,6.03262), (6.43559,14.12668), (4.35565,16.01939), (-1.05545,8.19237), (5.00087,18.01595), (-2.72239,9.45609), (7.32313,6.90459), (2.11548,12.83115), (-3.40953,10.603), (6.97051,13.70439), (-0.45567,6.1633), (1.31699,4.1151), (-1.49871,8.20499), (7.14772,11.67903), (0.79277,7.30851), (6.9698,6.50941), (2.08733,7.3949), (-3.55962,12.80075), (0.75601,5.62043), (1.21,18.2542), (-2.17877,17.9393), (1.83206,16.4569), (5.72463,8.78811), (7.42257,4.85949), (0.97829,-3.36394), (7.54238,5.38683), (9.91081,12.26083), (-4.61743,10.27907), (-4.40799,11.5144), (9.99854,11.57335), (8.53725,1.94203), (3.2905,7.78228), (0.38634,11.79385), (-2.53374,10.18415), (4.94758,14.67613), (4.79624,4.70301), (5.57664,12.72151), (-6.44871,-3.35508), (3.34431,17.63775), (0.14209,2.53883), (10.88431,14.01483), (0.31846,12.4387), (-0.54703,11.15408), (-4.67791,7.74882), (-5.68011,13.60956), (-4.93362,7.81991), (1.2271,10.90969), (5.27512,8.19828), (-3.84611,-1.18523), (6.81706,0.5916), (10.33033,0.35805), (5.13979,12.98364), (3.66534,11.38628), (-2.07219,13.94644), (10.65442,2.03781), (-3.31751,10.74447), (-1.82011,12.35656), (-0.39886,7.08701), (1.77052,2.69871), (1.29049,19.66653), (7.92344,7.88636), (-2.92595,10.36916), (-2.67107,1.632), (5.64708,11.86081), (0.34639,13.47602), (-3.04356,6.60204), (3.98828,7.01303), (-1.36695,20.19992), (-8.48462,18.88249), (-4.04669,11.34367), (9.84561,12.97305), (-6.1537,9.5776), (0.82433,17.91364), (1.92449,18.3247), (2.51288,9.9211), (0.40965,7.14257), (2.89183,6.59133), (3.84347,12.35274), (0.66829,10.57523), (-3.45094,12.12859), (1.3544,9.47177), (-9.85456,0.60659), (5.25689,4.72996), (-5.26018,4.51121), (-6.16912,13.28893), (-1.77163,8.09014), (3.96687,8.02511), (0.70893,13.85406), (-5.45342,1.75412), (-3.89706,6.00641), (3.11868,6.35554), (4.41714,7.11293), (7.64841,8.30442), (0.00489,12.63024), (3.2263,12.38966), (-5.33042,7.6801), (2.52189,11.33744), (-7.40308,4.67713), (0.67891,7.62276), (2.49343,2.14478), (5.43133,15.32988), (-0.67541,1.52299), (-0.60299,17.00017), (-6.32903,8.29701), (-3.44336,10.92961), (-0.23963,6.78449), (6.94686,7.02698), (6.59442,11.51719), (-4.18532,9.97926), (-1.8228,7.44251), (-0.29443,7.58541), (2.99821,4.76058), (2.51942,12.88959), (-3.49176,9.974), (-0.57979,17.03689), (8.69471,11.14554), (-1.19427,11.7392), (-3.17119,11.50029), (-2.99566,19.41759), (-3.34493,9.65127), (-2.33826,9.87673), (-5.04164,14.13485), (-0.48214,9.78034), (7.45097,1.57826), (3.04787,3.72091), (2.92632,9.4054), (1.39694,23.22816), (4.38686,-0.12571), (3.25753,6.97343), (7.14218,10.09049), (-4.04341,11.78393), (-9.19352,3.01909), (2.78473,16.09448), (0.33331,6.25485), (9.89238,7.13164), (6.00566,7.75879), (-1.7511,9.56834), (4.77815,6.14824), (5.07457,13.53454), (2.56132,8.26364), (2.38317,8.7095), (-1.63486,10.61607), (-1.46871,10.64418), (-5.8681,23.9106), (-2.96227,11.38978), (-1.90638,11.4383), (-13.3052,18.41498), (-2.14705,3.70959), (-9.62069,19.95918), (2.29313,9.53847), (0.22162,14.04957), (-1.83956,13.70151), (4.1853,5.45046), (6.05965,10.95061), (-0.23737,9.55156), (6.07452,17.92345), (4.34629,6.23976), (4.02922,8.71029), (3.62622,13.58736), (-3.95825,8.78527), (-1.63412,11.14213), (-1.25727,12.23717), (5.06323,16.44557), (-0.66176,0.47144), (2.36606,9.7198), (-5.77792,13.50981), (4.535,14.27806), (1.02031,13.50793), (4.49345,7.47381), (-4.99791,11.07844), (2.46716,9.89844), (3.65471,21.48548), (11.2283,6.92085), (6.69743,4.44074), (-5.60375,19.98074), (0.28683,7.92826), (-0.85737,16.6313), (4.26726,17.17618), (-3.4322,13.80807), (-2.07039,5.37083), (-2.26798,9.73962), (-0.99818,10.66273), (0.41335,8.90639), (5.18124,12.24596), (-5.01858,16.89203), (2.05561,12.69184), (-0.12117,15.59077), (0.99471,6.94287), (6.89979,-0.1801), (-4.18527,3.25318), (-6.35104,8.08804), (3.89734,13.78384), (-1.979,0.46434), (3.15404,7.78224), (3.52672,9.10987), (2.48372,-0.89391), (-6.13089,14.3696), (2.2968,3.01763), (-2.74324,8.03559), (-0.12876,7.24609), (-1.51135,11.86271), (-3.92434,6.28196), (-1.71254,8.9725), (-1.25878,14.46114), (2.03021,9.50216), (4.31726,16.30413), (-3.02908,1.02795), (9.7093,1.88717), (-3.36284,9.80106), (6.70938,4.53487), (0.42762,16.34543), (5.04726,7.71098), (2.78386,2.74639), (6.83022,6.51875), (-3.02109,10.42308), (-0.65382,13.57901), (-15.58675,0.52784), (5.89746,4.4708), (-4.11598,6.39619), (-1.37208,14.57666), (10.08082,2.71602), (5.35686,12.53905), (1.93331,11.4292), (10.47444,12.44641), (-2.36872,14.50894), (6.50752,17.64374), (2.54603,11.03218), (-0.4332,9.82789), (5.26572,10.11104), (2.09016,2.16137), (1.15513,10.24054), (14.95941,12.86909), (-3.85505,15.22845), (-2.36239,5.05411), (1.64338,10.84836), (-4.25074,11.15717), (7.29744,0.91782), (-1.18964,13.29961), (5.60612,15.11314), (-3.77011,11.54004), (6.67642,-0.94238), (-0.06862,19.32581), (5.60514,10.20744), (3.7341,6.54857), (9.59001,8.69108), (3.30093,8.2296), (-2.75658,8.4474), (4.71994,6.81178), (0.74699,5.99415), (2.91095,13.99336), (-7.36829,8.7469), (-5.29487,8.62349), (3.31079,1.84212), (1.06974,4.4762), (-1.18424,9.25421), (-7.415,10.44229), (3.40595,12.21649), (-7.63085,10.45968), (1.13336,15.34722), (-0.0096,5.50868), (0.8928,10.93609), (-0.5943,2.78631), (7.48306,11.86145), (10.11943,18.67385), (5.60459,10.64051), (4.00189,12.75565), (2.35823,6.63666), (0.33475,12.19343), (3.47072,9.08636), (-6.68867,11.67256), (3.31031,20.31392), (2.17159,11.66443); +SELECT -28.740781574102936, 7.667329672103986e-133; +SELECT roundBankers(StudentTTest(left, right).1, 16) as t_stat, roundBankers(StudentTTest(left, right).2, 16) as p_value from student_ttest; +DROP TABLE IF EXISTS student_ttest; diff --git a/tests/queries/0_stateless/01322_welch_ttest.reference b/tests/queries/0_stateless/01322_welch_ttest.reference index 015dd503b7e..d06853a0a5e 100644 --- a/tests/queries/0_stateless/01322_welch_ttest.reference +++ b/tests/queries/0_stateless/01322_welch_ttest.reference @@ -1,6 +1,10 @@ 0.021378001462867 -0.021378 +0.0213780014628671 0.090773324285671 -0.09077332 +0.0907733242891952 0.00339907162713746 -0.00339907 +0.0033990715715539 +-0.5028215369186904 0.6152361677168877 +-0.5028215369187079 0.6152361677170834 +14.971190998235835 5.898143508382202e-44 +14.971190998235837 0 diff --git a/tests/queries/0_stateless/01322_welch_ttest.sql b/tests/queries/0_stateless/01322_welch_ttest.sql index 073e71f69fe..2a045e70b32 100644 --- a/tests/queries/0_stateless/01322_welch_ttest.sql +++ b/tests/queries/0_stateless/01322_welch_ttest.sql @@ -1,18 +1,37 @@ +/*Check only p-value first*/ DROP TABLE IF EXISTS welch_ttest; CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; INSERT INTO welch_ttest VALUES (27.5,27.1), (21.0,22.0), (19.0,20.8), (23.6,23.4), (17.0,23.4), (17.9,23.5), (16.9,25.8), (20.1,22.0), (21.9,24.8), (22.6,20.2), (23.1,21.9), (19.6,22.1), (19.0,22.9), (21.7,20.5), (21.4,24.4); SELECT '0.021378001462867'; -SELECT roundBankers(WelchTTest(left, right), 8) from welch_ttest; +SELECT roundBankers(WelchTTest(left, right).2, 16) from welch_ttest; DROP TABLE IF EXISTS welch_ttest; CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; INSERT INTO welch_ttest VALUES (30.02,29.89), (29.99,29.93), (30.11,29.72), (29.97,29.98), (30.01,30.02), (29.99,29.98); SELECT '0.090773324285671'; -SELECT roundBankers(WelchTTest(left, right), 8) from welch_ttest; +SELECT roundBankers(WelchTTest(left, right).2, 16) from welch_ttest; DROP TABLE IF EXISTS welch_ttest; CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; INSERT INTO welch_ttest VALUES (0.010268,0.159258), (0.000167,0.136278), (0.000167,0.122389); SELECT '0.00339907162713746'; -SELECT roundBankers(WelchTTest(left, right), 8) from welch_ttest; -DROP TABLE IF EXISTS welch_ttest; \ No newline at end of file +SELECT roundBankers(WelchTTest(left, right).2, 16) from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=10, sigma (not sigma^2)=5, size=500 + Second: a=10, sigma = 10, size = 500 */ +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (14.72789,-8.65656), (9.61661,22.98234), (13.57615,23.80821), (3.98392,13.33939), (11.98889,-4.05537), (10.99422,23.5155), (5.44792,-6.45272), (20.29346,17.7903), (7.05926,11.463), (9.22732,5.28021), (12.06847,8.39157), (13.52612,6.02464), (8.24597,14.43732), (9.35245,15.76584), (10.12297,1.54391), (15.80624,1.24897), (13.68613,27.1507), (10.72729,7.71091), (5.62078,15.71846), (6.12229,32.97808), (6.03801,-1.79334), (8.95585,-9.23439), (24.04613,11.27838), (9.04757,0.72703), (2.68263,18.51557), (15.43935,9.16619), (2.89423,17.29624), (4.01423,-1.30208), (4.30568,-3.48018), (11.99948,10.12082), (8.40574,-8.01318), (10.86642,-14.22264), (9.4266,16.58174), (-8.12752,-0.55975), (7.91634,5.61449), (7.3967,1.44626), (2.26431,7.89158), (14.20118,1.13369), (6.68233,-0.82609), (15.46221,12.23365), (7.88467,12.45443), (11.20011,14.46915), (8.92027,13.72627), (10.27926,18.41459), (5.14395,29.66702), (5.62178,1.51619), (12.84383,10.40078), (9.98009,3.33266), (-0.69789,6.12036), (11.41386,11.86553), (7.76863,6.59422), (7.21743,22.0948), (1.81176,1.79623), (9.43762,14.29513), (19.22117,19.69162), (2.97128,-7.98033), (14.32851,5.48433), (7.54959,-2.28474), (3.81545,9.91876), (10.1281,10.64097), (2.48596,0.22523), (10.0461,17.01773), (3.59714,22.37388), (9.73522,14.04215), (18.8077,23.1244), (3.15148,18.96958), (12.26062,8.42663), (5.66707,3.7165), (6.58623,14.29366), (17.30902,23.50886), (9.91391,26.33722), (5.36946,26.72396), (15.73637,13.26287), (16.96281,12.97607), (11.54063,17.41838), (18.37358,8.63875), (11.38255,17.08943), (10.53256,23.15356), (8.08833,-4.4965), (16.27556,7.58895), (2.42969,26.04074), (9.56127,6.84245), (7.32998,20.56287), (9.19511,3.84735), (9.66903,-2.76304), (4.15029,13.1615), (8.83511,8.21954), (14.60617,-3.49943), (14.06143,22.12419), (5.39556,7.08323), (10.11871,16.12937), (10.56619,-0.32672), (14.4462,16.5942), (10.42106,7.68977), (7.75551,11.39484), (11.00418,-5.11987), (4.47226,20.87404), (16.35461,8.01007), (18.55174,3.26497), (11.82044,5.61253), (7.39454,20.69182), (11.27767,0.0296), (6.83827,21.904), (7.76858,22.46572), (15.97614,3.63685), (14.53781,-5.10846), (12.99546,14.86389), (16.91151,5.47188), (9.65012,18.44095), (14.25487,16.71368), (14.03618,6.36704), (2.57382,8.82663), (2.50779,14.6727), (14.24787,7.98383), (13.34666,2.65568), (7.31102,21.45827), (10.22981,11.77948), (17.4435,4.71979), (21.2074,3.17951), (6.64191,13.90226), (18.7086,15.50578), (14.78686,10.8026), (9.85287,16.91369), (4.48263,9.90552), (14.17469,13.87322), (14.4342,4.12366), (19.2481,-3.78985), (3.47165,1.7599), (8.28712,3.43715), (8.81657,-3.45246), (0.92319,23.64571), (20.41106,-4.96877), (6.76127,3.93514), (22.00242,1.49914), (8.66129,12.71519), (10.9929,5.11521), (17.95494,4.79872), (17.20996,20.89391), (12.18888,5.363), (12.14257,8.02765), (15.81243,14.30804), (4.43362,11.49002), (1.17567,14.25281), (15.60881,7.6573), (9.34833,15.49686), (6.33513,3.29327), (-0.83095,2.27236), (12.43268,12.58104), (6.63207,19.19128), (11.96877,15.25901), (14.81029,6.5221), (21.84876,10.10965), (3.75896,12.75249), (6.91307,16.50977), (13.73015,-8.6697), (8.63753,8.28553), (15.71679,1.44315), (1.74565,4.65869), (9.16895,0.98149), (5.70685,0.16623), (5.00117,17.66332), (13.06888,4.35346), (7.51204,6.52742), (15.34885,-1.06631), (5.20264,-5.28454), (8.59043,14.25583), (6.45619,8.74058), (14.61979,1.89553), (11.7075,-0.92959), (14.04901,10.30289), (4.20525,-6.3744), (15.1733,-8.1706), (3.12934,10.95369), (8.08049,4.94384), (15.41273,28.40568), (16.90751,3.7004), (5.86893,2.52363), (7.1086,4.07997), (4.418,7.8849), (12.0614,17.95409), (7.07887,16.67021), (3.61585,11.34377), (11.73001,-0.07446), (10.80449,22.00223), (8.40311,3.31778), (9.91276,18.50719), (16.4164,-3.58655), (5.25034,6.5394), (15.20283,12.40459), (10.42909,16.59866), (9.53888,7.54176), (14.68939,-1.51044), (6.60007,12.69758), (18.31058,2.9842), (7.01885,2.49187), (18.71631,2.04113), (10.50002,-2.46544), (10.7517,15.18368), (4.23224,-0.04058), (2.28924,-0.4127), (8.56059,10.5526), (8.25095,12.03982), (9.15673,12.10923), (13.28409,11.54954), (8.4513,-1.18613), (2.83911,11.30984), (2.79676,23.54105), (9.11055,10.67321), (7.18529,24.09196), (-4.1258,7.5008), (5.28306,12.52233), (6.82757,4.30673), (10.89035,9.35793), (5.24822,4.44472), (11.935,-7.00679), (6.45675,8.56241), (10.18088,23.73891), (4.9932,15.62708), (18.09939,16.09205), (8.11738,12.52074), (5.37883,14.58927), (10.50339,-4.80187), (16.64093,8.47964), (14.77263,7.75477), (13.71385,12.6893), (6.98746,7.14147), (10.74635,12.12654), (5.49432,12.32334), (13.46078,7.98909), (10.67565,3.26652), (9.0291,20.53684), (11.51417,32.3369), (13.07118,19.74911), (9.5049,-4.62897), (8.50611,8.26483), (6.47606,20.88451), (13.06526,-2.12982), (19.08658,25.61459), (9.49741,5.32091), (10.60865,-4.1196), (2.28996,7.57937), (8.12846,21.15847), (5.62241,6.46355), (4.07712,7.74846), (17.98526,19.62636), (9.466,28.34629), (11.38904,26.73919), (5.91826,20.40427), (1.52059,3.03378), (18.79161,10.2537), (18.20669,7.47745), (-1.67829,10.79184), (18.01586,3.91962), (16.31577,19.97973), (7.88281,18.87711), (8.46179,12.56157), (10.31113,11.46033), (14.88377,3.78661), (1.31835,-9.45748), (2.53176,12.06033), (9.48625,-0.74615), (3.97936,13.2815), (11.52319,24.78052), (13.24178,5.83337), (7.58739,17.4111), (10.00959,19.70331), (9.73361,11.78446), (8.35716,-1.366), (1.65491,1.37458), (11.11521,16.31483), (6.08355,32.63464), (10.04582,-3.79736), (11.58237,19.17984), (16.40249,-0.27705), (1.9691,-3.69456), (13.22776,28.38058), (2.67059,-1.36876), (9.83651,-25.63301), (2.12539,3.58644), (9.27114,-6.85667), (9.0699,13.42225), (2.78179,12.04671), (12.49311,28.99468), (12.97662,7.87662), (15.06359,2.61119), (16.91565,-3.56022), (5.92011,1.50022), (5.81304,14.55836), (8.46425,9.35831), (9.48705,16.9366), (4.68191,29.23126), (5.70028,15.31386), (-0.78798,13.46112), (10.03442,7.39667), (15.45433,11.15599), (9.43845,9.80499), (3.05825,22.64923), (6.92126,8.67693), (14.05905,18.67335), (19.71579,-3.19127), (15.0131,22.94716), (4.50386,17.86834), (1.31061,16.98267), (10.81197,15.91653), (14.32942,11.79718), (9.26469,18.50208), (7.27679,8.90755), (22.69295,10.44843), (12.03763,4.67433), (7.34876,6.82287), (16.60689,10.82228), (7.48786,-4.18631), (15.78602,20.3872), (17.21048,11.84735), (13.93482,21.25376), (9.69911,10.55032), (12.24315,12.19023), (10.58131,0.63369), (19.57006,7.92381), (9.8856,17.90933), (11.70302,15.30781), (7.89864,10.01877), (12.24831,0.88744), (16.93707,22.20967), (9.65467,-4.23117), (4.221,21.50819), (15.45229,11.27421), (12.83088,-16.23179), (7.58313,33.43085), (12.895,5.15093), (10.02471,1.34505), (13.36059,6.027), (5.07864,-10.43035), (9.72017,27.45998), (11.05809,19.24886), (15.28528,-4.44761), (13.99834,5.453), (19.26989,12.73758), (9.41846,11.2897), (11.65425,31.032), (8.49638,7.39168), (6.38592,11.95245), (-4.69837,26.279), (12.22061,-1.0255), (9.41331,10.36675), (13.2075,11.58439), (12.97005,27.8405), (11.44352,13.1707), (9.79805,31.39133), (6.93116,27.08301), (10.07691,-2.14368), (22.05892,4.08476), (7.80353,21.5573), (-2.17276,16.69822), (0.61509,7.69955), (8.35842,8.32793), (17.77108,6.49235), (14.70841,-7.3284), (1.27992,10.58264), (15.62699,-6.17006), (9.32914,34.55782), (15.41866,10.93221), (10.82009,44.24299), (3.29902,14.6224), (9.21998,-7.42798), (7.93845,15.52351), (10.33344,11.33982), (12.06399,10.46716), (5.5308,13.0986), (8.38727,-4.25988), (18.11104,9.55316), (8.86565,0.75489), (19.41825,25.99212), (9.52376,-0.81401), (3.94552,3.49551), (9.37587,22.99402), (15.44954,10.99628), (15.90527,23.70223), (13.18927,2.71482), (7.01646,22.82309), (9.06005,31.25686), (9.06431,4.86318), (5.76006,-1.06476), (9.18705,15.10298), (-3.48446,-0.61015), (15.89817,17.81246), (12.94719,-1.55788), (23.69426,18.09709), (17.47755,9.11271), (15.61528,9.94682), (0.54832,-7.33194), (14.32916,-4.67293), (9.55305,21.81717), (13.79891,7.16318), (0.82544,13.25649), (13.34875,13.88776), (9.07614,4.95793), (5.19621,17.65303), (2.1451,14.47382), (9.87726,13.19373), (8.45439,31.86093), (-1.41842,5.73161), (7.93598,10.96492), (11.23151,6.97951), (17.84458,1.75136), (7.02237,10.96144), (10.7842,15.08137), (4.42832,9.95311), (4.45044,7.07729), (1.50938,3.08148), (21.21651,22.37954), (6.2097,8.51951), (6.84354,2.88746), (18.53804,26.73509), (12.01072,-2.88939), (4.8345,-2.82367), (20.41587,-0.35783), (14.48353,14.22076), (8.71116,11.50295), (12.42818,7.10171), (14.89244,8.28488), (8.03033,0.54178), (5.25917,13.8022), (2.30092,15.62157), (10.22504,10.79173), (15.37573,28.18946), (7.13666,30.43524), (4.45018,2.54914), (10.18405,9.89421), (3.91025,13.08631), (14.52304,4.68761), (13.14771,5.61516), (11.99219,22.88072), (9.21345,7.4735), (8.85106,11.27382), (12.91887,2.39559), (15.62308,-3.31889), (11.88034,9.61957), (15.12097,23.01381), (11.58168,-1.23467), (16.83051,9.07691), (5.25405,15.78056), (2.19976,12.28421), (4.56716,9.44888), (16.46053,13.16928), (5.61995,4.33357), (8.67704,2.21737), (5.62789,33.17833), (9.84815,13.25407), (13.05834,-2.47961), (11.74205,6.41401), (3.88393,18.8439), (16.15321,-4.63375), (4.83925,-8.2909), (13.00334,12.18221), (4.4028,-2.95356), (4.35794,19.61659), (4.47478,12.45056), (2.38713,-4.17198), (4.25235,21.9641), (10.87509,11.96416), (9.82411,12.74573), (13.61518,10.47873), (10.25507,12.73295), (4.0335,11.31373), (10.69881,9.9827), (5.70321,5.87138), (6.96244,4.24372), (9.35874,-23.72256), (6.28076,28.41337), (8.29015,4.88103), (6.88653,3.61902), (7.70687,8.93586), (8.2001,16.40759), (6.73415,27.84494), (3.82052,5.6001), (3.94469,14.51379), (15.82384,13.5576), (2.54004,12.92213), (10.74876,3.90686), (12.60517,17.07104), (17.7024,15.84268), (4.6722,17.38777), (13.67341,16.54766), (6.4565,5.94487), (12.95699,17.02804), (4.56912,7.66386), (5.58464,10.43088), (4.0638,6.16059), (13.05559,20.46178), (5.38269,20.02888), (0.16354,20.95949), (7.23962,6.50808), (7.38577,7.22366), (8.50951,8.06659), (13.72574,16.08241), (17.80421,13.83514), (3.01135,-0.33454), (8.02608,12.98848), (14.23847,12.99024); +SELECT -0.5028215369186904, 0.6152361677168877; +SELECT roundBankers(WelchTTest(left, right).1, 16) as t_stat, roundBankers(WelchTTest(left, right).2, 16) as p_value from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=10, sigma (not sigma^2)=5, size=500 + Second: a=1, sigma = 12, size = 500 */ +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (4.82025,-2.69857), (6.13896,15.80943), (15.20277,7.31555), (14.15351,3.96517), (7.21338,4.77809), (8.55506,9.6472), (13.80816,-26.41717), (11.28411,-10.85635), (7.4612,-1.4376), (7.43759,-0.96308), (12.9832,2.84315), (-5.74783,5.79467), (12.47114,-3.06091), (15.14223,-14.62902), (3.40603,22.08022), (9.27323,-2.11982), (7.88547,-4.84824), (8.56456,-10.50447), (4.59731,2.4891), (7.91213,9.90324), (7.33894,-22.66866), (21.74811,-0.97103), (11.92111,-16.57608), (0.18828,-3.78749), (10.47314,25.84511), (20.37396,5.30797), (11.04991,-18.19466), (13.30083,11.72708), (14.28065,0.2891), (2.86942,-9.83474), (24.96072,6.69942), (14.20164,18.09604), (18.28769,18.52651), (10.50949,1.38201), (9.22273,7.64615), (11.77608,17.66598), (8.56872,-2.44141), (13.74535,-9.01598), (11.65209,27.69142), (12.51894,4.06946), (17.76256,-15.0077), (13.52122,-10.49648), (8.70796,-4.88322), (6.04749,-25.09805), (16.33064,-4.64024), (8.35636,20.94434), (14.03496,24.12126), (11.05834,-14.10962), (14.49261,10.6512), (2.59383,14.50687), (8.01022,-19.88081), (4.05458,-11.55271), (13.26384,13.16921), (14.62058,16.63864), (10.52489,-24.08114), (8.46357,-9.09949), (6.4147,-10.54702), (9.70071,0.20813), (12.47581,8.19066), (4.38333,-2.70523), (17.54172,-0.23954), (10.12109,7.19398), (7.73186,-7.1618), (14.0279,-7.44322), (11.6621,-17.92031), (17.47045,-1.58146), (15.50223,9.18338), (15.46034,3.25838), (13.39964,-14.30234), (14.98025,1.84695), (15.87912,31.13794), (17.67374,-0.85067), (9.64073,19.02787), (12.84904,-3.09594), (7.70278,13.45584), (13.03156,-5.48104), (9.04512,-22.74928), (15.97014,-8.03697), (8.96389,17.31143), (11.48009,-16.65231), (9.71153,-18.58713), (13.00084,-16.52641), (12.39803,14.95261), (13.08188,12.56762), (5.82244,15.00188), (10.81871,1.85858), (8.2539,2.1926), (7.52114,-2.4095), (9.11488,21.56873), (8.37482,3.35509), (14.48652,-4.98672), (11.42152,35.08603), (16.03111,-10.01602), (13.14057,-3.85153), (-2.26351,-6.81974), (15.50394,19.56525), (14.88603,-9.35488), (13.37257,0.24268), (11.84026,-3.51488), (7.66558,-0.37066), (6.24584,24.20888), (3.6312,-11.73537), (2.7018,0.01282), (5.63656,0.03963), (5.82643,-9.65589), (10.06745,-0.37429), (-0.5831,5.61255), (14.84202,0.49984), (9.5524,-10.15066), (19.71713,-14.54314), (14.23109,16.56889), (8.69105,-7.73873), (5.33742,-3.76422), (7.30372,1.40722), (7.93342,2.28818), (15.20884,-13.12643), (7.53839,5.17082), (13.45311,4.79089), (11.04473,-17.42643), (10.76673,8.72548), (15.44145,-3.70285), (14.06596,16.77893), (9.14873,13.382), (12.88372,19.98418), (8.74994,0.00483), (10.53263,-4.75951), (16.16694,2.35391), (8.37197,21.65809), (3.43739,-9.2714), (4.72799,-18.38253), (9.08802,7.23097), (11.2531,14.97927), (5.16115,-4.02197), (10.20895,-29.8189), (18.70884,-12.8554), (15.88924,-7.60124), (3.38758,-14.90158), (6.46449,-3.31486), (10.21088,31.38144), (14.08458,-8.61288), (15.74508,15.31895), (19.31896,-10.19488), (13.19641,13.796), (11.95409,-0.32912), (10.70718,-0.0684), (1.05245,-30.06834), (10.04772,24.93912), (17.01369,-3.26506), (10.2286,-8.29751), (19.58323,-5.39189), (7.02892,-25.08603), (4.16866,-1.45318), (8.94326,16.72724), (4.99854,-3.38467), (8.88352,-26.00478), (18.65422,7.28369), (17.32328,16.96226), (9.33492,16.5858), (14.94788,10.46583), (8.05863,3.84345), (14.6737,-2.99382), (10.93801,1.42078), (0.54036,-11.0123), (-0.34242,2.09909), (5.89076,1.21064), (3.15189,15.36079), (1.94421,-21.61349), (6.38698,22.7726), (10.50654,10.50512), (8.95362,-6.95825), (6.23711,9.20036), (11.75359,15.66902), (12.42155,3.28098), (-1.55472,-9.05692), (4.6688,0.32882), (10.48087,-1.64934), (11.74615,-4.81406), (9.26822,-5.06006), (7.55517,19.97493), (12.76005,2.88646), (16.47102,-0.34552), (11.31297,7.55186), (14.37437,-22.96115), (2.38799,31.29166), (6.44577,6.18798), (5.07471,-2.52715), (11.55123,-11.58799), (7.76795,14.13596), (10.60116,13.45069), (14.40885,12.15179), (11.58158,3.44491), (8.81648,-8.78006), (12.92299,18.32087), (11.26939,11.91757), (17.95014,-2.00179), (2.95002,10.88411), (17.41959,9.09327), (11.12455,6.62484), (8.78541,8.87178), (14.36413,11.52254), (12.98554,-14.15988), (12.58505,-17.19515), (15.49789,14.03089), (11.70999,-2.4095), (0.65596,-16.83575), (11.08202,2.71469), (14.75752,4.84351), (6.84385,-1.17651), (9.27245,-3.37529), (13.78243,-19.92137), (17.4863,4.48952), (4.01777,-12.4906), (11.82861,-5.65277), (13.86551,8.50819), (6.16591,-19.61261), (8.71589,12.54156), (16.77195,11.06784), (17.23243,-12.59285), (-2.12941,3.43683), (5.66629,-3.00325), (12.45153,12.49082), (1.63971,7.20955), (13.84031,17.6547), (4.6144,15.8619), (5.26169,24.3048), (9.27769,-8.05434), (9.14288,-6.06901), (9.71953,-15.69515), (9.38446,-11.13917), (1.64788,-3.90757), (11.72922,-2.57038), (13.68926,5.14065), (9.42952,17.8497), (12.05574,-8.64665), (9.09148,-18.68331), (5.32273,5.8567), (20.25258,-20.93884), (10.14599,4.40583), (10.82156,14.35985), (5.75736,4.18134), (7.13567,4.3635), (9.29746,9.35428), (5.1618,2.8908), (10.076,16.01017), (21.65669,-1.48499), (13.35486,-9.97949), (6.79957,1.03055), (8.76243,-2.79697), (14.59294,6.85977), (16.90609,4.73213), (10.50337,2.7815), (-0.07923,-2.46866), (13.51648,18.39425), (12.0676,-0.80378), (0.86482,-0.22982), (9.03563,-16.11608), (5.38751,3.0862), (17.16866,3.20779), (2.78702,10.50146), (11.15548,-0.21305), (12.30843,11.21012), (8.04897,-0.99825), (9.95814,18.39633), (11.29308,-3.39003), (14.13032,-0.64411), (21.05877,-1.39932), (3.57386,15.45319), (7.96631,-0.66044), (3.30484,-15.2223), (18.61856,-34.39907), (16.35184,-3.57836), (7.65236,16.82828), (18.02895,1.66624), (9.79458,15.43475), (16.7274,8.17776), (8.84453,5.50486), (13.05709,10.43082), (10.91447,-6.63332), (8.40171,2.28008), (16.95211,16.37203), (11.82194,5.16313), (19.87978,-8.85281), (12.88455,13.26692), (-0.00947,-7.46842), (12.28109,8.43091), (6.96462,-13.18172), (13.75282,-0.72401), (14.39141,22.3881), (11.07193,10.65448), (12.88039,2.81289), (11.38253,10.92405), (21.02707,-8.95358), (7.51955,19.80653), (6.31984,-12.86527), (15.6543,5.38826), (14.80315,-6.83501), (8.38024,-15.7647), (21.7516,-27.67412), (14.31336,8.6499), (15.04703,-4.89542), (5.73787,16.76167), (13.16911,12.84284), (12.40695,-17.27324), (9.88968,-4.18726), (8.46703,-14.62366), (8.70637,-5.49863), (8.03551,-16.22846), (5.9757,10.60329), (12.22951,6.46781), (3.14736,1.70458), (10.51266,10.77448), (18.593,0.8463), (10.82213,13.0482), (7.14216,-4.36264), (6.81154,3.22647), (-0.6486,2.38828), (20.56136,6.7946), (11.35367,-0.25254), (11.38205,1.2497), (17.14,1.6544), (14.91215,4.1019), (15.50207,11.27839), (5.93162,-5.04127), (3.74869,18.11674), (14.11532,0.51231), (7.38954,-0.51029), (5.45764,13.52556), (18.33733,16.10171), (9.91923,5.68197), (2.38991,-2.85904), (14.16756,-8.89167), (2.39791,6.24489), (6.92586,10.85319), (5.32474,-0.39816), (2.28812,3.87079), (5.71718,-3.1867), (5.84197,1.55322), (2.76206,16.86779), (19.05928,-14.60321), (11.51788,-1.81952), (6.56648,-3.11624), (3.35735,1.24193), (7.55948,10.18179), (19.99908,4.69796), (13.00634,0.69032), (18.36886,11.7723), (11.14675,7.62896), (16.72931,9.89741), (12.50106,9.11484), (6.00605,-3.84676), (23.06653,-0.4777), (5.39694,0.95958), (9.53167,-7.95056), (12.76944,-10.97474), (7.20604,-6.54861), (13.25391,34.74933), (13.7341,27.39463), (10.85292,4.18299), (-7.75835,6.02476), (10.29728,-1.99397), (13.70099,1.26478), (10.17959,23.37106), (9.98399,10.49682), (12.69389,-11.04354), (-0.28848,-12.22284), (-2.18319,-9.87635), (13.36378,28.90511), (10.09232,6.77613), (5.49489,0.55352), (5.46156,0.37031), (0.94225,7.1418), (12.79205,3.24897), (10.09593,-1.60918), (6.06218,3.1675), (0.89463,-17.97072), (11.88986,-5.61743), (10.79733,14.1422), (1.51371,14.87695), (2.20967,-4.65961), (15.45732,-0.99174), (16.5262,-2.96623), (5.99724,-9.02263), (8.3613,-17.2088), (15.68183,2.78608), (15.32117,6.74239), (14.15674,4.8524), (6.64553,7.46731), (4.20777,1.04894), (-0.10521,-12.8023), (-0.88169,-17.18188), (1.85913,-5.08801), (9.73673,22.13942), (0.30926,-0.36384), (6.17559,17.80564), (11.76602,7.67504), (5.68385,1.59779), (14.57088,4.10942), (12.81509,0.61074), (9.85682,-14.40767), (12.06376,10.59906), (6.08874,16.57017), (11.63921,-15.17526), (14.86722,-6.98549), (10.41035,-0.64548), (2.93794,3.23756), (12.21841,14.65504), (0.23804,4.583), (3.14845,12.72378), (7.29748,5.26547), (3.06134,0.81781), (13.77684,9.38273), (16.21992,10.37636), (5.33511,10.70325), (9.68959,-0.83043), (9.44169,-7.53149), (18.08012,-9.09147), (4.04224,-19.51381), (8.77918,-28.44508), (10.18324,6.44392), (9.38914,11.10201), (11.76995,-2.86184), (14.19963,8.30673), (6.88817,8.8797), (16.56123,10.68053), (15.39885,15.62919), (5.21241,8.00579), (4.44408,6.4651), (17.87587,-4.50029), (12.53337,18.04514), (13.60916,11.12996), (6.60104,-5.14007), (7.35453,9.43857), (18.61572,3.13476), (6.10437,4.9772), (13.08682,-17.45782), (12.15404,0.05552), (4.90789,-1.90283), (2.13353,2.67908), (12.49593,-2.62243), (11.93056,-3.22767), (13.29408,-8.70222), (5.70038,-23.11605), (8.40271,21.6757), (5.19456,12.70076), (-5.51028,4.4322), (14.0329,11.69344), (10.38365,9.18052), (6.56812,-2.2549), (4.21129,-2.15615), (9.7157,20.29765), (9.88553,-0.29536), (13.45346,15.50109), (4.97752,8.79187), (12.77595,5.11533), (8.56465,-20.44436), (4.27703,-3.00909), (18.12502,-4.48291), (12.45735,21.84462), (12.42912,1.94225), (12.08125,-2.81908), (10.85779,17.19418), (4.36013,-9.33528), (11.85062,-0.17346), (8.47776,0.03958), (9.60822,-35.17786), (11.3069,8.36887), (14.25525,-9.02292), (1.55168,-10.98804), (14.57782,0.29335), (7.84786,4.29634), (9.87774,3.87718), (14.75575,-9.08532), (3.68774,7.13922), (9.37667,-7.62463), (20.28676,-10.5666), (12.10027,4.68165), (8.01819,-3.30172), (18.78158,13.04852), (20.85402,13.45616), (18.98069,2.41043), (16.1429,-0.36501), (9.24047,-15.67383), (14.12487,17.92217), (10.18841,8.42106), (-3.04478,3.22063), (5.7552,-7.31753), (9.30376,21.99596), (11.42837,-36.8273), (6.02364,-20.46391), (8.86984,5.74179), (10.91177,-15.83178), (10.04418,14.90454), (18.10774,-8.84645), (7.49384,3.72036), (9.11556,4.6877), (9.7051,16.35418), (5.23268,3.15441), (9.04647,2.39907), (8.81547,-17.58664), (2.65098,-13.18269); +SELECT 14.971190998235835, 5.898143508382202e-44; +SELECT roundBankers(WelchTTest(left, right).1, 16) as t_stat, roundBankers(WelchTTest(left, right).2, 16) as p_value from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; From a750b76818b6fc6273a6d7fc8e5785d9ff0068c7 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Tue, 13 Oct 2020 21:50:04 +0300 Subject: [PATCH 065/174] better --- tests/queries/0_stateless/01322_student_ttest.sql | 2 +- tests/queries/0_stateless/01322_welch_ttest.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01322_student_ttest.sql b/tests/queries/0_stateless/01322_student_ttest.sql index 3636e239fe8..babc8457bc0 100644 --- a/tests/queries/0_stateless/01322_student_ttest.sql +++ b/tests/queries/0_stateless/01322_student_ttest.sql @@ -14,6 +14,6 @@ DROP TABLE IF EXISTS student_ttest; Second: a=1, sigma = 5, size = 500 */ CREATE TABLE student_ttest (left Float64, right Float64) ENGINE = Memory; INSERT INTO student_ttest VALUES (4.52546,8.69444), (3.73628,3.81414), (-0.39478,12.38442), (5.15633,8.9738), (0.50539,9.19594), (-5.34036,7.21009), (0.19336,4.97743), (8.35729,4.94756), (6.95818,19.80911), (-2.93812,13.75358), (8.30807,16.56373), (-3.3517,9.72882), (4.16279,4.64509), (-3.17231,17.76854), (1.93545,4.80693), (11.06606,8.79505), (-4.22678,10.88868), (-1.99975,6.21932), (-4.51178,15.11614), (-4.50711,13.24703), (1.89786,14.76476), (-6.19638,-0.6117), (-3.70188,17.48993), (5.01334,12.11847), (1.79036,4.87439), (2.14435,18.56479), (3.0282,1.23712), (2.35528,5.41596), (-12.18535,4.54994), (5.59709,11.37668), (-12.92336,9.5982), (-0.04281,6.59822), (-0.16923,1.16703), (0.88924,8.88418), (-4.68414,10.95047), (8.01099,5.52787), (2.61686,-1.11647), (-2.76895,14.49946), (3.32165,3.27585), (-0.85135,-0.42025), (1.21368,6.37906), (4.38673,2.5242), (6.20964,8.1405), (-1.23172,6.46732), (4.65516,9.89332), (-1.87143,10.4374), (0.86429,-1.06465), (2.51184,6.84902), (-1.88822,10.96576), (-1.61802,7.83319), (1.93653,14.39823), (-3.66631,7.02594), (-1.05294,13.46629), (-10.74718,10.39531), (16.49295,11.27348), (-7.65494,9.32187), (-3.39303,12.32667), (-4.89418,8.98905), (3.2521,9.54757), (0.05831,5.98325), (-3.00409,3.47248), (5.76702,9.26966), (2.67674,5.77816), (10.52623,6.32966), (-0.54501,9.49313), (-4.89835,6.21337), (3.52457,10.00242), (-0.0451,6.25167), (-6.61226,15.64671), (9.02391,2.78968), (5.52571,6.55442), (4.54352,3.68819), (-3.8394,9.55934), (-7.75295,4.166), (5.91167,12.32471), (1.38897,7.10969), (6.24166,16.31723), (5.58536,12.99482), (4.7591,10.11585), (-2.58336,10.29455), (-1.91263,18.27524), (3.31575,12.84435), (5.3507,13.11954), (-15.22081,12.84147), (-0.84775,15.55658), (-4.538,11.45329), (6.71177,7.50912), (0.52882,8.56226), (2.0242,8.63104), (5.69146,15.68026), (4.63328,21.6361), (0.22984,6.23925), (-2.84052,8.65714), (7.91867,9.9423), (1.11001,12.28213), (-0.11251,3.11279), (-0.20905,13.58128), (0.03287,16.51407), (-1.59397,16.60476), (-5.39405,12.02022), (-7.1233,12.11035), (4.51517,9.47832), (-0.70967,6.40742), (5.67299,8.87252), (-0.33835,15.14265), (-1.83047,2.23572), (-0.62877,11.57144), (-7.23148,18.87737), (0.1802,12.1833), (11.73325,11.17519), (2.17603,16.80422), (-0.11683,6.81423), (-1.29102,12.12546), (-0.23201,8.06153), (-6.8643,10.97228), (-6.85153,7.30596), (-4.77163,15.44026), (6.11721,8.00993), (5.96406,12.60196), (3.59135,13.96832), (-0.60095,14.03207), (3.11163,4.53758), (-0.18831,8.08297), (0.67657,4.90451), (-3.16117,8.14253), (0.26957,19.88605), (2.18653,13.85254), (-5.94611,23.01839), (-4.39352,6.02084), (-3.71525,9.60319), (5.11103,1.90511), (1.33998,10.35237), (1.01629,16.27082), (-3.36917,12.52379), (-3.99661,11.37435), (8.19336,13.61823), (2.89168,15.77622), (-11.10373,15.17254), (11.68005,6.711), (3.08282,4.74205), (-6.81506,10.09812), (-2.34587,6.61722), (-2.68725,10.34164), (0.3577,8.96602), (-3.05682,12.32157), (9.08062,11.75711), (-0.77913,13.49499), (10.35215,8.57713), (6.82565,11.50313), (-1.24674,1.13097), (5.18822,7.83205), (-3.70743,5.77957), (1.40319,15.5519), (5.89432,10.82676), (1.43152,11.51218), (6.70638,9.29779), (9.76613,9.77021), (4.27604,9.94114), (-2.63141,15.54513), (-7.8133,19.10736), (-0.06668,15.04205), (1.05391,9.03114), (4.41797,24.0104), (0.09337,9.94205), (6.16075,2.5925), (7.49413,8.82726), (-3.52872,10.0209), (-2.17126,8.1635), (-3.87605,4.24074), (3.26607,7.67291), (-3.28045,5.21642), (2.1429,11.2808), (1.53386,6.88172), (0.21169,5.98743), (-0.63674,17.97249), (5.84893,6.46323), (-0.63498,15.37416), (8.29526,2.89957), (-1.08358,17.13044), (-2.306,11.06355), (2.86991,3.09625), (-0.76074,-2.33019), (5.49191,7.42675), (1.82883,15.06792), (-3.70497,8.81116), (-0.53232,19.17446), (-11.49722,18.77181), (3.44877,14.06443), (-1.8596,12.81241), (-10.34851,2.72299), (1.13093,18.67739), (-10.93389,11.63275), (-3.39703,2.23891), (0.19749,13.01195), (-3.68389,7.43402), (-4.67863,8.14599), (10.78916,16.65328), (0.37675,1.362), (3.98094,3.87957), (-3.64775,11.16134), (-4.8443,6.25357), (1.102,4.21945), (8.72112,12.50047), (-1.47361,6.45486), (6.24183,18.99924), (6.83569,18.09508), (-3.11684,13.59528), (4.91306,3.39681), (-0.03628,13.33157), (5.1282,5.8945), (-2.38558,5.61212), (2.33351,8.41149), (-0.97191,13.78608), (-0.05588,6.08609), (-4.70019,12.76962), (-5.12371,3.26206), (0.65606,0.25528), (-0.11574,11.9083), (4.4238,4.35071), (6.93399,11.19855), (3.68712,13.87404), (-0.01187,6.87986), (1.8332,8.32566), (5.81322,22.51334), (-4.04709,2.5226), (-8.26397,16.84498), (-2.11273,6.26108), (5.28396,13.84824), (0.73054,6.03262), (6.43559,14.12668), (4.35565,16.01939), (-1.05545,8.19237), (5.00087,18.01595), (-2.72239,9.45609), (7.32313,6.90459), (2.11548,12.83115), (-3.40953,10.603), (6.97051,13.70439), (-0.45567,6.1633), (1.31699,4.1151), (-1.49871,8.20499), (7.14772,11.67903), (0.79277,7.30851), (6.9698,6.50941), (2.08733,7.3949), (-3.55962,12.80075), (0.75601,5.62043), (1.21,18.2542), (-2.17877,17.9393), (1.83206,16.4569), (5.72463,8.78811), (7.42257,4.85949), (0.97829,-3.36394), (7.54238,5.38683), (9.91081,12.26083), (-4.61743,10.27907), (-4.40799,11.5144), (9.99854,11.57335), (8.53725,1.94203), (3.2905,7.78228), (0.38634,11.79385), (-2.53374,10.18415), (4.94758,14.67613), (4.79624,4.70301), (5.57664,12.72151), (-6.44871,-3.35508), (3.34431,17.63775), (0.14209,2.53883), (10.88431,14.01483), (0.31846,12.4387), (-0.54703,11.15408), (-4.67791,7.74882), (-5.68011,13.60956), (-4.93362,7.81991), (1.2271,10.90969), (5.27512,8.19828), (-3.84611,-1.18523), (6.81706,0.5916), (10.33033,0.35805), (5.13979,12.98364), (3.66534,11.38628), (-2.07219,13.94644), (10.65442,2.03781), (-3.31751,10.74447), (-1.82011,12.35656), (-0.39886,7.08701), (1.77052,2.69871), (1.29049,19.66653), (7.92344,7.88636), (-2.92595,10.36916), (-2.67107,1.632), (5.64708,11.86081), (0.34639,13.47602), (-3.04356,6.60204), (3.98828,7.01303), (-1.36695,20.19992), (-8.48462,18.88249), (-4.04669,11.34367), (9.84561,12.97305), (-6.1537,9.5776), (0.82433,17.91364), (1.92449,18.3247), (2.51288,9.9211), (0.40965,7.14257), (2.89183,6.59133), (3.84347,12.35274), (0.66829,10.57523), (-3.45094,12.12859), (1.3544,9.47177), (-9.85456,0.60659), (5.25689,4.72996), (-5.26018,4.51121), (-6.16912,13.28893), (-1.77163,8.09014), (3.96687,8.02511), (0.70893,13.85406), (-5.45342,1.75412), (-3.89706,6.00641), (3.11868,6.35554), (4.41714,7.11293), (7.64841,8.30442), (0.00489,12.63024), (3.2263,12.38966), (-5.33042,7.6801), (2.52189,11.33744), (-7.40308,4.67713), (0.67891,7.62276), (2.49343,2.14478), (5.43133,15.32988), (-0.67541,1.52299), (-0.60299,17.00017), (-6.32903,8.29701), (-3.44336,10.92961), (-0.23963,6.78449), (6.94686,7.02698), (6.59442,11.51719), (-4.18532,9.97926), (-1.8228,7.44251), (-0.29443,7.58541), (2.99821,4.76058), (2.51942,12.88959), (-3.49176,9.974), (-0.57979,17.03689), (8.69471,11.14554), (-1.19427,11.7392), (-3.17119,11.50029), (-2.99566,19.41759), (-3.34493,9.65127), (-2.33826,9.87673), (-5.04164,14.13485), (-0.48214,9.78034), (7.45097,1.57826), (3.04787,3.72091), (2.92632,9.4054), (1.39694,23.22816), (4.38686,-0.12571), (3.25753,6.97343), (7.14218,10.09049), (-4.04341,11.78393), (-9.19352,3.01909), (2.78473,16.09448), (0.33331,6.25485), (9.89238,7.13164), (6.00566,7.75879), (-1.7511,9.56834), (4.77815,6.14824), (5.07457,13.53454), (2.56132,8.26364), (2.38317,8.7095), (-1.63486,10.61607), (-1.46871,10.64418), (-5.8681,23.9106), (-2.96227,11.38978), (-1.90638,11.4383), (-13.3052,18.41498), (-2.14705,3.70959), (-9.62069,19.95918), (2.29313,9.53847), (0.22162,14.04957), (-1.83956,13.70151), (4.1853,5.45046), (6.05965,10.95061), (-0.23737,9.55156), (6.07452,17.92345), (4.34629,6.23976), (4.02922,8.71029), (3.62622,13.58736), (-3.95825,8.78527), (-1.63412,11.14213), (-1.25727,12.23717), (5.06323,16.44557), (-0.66176,0.47144), (2.36606,9.7198), (-5.77792,13.50981), (4.535,14.27806), (1.02031,13.50793), (4.49345,7.47381), (-4.99791,11.07844), (2.46716,9.89844), (3.65471,21.48548), (11.2283,6.92085), (6.69743,4.44074), (-5.60375,19.98074), (0.28683,7.92826), (-0.85737,16.6313), (4.26726,17.17618), (-3.4322,13.80807), (-2.07039,5.37083), (-2.26798,9.73962), (-0.99818,10.66273), (0.41335,8.90639), (5.18124,12.24596), (-5.01858,16.89203), (2.05561,12.69184), (-0.12117,15.59077), (0.99471,6.94287), (6.89979,-0.1801), (-4.18527,3.25318), (-6.35104,8.08804), (3.89734,13.78384), (-1.979,0.46434), (3.15404,7.78224), (3.52672,9.10987), (2.48372,-0.89391), (-6.13089,14.3696), (2.2968,3.01763), (-2.74324,8.03559), (-0.12876,7.24609), (-1.51135,11.86271), (-3.92434,6.28196), (-1.71254,8.9725), (-1.25878,14.46114), (2.03021,9.50216), (4.31726,16.30413), (-3.02908,1.02795), (9.7093,1.88717), (-3.36284,9.80106), (6.70938,4.53487), (0.42762,16.34543), (5.04726,7.71098), (2.78386,2.74639), (6.83022,6.51875), (-3.02109,10.42308), (-0.65382,13.57901), (-15.58675,0.52784), (5.89746,4.4708), (-4.11598,6.39619), (-1.37208,14.57666), (10.08082,2.71602), (5.35686,12.53905), (1.93331,11.4292), (10.47444,12.44641), (-2.36872,14.50894), (6.50752,17.64374), (2.54603,11.03218), (-0.4332,9.82789), (5.26572,10.11104), (2.09016,2.16137), (1.15513,10.24054), (14.95941,12.86909), (-3.85505,15.22845), (-2.36239,5.05411), (1.64338,10.84836), (-4.25074,11.15717), (7.29744,0.91782), (-1.18964,13.29961), (5.60612,15.11314), (-3.77011,11.54004), (6.67642,-0.94238), (-0.06862,19.32581), (5.60514,10.20744), (3.7341,6.54857), (9.59001,8.69108), (3.30093,8.2296), (-2.75658,8.4474), (4.71994,6.81178), (0.74699,5.99415), (2.91095,13.99336), (-7.36829,8.7469), (-5.29487,8.62349), (3.31079,1.84212), (1.06974,4.4762), (-1.18424,9.25421), (-7.415,10.44229), (3.40595,12.21649), (-7.63085,10.45968), (1.13336,15.34722), (-0.0096,5.50868), (0.8928,10.93609), (-0.5943,2.78631), (7.48306,11.86145), (10.11943,18.67385), (5.60459,10.64051), (4.00189,12.75565), (2.35823,6.63666), (0.33475,12.19343), (3.47072,9.08636), (-6.68867,11.67256), (3.31031,20.31392), (2.17159,11.66443); -SELECT -28.740781574102936, 7.667329672103986e-133; +SELECT '-28.740781574102936', '7.667329672103986e-133'; SELECT roundBankers(StudentTTest(left, right).1, 16) as t_stat, roundBankers(StudentTTest(left, right).2, 16) as p_value from student_ttest; DROP TABLE IF EXISTS student_ttest; diff --git a/tests/queries/0_stateless/01322_welch_ttest.sql b/tests/queries/0_stateless/01322_welch_ttest.sql index 2a045e70b32..5a5b52ab612 100644 --- a/tests/queries/0_stateless/01322_welch_ttest.sql +++ b/tests/queries/0_stateless/01322_welch_ttest.sql @@ -23,7 +23,7 @@ DROP TABLE IF EXISTS welch_ttest; Second: a=10, sigma = 10, size = 500 */ CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; INSERT INTO welch_ttest VALUES (14.72789,-8.65656), (9.61661,22.98234), (13.57615,23.80821), (3.98392,13.33939), (11.98889,-4.05537), (10.99422,23.5155), (5.44792,-6.45272), (20.29346,17.7903), (7.05926,11.463), (9.22732,5.28021), (12.06847,8.39157), (13.52612,6.02464), (8.24597,14.43732), (9.35245,15.76584), (10.12297,1.54391), (15.80624,1.24897), (13.68613,27.1507), (10.72729,7.71091), (5.62078,15.71846), (6.12229,32.97808), (6.03801,-1.79334), (8.95585,-9.23439), (24.04613,11.27838), (9.04757,0.72703), (2.68263,18.51557), (15.43935,9.16619), (2.89423,17.29624), (4.01423,-1.30208), (4.30568,-3.48018), (11.99948,10.12082), (8.40574,-8.01318), (10.86642,-14.22264), (9.4266,16.58174), (-8.12752,-0.55975), (7.91634,5.61449), (7.3967,1.44626), (2.26431,7.89158), (14.20118,1.13369), (6.68233,-0.82609), (15.46221,12.23365), (7.88467,12.45443), (11.20011,14.46915), (8.92027,13.72627), (10.27926,18.41459), (5.14395,29.66702), (5.62178,1.51619), (12.84383,10.40078), (9.98009,3.33266), (-0.69789,6.12036), (11.41386,11.86553), (7.76863,6.59422), (7.21743,22.0948), (1.81176,1.79623), (9.43762,14.29513), (19.22117,19.69162), (2.97128,-7.98033), (14.32851,5.48433), (7.54959,-2.28474), (3.81545,9.91876), (10.1281,10.64097), (2.48596,0.22523), (10.0461,17.01773), (3.59714,22.37388), (9.73522,14.04215), (18.8077,23.1244), (3.15148,18.96958), (12.26062,8.42663), (5.66707,3.7165), (6.58623,14.29366), (17.30902,23.50886), (9.91391,26.33722), (5.36946,26.72396), (15.73637,13.26287), (16.96281,12.97607), (11.54063,17.41838), (18.37358,8.63875), (11.38255,17.08943), (10.53256,23.15356), (8.08833,-4.4965), (16.27556,7.58895), (2.42969,26.04074), (9.56127,6.84245), (7.32998,20.56287), (9.19511,3.84735), (9.66903,-2.76304), (4.15029,13.1615), (8.83511,8.21954), (14.60617,-3.49943), (14.06143,22.12419), (5.39556,7.08323), (10.11871,16.12937), (10.56619,-0.32672), (14.4462,16.5942), (10.42106,7.68977), (7.75551,11.39484), (11.00418,-5.11987), (4.47226,20.87404), (16.35461,8.01007), (18.55174,3.26497), (11.82044,5.61253), (7.39454,20.69182), (11.27767,0.0296), (6.83827,21.904), (7.76858,22.46572), (15.97614,3.63685), (14.53781,-5.10846), (12.99546,14.86389), (16.91151,5.47188), (9.65012,18.44095), (14.25487,16.71368), (14.03618,6.36704), (2.57382,8.82663), (2.50779,14.6727), (14.24787,7.98383), (13.34666,2.65568), (7.31102,21.45827), (10.22981,11.77948), (17.4435,4.71979), (21.2074,3.17951), (6.64191,13.90226), (18.7086,15.50578), (14.78686,10.8026), (9.85287,16.91369), (4.48263,9.90552), (14.17469,13.87322), (14.4342,4.12366), (19.2481,-3.78985), (3.47165,1.7599), (8.28712,3.43715), (8.81657,-3.45246), (0.92319,23.64571), (20.41106,-4.96877), (6.76127,3.93514), (22.00242,1.49914), (8.66129,12.71519), (10.9929,5.11521), (17.95494,4.79872), (17.20996,20.89391), (12.18888,5.363), (12.14257,8.02765), (15.81243,14.30804), (4.43362,11.49002), (1.17567,14.25281), (15.60881,7.6573), (9.34833,15.49686), (6.33513,3.29327), (-0.83095,2.27236), (12.43268,12.58104), (6.63207,19.19128), (11.96877,15.25901), (14.81029,6.5221), (21.84876,10.10965), (3.75896,12.75249), (6.91307,16.50977), (13.73015,-8.6697), (8.63753,8.28553), (15.71679,1.44315), (1.74565,4.65869), (9.16895,0.98149), (5.70685,0.16623), (5.00117,17.66332), (13.06888,4.35346), (7.51204,6.52742), (15.34885,-1.06631), (5.20264,-5.28454), (8.59043,14.25583), (6.45619,8.74058), (14.61979,1.89553), (11.7075,-0.92959), (14.04901,10.30289), (4.20525,-6.3744), (15.1733,-8.1706), (3.12934,10.95369), (8.08049,4.94384), (15.41273,28.40568), (16.90751,3.7004), (5.86893,2.52363), (7.1086,4.07997), (4.418,7.8849), (12.0614,17.95409), (7.07887,16.67021), (3.61585,11.34377), (11.73001,-0.07446), (10.80449,22.00223), (8.40311,3.31778), (9.91276,18.50719), (16.4164,-3.58655), (5.25034,6.5394), (15.20283,12.40459), (10.42909,16.59866), (9.53888,7.54176), (14.68939,-1.51044), (6.60007,12.69758), (18.31058,2.9842), (7.01885,2.49187), (18.71631,2.04113), (10.50002,-2.46544), (10.7517,15.18368), (4.23224,-0.04058), (2.28924,-0.4127), (8.56059,10.5526), (8.25095,12.03982), (9.15673,12.10923), (13.28409,11.54954), (8.4513,-1.18613), (2.83911,11.30984), (2.79676,23.54105), (9.11055,10.67321), (7.18529,24.09196), (-4.1258,7.5008), (5.28306,12.52233), (6.82757,4.30673), (10.89035,9.35793), (5.24822,4.44472), (11.935,-7.00679), (6.45675,8.56241), (10.18088,23.73891), (4.9932,15.62708), (18.09939,16.09205), (8.11738,12.52074), (5.37883,14.58927), (10.50339,-4.80187), (16.64093,8.47964), (14.77263,7.75477), (13.71385,12.6893), (6.98746,7.14147), (10.74635,12.12654), (5.49432,12.32334), (13.46078,7.98909), (10.67565,3.26652), (9.0291,20.53684), (11.51417,32.3369), (13.07118,19.74911), (9.5049,-4.62897), (8.50611,8.26483), (6.47606,20.88451), (13.06526,-2.12982), (19.08658,25.61459), (9.49741,5.32091), (10.60865,-4.1196), (2.28996,7.57937), (8.12846,21.15847), (5.62241,6.46355), (4.07712,7.74846), (17.98526,19.62636), (9.466,28.34629), (11.38904,26.73919), (5.91826,20.40427), (1.52059,3.03378), (18.79161,10.2537), (18.20669,7.47745), (-1.67829,10.79184), (18.01586,3.91962), (16.31577,19.97973), (7.88281,18.87711), (8.46179,12.56157), (10.31113,11.46033), (14.88377,3.78661), (1.31835,-9.45748), (2.53176,12.06033), (9.48625,-0.74615), (3.97936,13.2815), (11.52319,24.78052), (13.24178,5.83337), (7.58739,17.4111), (10.00959,19.70331), (9.73361,11.78446), (8.35716,-1.366), (1.65491,1.37458), (11.11521,16.31483), (6.08355,32.63464), (10.04582,-3.79736), (11.58237,19.17984), (16.40249,-0.27705), (1.9691,-3.69456), (13.22776,28.38058), (2.67059,-1.36876), (9.83651,-25.63301), (2.12539,3.58644), (9.27114,-6.85667), (9.0699,13.42225), (2.78179,12.04671), (12.49311,28.99468), (12.97662,7.87662), (15.06359,2.61119), (16.91565,-3.56022), (5.92011,1.50022), (5.81304,14.55836), (8.46425,9.35831), (9.48705,16.9366), (4.68191,29.23126), (5.70028,15.31386), (-0.78798,13.46112), (10.03442,7.39667), (15.45433,11.15599), (9.43845,9.80499), (3.05825,22.64923), (6.92126,8.67693), (14.05905,18.67335), (19.71579,-3.19127), (15.0131,22.94716), (4.50386,17.86834), (1.31061,16.98267), (10.81197,15.91653), (14.32942,11.79718), (9.26469,18.50208), (7.27679,8.90755), (22.69295,10.44843), (12.03763,4.67433), (7.34876,6.82287), (16.60689,10.82228), (7.48786,-4.18631), (15.78602,20.3872), (17.21048,11.84735), (13.93482,21.25376), (9.69911,10.55032), (12.24315,12.19023), (10.58131,0.63369), (19.57006,7.92381), (9.8856,17.90933), (11.70302,15.30781), (7.89864,10.01877), (12.24831,0.88744), (16.93707,22.20967), (9.65467,-4.23117), (4.221,21.50819), (15.45229,11.27421), (12.83088,-16.23179), (7.58313,33.43085), (12.895,5.15093), (10.02471,1.34505), (13.36059,6.027), (5.07864,-10.43035), (9.72017,27.45998), (11.05809,19.24886), (15.28528,-4.44761), (13.99834,5.453), (19.26989,12.73758), (9.41846,11.2897), (11.65425,31.032), (8.49638,7.39168), (6.38592,11.95245), (-4.69837,26.279), (12.22061,-1.0255), (9.41331,10.36675), (13.2075,11.58439), (12.97005,27.8405), (11.44352,13.1707), (9.79805,31.39133), (6.93116,27.08301), (10.07691,-2.14368), (22.05892,4.08476), (7.80353,21.5573), (-2.17276,16.69822), (0.61509,7.69955), (8.35842,8.32793), (17.77108,6.49235), (14.70841,-7.3284), (1.27992,10.58264), (15.62699,-6.17006), (9.32914,34.55782), (15.41866,10.93221), (10.82009,44.24299), (3.29902,14.6224), (9.21998,-7.42798), (7.93845,15.52351), (10.33344,11.33982), (12.06399,10.46716), (5.5308,13.0986), (8.38727,-4.25988), (18.11104,9.55316), (8.86565,0.75489), (19.41825,25.99212), (9.52376,-0.81401), (3.94552,3.49551), (9.37587,22.99402), (15.44954,10.99628), (15.90527,23.70223), (13.18927,2.71482), (7.01646,22.82309), (9.06005,31.25686), (9.06431,4.86318), (5.76006,-1.06476), (9.18705,15.10298), (-3.48446,-0.61015), (15.89817,17.81246), (12.94719,-1.55788), (23.69426,18.09709), (17.47755,9.11271), (15.61528,9.94682), (0.54832,-7.33194), (14.32916,-4.67293), (9.55305,21.81717), (13.79891,7.16318), (0.82544,13.25649), (13.34875,13.88776), (9.07614,4.95793), (5.19621,17.65303), (2.1451,14.47382), (9.87726,13.19373), (8.45439,31.86093), (-1.41842,5.73161), (7.93598,10.96492), (11.23151,6.97951), (17.84458,1.75136), (7.02237,10.96144), (10.7842,15.08137), (4.42832,9.95311), (4.45044,7.07729), (1.50938,3.08148), (21.21651,22.37954), (6.2097,8.51951), (6.84354,2.88746), (18.53804,26.73509), (12.01072,-2.88939), (4.8345,-2.82367), (20.41587,-0.35783), (14.48353,14.22076), (8.71116,11.50295), (12.42818,7.10171), (14.89244,8.28488), (8.03033,0.54178), (5.25917,13.8022), (2.30092,15.62157), (10.22504,10.79173), (15.37573,28.18946), (7.13666,30.43524), (4.45018,2.54914), (10.18405,9.89421), (3.91025,13.08631), (14.52304,4.68761), (13.14771,5.61516), (11.99219,22.88072), (9.21345,7.4735), (8.85106,11.27382), (12.91887,2.39559), (15.62308,-3.31889), (11.88034,9.61957), (15.12097,23.01381), (11.58168,-1.23467), (16.83051,9.07691), (5.25405,15.78056), (2.19976,12.28421), (4.56716,9.44888), (16.46053,13.16928), (5.61995,4.33357), (8.67704,2.21737), (5.62789,33.17833), (9.84815,13.25407), (13.05834,-2.47961), (11.74205,6.41401), (3.88393,18.8439), (16.15321,-4.63375), (4.83925,-8.2909), (13.00334,12.18221), (4.4028,-2.95356), (4.35794,19.61659), (4.47478,12.45056), (2.38713,-4.17198), (4.25235,21.9641), (10.87509,11.96416), (9.82411,12.74573), (13.61518,10.47873), (10.25507,12.73295), (4.0335,11.31373), (10.69881,9.9827), (5.70321,5.87138), (6.96244,4.24372), (9.35874,-23.72256), (6.28076,28.41337), (8.29015,4.88103), (6.88653,3.61902), (7.70687,8.93586), (8.2001,16.40759), (6.73415,27.84494), (3.82052,5.6001), (3.94469,14.51379), (15.82384,13.5576), (2.54004,12.92213), (10.74876,3.90686), (12.60517,17.07104), (17.7024,15.84268), (4.6722,17.38777), (13.67341,16.54766), (6.4565,5.94487), (12.95699,17.02804), (4.56912,7.66386), (5.58464,10.43088), (4.0638,6.16059), (13.05559,20.46178), (5.38269,20.02888), (0.16354,20.95949), (7.23962,6.50808), (7.38577,7.22366), (8.50951,8.06659), (13.72574,16.08241), (17.80421,13.83514), (3.01135,-0.33454), (8.02608,12.98848), (14.23847,12.99024); -SELECT -0.5028215369186904, 0.6152361677168877; +SELECT '-0.5028215369186904', '0.6152361677168877'; SELECT roundBankers(WelchTTest(left, right).1, 16) as t_stat, roundBankers(WelchTTest(left, right).2, 16) as p_value from welch_ttest; DROP TABLE IF EXISTS welch_ttest; @@ -32,6 +32,6 @@ DROP TABLE IF EXISTS welch_ttest; Second: a=1, sigma = 12, size = 500 */ CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; INSERT INTO welch_ttest VALUES (4.82025,-2.69857), (6.13896,15.80943), (15.20277,7.31555), (14.15351,3.96517), (7.21338,4.77809), (8.55506,9.6472), (13.80816,-26.41717), (11.28411,-10.85635), (7.4612,-1.4376), (7.43759,-0.96308), (12.9832,2.84315), (-5.74783,5.79467), (12.47114,-3.06091), (15.14223,-14.62902), (3.40603,22.08022), (9.27323,-2.11982), (7.88547,-4.84824), (8.56456,-10.50447), (4.59731,2.4891), (7.91213,9.90324), (7.33894,-22.66866), (21.74811,-0.97103), (11.92111,-16.57608), (0.18828,-3.78749), (10.47314,25.84511), (20.37396,5.30797), (11.04991,-18.19466), (13.30083,11.72708), (14.28065,0.2891), (2.86942,-9.83474), (24.96072,6.69942), (14.20164,18.09604), (18.28769,18.52651), (10.50949,1.38201), (9.22273,7.64615), (11.77608,17.66598), (8.56872,-2.44141), (13.74535,-9.01598), (11.65209,27.69142), (12.51894,4.06946), (17.76256,-15.0077), (13.52122,-10.49648), (8.70796,-4.88322), (6.04749,-25.09805), (16.33064,-4.64024), (8.35636,20.94434), (14.03496,24.12126), (11.05834,-14.10962), (14.49261,10.6512), (2.59383,14.50687), (8.01022,-19.88081), (4.05458,-11.55271), (13.26384,13.16921), (14.62058,16.63864), (10.52489,-24.08114), (8.46357,-9.09949), (6.4147,-10.54702), (9.70071,0.20813), (12.47581,8.19066), (4.38333,-2.70523), (17.54172,-0.23954), (10.12109,7.19398), (7.73186,-7.1618), (14.0279,-7.44322), (11.6621,-17.92031), (17.47045,-1.58146), (15.50223,9.18338), (15.46034,3.25838), (13.39964,-14.30234), (14.98025,1.84695), (15.87912,31.13794), (17.67374,-0.85067), (9.64073,19.02787), (12.84904,-3.09594), (7.70278,13.45584), (13.03156,-5.48104), (9.04512,-22.74928), (15.97014,-8.03697), (8.96389,17.31143), (11.48009,-16.65231), (9.71153,-18.58713), (13.00084,-16.52641), (12.39803,14.95261), (13.08188,12.56762), (5.82244,15.00188), (10.81871,1.85858), (8.2539,2.1926), (7.52114,-2.4095), (9.11488,21.56873), (8.37482,3.35509), (14.48652,-4.98672), (11.42152,35.08603), (16.03111,-10.01602), (13.14057,-3.85153), (-2.26351,-6.81974), (15.50394,19.56525), (14.88603,-9.35488), (13.37257,0.24268), (11.84026,-3.51488), (7.66558,-0.37066), (6.24584,24.20888), (3.6312,-11.73537), (2.7018,0.01282), (5.63656,0.03963), (5.82643,-9.65589), (10.06745,-0.37429), (-0.5831,5.61255), (14.84202,0.49984), (9.5524,-10.15066), (19.71713,-14.54314), (14.23109,16.56889), (8.69105,-7.73873), (5.33742,-3.76422), (7.30372,1.40722), (7.93342,2.28818), (15.20884,-13.12643), (7.53839,5.17082), (13.45311,4.79089), (11.04473,-17.42643), (10.76673,8.72548), (15.44145,-3.70285), (14.06596,16.77893), (9.14873,13.382), (12.88372,19.98418), (8.74994,0.00483), (10.53263,-4.75951), (16.16694,2.35391), (8.37197,21.65809), (3.43739,-9.2714), (4.72799,-18.38253), (9.08802,7.23097), (11.2531,14.97927), (5.16115,-4.02197), (10.20895,-29.8189), (18.70884,-12.8554), (15.88924,-7.60124), (3.38758,-14.90158), (6.46449,-3.31486), (10.21088,31.38144), (14.08458,-8.61288), (15.74508,15.31895), (19.31896,-10.19488), (13.19641,13.796), (11.95409,-0.32912), (10.70718,-0.0684), (1.05245,-30.06834), (10.04772,24.93912), (17.01369,-3.26506), (10.2286,-8.29751), (19.58323,-5.39189), (7.02892,-25.08603), (4.16866,-1.45318), (8.94326,16.72724), (4.99854,-3.38467), (8.88352,-26.00478), (18.65422,7.28369), (17.32328,16.96226), (9.33492,16.5858), (14.94788,10.46583), (8.05863,3.84345), (14.6737,-2.99382), (10.93801,1.42078), (0.54036,-11.0123), (-0.34242,2.09909), (5.89076,1.21064), (3.15189,15.36079), (1.94421,-21.61349), (6.38698,22.7726), (10.50654,10.50512), (8.95362,-6.95825), (6.23711,9.20036), (11.75359,15.66902), (12.42155,3.28098), (-1.55472,-9.05692), (4.6688,0.32882), (10.48087,-1.64934), (11.74615,-4.81406), (9.26822,-5.06006), (7.55517,19.97493), (12.76005,2.88646), (16.47102,-0.34552), (11.31297,7.55186), (14.37437,-22.96115), (2.38799,31.29166), (6.44577,6.18798), (5.07471,-2.52715), (11.55123,-11.58799), (7.76795,14.13596), (10.60116,13.45069), (14.40885,12.15179), (11.58158,3.44491), (8.81648,-8.78006), (12.92299,18.32087), (11.26939,11.91757), (17.95014,-2.00179), (2.95002,10.88411), (17.41959,9.09327), (11.12455,6.62484), (8.78541,8.87178), (14.36413,11.52254), (12.98554,-14.15988), (12.58505,-17.19515), (15.49789,14.03089), (11.70999,-2.4095), (0.65596,-16.83575), (11.08202,2.71469), (14.75752,4.84351), (6.84385,-1.17651), (9.27245,-3.37529), (13.78243,-19.92137), (17.4863,4.48952), (4.01777,-12.4906), (11.82861,-5.65277), (13.86551,8.50819), (6.16591,-19.61261), (8.71589,12.54156), (16.77195,11.06784), (17.23243,-12.59285), (-2.12941,3.43683), (5.66629,-3.00325), (12.45153,12.49082), (1.63971,7.20955), (13.84031,17.6547), (4.6144,15.8619), (5.26169,24.3048), (9.27769,-8.05434), (9.14288,-6.06901), (9.71953,-15.69515), (9.38446,-11.13917), (1.64788,-3.90757), (11.72922,-2.57038), (13.68926,5.14065), (9.42952,17.8497), (12.05574,-8.64665), (9.09148,-18.68331), (5.32273,5.8567), (20.25258,-20.93884), (10.14599,4.40583), (10.82156,14.35985), (5.75736,4.18134), (7.13567,4.3635), (9.29746,9.35428), (5.1618,2.8908), (10.076,16.01017), (21.65669,-1.48499), (13.35486,-9.97949), (6.79957,1.03055), (8.76243,-2.79697), (14.59294,6.85977), (16.90609,4.73213), (10.50337,2.7815), (-0.07923,-2.46866), (13.51648,18.39425), (12.0676,-0.80378), (0.86482,-0.22982), (9.03563,-16.11608), (5.38751,3.0862), (17.16866,3.20779), (2.78702,10.50146), (11.15548,-0.21305), (12.30843,11.21012), (8.04897,-0.99825), (9.95814,18.39633), (11.29308,-3.39003), (14.13032,-0.64411), (21.05877,-1.39932), (3.57386,15.45319), (7.96631,-0.66044), (3.30484,-15.2223), (18.61856,-34.39907), (16.35184,-3.57836), (7.65236,16.82828), (18.02895,1.66624), (9.79458,15.43475), (16.7274,8.17776), (8.84453,5.50486), (13.05709,10.43082), (10.91447,-6.63332), (8.40171,2.28008), (16.95211,16.37203), (11.82194,5.16313), (19.87978,-8.85281), (12.88455,13.26692), (-0.00947,-7.46842), (12.28109,8.43091), (6.96462,-13.18172), (13.75282,-0.72401), (14.39141,22.3881), (11.07193,10.65448), (12.88039,2.81289), (11.38253,10.92405), (21.02707,-8.95358), (7.51955,19.80653), (6.31984,-12.86527), (15.6543,5.38826), (14.80315,-6.83501), (8.38024,-15.7647), (21.7516,-27.67412), (14.31336,8.6499), (15.04703,-4.89542), (5.73787,16.76167), (13.16911,12.84284), (12.40695,-17.27324), (9.88968,-4.18726), (8.46703,-14.62366), (8.70637,-5.49863), (8.03551,-16.22846), (5.9757,10.60329), (12.22951,6.46781), (3.14736,1.70458), (10.51266,10.77448), (18.593,0.8463), (10.82213,13.0482), (7.14216,-4.36264), (6.81154,3.22647), (-0.6486,2.38828), (20.56136,6.7946), (11.35367,-0.25254), (11.38205,1.2497), (17.14,1.6544), (14.91215,4.1019), (15.50207,11.27839), (5.93162,-5.04127), (3.74869,18.11674), (14.11532,0.51231), (7.38954,-0.51029), (5.45764,13.52556), (18.33733,16.10171), (9.91923,5.68197), (2.38991,-2.85904), (14.16756,-8.89167), (2.39791,6.24489), (6.92586,10.85319), (5.32474,-0.39816), (2.28812,3.87079), (5.71718,-3.1867), (5.84197,1.55322), (2.76206,16.86779), (19.05928,-14.60321), (11.51788,-1.81952), (6.56648,-3.11624), (3.35735,1.24193), (7.55948,10.18179), (19.99908,4.69796), (13.00634,0.69032), (18.36886,11.7723), (11.14675,7.62896), (16.72931,9.89741), (12.50106,9.11484), (6.00605,-3.84676), (23.06653,-0.4777), (5.39694,0.95958), (9.53167,-7.95056), (12.76944,-10.97474), (7.20604,-6.54861), (13.25391,34.74933), (13.7341,27.39463), (10.85292,4.18299), (-7.75835,6.02476), (10.29728,-1.99397), (13.70099,1.26478), (10.17959,23.37106), (9.98399,10.49682), (12.69389,-11.04354), (-0.28848,-12.22284), (-2.18319,-9.87635), (13.36378,28.90511), (10.09232,6.77613), (5.49489,0.55352), (5.46156,0.37031), (0.94225,7.1418), (12.79205,3.24897), (10.09593,-1.60918), (6.06218,3.1675), (0.89463,-17.97072), (11.88986,-5.61743), (10.79733,14.1422), (1.51371,14.87695), (2.20967,-4.65961), (15.45732,-0.99174), (16.5262,-2.96623), (5.99724,-9.02263), (8.3613,-17.2088), (15.68183,2.78608), (15.32117,6.74239), (14.15674,4.8524), (6.64553,7.46731), (4.20777,1.04894), (-0.10521,-12.8023), (-0.88169,-17.18188), (1.85913,-5.08801), (9.73673,22.13942), (0.30926,-0.36384), (6.17559,17.80564), (11.76602,7.67504), (5.68385,1.59779), (14.57088,4.10942), (12.81509,0.61074), (9.85682,-14.40767), (12.06376,10.59906), (6.08874,16.57017), (11.63921,-15.17526), (14.86722,-6.98549), (10.41035,-0.64548), (2.93794,3.23756), (12.21841,14.65504), (0.23804,4.583), (3.14845,12.72378), (7.29748,5.26547), (3.06134,0.81781), (13.77684,9.38273), (16.21992,10.37636), (5.33511,10.70325), (9.68959,-0.83043), (9.44169,-7.53149), (18.08012,-9.09147), (4.04224,-19.51381), (8.77918,-28.44508), (10.18324,6.44392), (9.38914,11.10201), (11.76995,-2.86184), (14.19963,8.30673), (6.88817,8.8797), (16.56123,10.68053), (15.39885,15.62919), (5.21241,8.00579), (4.44408,6.4651), (17.87587,-4.50029), (12.53337,18.04514), (13.60916,11.12996), (6.60104,-5.14007), (7.35453,9.43857), (18.61572,3.13476), (6.10437,4.9772), (13.08682,-17.45782), (12.15404,0.05552), (4.90789,-1.90283), (2.13353,2.67908), (12.49593,-2.62243), (11.93056,-3.22767), (13.29408,-8.70222), (5.70038,-23.11605), (8.40271,21.6757), (5.19456,12.70076), (-5.51028,4.4322), (14.0329,11.69344), (10.38365,9.18052), (6.56812,-2.2549), (4.21129,-2.15615), (9.7157,20.29765), (9.88553,-0.29536), (13.45346,15.50109), (4.97752,8.79187), (12.77595,5.11533), (8.56465,-20.44436), (4.27703,-3.00909), (18.12502,-4.48291), (12.45735,21.84462), (12.42912,1.94225), (12.08125,-2.81908), (10.85779,17.19418), (4.36013,-9.33528), (11.85062,-0.17346), (8.47776,0.03958), (9.60822,-35.17786), (11.3069,8.36887), (14.25525,-9.02292), (1.55168,-10.98804), (14.57782,0.29335), (7.84786,4.29634), (9.87774,3.87718), (14.75575,-9.08532), (3.68774,7.13922), (9.37667,-7.62463), (20.28676,-10.5666), (12.10027,4.68165), (8.01819,-3.30172), (18.78158,13.04852), (20.85402,13.45616), (18.98069,2.41043), (16.1429,-0.36501), (9.24047,-15.67383), (14.12487,17.92217), (10.18841,8.42106), (-3.04478,3.22063), (5.7552,-7.31753), (9.30376,21.99596), (11.42837,-36.8273), (6.02364,-20.46391), (8.86984,5.74179), (10.91177,-15.83178), (10.04418,14.90454), (18.10774,-8.84645), (7.49384,3.72036), (9.11556,4.6877), (9.7051,16.35418), (5.23268,3.15441), (9.04647,2.39907), (8.81547,-17.58664), (2.65098,-13.18269); -SELECT 14.971190998235835, 5.898143508382202e-44; +SELECT '14.971190998235835', '5.898143508382202e-44'; SELECT roundBankers(WelchTTest(left, right).1, 16) as t_stat, roundBankers(WelchTTest(left, right).2, 16) as p_value from welch_ttest; DROP TABLE IF EXISTS welch_ttest; From f9204135d8212640803fc7fdd5ce69e8902fd176 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 14 Oct 2020 16:54:02 +0300 Subject: [PATCH 066/174] add lgammal.c --- base/glibc-compatibility/musl/lgammal.c | 340 ++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 base/glibc-compatibility/musl/lgammal.c diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c new file mode 100644 index 00000000000..cc4a5e3b54d --- /dev/null +++ b/base/glibc-compatibility/musl/lgammal.c @@ -0,0 +1,340 @@ +/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_lgammal.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Copyright (c) 2008 Stephen L. Moshier + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* lgammal(x) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = pi/sin(pi*x), + * we have + * G(x) = pi/(sin(pi*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(pi*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(pi/(|x*sin(pi*x)|)) - lgamma(-x); + * Note: one should avoid compute pi*(-x) directly in the + * computation of sin(pi*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1)=lgamma(2)=0 + * lgamma(x) ~ -log(x) for tiny x + * lgamma(0) = lgamma(inf) = inf + * lgamma(-integer) = +-inf + * + */ + +static const long double pi = 3.14159265358979323846264L, + +/* lgam(1+x) = 0.5 x + x a(x)/b(x) + -0.268402099609375 <= x <= 0 + peak relative error 6.6e-22 */ +a0 = -6.343246574721079391729402781192128239938E2L, +a1 = 1.856560238672465796768677717168371401378E3L, +a2 = 2.404733102163746263689288466865843408429E3L, +a3 = 8.804188795790383497379532868917517596322E2L, +a4 = 1.135361354097447729740103745999661157426E2L, +a5 = 3.766956539107615557608581581190400021285E0L, + +b0 = 8.214973713960928795704317259806842490498E3L, +b1 = 1.026343508841367384879065363925870888012E4L, +b2 = 4.553337477045763320522762343132210919277E3L, +b3 = 8.506975785032585797446253359230031874803E2L, +b4 = 6.042447899703295436820744186992189445813E1L, +/* b5 = 1.000000000000000000000000000000000000000E0 */ + + +tc = 1.4616321449683623412626595423257213284682E0L, +tf = -1.2148629053584961146050602565082954242826E-1, /* double precision */ +/* tt = (tail of tf), i.e. tf + tt has extended precision. */ +tt = 3.3649914684731379602768989080467587736363E-18L, +/* lgam ( 1.4616321449683623412626595423257213284682E0 ) = +-1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */ + +/* lgam (x + tc) = tf + tt + x g(x)/h(x) + -0.230003726999612341262659542325721328468 <= x + <= 0.2699962730003876587373404576742786715318 + peak relative error 2.1e-21 */ +g0 = 3.645529916721223331888305293534095553827E-18L, +g1 = 5.126654642791082497002594216163574795690E3L, +g2 = 8.828603575854624811911631336122070070327E3L, +g3 = 5.464186426932117031234820886525701595203E3L, +g4 = 1.455427403530884193180776558102868592293E3L, +g5 = 1.541735456969245924860307497029155838446E2L, +g6 = 4.335498275274822298341872707453445815118E0L, + +h0 = 1.059584930106085509696730443974495979641E4L, +h1 = 2.147921653490043010629481226937850618860E4L, +h2 = 1.643014770044524804175197151958100656728E4L, +h3 = 5.869021995186925517228323497501767586078E3L, +h4 = 9.764244777714344488787381271643502742293E2L, +h5 = 6.442485441570592541741092969581997002349E1L, +/* h6 = 1.000000000000000000000000000000000000000E0 */ + + +/* lgam (x+1) = -0.5 x + x u(x)/v(x) + -0.100006103515625 <= x <= 0.231639862060546875 + peak relative error 1.3e-21 */ +u0 = -8.886217500092090678492242071879342025627E1L, +u1 = 6.840109978129177639438792958320783599310E2L, +u2 = 2.042626104514127267855588786511809932433E3L, +u3 = 1.911723903442667422201651063009856064275E3L, +u4 = 7.447065275665887457628865263491667767695E2L, +u5 = 1.132256494121790736268471016493103952637E2L, +u6 = 4.484398885516614191003094714505960972894E0L, + +v0 = 1.150830924194461522996462401210374632929E3L, +v1 = 3.399692260848747447377972081399737098610E3L, +v2 = 3.786631705644460255229513563657226008015E3L, +v3 = 1.966450123004478374557778781564114347876E3L, +v4 = 4.741359068914069299837355438370682773122E2L, +v5 = 4.508989649747184050907206782117647852364E1L, +/* v6 = 1.000000000000000000000000000000000000000E0 */ + + +/* lgam (x+2) = .5 x + x s(x)/r(x) + 0 <= x <= 1 + peak relative error 7.2e-22 */ +s0 = 1.454726263410661942989109455292824853344E6L, +s1 = -3.901428390086348447890408306153378922752E6L, +s2 = -6.573568698209374121847873064292963089438E6L, +s3 = -3.319055881485044417245964508099095984643E6L, +s4 = -7.094891568758439227560184618114707107977E5L, +s5 = -6.263426646464505837422314539808112478303E4L, +s6 = -1.684926520999477529949915657519454051529E3L, + +r0 = -1.883978160734303518163008696712983134698E7L, +r1 = -2.815206082812062064902202753264922306830E7L, +r2 = -1.600245495251915899081846093343626358398E7L, +r3 = -4.310526301881305003489257052083370058799E6L, +r4 = -5.563807682263923279438235987186184968542E5L, +r5 = -3.027734654434169996032905158145259713083E4L, +r6 = -4.501995652861105629217250715790764371267E2L, +/* r6 = 1.000000000000000000000000000000000000000E0 */ + + +/* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2) + x >= 8 + Peak relative error 1.51e-21 +w0 = LS2PI - 0.5 */ +w0 = 4.189385332046727417803e-1L, +w1 = 8.333333333333331447505E-2L, +w2 = -2.777777777750349603440E-3L, +w3 = 7.936507795855070755671E-4L, +w4 = -5.952345851765688514613E-4L, +w5 = 8.412723297322498080632E-4L, +w6 = -1.880801938119376907179E-3L, +w7 = 4.885026142432270781165E-3L; + +/* sin(pi*x) assuming x > 2^-1000, if sin(pi*x)==0 the sign is arbitrary */ +static long double sin_pi(long double x) +{ + int n; + + /* spurious inexact if odd int */ + x *= 0.5; + x = 2.0*(x - floorl(x)); /* x mod 2.0 */ + + n = (int)(x*4.0); + n = (n+1)/2; + x -= n*0.5f; + x *= pi; + + switch (n) { + default: /* case 4: */ + case 0: return __sinl(x, 0.0, 0); + case 1: return __cosl(x, 0.0); + case 2: return __sinl(-x, 0.0, 0); + case 3: return -__cosl(x, 0.0); + } +} + +#include +#include + +long double __lgammal_r(long double x, int *sg) { + long double t, y, z, nadj, p, p1, p2, q, r, w; + union ldshape u = {x}; + uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; + int sign = u.i.se >> 15; + int i; + + *sg = 1; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + if (ix >= 0x7fff0000) + return x * x; + if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */ + if (sign) { + *sg = -1; + x = -x; + } + return -logl(x); + } + if (sign) { + x = -x; + t = sin_pi(x); + if (t == 0.0) + return 1.0 / (x-x); /* -integer */ + if (t > 0.0) + *sg = -1; + else + t = -t; + nadj = logl(pi / (t * x)); + } + + /* purge off 1 and 2 (so the sign is ok with downward rounding) */ + if ((ix == 0x3fff8000 || ix == 0x40008000) && u.i.m == 0) { + r = 0; + } else if (ix < 0x40008000) { /* x < 2.0 */ + if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */ + /* lgamma(x) = lgamma(x+1) - log(x) */ + r = -logl(x); + if (ix >= 0x3ffebb4a) { /* 7.31597900390625e-1 */ + y = x - 1.0; + i = 0; + } else if (ix >= 0x3ffced33) { /* 2.31639862060546875e-1 */ + y = x - (tc - 1.0); + i = 1; + } else { /* x < 0.23 */ + y = x; + i = 2; + } + } else { + r = 0.0; + if (ix >= 0x3fffdda6) { /* 1.73162841796875 */ + /* [1.7316,2] */ + y = x - 2.0; + i = 0; + } else if (ix >= 0x3fff9da6) { /* 1.23162841796875 */ + /* [1.23,1.73] */ + y = x - tc; + i = 1; + } else { + /* [0.9, 1.23] */ + y = x - 1.0; + i = 2; + } + } + switch (i) { + case 0: + p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5)))); + p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y)))); + r += 0.5 * y + y * p1/p2; + break; + case 1: + p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6))))); + p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y))))); + p = tt + y * p1/p2; + r += (tf + p); + break; + case 2: + p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6)))))); + p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y))))); + r += (-0.5 * y + p1 / p2); + } + } else if (ix < 0x40028000) { /* 8.0 */ + /* x < 8.0 */ + i = (int)x; + y = x - (double)i; + p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6)))))); + q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y)))))); + r = 0.5 * y + p / q; + z = 1.0; + /* lgamma(1+s) = log(s) + lgamma(s) */ + switch (i) { + case 7: + z *= (y + 6.0); /* FALLTHRU */ + case 6: + z *= (y + 5.0); /* FALLTHRU */ + case 5: + z *= (y + 4.0); /* FALLTHRU */ + case 4: + z *= (y + 3.0); /* FALLTHRU */ + case 3: + z *= (y + 2.0); /* FALLTHRU */ + r += logl(z); + break; + } + } else if (ix < 0x40418000) { /* 2^66 */ + /* 8.0 <= x < 2**66 */ + t = logl(x); + z = 1.0 / x; + y = z * z; + w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7)))))); + r = (x - 0.5) * (t - 1.0) + w; + } else /* 2**66 <= x <= inf */ + r = x * (logl(x) - 1.0); + if (sign) + r = nadj - r; + return r; +} + +int signgam; + +long double lgammal(long double x) +{ + return lgammal_r(x, &signgam); +} + From 252b9b2ec1212ca4569f56b2012d69cc83c8606b Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 14 Oct 2020 16:54:55 +0300 Subject: [PATCH 067/174] better --- base/glibc-compatibility/musl/lgammal.c | 26 ++----------------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c index cc4a5e3b54d..1e720163a5d 100644 --- a/base/glibc-compatibility/musl/lgammal.c +++ b/base/glibc-compatibility/musl/lgammal.c @@ -187,31 +187,9 @@ w5 = 8.412723297322498080632E-4L, w6 = -1.880801938119376907179E-3L, w7 = 4.885026142432270781165E-3L; -/* sin(pi*x) assuming x > 2^-1000, if sin(pi*x)==0 the sign is arbitrary */ -static long double sin_pi(long double x) -{ - int n; - - /* spurious inexact if odd int */ - x *= 0.5; - x = 2.0*(x - floorl(x)); /* x mod 2.0 */ - - n = (int)(x*4.0); - n = (n+1)/2; - x -= n*0.5f; - x *= pi; - - switch (n) { - default: /* case 4: */ - case 0: return __sinl(x, 0.0, 0); - case 1: return __cosl(x, 0.0); - case 2: return __sinl(-x, 0.0, 0); - case 3: return -__cosl(x, 0.0); - } -} - #include #include +#include "libm.h" long double __lgammal_r(long double x, int *sg) { long double t, y, z, nadj, p, p1, p2, q, r, w; @@ -234,7 +212,7 @@ long double __lgammal_r(long double x, int *sg) { } if (sign) { x = -x; - t = sin_pi(x); + t = sin(pi * x); if (t == 0.0) return 1.0 / (x-x); /* -integer */ if (t > 0.0) From eb8d8f6ba40f8968152b7e0a5a451e4978c85e45 Mon Sep 17 00:00:00 2001 From: feng lv Date: Wed, 14 Oct 2020 23:02:51 +0800 Subject: [PATCH 068/174] interval op support string literal --- src/Parsers/ExpressionListParsers.cpp | 42 +++++++++++++++++++++++++-- src/Parsers/ExpressionListParsers.h | 2 ++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 26affe020b1..ab1f7abf4a1 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -645,6 +645,14 @@ bool ParserTimestampOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expe return true; } +bool ParserIntervalOperatorExpression::stringToIntervalKind(const String & literal, ASTPtr & number, IntervalKind & interval_kind) +{ + Tokens tokens(literal.data(), literal.data() + literal.size()); + Pos pos(tokens, 0); + Expected expected; + return (ParserNumber().parse(pos, number, expected) && parseIntervalKind(pos, expected, interval_kind)); +} + bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto begin = pos; @@ -653,12 +661,41 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec if (!ParserKeyword("INTERVAL").ignore(pos, expected)) return next_parser.parse(pos, node, expected); + ASTPtr string_literal; + if (ParserStringLiteral().parse(pos, string_literal, expected)) + { + String literal; + if (string_literal->as().value.tryGet(literal)) + { + IntervalKind interval_kind; + ASTPtr number; + + if (!stringToIntervalKind(literal, number, interval_kind)) + return false; + auto function = std::make_shared(); + + /// function arguments + auto exp_list = std::make_shared(); + + /// the first argument of the function is the previous element, the second is the next one + function->name = interval_kind.toNameOfFunctionToIntervalDataType(); + function->arguments = exp_list; + function->children.push_back(exp_list); + + exp_list->children.push_back(number); + + node = function; + return true; + } + } + ASTPtr expr; + /// Any expression can be inside, because operator surrounds it. if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) { - pos = begin; - return next_parser.parse(pos, node, expected); + pos = begin; + return next_parser.parse(pos, node, expected); } IntervalKind interval_kind; @@ -729,3 +766,4 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } } + diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 93a47648a0b..72961f700fd 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -5,6 +5,7 @@ #include #include +#include namespace DB { @@ -232,6 +233,7 @@ protected: const char * getName() const override { return "INTERVAL operator expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool stringToIntervalKind(const String & literal, ASTPtr & number, IntervalKind & interval_kind); }; class ParserAdditiveExpression : public IParserBase From 7b69592e496b2ad8cc5ac75dc408e0030c88f8ed Mon Sep 17 00:00:00 2001 From: feng lv Date: Wed, 14 Oct 2020 23:08:47 +0800 Subject: [PATCH 069/174] add test fix fix --- src/Parsers/ExpressionListParsers.cpp | 9 ++++----- ...23_interval_operator_support_string_literal.reference | 6 ++++++ .../01523_interval_operator_support_string_literal.sql | 6 ++++++ 3 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference create mode 100644 tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index ab1f7abf4a1..4f4b97eff2d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -670,14 +670,14 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec IntervalKind interval_kind; ASTPtr number; + /// parse function arguments and interval kind from string literal if (!stringToIntervalKind(literal, number, interval_kind)) return false; + auto function = std::make_shared(); - /// function arguments auto exp_list = std::make_shared(); - /// the first argument of the function is the previous element, the second is the next one function->name = interval_kind.toNameOfFunctionToIntervalDataType(); function->arguments = exp_list; function->children.push_back(exp_list); @@ -690,12 +690,11 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec } ASTPtr expr; - /// Any expression can be inside, because operator surrounds it. if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) { - pos = begin; - return next_parser.parse(pos, node, expected); + pos = begin; + return next_parser.parse(pos, node, expected); } IntervalKind interval_kind; diff --git a/tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference new file mode 100644 index 00000000000..5ee4e7592f6 --- /dev/null +++ b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference @@ -0,0 +1,6 @@ +2 +2 +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql new file mode 100644 index 00000000000..ce418e13e9f --- /dev/null +++ b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql @@ -0,0 +1,6 @@ +SELECT INTERVAL 2 day; +SELECT INTERVAL '2 day'; +SELECT INTERVAL 2 hour; +SELECT INTERVAL '2 hour'; +SELECT INTERVAL 2 minute; +SELECT INTERVAL '2 minute'; From f8852fcced01b368dcc6984e610d976494a61fe7 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 14 Oct 2020 20:01:03 +0300 Subject: [PATCH 070/174] Update 01150_ddl_guard_rwr.sh --- tests/queries/0_stateless/01150_ddl_guard_rwr.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01150_ddl_guard_rwr.sh b/tests/queries/0_stateless/01150_ddl_guard_rwr.sh index c14e4c38f54..43804075938 100755 --- a/tests/queries/0_stateless/01150_ddl_guard_rwr.sh +++ b/tests/queries/0_stateless/01150_ddl_guard_rwr.sh @@ -39,5 +39,6 @@ timeout 20 bash -c 'thread_rename' & wait sleep 1 +$CLICKHOUSE_CLIENT --query "DETACH DATABASE IF EXISTS test_01150" $CLICKHOUSE_CLIENT --query "ATTACH DATABASE IF NOT EXISTS test_01150" $CLICKHOUSE_CLIENT --query "DROP DATABASE test_01150"; From ff7601a52cea90d5594a35969417747219dd9585 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 14 Oct 2020 21:41:35 +0300 Subject: [PATCH 071/174] add python test and fix build --- base/glibc-compatibility/musl/lgammal.c | 4 +- .../AggregateFunctionStudentTTest.cpp | 2 +- .../AggregateFunctionStudentTTest.h | 28 ++--- .../AggregateFunctionWelchTTest.cpp | 2 +- .../AggregateFunctionWelchTTest.h | 8 +- .../queries/0_stateless/01322_ttest_scipy.py | 108 ++++++++++++++++++ .../0_stateless/01322_ttest_scipy.reference | 0 7 files changed, 126 insertions(+), 26 deletions(-) create mode 100644 tests/queries/0_stateless/01322_ttest_scipy.py create mode 100644 tests/queries/0_stateless/01322_ttest_scipy.reference diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c index 1e720163a5d..534abf41894 100644 --- a/base/glibc-compatibility/musl/lgammal.c +++ b/base/glibc-compatibility/musl/lgammal.c @@ -309,10 +309,10 @@ long double __lgammal_r(long double x, int *sg) { return r; } -int signgam; +int signgam_lgammal; long double lgammal(long double x) { - return lgammal_r(x, &signgam); + return lgammal_r(x, &signgam_lgammal); } diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp index b6f32409946..b03e961a24a 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp @@ -50,7 +50,7 @@ AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & nam void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory) { - factory.registerFunction("StudentTTest", createAggregateFunctionStudentTTest, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest, AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.h b/src/AggregateFunctions/AggregateFunctionStudentTTest.h index ac05a11d334..2f3d35dbeab 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.h +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.h @@ -98,8 +98,8 @@ struct AggregateFunctionStudentTTestData final Float64 getSSquared() const { - /// TODO: Update comment with Tex. - /// The original formulae looks like ... + /// The original formulae looks like + /// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2} /// But we made some mathematical transformations not to store original sequences. /// Also we dropped sqrt, because later it will be squared later. const Float64 all_x = square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x; @@ -110,26 +110,19 @@ struct AggregateFunctionStudentTTestData final Float64 getTStatisticSquared() const { - if (size_x == 0 || size_y == 0) - { - throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS); - } - return std::pow(mean_x - mean_y, 2) / getStandartErrorSquared(); } Float64 getTStatistic() const { - if (size_x == 0 || size_y == 0) - { - throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS); - } - return (mean_x - mean_y) / std::sqrt(getStandartErrorSquared()); } Float64 getStandartErrorSquared() const { + if (size_x == 0 || size_y == 0) + throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS); + return getSSquared() * (1.0 / static_cast(size_x) + 1.0 / static_cast(size_y)); } @@ -138,9 +131,10 @@ struct AggregateFunctionStudentTTestData final return static_cast(size_x + size_y - 2); } - static Float64 integrateSimpson(Float64 a, Float64 b, std::function func, size_t iterations = 1e6) + static Float64 integrateSimpson(Float64 a, Float64 b, std::function func) { - double h = (b - a) / iterations; + const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b))); + const long double h = (b - a) / iterations; Float64 sum_odds = 0.0; for (size_t i = 1; i < iterations; i += 2) sum_odds += func(a + i * h); @@ -154,13 +148,9 @@ struct AggregateFunctionStudentTTestData final { const Float64 v = getDegreesOfFreedom(); const Float64 t = getTStatisticSquared(); - std::cout << "getDegreesOfFreedom() " << getDegreesOfFreedom() << std::endl; - std::cout << "getTStatisticSquared() " << getTStatisticSquared() << std::endl; auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); }; Float64 numenator = integrateSimpson(0, v / (t + v), f); Float64 denominator = std::exp(std::lgammal(v/2) + std::lgammal(0.5) - std::lgammal(v/2 + 0.5)); - std::cout << "numenator " << numenator << std::endl; - std::cout << "denominator " << denominator << std::endl; return numenator / denominator; } @@ -184,7 +174,7 @@ public: String getName() const override { - return "StudentTTest"; + return "studentTTest"; } DataTypePtr getReturnType() const override diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index d9fce97680c..00607171c41 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -50,7 +50,7 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { - factory.registerFunction("WelchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 36641b826b1..3332a6c363e 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -142,8 +142,9 @@ struct AggregateFunctionWelchTTestData final return numerator / (denominator_first + denominator_second); } - static Float64 integrateSimpson(Float64 a, Float64 b, std::function func, size_t iterations = 1e6) + static Float64 integrateSimpson(Float64 a, Float64 b, std::function func) { + size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b))); double h = (b - a) / iterations; Float64 sum_odds = 0.0; for (size_t i = 1; i < iterations; i += 2) @@ -170,7 +171,8 @@ struct AggregateFunctionWelchTTestData final } }; -/// Returns p-value +/// Returns tuple of (t-statistic, p-value) +/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf template class AggregateFunctionWelchTTest : public IAggregateFunctionDataHelper,AggregateFunctionWelchTTest> @@ -183,7 +185,7 @@ public: String getName() const override { - return "WelchTTest"; + return "welchTTest"; } DataTypePtr getReturnType() const override diff --git a/tests/queries/0_stateless/01322_ttest_scipy.py b/tests/queries/0_stateless/01322_ttest_scipy.py new file mode 100644 index 00000000000..d8255cd8062 --- /dev/null +++ b/tests/queries/0_stateless/01322_ttest_scipy.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +import os +import io +import sys +import requests +import time +import pandas as pd +import numpy as np +from scipy import stats + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') +CLICKHOUSE_SERVER_URL_STR = 'http://' + ':'.join(str(s) for s in [CLICKHOUSE_HOST, CLICKHOUSE_PORT_HTTP]) + "/" + +class ClickHouseClient: + def __init__(self, host = CLICKHOUSE_SERVER_URL_STR): + self.host = host + + def query(self, query, connection_timeout = 1500): + NUMBER_OF_TRIES = 30 + DELAY = 10 + + for i in range(NUMBER_OF_TRIES): + r = requests.post( + self.host, + params = {'timeout_before_checking_execution_speed': 120, 'max_execution_time': 6000}, + timeout = connection_timeout, + data = query) + if r.status_code == 200: + return r.text + else: + print('ATTENTION: try #%d failed' % i) + if i != (NUMBER_OF_TRIES-1): + print(query) + print(r.text) + time.sleep(DELAY*(i+1)) + else: + raise ValueError(r.text) + + def query_return_df(self, query, connection_timeout = 1500): + data = self.query(query, connection_timeout) + df = pd.read_csv(io.StringIO(data), sep = '\t') + return df + + def query_with_data(self, query, content): + content = content.encode('utf-8') + r = requests.post(self.host, data=content) + result = r.text + if r.status_code == 200: + return result + else: + raise ValueError(r.text) + +def test_and_check(name, a, b, t_stat, p_value): + client = ClickHouseClient() + client.query("DROP TABLE IF EXISTS ttest;") + client.query("CREATE TABLE ttest (left Float64, right Float64) ENGINE = Memory;"); + client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, j) for i,j in zip(a, b)]))) + + real = client.query_return_df( + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM ttest FORMAT TabSeparatedWithNames;") + real_t_stat = real['t_stat'][0] + real_p_value = real['p_value'][0] + assert(abs(real_t_stat - np.float64(t_stat) < 1e-4)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert(abs(real_p_value - np.float64(p_value)) < 1e-4), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + client.query("DROP TABLE IF EXISTS ttest;") + + +def test_student(): + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=65536), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + +def test_welch(): + rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=65536), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + +if __name__ == "__main__": + test_student() + test_welch() + print("Ok.") \ No newline at end of file diff --git a/tests/queries/0_stateless/01322_ttest_scipy.reference b/tests/queries/0_stateless/01322_ttest_scipy.reference new file mode 100644 index 00000000000..e69de29bb2d From 1f200f8bd20a08f7180a00bfc77aaf56a5169aba Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 14 Oct 2020 21:43:38 +0300 Subject: [PATCH 072/174] add reference --- tests/queries/0_stateless/01322_ttest_scipy.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01322_ttest_scipy.reference b/tests/queries/0_stateless/01322_ttest_scipy.reference index e69de29bb2d..587579af915 100644 --- a/tests/queries/0_stateless/01322_ttest_scipy.reference +++ b/tests/queries/0_stateless/01322_ttest_scipy.reference @@ -0,0 +1 @@ +Ok. From 575354a6d39c91d37fd8ed3795c78c97985a946c Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 14 Oct 2020 21:52:23 +0300 Subject: [PATCH 073/174] add scipy to dockerfile --- docker/test/stateless/Dockerfile | 3 +++ docker/test/stateless_unbundled/Dockerfile | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 33eb1c29103..ca7ecf9cb9c 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update -y \ python3-lxml \ python3-requests \ python3-termcolor \ + python3-pip \ qemu-user-static \ sudo \ telnet \ @@ -23,6 +24,8 @@ RUN apt-get update -y \ unixodbc \ wget +RUN pip3 install numpy, scipy, pandas + RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ diff --git a/docker/test/stateless_unbundled/Dockerfile b/docker/test/stateless_unbundled/Dockerfile index f2fd28e4078..2f4663fa061 100644 --- a/docker/test/stateless_unbundled/Dockerfile +++ b/docker/test/stateless_unbundled/Dockerfile @@ -58,6 +58,7 @@ RUN apt-get --allow-unauthenticated update -y \ python3-lxml \ python3-requests \ python3-termcolor \ + python3-pip \ qemu-user-static \ sudo \ telnet \ @@ -66,7 +67,9 @@ RUN apt-get --allow-unauthenticated update -y \ unixodbc \ unixodbc-dev \ wget \ - zlib1g-dev + zlib1g-dev\ + +RUN pip3 install numpy, scipy, pandas RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ From 2d29eab934a6543653ebd1381215a9bf762452af Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 14 Oct 2020 22:00:20 +0300 Subject: [PATCH 074/174] better --- docker/test/stateless_unbundled/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless_unbundled/Dockerfile b/docker/test/stateless_unbundled/Dockerfile index 2f4663fa061..528f1e0520c 100644 --- a/docker/test/stateless_unbundled/Dockerfile +++ b/docker/test/stateless_unbundled/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get --allow-unauthenticated update -y \ unixodbc \ unixodbc-dev \ wget \ - zlib1g-dev\ + zlib1g-dev RUN pip3 install numpy, scipy, pandas From b1e15530614f29320cd0b21541d00e3fc3523515 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 14 Oct 2020 22:43:08 +0300 Subject: [PATCH 075/174] style + docker + tests --- docker/test/stateless/Dockerfile | 2 +- docker/test/stateless_unbundled/Dockerfile | 2 +- src/AggregateFunctions/AggregateFunctionStudentTTest.cpp | 4 ---- src/AggregateFunctions/AggregateFunctionStudentTTest.h | 2 +- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 7 ------- src/AggregateFunctions/AggregateFunctionWelchTTest.h | 2 +- tests/queries/0_stateless/01322_ttest_scipy.sh | 8 ++++++++ 7 files changed, 12 insertions(+), 15 deletions(-) create mode 100755 tests/queries/0_stateless/01322_ttest_scipy.sh diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index ca7ecf9cb9c..8f2c7358bb8 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -24,7 +24,7 @@ RUN apt-get update -y \ unixodbc \ wget -RUN pip3 install numpy, scipy, pandas +RUN pip3 install numpy scipy pandas RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless_unbundled/Dockerfile b/docker/test/stateless_unbundled/Dockerfile index 528f1e0520c..345ba905412 100644 --- a/docker/test/stateless_unbundled/Dockerfile +++ b/docker/test/stateless_unbundled/Dockerfile @@ -69,7 +69,7 @@ RUN apt-get --allow-unauthenticated update -y \ wget \ zlib1g-dev -RUN pip3 install numpy, scipy, pandas +RUN pip3 install numpy scipy pandas RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp index b03e961a24a..a2c36e43488 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp @@ -11,7 +11,6 @@ namespace ErrorCodes { -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; } @@ -44,13 +43,10 @@ AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & nam return res; } - } - void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory) { factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest, AggregateFunctionFactory::CaseInsensitive); } - } diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.h b/src/AggregateFunctions/AggregateFunctionStudentTTest.h index 2f3d35dbeab..2a4ec40e3c1 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.h +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.h @@ -163,7 +163,7 @@ struct AggregateFunctionStudentTTestData final /// Returns tuple of (t-statistic, p-value) /// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf template -class AggregateFunctionStudentTTest : +class AggregateFunctionStudentTTest : public IAggregateFunctionDataHelper,AggregateFunctionStudentTTest> { diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 00607171c41..483c99dde9b 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -6,12 +6,8 @@ #include #include - -// the return type is boolean (we use UInt8 as we do not have boolean in clickhouse) - namespace ErrorCodes { -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; } @@ -44,13 +40,10 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, return res; } - } - void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) { factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive); } - } diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index 3332a6c363e..b262ebb70af 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -174,7 +174,7 @@ struct AggregateFunctionWelchTTestData final /// Returns tuple of (t-statistic, p-value) /// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf template -class AggregateFunctionWelchTTest : +class AggregateFunctionWelchTTest : public IAggregateFunctionDataHelper,AggregateFunctionWelchTTest> { diff --git a/tests/queries/0_stateless/01322_ttest_scipy.sh b/tests/queries/0_stateless/01322_ttest_scipy.sh new file mode 100755 index 00000000000..10dc79614d4 --- /dev/null +++ b/tests/queries/0_stateless/01322_ttest_scipy.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/01322_ttest_scipy.py \ No newline at end of file From b931a3c9dad0375415cee6f511af831eb9b198ec Mon Sep 17 00:00:00 2001 From: feng lv Date: Thu, 15 Oct 2020 15:18:38 +0800 Subject: [PATCH 076/174] fix update test fix --- src/Parsers/ExpressionElementParsers.cpp | 24 +++--- src/Parsers/ExpressionListParsers.cpp | 80 +++++++++---------- src/Parsers/ExpressionListParsers.h | 2 +- ..._operator_support_string_literal.reference | 19 +++++ ...terval_operator_support_string_literal.sql | 19 +++++ 5 files changed, 87 insertions(+), 57 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 1d861c6d78a..b26e73287d0 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -789,6 +789,7 @@ bool ParserDateAddExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp ++pos; IntervalKind interval_kind; + ASTPtr interval_func_node; if (parseIntervalKind(pos, expected, interval_kind)) { /// function(unit, offset, timestamp) @@ -805,6 +806,13 @@ bool ParserDateAddExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp if (!ParserExpression().parse(pos, timestamp_node, expected)) return false; + auto interval_expr_list_args = std::make_shared(); + interval_expr_list_args->children = {offset_node}; + + interval_func_node = std::make_shared(); + interval_func_node->as().name = interval_kind.toNameOfFunctionToIntervalDataType(); + interval_func_node->as().arguments = std::move(interval_expr_list_args); + interval_func_node->as().children.push_back(interval_func_node->as().arguments); } else { @@ -816,27 +824,13 @@ bool ParserDateAddExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return false; ++pos; - if (!ParserKeyword("INTERVAL").ignore(pos, expected)) - return false; - - if (!ParserExpression().parse(pos, offset_node, expected)) - return false; - - if (!parseIntervalKind(pos, expected, interval_kind)) + if (!ParserIntervalOperatorExpression{}.parse(pos, interval_func_node, expected)) return false; } if (pos->type != TokenType::ClosingRoundBracket) return false; ++pos; - auto interval_expr_list_args = std::make_shared(); - interval_expr_list_args->children = {offset_node}; - - auto interval_func_node = std::make_shared(); - interval_func_node->name = interval_kind.toNameOfFunctionToIntervalDataType(); - interval_func_node->arguments = std::move(interval_expr_list_args); - interval_func_node->children.push_back(interval_func_node->arguments); - auto expr_list_args = std::make_shared(); expr_list_args->children = {timestamp_node, interval_func_node}; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 4f4b97eff2d..d6678bb9a78 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -645,12 +645,45 @@ bool ParserTimestampOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expe return true; } -bool ParserIntervalOperatorExpression::stringToIntervalKind(const String & literal, ASTPtr & number, IntervalKind & interval_kind) +bool ParserIntervalOperatorExpression::parseArgumentAndIntervalKind( + Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected) { - Tokens tokens(literal.data(), literal.data() + literal.size()); - Pos pos(tokens, 0); - Expected expected; - return (ParserNumber().parse(pos, number, expected) && parseIntervalKind(pos, expected, interval_kind)); + auto begin = pos; + auto init_expected = expected; + ASTPtr string_literal; + //// A String literal followed INTERVAL keyword, + /// the literal can be a part of an expression or + /// include Number and INTERVAL TYPE at the same time + if (ParserStringLiteral{}.parse(pos, string_literal, expected)) + { + String literal; + if (string_literal->as().value.tryGet(literal)) + { + Tokens tokens(literal.data(), literal.data() + literal.size()); + Pos token_pos(tokens, 0); + Expected token_expected; + + if (!ParserNumber{}.parse(token_pos, expr, token_expected)) + return false; + else + { + /// case: INTERVAL '1' HOUR + /// back to begin + if (!token_pos.isValid()) + { + pos = begin; + expected = init_expected; + } + else + /// case: INTERVAL '1 HOUR' + return parseIntervalKind(token_pos, token_expected, interval_kind); + } + } + } + // case: INTERVAL expr HOUR + if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) + return false; + return parseIntervalKind(pos, expected, interval_kind); } bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -661,44 +694,9 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec if (!ParserKeyword("INTERVAL").ignore(pos, expected)) return next_parser.parse(pos, node, expected); - ASTPtr string_literal; - if (ParserStringLiteral().parse(pos, string_literal, expected)) - { - String literal; - if (string_literal->as().value.tryGet(literal)) - { - IntervalKind interval_kind; - ASTPtr number; - - /// parse function arguments and interval kind from string literal - if (!stringToIntervalKind(literal, number, interval_kind)) - return false; - - auto function = std::make_shared(); - - auto exp_list = std::make_shared(); - - function->name = interval_kind.toNameOfFunctionToIntervalDataType(); - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(number); - - node = function; - return true; - } - } - ASTPtr expr; - /// Any expression can be inside, because operator surrounds it. - if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - IntervalKind interval_kind; - if (!parseIntervalKind(pos, expected, interval_kind)) + if (!parseArgumentAndIntervalKind(pos, expr, interval_kind, expected)) { pos = begin; return next_parser.parse(pos, node, expected); diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 72961f700fd..40efd0e02d2 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -233,7 +233,7 @@ protected: const char * getName() const override { return "INTERVAL operator expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool stringToIntervalKind(const String & literal, ASTPtr & number, IntervalKind & interval_kind); + bool parseArgumentAndIntervalKind(Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected); }; class ParserAdditiveExpression : public IParserBase diff --git a/tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference index 5ee4e7592f6..0451ef3afd5 100644 --- a/tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference +++ b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.reference @@ -4,3 +4,22 @@ 2 2 2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2009-02-14 00:31:30 +2009-02-14 00:31:30 +2009-02-14 00:31:30 +2009-02-15 23:31:30 +2009-02-15 23:31:30 +2009-02-15 23:31:30 diff --git a/tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql index ce418e13e9f..2af2ba4996e 100644 --- a/tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql +++ b/tests/queries/0_stateless/01523_interval_operator_support_string_literal.sql @@ -1,6 +1,25 @@ +SELECT INTERVAL 2 year; +SELECT INTERVAL '2' year; +SELECT INTERVAL '2 year'; +SELECT INTERVAL 2 month; +SELECT INTERVAL '2' month; +SELECT INTERVAL '2 month'; +SELECT INTERVAL 2 week; +SELECT INTERVAL '2' week; +SELECT INTERVAL '2 week'; SELECT INTERVAL 2 day; +SELECT INTERVAL '2' day; SELECT INTERVAL '2 day'; SELECT INTERVAL 2 hour; +SELECT INTERVAL '2' hour; SELECT INTERVAL '2 hour'; SELECT INTERVAL 2 minute; +SELECT INTERVAL '2' minute; SELECT INTERVAL '2 minute'; +SELECT INTERVAL '2' AS n minute; +SELECT DATE_ADD(hour, '1', toDateTime(1234567890, 'UTC')); +SELECT DATE_ADD(hour, 1, toDateTime(1234567890, 'UTC')); +SELECT DATE_ADD(hour, (SELECT 1), toDateTime(1234567890, 'UTC')); +SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL 2 day); +SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL '2 day'); +SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL '2' day); From fabe86c6d30907bdc2a4e370b4dfcc2c0face7bf Mon Sep 17 00:00:00 2001 From: feng lv Date: Thu, 15 Oct 2020 18:48:39 +0800 Subject: [PATCH 077/174] fix fix --- src/Parsers/ExpressionListParsers.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 40efd0e02d2..cf77b8b4da4 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -233,7 +233,9 @@ protected: const char * getName() const override { return "INTERVAL operator expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool parseArgumentAndIntervalKind(Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected); + +private: + static bool parseArgumentAndIntervalKind(Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected); }; class ParserAdditiveExpression : public IParserBase From 80f3de1359e119851f1de45e4250f4cf5f87c63d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 15 Oct 2020 20:39:04 +0800 Subject: [PATCH 078/174] ISSUES-15883 try fix collate name --- src/Parsers/MySQL/ASTAlterCommand.cpp | 6 +-- src/Parsers/MySQL/ASTDeclareColumn.cpp | 4 +- src/Parsers/MySQL/ASTDeclareOption.cpp | 39 +++++--------------- src/Parsers/MySQL/ASTDeclareOption.h | 4 +- src/Parsers/MySQL/ASTDeclareTableOptions.cpp | 8 ++-- 5 files changed, 21 insertions(+), 40 deletions(-) diff --git a/src/Parsers/MySQL/ASTAlterCommand.cpp b/src/Parsers/MySQL/ASTAlterCommand.cpp index b6f2b925de0..92461635265 100644 --- a/src/Parsers/MySQL/ASTAlterCommand.cpp +++ b/src/Parsers/MySQL/ASTAlterCommand.cpp @@ -303,9 +303,9 @@ static inline bool parseOtherCommand(IParser::Pos & pos, ASTPtr & node, Expected OptionDescribe("ENABLE KEYS", "enable_keys", std::make_shared()), OptionDescribe("DISABLE KEYS", "enable_keys", std::make_shared()), /// TODO: with collate - OptionDescribe("CONVERT TO CHARACTER SET", "charset", std::make_shared()), - OptionDescribe("CHARACTER SET", "charset", std::make_shared()), - OptionDescribe("DEFAULT CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("CONVERT TO CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("DEFAULT CHARACTER SET", "charset", std::make_shared()), OptionDescribe("LOCK", "lock", std::make_shared()) } }; diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp index 6d21f934858..3913c828ec3 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.cpp +++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp @@ -51,8 +51,8 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node, OptionDescribe("UNIQUE", "unique_key", std::make_unique()), OptionDescribe("KEY", "primary_key", std::make_unique()), OptionDescribe("COMMENT", "comment", std::make_unique()), - OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), - OptionDescribe("COLLATE", "collate", std::make_unique()), + OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), + OptionDescribe("COLLATE", "collate", std::make_unique()), OptionDescribe("COLUMN_FORMAT", "column_format", std::make_unique()), OptionDescribe("STORAGE", "storage", std::make_unique()), OptionDescribe("AS", "generated", std::make_unique()), diff --git a/src/Parsers/MySQL/ASTDeclareOption.cpp b/src/Parsers/MySQL/ASTDeclareOption.cpp index 92ac5f0343e..17be639b630 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.cpp +++ b/src/Parsers/MySQL/ASTDeclareOption.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -94,41 +95,21 @@ bool ParserAlwaysFalse::parseImpl(IParser::Pos & /*pos*/, ASTPtr & node, Expecte return true; } -bool ParserCharsetName::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &) +bool ParserCharsetOrCollateName::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - /// Identifier in backquotes or in double quotes - if (pos->type == TokenType::QuotedIdentifier) - { - ReadBufferFromMemory buf(pos->begin, pos->size()); - String s; + ParserIdentifier p_identifier; + ParserStringLiteral p_string_literal; - if (*pos->begin == '`') - readBackQuotedStringWithSQLStyle(s, buf); - else - readDoubleQuotedStringWithSQLStyle(s, buf); - - if (s.empty()) /// Identifiers "empty string" are not allowed. - return false; - - node = std::make_shared(s); - ++pos; + if (p_identifier.parse(pos, node, expected)) return true; - } - else if (pos->type == TokenType::BareWord) + else { - const char * begin = pos->begin; - - while (true) + if (p_string_literal.parse(pos, node, expected)) { - if (!isWhitespaceASCII(*pos->end) && pos->type != TokenType::EndOfStream) - ++pos; - else - break; + const auto & string_value = node->as()->value.safeGet(); + node = std::make_shared(string_value); + return true; } - - node = std::make_shared(String(begin, pos->end)); - ++pos; - return true; } return false; diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index 24800371061..2502618b209 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -61,10 +61,10 @@ public: /// Copy and paste from ParserIdentifier, /// the difference is that multiple tokens are glued if there is no whitespace ASCII between them -struct ParserCharsetName : public IParserBase +struct ParserCharsetOrCollateName : public IParserBase { protected: - const char * getName() const override { return "charset name"; } + const char * getName() const override { return "charset or collate name"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected &) override; }; diff --git a/src/Parsers/MySQL/ASTDeclareTableOptions.cpp b/src/Parsers/MySQL/ASTDeclareTableOptions.cpp index 87b99cdf1ac..c903c7d2fa7 100644 --- a/src/Parsers/MySQL/ASTDeclareTableOptions.cpp +++ b/src/Parsers/MySQL/ASTDeclareTableOptions.cpp @@ -68,12 +68,12 @@ bool ParserDeclareTableOptions::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp { OptionDescribe("AUTO_INCREMENT", "auto_increment", std::make_shared()), OptionDescribe("AVG_ROW_LENGTH", "avg_row_length", std::make_shared()), - OptionDescribe("CHARSET", "character_set", std::make_shared()), - OptionDescribe("DEFAULT CHARSET", "character_set", std::make_shared()), - OptionDescribe("CHARACTER SET", "character_set", std::make_shared()), + OptionDescribe("CHARSET", "character_set", std::make_shared()), + OptionDescribe("DEFAULT CHARSET", "character_set", std::make_shared()), + OptionDescribe("CHARACTER SET", "character_set", std::make_shared()), OptionDescribe("DEFAULT CHARACTER SET", "character_set", std::make_shared()), OptionDescribe("CHECKSUM", "checksum", std::make_shared>()), - OptionDescribe("COLLATE", "collate", std::make_shared()), + OptionDescribe("COLLATE", "collate", std::make_shared()), OptionDescribe("DEFAULT COLLATE", "collate", std::make_shared()), OptionDescribe("COMMENT", "comment", std::make_shared()), OptionDescribe("COMPRESSION", "compression", std::make_shared()), From c8aa007a455372ab0b71a60f3405aea9beb5c5d6 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 15 Oct 2020 20:42:10 +0800 Subject: [PATCH 079/174] ISSUES-15883 modify comment --- src/Parsers/MySQL/ASTDeclareOption.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index 2502618b209..a9529924567 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -59,8 +59,7 @@ public: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// Copy and paste from ParserIdentifier, -/// the difference is that multiple tokens are glued if there is no whitespace ASCII between them +/// identifier, string literal, binary keyword struct ParserCharsetOrCollateName : public IParserBase { protected: From a80bbf6e0eff4db2222776b18a5f1a578784ba82 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Thu, 15 Oct 2020 22:54:17 +0300 Subject: [PATCH 080/174] fasttest + rm .py --- docker/test/fasttest/Dockerfile | 3 +++ ...test_scipy.py => 01322_ttest_scipy.python} | 0 .../queries/0_stateless/01322_ttest_scipy.sh | 2 +- .../01521_skip_unused_shards_bugfix.sql | 25 +++++++++++++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) rename tests/queries/0_stateless/{01322_ttest_scipy.py => 01322_ttest_scipy.python} (100%) create mode 100644 tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 3cfa57bd747..23d7504230a 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -53,6 +53,7 @@ RUN apt-get update \ ninja-build \ psmisc \ python3 \ + python3-pip \ python3-lxml \ python3-requests \ python3-termcolor \ @@ -63,6 +64,8 @@ RUN apt-get update \ unixodbc \ --yes --no-install-recommends +RUN pip3 install numpy scipy pandas + # This symlink required by gcc to find lld compiler RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld diff --git a/tests/queries/0_stateless/01322_ttest_scipy.py b/tests/queries/0_stateless/01322_ttest_scipy.python similarity index 100% rename from tests/queries/0_stateless/01322_ttest_scipy.py rename to tests/queries/0_stateless/01322_ttest_scipy.python diff --git a/tests/queries/0_stateless/01322_ttest_scipy.sh b/tests/queries/0_stateless/01322_ttest_scipy.sh index 10dc79614d4..31c1acf3e60 100755 --- a/tests/queries/0_stateless/01322_ttest_scipy.sh +++ b/tests/queries/0_stateless/01322_ttest_scipy.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/01322_ttest_scipy.py \ No newline at end of file +python3 "$CURDIR"/01322_ttest_scipy.python diff --git a/tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql b/tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql new file mode 100644 index 00000000000..a6dd118bad3 --- /dev/null +++ b/tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS mv; +DROP DATABASE IF EXISTS dict_01521; +CREATE DATABASE dict_01521; + +CREATE TABLE dict_01521.sharding_table (key UInt64, val UInt64) Engine=Memory(); + +CREATE DICTIONARY dict_01521.sharding_dict +( + key UInt64 DEFAULT 0, + val UInt8 DEFAULT 1 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'sharding_table' PASSWORD '' DB 'dict_01521')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(HASHED()); + +INSERT INTO dict_01521.sharding_table VALUES (150, 1), (151, 2); + +CREATE TABLE table_first (a UInt64, b UInt64) ENGINE = Memory; +CREATE TABLE table_second (a UInt64, b UInt64) ENGINE = Memory; + +CREATE TABLE table_distr (a Int) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 't_local'); + + + From 4e285168dfa3cdcc6fad815fdde8e311af53275c Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 16 Oct 2020 09:11:05 +0800 Subject: [PATCH 081/174] ISSUES-15883 try fix test failure --- src/Parsers/MySQL/tests/gtest_column_parser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index ef6371f71d9..9f1f61c8c47 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -15,7 +15,7 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) { ParserDeclareColumn p_column; - String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf-8 " + String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "COLUMN_FORMAT FIXED STORAGE MEMORY REFERENCES tbl_name (col_01) CHECK 1"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); @@ -29,7 +29,7 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8"); EXPECT_EQ(declare_options->changes["column_format"]->as()->name, "FIXED"); EXPECT_EQ(declare_options->changes["storage"]->as()->name, "MEMORY"); EXPECT_TRUE(declare_options->changes["reference"]->as()); @@ -40,7 +40,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) { ParserDeclareColumn p_column; - String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf-8 " + String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "REFERENCES tbl_name (col_01) CHECK 1 GENERATED ALWAYS AS (1) STORED"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); @@ -52,7 +52,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8"); EXPECT_EQ(declare_options->changes["generated"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["is_stored"]->as()->value.safeGet(), 1); EXPECT_TRUE(declare_options->changes["reference"]->as()); From d95229317d8db772745ac06039bdd61aa0806802 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 16 Oct 2020 17:50:47 +0800 Subject: [PATCH 082/174] Change error message in function if & ifNull --- src/Functions/if.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index dd67f922ddf..97ab7fca0cd 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -160,7 +160,7 @@ struct NumIfImpl private: [[noreturn]] static void throwError() { - throw Exception("Invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception("Incompatible types of arguments corresponding to two conditional branches", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } public: template static void vectorVector(Args &&...) { throwError(); } From 969b06508fbb9d3d49554f24f3992e94c4563e6b Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Fri, 16 Oct 2020 14:39:09 +0300 Subject: [PATCH 083/174] delete trash --- .../01521_skip_unused_shards_bugfix.sql | 25 ------------------- 1 file changed, 25 deletions(-) delete mode 100644 tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql diff --git a/tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql b/tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql deleted file mode 100644 index a6dd118bad3..00000000000 --- a/tests/queries/0_stateless/01521_skip_unused_shards_bugfix.sql +++ /dev/null @@ -1,25 +0,0 @@ -DROP TABLE IF EXISTS mv; -DROP DATABASE IF EXISTS dict_01521; -CREATE DATABASE dict_01521; - -CREATE TABLE dict_01521.sharding_table (key UInt64, val UInt64) Engine=Memory(); - -CREATE DICTIONARY dict_01521.sharding_dict -( - key UInt64 DEFAULT 0, - val UInt8 DEFAULT 1 -) -PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'sharding_table' PASSWORD '' DB 'dict_01521')) -LIFETIME(MIN 0 MAX 0) -LAYOUT(HASHED()); - -INSERT INTO dict_01521.sharding_table VALUES (150, 1), (151, 2); - -CREATE TABLE table_first (a UInt64, b UInt64) ENGINE = Memory; -CREATE TABLE table_second (a UInt64, b UInt64) ENGINE = Memory; - -CREATE TABLE table_distr (a Int) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 't_local'); - - - From 2241ea9f33e770d019abfbfedba9e1ae03dd609f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 16 Oct 2020 17:44:59 +0300 Subject: [PATCH 084/174] not sure what's going on... --- docker/test/performance-comparison/compare.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 08b18758874..15d23226a0d 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -465,6 +465,8 @@ unset IFS # The comparison script might be bound to one NUMA node for better test # stability, and the calculation runs out of memory because of this. Use # all nodes. +numactl --show +numactl --all numactl --show numactl --all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log clickhouse-local --query " From 5207be9b32afcc5e780572a31bf17be1ac573da2 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 17 Oct 2020 12:36:08 +0800 Subject: [PATCH 085/174] ISSUES-15883 try fix test failure --- src/Parsers/MySQL/tests/gtest_table_options_parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp index b051f6149bb..656b59fe6f3 100644 --- a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp @@ -11,7 +11,7 @@ using namespace DB::MySQLParser; TEST(ParserTableOptions, AllSubpatitionOptions) { - String input = "AUTO_INCREMENt = 1 AVG_ROW_LENGTh 3 CHARACTER SET utf-8 CHECKSUM 1 COLLATE utf8_bin" + String input = "AUTO_INCREMENt = 1 AVG_ROW_LENGTh 3 CHARACTER SET utf8 CHECKSUM 1 COLLATE utf8_bin" " COMMENT 'table option comment' COMPRESSION 'LZ4' CONNECTION 'connect_string' DATA DIRECTORY 'data_directory'" " INDEX DIRECTORY 'index_directory' DELAY_KEY_WRITE 0 ENCRYPTION 'Y' ENGINE INNODB INSERT_METHOD NO KEY_BLOCK_SIZE 3" " MAX_ROWS 1000 MIN_ROWS 0 PACK_KEYS DEFAULT PASSWORD 'password' ROW_FORMAT DYNAMIC STATS_AUTO_RECALC DEFAULT " @@ -23,7 +23,7 @@ TEST(ParserTableOptions, AllSubpatitionOptions) ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["avg_row_length"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["character_set"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["character_set"]->as()->name, "utf8"); EXPECT_EQ(declare_options->changes["checksum"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8_bin"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "table option comment"); From d084e05aba63c6039db5524a4bf3ce33aa8f2d94 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 17 Oct 2020 13:15:00 +0800 Subject: [PATCH 086/174] ISSUES-15883 support zero length argument with string type --- src/DataTypes/DataTypeString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 9d563ee836c..141f896cfc2 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -384,7 +384,7 @@ static DataTypePtr create(const ASTPtr & arguments) throw Exception("String data type family mustn't have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + if (!argument || argument->value.getType() != Field::Types::UInt64) throw Exception("String data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } From 32c449c34f831c4554c864a51479f9e389276135 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 19 Oct 2020 00:08:26 +0300 Subject: [PATCH 087/174] Add IDatabaseTablesIterator::reset() interface --- src/Databases/DatabaseLazy.cpp | 6 ++++++ src/Databases/DatabaseLazy.h | 1 + src/Databases/IDatabase.h | 4 ++++ src/Databases/MySQL/DatabaseMaterializeTablesIterator.h | 5 +++++ 4 files changed, 16 insertions(+) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 0119f17f843..81414902a33 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -329,4 +329,10 @@ const StoragePtr & DatabaseLazyIterator::table() const return current_storage; } +void DatabaseLazyIterator::reset() +{ + if (current_storage) + current_storage.reset(); +} + } diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 13c14863efb..58e5e465eef 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -122,6 +122,7 @@ public: bool isValid() const override; const String & name() const override; const StoragePtr & table() const override; + void reset() override; private: const DatabaseLazy & database; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index b28bd5fd599..9b744259406 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -44,6 +44,8 @@ public: /// (a database with support for lazy tables loading /// - it maintains a list of tables but tables are loaded lazily). virtual const StoragePtr & table() const = 0; + /// Reset reference counter to the StoragePtr. + virtual void reset() = 0; virtual ~IDatabaseTablesIterator() = default; @@ -93,6 +95,8 @@ public: const String & name() const override { return it->first; } const StoragePtr & table() const override { return it->second; } + + void reset() override { it->second.reset(); } }; /// Copies list of dictionaries and iterates through such snapshot. diff --git a/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h b/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h index 86a5cbf8206..5a0ec242c2f 100644 --- a/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h +++ b/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h @@ -28,6 +28,11 @@ public: return tables.emplace_back(storage); } + void reset() override + { + tables.clear(); + } + UUID uuid() const override { return nested_iterator->uuid(); } DatabaseMaterializeTablesIterator(DatabaseTablesIteratorPtr nested_iterator_, DatabaseMaterializeMySQL * database_) From 6200783629ab8815d9e7c2dca9113da3b30d3489 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 18 Oct 2020 23:18:02 +0300 Subject: [PATCH 088/174] database_atomic_wait_for_drop_and_detach_synchronously/NO DELAY/SYNC for DROP DATABASE --- src/Interpreters/InterpreterDropQuery.cpp | 8 +++-- src/Interpreters/InterpreterDropQuery.h | 2 +- src/Parsers/ParserDropQuery.cpp | 3 ++ .../01530_drop_database_atomic_sync.reference | 0 .../01530_drop_database_atomic_sync.sql | 34 +++++++++++++++++++ 5 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01530_drop_database_atomic_sync.reference create mode 100644 tests/queries/0_stateless/01530_drop_database_atomic_sync.sql diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index c70431e5238..5cc14f93530 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -52,7 +52,7 @@ BlockIO InterpreterDropQuery::execute() return executeToDictionary(drop.database, drop.table, drop.kind, drop.if_exists, drop.temporary, drop.no_ddl_lock); } else if (!drop.database.empty()) - return executeToDatabase(drop.database, drop.kind, drop.if_exists); + return executeToDatabase(drop.database, drop.kind, drop.if_exists, drop.no_delay); else throw Exception("Nothing to drop, both names are empty", ErrorCodes::LOGICAL_ERROR); } @@ -223,7 +223,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, } -BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists) +BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists, bool no_delay) { auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, ""); @@ -252,8 +252,12 @@ BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, AS ASTDropQuery query; query.kind = kind; query.database = database_name; + query.no_delay = no_delay; + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { + /// Reset reference counter of the StoragePtr to allow synchronous drop. + iterator->reset(); query.table = iterator->name(); executeToTable({query.database, query.table}, query); } diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index b54736b5c21..1e0118c49d8 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -29,7 +29,7 @@ private: ASTPtr query_ptr; Context & context; - BlockIO executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists); + BlockIO executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists, bool no_delay); BlockIO executeToTable(const StorageID & table_id, const ASTDropQuery & query); diff --git a/src/Parsers/ParserDropQuery.cpp b/src/Parsers/ParserDropQuery.cpp index 31a6250a006..e09642e7b84 100644 --- a/src/Parsers/ParserDropQuery.cpp +++ b/src/Parsers/ParserDropQuery.cpp @@ -46,6 +46,9 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, bool if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; } + + if (s_no_delay.ignore(pos, expected) || s_sync.ignore(pos, expected)) + no_delay = true; } else { diff --git a/tests/queries/0_stateless/01530_drop_database_atomic_sync.reference b/tests/queries/0_stateless/01530_drop_database_atomic_sync.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql new file mode 100644 index 00000000000..010b8931448 --- /dev/null +++ b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql @@ -0,0 +1,34 @@ +drop database if exists db_01530_atomic sync; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic sync; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic sync; + + +set database_atomic_wait_for_drop_and_detach_synchronously=1; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic; + + +set database_atomic_wait_for_drop_and_detach_synchronously=0; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; -- { serverError 253; } +-- TODO: SYSTEM FORCE DROP and uncomment the line below to cleanup the data after test +-- (otherwise the test is not retriable...) +-- +-- drop database db_01530_atomic sync; From 38a2dd55ce895da119089ddc28c6f716a182d11a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 19 Oct 2020 17:31:02 +0300 Subject: [PATCH 089/174] fix numactl parameters --- docker/test/performance-comparison/Dockerfile | 2 +- docker/test/performance-comparison/compare.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 535f7de9e29..3b43b68319b 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -44,7 +44,7 @@ COPY * / # downloader script as well. # We could also try to remount it with proper options in Sandbox task. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -CMD ["numactl", "--cpunodebind=0", "--localalloc", "/entrypoint.sh"] +CMD ["numactl", "--cpunodebind=0", "--membind=0", "/entrypoint.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 15d23226a0d..34ddbc37eec 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -466,8 +466,8 @@ unset IFS # stability, and the calculation runs out of memory because of this. Use # all nodes. numactl --show -numactl --all numactl --show -numactl --all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log +numactl --cpunodebind=all --membind=all numactl --show +numactl --cpunodebind=all --membind=all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log clickhouse-local --query " -- Join the metric names back to the metric statistics we've calculated, and make From 764f19820b6745cebb7defd47df31fe740260910 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 19 Oct 2020 17:39:52 +0300 Subject: [PATCH 090/174] max threads --- .../config/users.d/perf-comparison-tweaks-users.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index cee7dc3ff16..ce7a6ae094a 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -14,6 +14,9 @@ we might also add time check to perf.py script. --> 300 + + + 20 From 8a39b65fa203739734c1151e3336ebf6d122ffa4 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Mon, 19 Oct 2020 19:15:22 +0300 Subject: [PATCH 091/174] fix build and tests --- base/glibc-compatibility/musl/lgammal.c | 29 ++++++++++++++++--- docker/test/fasttest/run.sh | 3 ++ .../0_stateless/01322_ttest_scipy.python | 4 +-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c index 534abf41894..3b5d94c5051 100644 --- a/base/glibc-compatibility/musl/lgammal.c +++ b/base/glibc-compatibility/musl/lgammal.c @@ -85,6 +85,20 @@ * */ +#include +#include +#include "libm.h" + + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +double lgamma_r(double x, int *sg); + +long double lgammal_r(long double x, int *sg) +{ + return lgamma_r(x, sg); +} +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 + static const long double pi = 3.14159265358979323846264L, /* lgam(1+x) = 0.5 x + x a(x)/b(x) @@ -187,11 +201,8 @@ w5 = 8.412723297322498080632E-4L, w6 = -1.880801938119376907179E-3L, w7 = 4.885026142432270781165E-3L; -#include -#include -#include "libm.h" -long double __lgammal_r(long double x, int *sg) { +long double lgammal_r(long double x, int *sg) { long double t, y, z, nadj, p, p1, p2, q, r, w; union ldshape u = {x}; uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; @@ -308,6 +319,16 @@ long double __lgammal_r(long double x, int *sg) { r = nadj - r; return r; } +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 +// TODO: broken implementation to make things compile +double lgamma_r(double x, int *sg); + +long double lgammal_r(long double x, int *sg) +{ + return lgamma_r(x, sg); +} +#endif + int signgam_lgammal; diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index f12ecbb2c9c..9c89e9ffb28 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -258,6 +258,9 @@ TESTS_TO_SKIP=( # Look at DistributedFilesToInsert, so cannot run in parallel. 01460_DistributedFilesToInsert + + # Require python libraries like scipy, pandas and numpy + 01322_ttest_scipy ) time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/tests/queries/0_stateless/01322_ttest_scipy.python b/tests/queries/0_stateless/01322_ttest_scipy.python index d8255cd8062..7068b6c4d5a 100644 --- a/tests/queries/0_stateless/01322_ttest_scipy.python +++ b/tests/queries/0_stateless/01322_ttest_scipy.python @@ -63,8 +63,8 @@ def test_and_check(name, a, b, t_stat, p_value): "FROM ttest FORMAT TabSeparatedWithNames;") real_t_stat = real['t_stat'][0] real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat) < 1e-4)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < 1e-4), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + assert(abs(real_t_stat - np.float64(t_stat) < 1e-3)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert(abs(real_p_value - np.float64(p_value)) < 1e-3), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS ttest;") From 242ec7e56c72c44cd26eacf22eb4a902e8b7c0b2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 20 Oct 2020 15:12:06 +0300 Subject: [PATCH 092/174] make some tests faster --- docker/test/performance-comparison/compare.sh | 1 + tests/performance/constant_column_search.xml | 3 ++- tests/performance/trim_urls.xml | 2 +- tests/performance/website.xml | 6 +++--- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 34ddbc37eec..769a4f8f735 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -1090,6 +1090,7 @@ case "$stage" in "restart") numactl --hardware ||: lscpu ||: + sudo dmidecode -t 4 ||: time restart ;& "run_tests") diff --git a/tests/performance/constant_column_search.xml b/tests/performance/constant_column_search.xml index 62baab14e3c..2f90844cb4a 100644 --- a/tests/performance/constant_column_search.xml +++ b/tests/performance/constant_column_search.xml @@ -5,6 +5,7 @@ hits_100m_single + hits_10m_single @@ -36,7 +37,7 @@ - + diff --git a/tests/performance/trim_urls.xml b/tests/performance/trim_urls.xml index 276a12bc570..075984682f9 100644 --- a/tests/performance/trim_urls.xml +++ b/tests/performance/trim_urls.xml @@ -20,5 +20,5 @@ - SELECT count() FROM hits_100m_single WHERE NOT ignore({func}URL)) + SELECT ignore({func}URL)) FROM hits_100m_single LIMIT 50000000 FORMAT Null diff --git a/tests/performance/website.xml b/tests/performance/website.xml index 65b3d79b5f1..2127a71c55c 100644 --- a/tests/performance/website.xml +++ b/tests/performance/website.xml @@ -37,7 +37,7 @@ SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10 SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10 SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10 -SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10 +SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM hits_10m_single GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10 SELECT count() FROM hits_100m_single WHERE UserID = 12345678901234567890 SELECT count() FROM hits_100m_single WHERE URL LIKE '%metrika%' SELECT SearchPhrase, any(URL), count() AS c FROM hits_100m_single WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10 @@ -52,8 +52,8 @@ SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_single WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10 SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10 SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m_single GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10 -SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10 -SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10 +SELECT URL, count() AS c FROM hits_10m_single GROUP BY URL ORDER BY c DESC LIMIT 10 +SELECT 1, URL, count() AS c FROM hits_10m_single GROUP BY 1, URL ORDER BY c DESC LIMIT 10 SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_100m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10 SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10 SETTINGS max_threads = 1 SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10 From 439bbd98610983087cdcd324ba9c7cbc9890fef2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 20 Oct 2020 18:30:46 +0300 Subject: [PATCH 093/174] Blind performance fix --- src/Storages/MergeTree/SimpleMergeSelector.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index cbb24d1494e..65d9aaecdab 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -90,6 +90,9 @@ double mapPiecewiseLinearToUnit(double value, double min, double max) /** Is allowed to merge parts in range with specific properties. */ +#if defined(__clang__) + ALWAYS_INLINE +#endif bool allow( double sum_size, double max_size, From d7ea9b6d93f95caa2bdd5cd43f6656cc0f8043d3 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 20 Oct 2020 17:57:53 +0200 Subject: [PATCH 094/174] Add setTemporaryStorage to clickhouse-local to make OPTIMIZE work --- programs/local/LocalServer.cpp | 4 ++ .../01527_clickhouse_local_optimize.reference | 16 +++++ .../01527_clickhouse_local_optimize.sh | 60 +++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 tests/queries/0_stateless/01527_clickhouse_local_optimize.reference create mode 100755 tests/queries/0_stateless/01527_clickhouse_local_optimize.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 41da477152c..bfc7cac7fc1 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -152,6 +152,10 @@ void LocalServer::tryInitPath() path += '/'; context->setPath(path); + + context->setTemporaryStorage(path + "tmp"); + context->setFlagsPath(path + "flags"); + context->setUserFilesPath(""); // user's files are everywhere } diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference b/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference new file mode 100644 index 00000000000..ea0ec6fe765 --- /dev/null +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference @@ -0,0 +1,16 @@ +202001_1_1_0 1 2020-01-01 String +202001_4_4_0 1 2020-01-01 String +202002_2_2_0 2 2020-02-02 Another string +202002_5_5_0 2 2020-02-02 Another string +202003_3_3_0 3 2020-03-03 One more string +202003_6_6_0 3 2020-03-03 One more string +202001_1_1_0 4 2020-01-02 String for first partition +202001_4_4_0 4 2020-01-02 String for first partition +202001_1_4_1 1 2020-01-01 String +202001_1_4_1 1 2020-01-01 String +202002_2_5_1 2 2020-02-02 Another string +202002_2_5_1 2 2020-02-02 Another string +202003_3_6_1 3 2020-03-03 One more string +202003_3_6_1 3 2020-03-03 One more string +202001_1_4_1 4 2020-01-02 String for first partition +202001_1_4_1 4 2020-01-02 String for first partition diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh new file mode 100755 index 00000000000..e51d53c7524 --- /dev/null +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +WORKING_FOLDER="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize" + +rm -rf "${WORKING_FOLDER}" +mkdir -p "${WORKING_FOLDER}/metadata/local/" + +## 1. Imagine we want to process this file: +cat < "${WORKING_FOLDER}/data.csv" +1,2020-01-01,"String" +2,2020-02-02,"Another string" +3,2020-03-03,"One more string" +4,2020-01-02,"String for first partition" +EOF + +## 2. that is the metadata for the table we want to fill +## schema should match the schema of the table from server +## (the easiest way is just to copy it from the server) + +## I've added sleepEachRow(0.5) here just to mimic slow insert +cat < "${WORKING_FOLDER}/metadata/local/test.sql" +ATTACH TABLE local.test (id UInt64, d Date, s String, x MATERIALIZED sleepEachRow(0.5)) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d); +EOF + +## 3a. that is the metadata for the input file we want to read +## it should match the structure of source file + +## use stdin to read from pipe +cat < "${WORKING_FOLDER}/metadata/local/stdin.sql" +ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); +EOF + +## 3b. Instead of stdin you can use file path +cat < "${WORKING_FOLDER}/metadata/local/data_csv.sql" +ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER}/data.csv'); +EOF + +## All preparations done, +## the rest is simple: + +# option a (if 3a used) with pipe / reading stdin +cat "${WORKING_FOLDER}/data.csv" | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin" -- --path="${WORKING_FOLDER}" + +# option b (if 3b used) 0 with filepath +${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv" -- --path="${WORKING_FOLDER}" + +# now you can check what was inserted (i did both options so i have doubled data) +${CLICKHOUSE_LOCAL} --query "SELECT _part,* FROM local.test ORDER BY id, _part" -- --path="${WORKING_FOLDER}" + +# But you can't do OPTIMIZE (local will die with coredump) :) That would be too good +clickhouse-local --query "OPTIMIZE TABLE local.test FINAL" -- --path="${WORKING_FOLDER}" + +# now you can check what was inserted (i did both options so i have doubled data) +${CLICKHOUSE_LOCAL} --query "SELECT _part,* FROM local.test ORDER BY id, _part" -- --path="${WORKING_FOLDER}" + +## now you can upload those parts to a server (in detached subfolder) and attach them. +rm -rf "${WORKING_FOLDER}" \ No newline at end of file From 8a0435e17829efc554d2c3d04620dff9da6ee805 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 20 Oct 2020 20:47:10 +0300 Subject: [PATCH 095/174] some fixes for split build --- .../0_stateless/00965_logs_level_bugfix.sh | 16 ++++++++-------- .../00965_send_logs_level_concurrent_queries.sh | 4 ++-- ...ickhouse_server_start_with_embedded_config.sh | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/00965_logs_level_bugfix.sh b/tests/queries/0_stateless/00965_logs_level_bugfix.sh index 69fc34e6988..d0126c19eb9 100755 --- a/tests/queries/0_stateless/00965_logs_level_bugfix.sh +++ b/tests/queries/0_stateless/00965_logs_level_bugfix.sh @@ -3,18 +3,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_BINARY client --send_logs_level="trace" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="trace" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 echo "." -$CLICKHOUSE_BINARY client --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug" | head -n 1 echo "." -$CLICKHOUSE_BINARY client --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Information" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Information" | head -n 1 echo "." -$CLICKHOUSE_BINARY client --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Error" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Error" | head -n 1 echo "-" -$CLICKHOUSE_BINARY client --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 echo "." -$CLICKHOUSE_BINARY client --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" | head -n 1 echo "." -$CLICKHOUSE_BINARY client --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information" | head -n 1 echo "." -$CLICKHOUSE_BINARY client --send_logs_level="None" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information\|Error" | head -n 1 +${CLICKHOUSE_CLIENT} --send_logs_level="None" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information\|Error" | head -n 1 diff --git a/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh b/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh index cd654bd6581..5e24e820995 100755 --- a/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh +++ b/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh @@ -4,8 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh for _ in {1..10}; do - $CLICKHOUSE_BINARY client --send_logs_level="trace" --query="SELECT * from numbers(1000000);" > /dev/null 2> /dev/null & - $CLICKHOUSE_BINARY client --send_logs_level="information" --query="SELECT * from numbers(1000000);" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" & + ${CLICKHOUSE_CLIENT} --send_logs_level="trace" --query="SELECT * from numbers(1000000);" > /dev/null 2> /dev/null & + ${CLICKHOUSE_CLIENT} --send_logs_level="information" --query="SELECT * from numbers(1000000);" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" & done wait diff --git a/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh b/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh index 68198ec6e16..945be0a1324 100755 --- a/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh +++ b/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh @@ -10,7 +10,7 @@ echo "Starting clickhouse-server" $PORT -$CLICKHOUSE_BINARY server -- --tcp_port "$CLICKHOUSE_PORT_TCP" > server.log 2>&1 & +$CLICKHOUSE_BINARY-server -- --tcp_port "$CLICKHOUSE_PORT_TCP" > server.log 2>&1 & PID=$! function finish { From 7f7e6e809d7e64a1e448eedb8e1f303e4899ae9c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 20 Oct 2020 20:48:55 +0300 Subject: [PATCH 096/174] debug and -Og again --- docker/test/fasttest/run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 2a04d59252b..f4e5befd52c 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -147,9 +147,9 @@ CMAKE_LIBS_CONFIG=( "-DUSE_STATIC_LIBRARIES=0" "-DSPLIT_SHARED_LIBRARIES=1" "-DCLICKHOUSE_SPLIT_BINARY=1" -# "-DCMAKE_BUILD_TYPE=Debug" -# "-DCMAKE_C_FLAGS=-Og" -# "-DCMAKE_CXX_FLAGS=-Og" + "-DCMAKE_BUILD_TYPE=Debug" + "-DCMAKE_C_FLAGS_ADD=-Og" + "-DCMAKE_CXX_FLAGS_ADD=-Og" ) # TODO remove this? we don't use ccache anyway. An option would be to download it From 8097c696de7d1d99904a50a36ea4a587a4b04ccf Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 20 Oct 2020 23:05:20 +0300 Subject: [PATCH 097/174] Trying another fix --- src/Storages/MergeTree/SimpleMergeSelector.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 65d9aaecdab..ad8328cb7eb 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -90,9 +90,6 @@ double mapPiecewiseLinearToUnit(double value, double min, double max) /** Is allowed to merge parts in range with specific properties. */ -#if defined(__clang__) - ALWAYS_INLINE -#endif bool allow( double sum_size, double max_size, @@ -104,7 +101,9 @@ bool allow( // std::cerr << "sum_size: " << sum_size << "\n"; /// Map size to 0..1 using logarithmic scale - double size_normalized = mapPiecewiseLinearToUnit(log1p(sum_size), log1p(settings.min_size_to_lower_base), log1p(settings.max_size_to_lower_base)); + /// Use log(1 + x) instead of log1p(x) because our x variables (sum_size and settings) are always integer. + /// Also log1p seems to be slow and significantly affect performance of merges assignment. + double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), log(1 + settings.min_size_to_lower_base), log(1 + settings.max_size_to_lower_base)); // std::cerr << "size_normalized: " << size_normalized << "\n"; From 7e4494e2685aec72a84420b7d6aa15f412c9564a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 17 Oct 2020 01:17:38 +0300 Subject: [PATCH 098/174] Add a test for dictGet in sharding_key after dictionary reload --- ...dist_sharding_key_dictGet_reload.reference | 4 +++ ...01527_dist_sharding_key_dictGet_reload.sql | 26 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.reference create mode 100644 tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.sql diff --git a/tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.reference b/tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.reference new file mode 100644 index 00000000000..1a3c5705a7e --- /dev/null +++ b/tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.reference @@ -0,0 +1,4 @@ +1 +2 +2 +1 diff --git a/tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.sql b/tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.sql new file mode 100644 index 00000000000..d8f6704b892 --- /dev/null +++ b/tests/queries/0_stateless/01527_dist_sharding_key_dictGet_reload.sql @@ -0,0 +1,26 @@ +set allow_nondeterministic_optimize_skip_unused_shards=1; +set optimize_skip_unused_shards=1; +set force_optimize_skip_unused_shards=2; + +drop database if exists db_01527_ranges; +drop table if exists dist_01527; +drop table if exists data_01527; + +create database db_01527_ranges; + +create table data_01527 engine=Memory() as select toUInt64(number) key from numbers(2); +create table dist_01527 as data_01527 engine=Distributed('test_cluster_two_shards', currentDatabase(), data_01527, dictGetUInt64('db_01527_ranges.dict', 'shard', key)); + +create table db_01527_ranges.data engine=Memory() as select number key, number shard from numbers(100); +create dictionary db_01527_ranges.dict (key UInt64, shard UInt64) primary key key source(clickhouse(host '127.0.0.1' port 9000 table 'data' db 'db_01527_ranges' user 'default' password '')) lifetime(0) layout(hashed()); +system reload dictionary db_01527_ranges.dict; + +select _shard_num from dist_01527 where key=0; +select _shard_num from dist_01527 where key=1; + +drop table db_01527_ranges.data sync; +create table db_01527_ranges.data engine=Memory() as select number key, number+1 shard from numbers(100); +system reload dictionary db_01527_ranges.dict; + +select _shard_num from dist_01527 where key=0; +select _shard_num from dist_01527 where key=1; From 3559e3355c11f32d699e191c9bbd8105fbae4dd8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 17 Oct 2020 00:14:49 +0300 Subject: [PATCH 099/174] Do not cache dictionary for dictGet*/dictHas* There are places where ExpressionActionsPtr is cached (StorageDistributed caching it for sharding_key_expr and optimize_skip_unused_shards), and if the dictionary will be cached within "query" then cached ExpressionActionsPtr will always have first version of the query and the dictionary will not be updated after reload. For example this will fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently) Fixes: 01527_dist_sharding_key_dictGet_reload --- src/Functions/FunctionsExternalDictionaries.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 4affcdfa7e0..b5cc1f9c820 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -82,13 +82,13 @@ public: std::shared_ptr getDictionary(const String & dictionary_name) { - auto dict = std::atomic_load(&dictionary); - if (dict) - return dict; String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name); - dict = external_loader.getDictionary(resolved_name); - context.checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName()); - std::atomic_store(&dictionary, dict); + auto dict = external_loader.getDictionary(resolved_name); + if (!access_checked) + { + context.checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName()); + access_checked = true; + } return dict; } @@ -122,6 +122,8 @@ private: const Context & context; const ExternalDictionariesLoader & external_loader; mutable std::shared_ptr dictionary; + /// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated. + std::atomic access_checked = false; }; From 9ffa7bd0b6812c7ca379da53133b2d4033bb1d01 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 21 Oct 2020 09:10:40 +0200 Subject: [PATCH 100/174] remove sleep and make tests more deterministic --- .../01527_clickhouse_local_optimize.reference | 24 +++------ .../01527_clickhouse_local_optimize.sh | 49 ++++++++----------- 2 files changed, 28 insertions(+), 45 deletions(-) diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference b/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference index ea0ec6fe765..e7315547841 100644 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference @@ -1,16 +1,8 @@ -202001_1_1_0 1 2020-01-01 String -202001_4_4_0 1 2020-01-01 String -202002_2_2_0 2 2020-02-02 Another string -202002_5_5_0 2 2020-02-02 Another string -202003_3_3_0 3 2020-03-03 One more string -202003_6_6_0 3 2020-03-03 One more string -202001_1_1_0 4 2020-01-02 String for first partition -202001_4_4_0 4 2020-01-02 String for first partition -202001_1_4_1 1 2020-01-01 String -202001_1_4_1 1 2020-01-01 String -202002_2_5_1 2 2020-02-02 Another string -202002_2_5_1 2 2020-02-02 Another string -202003_3_6_1 3 2020-03-03 One more string -202003_3_6_1 3 2020-03-03 One more string -202001_1_4_1 4 2020-01-02 String for first partition -202001_1_4_1 4 2020-01-02 String for first partition +1 2020-01-01 String +2 2020-02-02 Another string +3 2020-03-03 One more string +4 2020-01-02 String for first partition +1 2020-01-01 String +2 2020-02-02 Another string +3 2020-03-03 One more string +4 2020-01-02 String for first partition diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh index e51d53c7524..13e8c847e71 100755 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -3,13 +3,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -WORKING_FOLDER="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize" +WORKING_FOLDER_01527="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize" -rm -rf "${WORKING_FOLDER}" -mkdir -p "${WORKING_FOLDER}/metadata/local/" +rm -rf "${WORKING_FOLDER_01527}" +mkdir -p "${WORKING_FOLDER_01527}/metadata/local/" + +# OPTIMIZE was crashing due to lack of temporary volume in local +${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}" + +# Some extra (unrealted) scenarios of clickhouse-local usage. ## 1. Imagine we want to process this file: -cat < "${WORKING_FOLDER}/data.csv" +cat < "${WORKING_FOLDER_01527}/data.csv" 1,2020-01-01,"String" 2,2020-02-02,"Another string" 3,2020-03-03,"One more string" @@ -19,42 +24,28 @@ EOF ## 2. that is the metadata for the table we want to fill ## schema should match the schema of the table from server ## (the easiest way is just to copy it from the server) - -## I've added sleepEachRow(0.5) here just to mimic slow insert -cat < "${WORKING_FOLDER}/metadata/local/test.sql" -ATTACH TABLE local.test (id UInt64, d Date, s String, x MATERIALIZED sleepEachRow(0.5)) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d); +cat < "${WORKING_FOLDER_01527}/metadata/local/test.sql" +ATTACH TABLE local.test (id UInt64, d Date, s String) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d); EOF ## 3a. that is the metadata for the input file we want to read ## it should match the structure of source file - ## use stdin to read from pipe -cat < "${WORKING_FOLDER}/metadata/local/stdin.sql" +cat < "${WORKING_FOLDER_01527}/metadata/local/stdin.sql" ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); EOF ## 3b. Instead of stdin you can use file path -cat < "${WORKING_FOLDER}/metadata/local/data_csv.sql" -ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER}/data.csv'); +cat < "${WORKING_FOLDER_01527}/metadata/local/data_csv.sql" +ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER_01527}/data.csv'); EOF -## All preparations done, -## the rest is simple: +## All preparations done, the rest is simple: -# option a (if 3a used) with pipe / reading stdin -cat "${WORKING_FOLDER}/data.csv" | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin" -- --path="${WORKING_FOLDER}" +# option a (if 3a used) with pipe / reading stdin (truncate was added for the test) +cat "${WORKING_FOLDER_01527}/data.csv" | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin; SELECT * FROM local.test ORDER BY id; TRUNCATE TABLE local.test;" -- --path="${WORKING_FOLDER_01527}" -# option b (if 3b used) 0 with filepath -${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv" -- --path="${WORKING_FOLDER}" +# option b (if 3b used) 0 with filepath (truncate was added for the test) +${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv; SELECT * FROM local.test ORDER BY id; TRUNCATE TABLE local.test;" -- --path="${WORKING_FOLDER_01527}" -# now you can check what was inserted (i did both options so i have doubled data) -${CLICKHOUSE_LOCAL} --query "SELECT _part,* FROM local.test ORDER BY id, _part" -- --path="${WORKING_FOLDER}" - -# But you can't do OPTIMIZE (local will die with coredump) :) That would be too good -clickhouse-local --query "OPTIMIZE TABLE local.test FINAL" -- --path="${WORKING_FOLDER}" - -# now you can check what was inserted (i did both options so i have doubled data) -${CLICKHOUSE_LOCAL} --query "SELECT _part,* FROM local.test ORDER BY id, _part" -- --path="${WORKING_FOLDER}" - -## now you can upload those parts to a server (in detached subfolder) and attach them. -rm -rf "${WORKING_FOLDER}" \ No newline at end of file +rm -rf "${WORKING_FOLDER_01527}" \ No newline at end of file From b45d42bb9c291e284017d9dd6dba331e36457045 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 21 Oct 2020 11:35:36 +0300 Subject: [PATCH 101/174] Maybe devirtualization can help --- src/Storages/MergeTree/SimpleMergeSelector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/SimpleMergeSelector.h index 9aeb73a40a8..fe57c40320a 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/SimpleMergeSelector.h @@ -6,7 +6,7 @@ namespace DB { -class SimpleMergeSelector : public IMergeSelector +class SimpleMergeSelector final : public IMergeSelector { public: struct Settings From a104c01cd9e0f9a4ef10b2c06fe6958469924159 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 21 Oct 2020 20:17:37 +0800 Subject: [PATCH 102/174] Add mysql binlog file check util --- utils/CMakeLists.txt | 1 + utils/check-mysql-binlog/CMakeLists.txt | 2 + utils/check-mysql-binlog/main.cpp | 162 ++++++++++++++++++++++++ 3 files changed, 165 insertions(+) create mode 100644 utils/check-mysql-binlog/CMakeLists.txt create mode 100644 utils/check-mysql-binlog/main.cpp diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index b4408a298c3..0987d64abed 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -30,6 +30,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (checksum-for-compressed-block) add_subdirectory (db-generator) add_subdirectory (wal-dump) + add_subdirectory (check-mysql-binlog) endif () if (ENABLE_CODE_QUALITY) diff --git a/utils/check-mysql-binlog/CMakeLists.txt b/utils/check-mysql-binlog/CMakeLists.txt new file mode 100644 index 00000000000..b1a72650ee9 --- /dev/null +++ b/utils/check-mysql-binlog/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(check-mysql-binlog main.cpp) +target_link_libraries(check-mysql-binlog PRIVATE dbms boost::program_options) diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp new file mode 100644 index 00000000000..4ec40ac41cc --- /dev/null +++ b/utils/check-mysql-binlog/main.cpp @@ -0,0 +1,162 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( + DB::MySQLReplication::EventHeader & header, DB::ReadBuffer & payload, + std::shared_ptr & last_table_map_event, bool exist_checksum) +{ + DB::MySQLReplication::BinlogEventPtr event; + DB::ReadBufferPtr limit_read_buffer = std::make_shared(payload, header.event_size - 19, false); + DB::ReadBufferPtr event_payload = limit_read_buffer; + + if (exist_checksum) + event_payload = std::make_shared(*limit_read_buffer); + + switch (header.type) + { + case DB::MySQLReplication::FORMAT_DESCRIPTION_EVENT: + { + event = std::make_shared(std::move(header)); + event->parseEvent(*event_payload); + break; + } + case DB::MySQLReplication::ROTATE_EVENT: + { + event = std::make_shared(std::move(header)); + event->parseEvent(*event_payload); + break; + } + case DB::MySQLReplication::QUERY_EVENT: + { + event = std::make_shared(std::move(header)); + event->parseEvent(*event_payload); + + auto query = std::static_pointer_cast(event); + switch (query->typ) + { + case DB::MySQLReplication::QUERY_EVENT_MULTI_TXN_FLAG: + case DB::MySQLReplication::QUERY_EVENT_XA: + { + event = std::make_shared(std::move(query->header)); + break; + } + default: + break; + } + break; + } + case DB::MySQLReplication::XID_EVENT: + { + event = std::make_shared(std::move(header)); + event->parseEvent(*event_payload); + break; + } + case DB::MySQLReplication::TABLE_MAP_EVENT: + { + event = std::make_shared(std::move(header)); + event->parseEvent(*event_payload); + last_table_map_event = std::static_pointer_cast(event); + break; + } + case DB::MySQLReplication::WRITE_ROWS_EVENT_V1: + case DB::MySQLReplication::WRITE_ROWS_EVENT_V2: + { + event = std::make_shared(last_table_map_event, std::move(header)); + event->parseEvent(*event_payload); + break; + } + case DB::MySQLReplication::DELETE_ROWS_EVENT_V1: + case DB::MySQLReplication::DELETE_ROWS_EVENT_V2: + { + event = std::make_shared(last_table_map_event, std::move(header)); + event->parseEvent(*event_payload); + break; + } + case DB::MySQLReplication::UPDATE_ROWS_EVENT_V1: + case DB::MySQLReplication::UPDATE_ROWS_EVENT_V2: + { + event = std::make_shared(last_table_map_event, std::move(header)); + event->parseEvent(*event_payload); + break; + } + case DB::MySQLReplication::GTID_EVENT: + { + event = std::make_shared(std::move(header)); + event->parseEvent(*event_payload); + break; + } + default: + { + event = std::make_shared(std::move(header)); + event->parseEvent(*event_payload); + break; + } + } + + return event; +} + +static int checkBinLogFile(const std::string & bin_path, bool exist_checksum) +{ + DB::ReadBufferFromFile in(bin_path); + DB::assertString("\xfe\x62\x69\x6e", in); /// magic number + + DB::MySQLReplication::BinlogEventPtr last_event; + std::shared_ptr last_header; + std::shared_ptr table_map; + + try + { + while (!in.eof()) + { + last_header = std::make_shared(); + last_header->parse(in); + last_event = parseSingleEventBody(*last_header, in, table_map, exist_checksum); + } + } + catch (...) + { + std::cerr << "Unable to parse MySQL binlog event. Code: " << DB::getCurrentExceptionCode() << ", Exception message: " + << DB::getCurrentExceptionMessage(false) << std::endl << ", Previous event: " << std::endl; + last_event->dump(std::cerr); + std::cerr << std::endl << ", Event header: " << std::endl; + last_header->dump(std::cerr); + std::cerr << std::endl; + return DB::getCurrentExceptionCode(); + } + + std::cout << "Check passed. " << std::endl << "No exception was thrown." << std::endl << "The last binlog event: " << std::endl; + last_event->dump(std::cout); + std::cout << std::endl; + return 0; +} + + +int main(int argc, char ** argv) +{ + boost::program_options::options_description desc("Allowed options"); + desc.add_options()("help,h", "Produce help message"); + desc.add_options()("disable_checksum", "Disable checksums in binlog files."); + + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + + if (options.count("help") || argc < 2) + { + std::cout << "Usage: " << argv[0] << " mysql_binlog_file" << std::endl; + std::cout << desc << std::endl; + return 1; + } + + return checkBinLogFile(argv[argc - 1], !options.count("disable_checksum")); +} From c10dada3d42ff0526c9ffbaa7c4beeb4467cde73 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 21 Oct 2020 18:06:42 +0300 Subject: [PATCH 103/174] try node 1 --- docker/test/performance-comparison/Dockerfile | 4 ++-- docker/test/performance-comparison/compare.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 3b43b68319b..1b4a75f7f5d 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -37,14 +37,14 @@ RUN apt-get update \ COPY * / -# Bind everything to node 0 early. We have to bind both servers and the tmpfs +# Bind everything to node 1 early. We have to bind both servers and the tmpfs # on which the database is stored. How to do it through Yandex Sandbox API is # unclear, but by default tmpfs uses 'process allocation policy', not sure # which process but hopefully the one that writes to it, so just bind the # downloader script as well. # We could also try to remount it with proper options in Sandbox task. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -CMD ["numactl", "--cpunodebind=0", "--membind=0", "/entrypoint.sh"] +CMD ["numactl", "--cpunodebind=1", "--membind=1", "/entrypoint.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 769a4f8f735..8a567d7a11a 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -1090,7 +1090,7 @@ case "$stage" in "restart") numactl --hardware ||: lscpu ||: - sudo dmidecode -t 4 ||: + dmidecode -t 4 ||: time restart ;& "run_tests") From a678f0322999afd4a3c054531220339086f25c6a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 21 Oct 2020 18:21:54 +0300 Subject: [PATCH 104/174] fixup --- docker/test/performance-comparison/Dockerfile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1b4a75f7f5d..1a904cf73c2 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -37,14 +37,15 @@ RUN apt-get update \ COPY * / -# Bind everything to node 1 early. We have to bind both servers and the tmpfs -# on which the database is stored. How to do it through Yandex Sandbox API is -# unclear, but by default tmpfs uses 'process allocation policy', not sure -# which process but hopefully the one that writes to it, so just bind the -# downloader script as well. +# Bind everything to NUMA node #1, if there's more than one. We avoid node #0, +# because it is more susceptible to system interruptions. We have to bind both +# servers and the tmpfs on which the database is stored. How to do it through +# Yandex Sandbox API is unclear, but by default tmpfs uses 'process allocation +# policy', not sure which process but hopefully the one that writes to it, so +# just bind the downloader script as well. # We could also try to remount it with proper options in Sandbox task. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -CMD ["numactl", "--cpunodebind=1", "--membind=1", "/entrypoint.sh"] +CMD ["bash", "-c", "node=$(numactl --hardware | grep -q 'available: 1 nodes' && echo 0 || echo 1); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison From 154553e4c046c8991ace2529d3cd8ae1cdcd62ed Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 21 Oct 2020 19:29:21 +0300 Subject: [PATCH 105/174] fix one more test The CLICKHOUSE_CLIENT has --send_logs_level option, and boost::program_options prefers the first entry, so we can't override it. Use CLICKHOUSE_CLIENT_BINARY instead, which does not contain options. --- .../0_stateless/00965_logs_level_bugfix.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/00965_logs_level_bugfix.sh b/tests/queries/0_stateless/00965_logs_level_bugfix.sh index d0126c19eb9..0c618cf1bf0 100755 --- a/tests/queries/0_stateless/00965_logs_level_bugfix.sh +++ b/tests/queries/0_stateless/00965_logs_level_bugfix.sh @@ -3,18 +3,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --send_logs_level="trace" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="trace" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 echo "." -${CLICKHOUSE_CLIENT} --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug" | head -n 1 echo "." -${CLICKHOUSE_CLIENT} --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Information" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Information" | head -n 1 echo "." -${CLICKHOUSE_CLIENT} --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Error" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Error" | head -n 1 echo "-" -${CLICKHOUSE_CLIENT} --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="debug" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Trace" | head -n 1 echo "." -${CLICKHOUSE_CLIENT} --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="information" --query="SELECT 1" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" | head -n 1 echo "." -${CLICKHOUSE_CLIENT} --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="error" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information" | head -n 1 echo "." -${CLICKHOUSE_CLIENT} --send_logs_level="None" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information\|Error" | head -n 1 +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="None" --query="SELECT throwIf(1)" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace\|Information\|Error" | head -n 1 From 545d187ef7e349d29b9fba5a84d859808ce37f32 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Wed, 21 Oct 2020 19:32:52 +0300 Subject: [PATCH 106/174] Docs for the crash_log table (en) --- docs/en/operations/system-tables/crash_log.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/en/operations/system-tables/crash_log.md diff --git a/docs/en/operations/system-tables/crash_log.md b/docs/en/operations/system-tables/crash_log.md new file mode 100644 index 00000000000..d38ce31584f --- /dev/null +++ b/docs/en/operations/system-tables/crash_log.md @@ -0,0 +1,42 @@ +## system.crash_log {#system-tables_crash_log} + +Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur. + +Columns: + +- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date of the event. +- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Time of the event. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. +- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Array of traces. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of full traces. +- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server build. + +**Example** + +Query: + +``` sql +SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1; +``` +Result (not full): + +``` text +event_date: 2020-10-14 +event_time: 2020-10-14 15:47:40 +timestamp_ns: 1602679660271312710 +signal: 11 +thread_id: 23624 +query_id: 428aab7c-8f5c-44e9-9607-d16b44467e69 +trace: [188531193,...] +trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector >&, std::__1::vector > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...] +version: ClickHouse 20.11.1.1 +revision: 54442 +build_id: +``` + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/crash_log) From 748ff404f94e46917c4231adc08fff59e66bfdc6 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 21 Oct 2020 20:36:01 +0200 Subject: [PATCH 107/174] Attempt to fix the race --- programs/local/LocalServer.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index bfc7cac7fc1..ef96db4e96a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -335,15 +335,19 @@ void LocalServer::processQueries() if (!parse_res.second) throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); - context->makeSessionContext(); - context->makeQueryContext(); + /// we can't mutate global context (due to possible races), so we can't reuse it safely as a query context + /// so we need a copy here + auto query_context = Context(context); - context->setUser("default", "", Poco::Net::SocketAddress{}); - context->setCurrentQueryId(""); + query_context->makeSessionContext(); + query_context->makeQueryContext(); + + query_context->setUser("default", "", Poco::Net::SocketAddress{}); + query_context->setCurrentQueryId(""); applyCmdSettings(); /// Use the same query_id (and thread group) for all queries - CurrentThread::QueryScope query_scope_holder(*context); + CurrentThread::QueryScope query_scope_holder(*query_context); bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); std::exception_ptr exception; @@ -362,7 +366,7 @@ void LocalServer::processQueries() try { - executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {}); + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *query_context, {}); } catch (...) { From 60aae56266e1a20fd4679d874fb488682fc566e8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 20 Oct 2020 21:12:02 +0300 Subject: [PATCH 108/174] Link dbms w/ atomic to fix undefined reference in unbundled build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will fix undefined reference for __atomic_load under clang10 and unbundled build (libstdc++ shared linkage): clickhouse-server: symbol lookup error: /src/ch/clickhouse/.cmake/src/libclickhouse_disksd.so: undefined symbol: __atomic_load From [1]: "libatomic (GNU) libgcc_s does not provide an implementation of an atomics library. Instead, GCC’s libatomic library can be used to supply these when using libgcc_s. Note Clang does not currently automatically link against libatomic when using libgcc_s. You may need to manually add -latomic to support this configuration when using non-native atomic operations (if you see link errors referring to __atomic_* functions)." [1]: https://clang.llvm.org/docs/Toolchain.html --- src/CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0016c51b7f8..085269847e4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -173,14 +173,20 @@ add_object_library(clickhouse_processors_merges Processors/Merges) add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms) add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) +set (DBMS_COMMON_LIBRARIES) +# libgcc_s does not provide an implementation of an atomics library. Instead, +# GCC’s libatomic library can be used to supply these when using libgcc_s. +if ((NOT USE_LIBCXX) AND COMPILER_CLANG AND OS_LINUX) + list (APPEND DBMS_COMMON_LIBRARIES atomic) +endif() if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) add_library (dbms STATIC ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PRIVATE jemalloc libdivide) + target_link_libraries (dbms PRIVATE jemalloc libdivide ${DBMS_COMMON_LIBRARIES}) set (all_modules dbms) else() add_library (dbms SHARED ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PUBLIC ${all_modules}) + target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES}) target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide) list (APPEND all_modules dbms) # force all split libs to be linked From 67cbb55d63d81f32bc19d8597ef8c4eefaf10a14 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 21 Oct 2020 22:16:13 +0200 Subject: [PATCH 109/174] Fix compilation --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index ef96db4e96a..bb9918d633f 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -337,7 +337,7 @@ void LocalServer::processQueries() /// we can't mutate global context (due to possible races), so we can't reuse it safely as a query context /// so we need a copy here - auto query_context = Context(context); + auto query_context = Context(context.get()); query_context->makeSessionContext(); query_context->makeQueryContext(); From 691b28e98bbfa1349876fc04200b9e9f63787b33 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Thu, 22 Oct 2020 02:43:02 +0400 Subject: [PATCH 110/174] Add a log message after a storage is added --- src/Access/AccessControlManager.cpp | 4 ++++ src/Access/AccessControlManager.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 56d225f64f4..0913717808a 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -137,6 +137,10 @@ AccessControlManager::AccessControlManager() AccessControlManager::~AccessControlManager() = default; +void AccessControlManager::addStorage(const StoragePtr & new_storage) { + MultipleAccessStorage::addStorage(new_storage); + LOG_DEBUG(getLogger(), "Added storage '{}' of type '{}'", new_storage->getStorageName(), String(new_storage->getStorageType())); +} void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_) { diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index c960b330ee6..dd710614ece 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -149,6 +149,8 @@ public: const ExternalAuthenticators & getExternalAuthenticators() const; private: + void addStorage(const StoragePtr & new_storage); + class ContextAccessCache; class CustomSettingsPrefixes; From f351b528512ceefd453048ff8ff3524b5c1f5e06 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Oct 2020 07:03:17 +0300 Subject: [PATCH 111/174] Update AccessControlManager.cpp --- src/Access/AccessControlManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 0913717808a..9bfc54fe69d 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -137,7 +137,8 @@ AccessControlManager::AccessControlManager() AccessControlManager::~AccessControlManager() = default; -void AccessControlManager::addStorage(const StoragePtr & new_storage) { +void AccessControlManager::addStorage(const StoragePtr & new_storage) +{ MultipleAccessStorage::addStorage(new_storage); LOG_DEBUG(getLogger(), "Added storage '{}' of type '{}'", new_storage->getStorageName(), String(new_storage->getStorageType())); } From 111b553ee559cceca35f71bbb15399cc6fc5063f Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 22 Oct 2020 09:37:03 +0200 Subject: [PATCH 112/174] Better --- programs/local/LocalServer.cpp | 86 +++++++++++++++++----------------- programs/local/LocalServer.h | 6 +-- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index bb9918d633f..77deb274c65 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -57,8 +57,8 @@ LocalServer::LocalServer() = default; LocalServer::~LocalServer() { - if (context) - context->shutdown(); /// required for properly exception handling + if (global_context) + global_context->shutdown(); /// required for properly exception handling } @@ -95,9 +95,9 @@ void LocalServer::initialize(Poco::Util::Application & self) } } -void LocalServer::applyCmdSettings() +void LocalServer::applyCmdSettings(Context & context) { - context->applySettingsChanges(cmd_settings.changes()); + context.applySettingsChanges(cmd_settings.changes()); } /// If path is specified and not empty, will try to setup server environment and load existing metadata @@ -151,12 +151,12 @@ void LocalServer::tryInitPath() if (path.back() != '/') path += '/'; - context->setPath(path); + global_context->setPath(path); - context->setTemporaryStorage(path + "tmp"); - context->setFlagsPath(path + "flags"); + global_context->setTemporaryStorage(path + "tmp"); + global_context->setFlagsPath(path + "flags"); - context->setUserFilesPath(""); // user's files are everywhere + global_context->setUserFilesPath(""); // user's files are everywhere } @@ -190,9 +190,9 @@ try } shared_context = Context::createShared(); - context = std::make_unique(Context::createGlobal(shared_context.get())); - context->makeGlobalContext(); - context->setApplicationType(Context::ApplicationType::LOCAL); + global_context = std::make_unique(Context::createGlobal(shared_context.get())); + global_context->makeGlobalContext(); + global_context->setApplicationType(Context::ApplicationType::LOCAL); tryInitPath(); std::optional status; @@ -214,32 +214,32 @@ try /// Maybe useless if (config().has("macros")) - context->setMacros(std::make_unique(config(), "macros", log)); + global_context->setMacros(std::make_unique(config(), "macros", log)); /// Skip networking /// Sets external authenticators config (LDAP). - context->setExternalAuthenticatorsConfig(config()); + global_context->setExternalAuthenticatorsConfig(config()); setupUsers(); /// Limit on total number of concurrently executing queries. /// There is no need for concurrent queries, override max_concurrent_queries. - context->getProcessList().setMaxSize(0); + global_context->getProcessList().setMaxSize(0); /// Size of cache for uncompressed blocks. Zero means disabled. size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); if (uncompressed_cache_size) - context->setUncompressedCache(uncompressed_cache_size); + global_context->setUncompressedCache(uncompressed_cache_size); /// Size of cache for marks (index of MergeTree family of tables). It is necessary. /// Specify default value for mark_cache_size explicitly! size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); if (mark_cache_size) - context->setMarkCache(mark_cache_size); + global_context->setMarkCache(mark_cache_size); /// Load global settings from default_profile and system_profile. - context->setDefaultProfiles(config()); + global_context->setDefaultProfiles(config()); /** Init dummy default DB * NOTE: We force using isolated default database to avoid conflicts with default database from server environment @@ -247,34 +247,34 @@ try * if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons. */ std::string default_database = config().getString("default_database", "_local"); - DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, *context)); - context->setCurrentDatabase(default_database); - applyCmdOptions(); + DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, *global_context)); + global_context->setCurrentDatabase(default_database); + applyCmdOptions(*global_context); - String path = context->getPath(); + String path = global_context->getPath(); if (!path.empty()) { /// Lock path directory before read - status.emplace(context->getPath() + "status", StatusFile::write_full_info); + status.emplace(global_context->getPath() + "status", StatusFile::write_full_info); LOG_DEBUG(log, "Loading metadata from {}", path); Poco::File(path + "data/").createDirectories(); Poco::File(path + "metadata/").createDirectories(); - loadMetadataSystem(*context); - attachSystemTables(*context); - loadMetadata(*context); + loadMetadataSystem(*global_context); + attachSystemTables(*global_context); + loadMetadata(*global_context); DatabaseCatalog::instance().loadDatabases(); LOG_DEBUG(log, "Loaded metadata."); } else { - attachSystemTables(*context); + attachSystemTables(*global_context); } processQueries(); - context->shutdown(); - context.reset(); + global_context->shutdown(); + global_context.reset(); status.reset(); cleanup(); @@ -327,7 +327,7 @@ void LocalServer::processQueries() String initial_create_query = getInitialCreateTableQuery(); String queries_str = initial_create_query + config().getRawString("query"); - const auto & settings = context->getSettingsRef(); + const auto & settings = global_context->getSettingsRef(); std::vector queries; auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); @@ -335,19 +335,19 @@ void LocalServer::processQueries() if (!parse_res.second) throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); - /// we can't mutate global context (due to possible races), so we can't reuse it safely as a query context - /// so we need a copy here - auto query_context = Context(context.get()); + /// we can't mutate global global_context (can lead to races, as it was already passed to some background threads) + /// so we can't reuse it safely as a query global_context and need a copy here + auto context = Context(*global_context); - query_context->makeSessionContext(); - query_context->makeQueryContext(); + context.makeSessionContext(); + context.makeQueryContext(); - query_context->setUser("default", "", Poco::Net::SocketAddress{}); - query_context->setCurrentQueryId(""); - applyCmdSettings(); + context.setUser("default", "", Poco::Net::SocketAddress{}); + context.setCurrentQueryId(""); + applyCmdSettings(context); /// Use the same query_id (and thread group) for all queries - CurrentThread::QueryScope query_scope_holder(*query_context); + CurrentThread::QueryScope query_scope_holder(context); bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); std::exception_ptr exception; @@ -366,7 +366,7 @@ void LocalServer::processQueries() try { - executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *query_context, {}); + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}); } catch (...) { @@ -431,7 +431,7 @@ void LocalServer::setupUsers() } if (users_config) - context->setUsersConfig(users_config); + global_context->setUsersConfig(users_config); else throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG); } @@ -585,10 +585,10 @@ void LocalServer::init(int argc, char ** argv) argsToConfig(arguments, config(), 100); } -void LocalServer::applyCmdOptions() +void LocalServer::applyCmdOptions(Context & context) { - context->setDefaultFormat(config().getString("output-format", config().getString("format", "TSV"))); - applyCmdSettings(); + context.setDefaultFormat(config().getString("output-format", config().getString("format", "TSV"))); + applyCmdSettings(context); } } diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index a8908754369..02778bd86cb 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -36,15 +36,15 @@ private: std::string getInitialCreateTableQuery(); void tryInitPath(); - void applyCmdOptions(); - void applyCmdSettings(); + void applyCmdOptions(Context & context); + void applyCmdSettings(Context & context); void processQueries(); void setupUsers(); void cleanup(); protected: SharedContextHolder shared_context; - std::unique_ptr context; + std::unique_ptr global_context; /// Settings specified via command line args Settings cmd_settings; From 77f66e5a09396286ac8fa5e66ddc4398f27de54e Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 22 Oct 2020 11:02:14 +0200 Subject: [PATCH 113/174] Split test into 2 --- .../01527_clickhouse_local_optimize.reference | 8 -- .../01527_clickhouse_local_optimize.sh | 42 +--------- ...8_clickhouse_local_prepare_parts.reference | 19 +++++ .../01528_clickhouse_local_prepare_parts.sh | 83 +++++++++++++++++++ 4 files changed, 104 insertions(+), 48 deletions(-) create mode 100644 tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.reference create mode 100755 tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference b/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference index e7315547841..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.reference @@ -1,8 +0,0 @@ -1 2020-01-01 String -2 2020-02-02 Another string -3 2020-03-03 One more string -4 2020-01-02 String for first partition -1 2020-01-01 String -2 2020-02-02 Another string -3 2020-03-03 One more string -4 2020-01-02 String for first partition diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh index 13e8c847e71..bbbdf9c65d6 100755 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -4,48 +4,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh WORKING_FOLDER_01527="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize" - rm -rf "${WORKING_FOLDER_01527}" -mkdir -p "${WORKING_FOLDER_01527}/metadata/local/" +mkdir -p "${WORKING_FOLDER_01527}" # OPTIMIZE was crashing due to lack of temporary volume in local ${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}" -# Some extra (unrealted) scenarios of clickhouse-local usage. - -## 1. Imagine we want to process this file: -cat < "${WORKING_FOLDER_01527}/data.csv" -1,2020-01-01,"String" -2,2020-02-02,"Another string" -3,2020-03-03,"One more string" -4,2020-01-02,"String for first partition" -EOF - -## 2. that is the metadata for the table we want to fill -## schema should match the schema of the table from server -## (the easiest way is just to copy it from the server) -cat < "${WORKING_FOLDER_01527}/metadata/local/test.sql" -ATTACH TABLE local.test (id UInt64, d Date, s String) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d); -EOF - -## 3a. that is the metadata for the input file we want to read -## it should match the structure of source file -## use stdin to read from pipe -cat < "${WORKING_FOLDER_01527}/metadata/local/stdin.sql" -ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); -EOF - -## 3b. Instead of stdin you can use file path -cat < "${WORKING_FOLDER_01527}/metadata/local/data_csv.sql" -ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER_01527}/data.csv'); -EOF - -## All preparations done, the rest is simple: - -# option a (if 3a used) with pipe / reading stdin (truncate was added for the test) -cat "${WORKING_FOLDER_01527}/data.csv" | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin; SELECT * FROM local.test ORDER BY id; TRUNCATE TABLE local.test;" -- --path="${WORKING_FOLDER_01527}" - -# option b (if 3b used) 0 with filepath (truncate was added for the test) -${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv; SELECT * FROM local.test ORDER BY id; TRUNCATE TABLE local.test;" -- --path="${WORKING_FOLDER_01527}" - -rm -rf "${WORKING_FOLDER_01527}" \ No newline at end of file +rm -rf "${WORKING_FOLDER_01527}" diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.reference b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.reference new file mode 100644 index 00000000000..64a56d9b949 --- /dev/null +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.reference @@ -0,0 +1,19 @@ +Option 1. Prepare parts from from table with Engine=File defined in metadata, read from an arbitrary path +1 2020-01-01 String +2 2020-02-02 Another string +3 2020-03-03 One more string +4 2020-01-02 String for first partition +Option 2. Prepare parts from from table with Engine=File defined in metadata, read from stdin (pipe) +11 2020-01-01 String +12 2020-02-02 Another string +13 2020-03-03 One more string +14 2020-01-02 String for first partition +Option 3. Prepare parts from from table with Engine=File defined via command line, read from stdin (pipe) +21 2020-01-01 String +22 2020-02-02 Another string +23 2020-03-03 One more string +24 2020-01-02 String for first partition +Possibility to run optimize on prepared parts before sending parts to server +202001 1 +202002 1 +202003 1 diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh new file mode 100755 index 00000000000..9c7ad1d9476 --- /dev/null +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +WORKING_FOLDER_01528="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize" +rm -rf "${WORKING_FOLDER_01528}" + +mkdir -p "${WORKING_FOLDER_01528}/metadata/local" + +## Checks scenario of preparing parts offline by clickhouse-local + +## that is the metadata for the table we want to fill +## schema should match the schema of the table from server +## (the easiest way is just to copy it from the server) +cat < "${WORKING_FOLDER_01528}/metadata/local/test.sql" +ATTACH TABLE local.test (id UInt64, d Date, s String) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d); +EOF + +################# + +echo "Option 1. Prepare parts from from table with Engine=File defined in metadata, read from an arbitrary path" + +## Source file: +cat < "${WORKING_FOLDER_01528}/data.csv" +1,2020-01-01,"String" +2,2020-02-02,"Another string" +3,2020-03-03,"One more string" +4,2020-01-02,"String for first partition" +EOF + +## metadata written into file +cat < "${WORKING_FOLDER_01528}/metadata/local/data_csv.sql" +ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER_01528}/data.csv'); +EOF + +## feed the table +${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" -- --path="${WORKING_FOLDER_01528}" + +## check the parts were created +${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}" + +################# + +echo "Option 2. Prepare parts from from table with Engine=File defined in metadata, read from stdin (pipe)" + +cat < "${WORKING_FOLDER_01528}/metadata/local/stdin.sql" +ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); +EOF + +cat < Date: Thu, 22 Oct 2020 15:41:01 +0300 Subject: [PATCH 114/174] Fix some unrelated performance issues in select parts for merge --- src/Disks/StoragePolicy.cpp | 7 +++++ src/Disks/StoragePolicy.h | 3 ++ .../MergeTree/MergeTreeDataMergerMutator.cpp | 5 +++- .../MergeTree/SimpleMergeSelector.cpp | 28 ++++++++++++------- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 8a71f4f7a2f..2215615feda 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -307,6 +307,13 @@ void StoragePolicy::buildVolumeIndices() } } +bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const +{ + for (const auto & volume : volumes) + if (volume->areMergesAvoided()) + return true; + return false; +} StoragePolicySelector::StoragePolicySelector( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Disks/StoragePolicy.h b/src/Disks/StoragePolicy.h index f4a4a0070b8..fc45ed3ed06 100644 --- a/src/Disks/StoragePolicy.h +++ b/src/Disks/StoragePolicy.h @@ -88,6 +88,9 @@ public: /// Checks if storage policy can be replaced by another one. void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const; + /// Check if we have any volume with stopped merges + bool hasAnyVolumeWithDisabledMerges() const; + private: Volumes volumes; const String name; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index df42f164e34..b29966751f9 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -227,6 +227,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( IMergeSelector::PartsRanges parts_ranges; StoragePolicyPtr storage_policy = data.getStoragePolicy(); + /// Volumes with stopped merges are extremely rare situation. + /// Check it once and don't check each part (this is bad for performance). + bool has_volumes_with_disabled_merges = storage_policy->hasAnyVolumeWithDisabledMerges(); const String * prev_partition_id = nullptr; /// Previous part only in boundaries of partition frame @@ -277,7 +280,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( part_info.data = ∂ part_info.ttl_infos = &part->ttl_infos; part_info.compression_codec_desc = part->default_codec->getFullCodecDesc(); - part_info.shall_participate_in_merges = part->shallParticipateInMerges(storage_policy); + part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true; parts_ranges.back().emplace_back(part_info); diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 335833998c8..1156c17835b 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -92,19 +92,21 @@ double mapPiecewiseLinearToUnit(double value, double min, double max) /** Is allowed to merge parts in range with specific properties. */ bool allow( - double sum_size, - double max_size, - double min_age, - double range_size, - double partition_size, + size_t sum_size, + size_t max_size, + size_t min_age, + size_t range_size, + size_t partition_size, + double min_size_to_lower_base_log, + double max_size_to_lower_base_log, const SimpleMergeSelector::Settings & settings) { // std::cerr << "sum_size: " << sum_size << "\n"; /// Map size to 0..1 using logarithmic scale - /// Use log(1 + x) instead of log1p(x) because our x variables (sum_size and settings) are always integer. + /// Use log(1 + x) instead of log1p(x) because our sum_size is always integer. /// Also log1p seems to be slow and significantly affect performance of merges assignment. - double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), log(1 + settings.min_size_to_lower_base), log(1 + settings.max_size_to_lower_base)); + double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), min_size_to_lower_base_log, max_size_to_lower_base_log); // std::cerr << "size_normalized: " << size_normalized << "\n"; @@ -143,7 +145,9 @@ void selectWithinPartition( const SimpleMergeSelector::PartsRange & parts, const size_t max_total_size_to_merge, Estimator & estimator, - const SimpleMergeSelector::Settings & settings) + const SimpleMergeSelector::Settings & settings, + double min_size_to_lower_base_log, + double max_size_to_lower_base_log) { size_t parts_count = parts.size(); if (parts_count <= 1) @@ -180,7 +184,7 @@ void selectWithinPartition( if (max_total_size_to_merge && sum_size > max_total_size_to_merge) break; - if (allow(sum_size, max_size, min_age, end - begin, parts_count, settings)) + if (allow(sum_size, max_size, min_age, end - begin, parts_count, min_size_to_lower_base_log, max_size_to_lower_base_log, settings)) estimator.consider( parts.begin() + begin, parts.begin() + end, @@ -200,8 +204,12 @@ SimpleMergeSelector::PartsRange SimpleMergeSelector::select( { Estimator estimator; + /// Precompute logarithm of settings boundaries, because log function is quite expensive in terms of performance + const double min_size_to_lower_base_log = log(1 + settings.min_size_to_lower_base); + const double max_size_to_lower_base_log = log(1 + settings.max_size_to_lower_base); + for (const auto & part_range : parts_ranges) - selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings); + selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings, min_size_to_lower_base_log, max_size_to_lower_base_log); return estimator.getBest(); } From ac7af67ede43e8a1923e1476d91e03c65ce1a16d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 22 Oct 2020 15:52:43 +0300 Subject: [PATCH 115/174] fixup --- docker/test/performance-comparison/Dockerfile | 18 ++++++++++-------- docker/test/performance-comparison/compare.sh | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1a904cf73c2..92c3da4d059 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -9,6 +9,7 @@ RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ bash \ curl \ + dmidecode \ g++ \ gdb \ git \ @@ -37,15 +38,16 @@ RUN apt-get update \ COPY * / -# Bind everything to NUMA node #1, if there's more than one. We avoid node #0, -# because it is more susceptible to system interruptions. We have to bind both -# servers and the tmpfs on which the database is stored. How to do it through -# Yandex Sandbox API is unclear, but by default tmpfs uses 'process allocation -# policy', not sure which process but hopefully the one that writes to it, so -# just bind the downloader script as well. -# We could also try to remount it with proper options in Sandbox task. +# Bind everything to one NUMA node, if there's more than one. Theoretically the +# node #0 should be less stable because of system interruptions. We bind +# randomly to node 1 or 0 to gather some statistics on that. We have to bind +# both servers and the tmpfs on which the database is stored. How to do it +# through Yandex Sandbox API is unclear, but by default tmpfs uses +# 'process allocation policy', not sure which process but hopefully the one that +# writes to it, so just bind the downloader script as well. We could also try to +# remount it with proper options in Sandbox task. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -CMD ["bash", "-c", "node=$(numactl --hardware | grep -q 'available: 1 nodes' && echo 0 || echo 1); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"] +CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 8a567d7a11a..258bc0a95f7 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -1088,6 +1088,7 @@ case "$stage" in time configure ;& "restart") + numactl --show ||: numactl --hardware ||: lscpu ||: dmidecode -t 4 ||: From 1d07ece5d1b309c89a8ece2c48bb2247314cfad1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Oct 2020 17:15:56 +0300 Subject: [PATCH 116/174] Fix clang tidy warning --- src/Storages/MergeTree/SimpleMergeSelector.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 1156c17835b..972c6ea6ecb 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -92,11 +92,11 @@ double mapPiecewiseLinearToUnit(double value, double min, double max) /** Is allowed to merge parts in range with specific properties. */ bool allow( - size_t sum_size, - size_t max_size, - size_t min_age, - size_t range_size, - size_t partition_size, + double sum_size, + double max_size, + double min_age, + double range_size, + double partition_size, double min_size_to_lower_base_log, double max_size_to_lower_base_log, const SimpleMergeSelector::Settings & settings) From ccf59c6412772d84def060be399be405756ee740 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 22 Oct 2020 22:53:46 +0800 Subject: [PATCH 117/174] Fix definitely wrong bug --- src/Common/HashTable/TwoLevelStringHashMap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/HashTable/TwoLevelStringHashMap.h b/src/Common/HashTable/TwoLevelStringHashMap.h index 55d54e51b6a..6bd8f74dbd6 100644 --- a/src/Common/HashTable/TwoLevelStringHashMap.h +++ b/src/Common/HashTable/TwoLevelStringHashMap.h @@ -18,7 +18,7 @@ public: void ALWAYS_INLINE forEachMapped(Func && func) { for (auto i = 0u; i < this->NUM_BUCKETS; ++i) - return this->impls[i].forEachMapped(func); + this->impls[i].forEachMapped(func); } TMapped & ALWAYS_INLINE operator[](const Key & x) From 0275e9c0104768c229a04004499f8e708a49240e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 22 Oct 2020 19:42:31 +0300 Subject: [PATCH 118/174] work around docker weirdness --- docker/test/performance-comparison/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 92c3da4d059..76cadc3ce11 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -47,7 +47,9 @@ COPY * / # writes to it, so just bind the downloader script as well. We could also try to # remount it with proper options in Sandbox task. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"] +# Double-escaped backslashes are a tribute to the engineering wonder of docker -- +# it gives '/bin/sh: 1: [bash,: not found' otherwise. +CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\\(.*\\)nodes.*$/\\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison From a6439aba4438f1e93472934169d1b7d37379fcee Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Thu, 22 Oct 2020 20:47:52 +0400 Subject: [PATCH 119/174] More specific log messages for each access storage type --- src/Access/AccessControlManager.cpp | 20 +++++++++++--------- src/Access/AccessControlManager.h | 2 -- src/Access/LDAPAccessStorage.cpp | 6 ++++++ src/Access/LDAPAccessStorage.h | 2 ++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 9bfc54fe69d..8fc3f2bfc97 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -137,12 +137,6 @@ AccessControlManager::AccessControlManager() AccessControlManager::~AccessControlManager() = default; -void AccessControlManager::addStorage(const StoragePtr & new_storage) -{ - MultipleAccessStorage::addStorage(new_storage); - LOG_DEBUG(getLogger(), "Added storage '{}' of type '{}'", new_storage->getStorageName(), String(new_storage->getStorageType())); -} - void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_) { auto storages = getStoragesPtr(); @@ -168,6 +162,7 @@ void AccessControlManager::addUsersConfigStorage(const String & storage_name_, c auto new_storage = std::make_shared(storage_name_, check_setting_name_function); new_storage->setConfig(users_config_); addStorage(new_storage); + LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); } void AccessControlManager::addUsersConfigStorage( @@ -200,6 +195,7 @@ void AccessControlManager::addUsersConfigStorage( auto new_storage = std::make_shared(storage_name_, check_setting_name_function); new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_); addStorage(new_storage); + LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); } void AccessControlManager::reloadUsersConfigs() @@ -243,7 +239,9 @@ void AccessControlManager::addDiskStorage(const String & storage_name_, const St } } } - addStorage(std::make_shared(storage_name_, directory_, readonly_)); + auto new_storage = std::make_shared(storage_name_, directory_, readonly_); + addStorage(new_storage); + LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); } @@ -255,13 +253,17 @@ void AccessControlManager::addMemoryStorage(const String & storage_name_) if (auto memory_storage = typeid_cast>(storage)) return; } - addStorage(std::make_shared(storage_name_)); + auto new_storage= std::make_shared(storage_name_); + addStorage(new_storage); + LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName()); } void AccessControlManager::addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_) { - addStorage(std::make_shared(storage_name_, this, config_, prefix_)); + auto new_storage = std::make_shared(storage_name_, this, config_, prefix_); + addStorage(new_storage); + LOG_DEBUG(getLogger(), "Added {} access storage '{}', LDAP server name: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getLDAPServerName()); } diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index dd710614ece..c960b330ee6 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -149,8 +149,6 @@ public: const ExternalAuthenticators & getExternalAuthenticators() const; private: - void addStorage(const StoragePtr & new_storage); - class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index cf5e7673e40..b20ef3a39a9 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -29,6 +29,12 @@ LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl } +String LDAPAccessStorage::getLDAPServerName() const +{ + return ldap_server; +} + + void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix) { std::scoped_lock lock(mutex); diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index a845279841c..b1b0001d9bc 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -32,6 +32,8 @@ public: explicit LDAPAccessStorage(const String & storage_name_, AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix); virtual ~LDAPAccessStorage() override = default; + String getLDAPServerName() const; + public: // IAccessStorage implementations. virtual const char * getStorageType() const override; virtual String getStorageParamsJSON() const override; From b77e776db4d7679b6c1023e89923d859f5eff1ca Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 22 Oct 2020 20:30:50 +0300 Subject: [PATCH 120/174] fixes --- docker/test/fasttest/run.sh | 10 ++-------- .../00965_send_logs_level_concurrent_queries.reference | 10 ++++++++++ .../00965_send_logs_level_concurrent_queries.sh | 4 ++-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index ad1ed24d5d7..91fe84a04cd 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -20,6 +20,7 @@ FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}") FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}") FASTTEST_DATA=$(readlink -f "${FASTTEST_DATA:-$FASTTEST_WORKSPACE/db-fasttest}") FASTTEST_OUTPUT=$(readlink -f "${FASTTEST_OUTPUT:-$FASTTEST_WORKSPACE}") +PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH" # Export these variables, so that all subsequent invocations of the script # use them, and not try to guess them anew, which leads to weird effects. @@ -28,6 +29,7 @@ export FASTTEST_SOURCE export FASTTEST_BUILD export FASTTEST_DATA export FASTTEST_OUT +export PATH server_pid=none @@ -144,12 +146,6 @@ CMAKE_LIBS_CONFIG=( "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1" - "-DUSE_STATIC_LIBRARIES=0" - "-DSPLIT_SHARED_LIBRARIES=1" - "-DCLICKHOUSE_SPLIT_BINARY=1" - "-DCMAKE_BUILD_TYPE=Debug" - "-DCMAKE_C_FLAGS_ADD=-Og" - "-DCMAKE_CXX_FLAGS_ADD=-Og" ) # TODO remove this? we don't use ccache anyway. An option would be to download it @@ -342,8 +338,6 @@ case "$stage" in ;& "build") build - PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH" - export PATH ;& "configure") # The `install_log.txt` is also needed for compatibility with old CI task -- diff --git a/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.reference b/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.reference index e69de29bb2d..432299e9556 100644 --- a/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.reference +++ b/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.reference @@ -0,0 +1,10 @@ +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh b/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh index 5e24e820995..34dd1e5c083 100755 --- a/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh +++ b/tests/queries/0_stateless/00965_send_logs_level_concurrent_queries.sh @@ -4,8 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh for _ in {1..10}; do - ${CLICKHOUSE_CLIENT} --send_logs_level="trace" --query="SELECT * from numbers(1000000);" > /dev/null 2> /dev/null & - ${CLICKHOUSE_CLIENT} --send_logs_level="information" --query="SELECT * from numbers(1000000);" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" & + ${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="trace" --query="SELECT * from numbers(1000000);" 2>&1 | awk '{ print $8 }' | grep -q "Trace" && echo "OK" || echo "Fail" & + ${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="information" --query="SELECT * from numbers(1000000);" 2>&1 | awk '{ print $8 }' | grep "Debug\|Trace" & done wait From ef39def7ff14b3be1b6052e487229acbbee98bf3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 22 Oct 2020 20:46:17 +0300 Subject: [PATCH 121/174] Apply suggestions from code review Co-authored-by: filimonov <1549571+filimonov@users.noreply.github.com> --- programs/local/LocalServer.cpp | 2 +- .../0_stateless/01528_clickhouse_local_prepare_parts.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 77deb274c65..2d019f81367 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -336,7 +336,7 @@ void LocalServer::processQueries() throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); /// we can't mutate global global_context (can lead to races, as it was already passed to some background threads) - /// so we can't reuse it safely as a query global_context and need a copy here + /// so we can't reuse it safely as a query context and need a copy here auto context = Context(*global_context); context.makeSessionContext(); diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh index 9c7ad1d9476..9b09edfe27a 100755 --- a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -WORKING_FOLDER_01528="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize" +WORKING_FOLDER_01528="${CLICKHOUSE_TMP}/01528_clickhouse_local_prepare_parts" rm -rf "${WORKING_FOLDER_01528}" mkdir -p "${WORKING_FOLDER_01528}/metadata/local" @@ -80,4 +80,4 @@ ${CLICKHOUSE_LOCAL} --query "OPTIMIZE TABLE local.test FINAL;" -- --path="${WORK ${CLICKHOUSE_LOCAL} --query "SELECT toYYYYMM(d) m, uniqExact(_part) FROM local.test GROUP BY m ORDER BY m" -- --path="${WORKING_FOLDER_01528}" # cleanup -rm -rf "${WORKING_FOLDER_01528}" \ No newline at end of file +rm -rf "${WORKING_FOLDER_01528}" From b310d0abb4fd17f15d8a85786d53e04bdce5dc8a Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 22 Oct 2020 15:22:17 -0300 Subject: [PATCH 122/174] Fix for incorrect error message max_[table/partition]_size_to_drop can be changed without restart since https://github.com/ClickHouse/ClickHouse/pull/7779 --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9c1f253f820..7f2ada8a426 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1950,7 +1950,7 @@ void Context::checkCanBeDropped(const String & database, const String & table, c << (force_file_exists ? "exists but not writeable (could not be removed)" : "doesn't exist") << "\n"; ostr << "How to fix this:\n" - << "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config and restart ClickHouse\n" + << "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n" << "2. Either create forcing file " << force_file.path() << " and make sure that ClickHouse has write permission for it.\n" << "Example:\nsudo touch '" << force_file.path() << "' && sudo chmod 666 '" << force_file.path() << "'"; From e743d2fd26fbe53baf7f0584405212e8683d5649 Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Thu, 22 Oct 2020 19:14:58 -0400 Subject: [PATCH 123/174] * Fixing tests in the tests/testflows/ldap/external_user_directory/tests/authentications.py suite * Moving TestFlows runs to use classic output format for stdout * Moving to TestFlows 1.6.57 * Updating LDAP test code styling --- docker/test/testflows/runner/Dockerfile | 4 +- .../authentication/tests/authentications.py | 62 ++- .../ldap/authentication/tests/common.py | 2 + .../ldap/authentication/tests/connections.py | 17 +- .../authentication/tests/multiple_servers.py | 5 +- .../authentication/tests/server_config.py | 1 + .../tests/authentications.py | 507 ++++++++++-------- .../external_user_directory/tests/common.py | 37 +- 8 files changed, 381 insertions(+), 254 deletions(-) diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index ed49743319c..9565e39598c 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.6.48 docker-compose docker dicttoxml kazoo tzlocal +RUN pip3 install urllib3 testflows==1.6.57 docker-compose docker dicttoxml kazoo tzlocal ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce @@ -72,5 +72,5 @@ RUN set -x \ VOLUME /var/lib/docker EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] -CMD ["sh", "-c", "python3 regression.py --no-color --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json"] +CMD ["sh", "-c", "python3 regression.py --no-color -o classic --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json"] diff --git a/tests/testflows/ldap/authentication/tests/authentications.py b/tests/testflows/ldap/authentication/tests/authentications.py index 1b21dce7cc1..a64a37ed686 100644 --- a/tests/testflows/ldap/authentication/tests/authentications.py +++ b/tests/testflows/ldap/authentication/tests/authentications.py @@ -28,6 +28,8 @@ servers = { @TestStep(When) @Name("I login as {username} and execute query") def login_and_execute_query(self, username, password, exitcode=None, message=None, steps=True): + """Execute query as some user. + """ self.context.node.query("SELECT 1", settings=[("user", username), ("password", password)], exitcode=exitcode or 0, @@ -35,7 +37,8 @@ def login_and_execute_query(self, username, password, exitcode=None, message=Non @TestScenario def add_user_to_ldap_and_login(self, server, user=None, ch_user=None, login=None, exitcode=None, message=None, rbac=False): - """Add user to LDAP and ClickHouse and then try to login.""" + """Add user to LDAP and ClickHouse and then try to login. + """ self.context.ldap_node = self.context.cluster.node(server) if ch_user is None: @@ -60,7 +63,8 @@ def add_user_to_ldap_and_login(self, server, user=None, ch_user=None, login=None RQ_SRS_007_LDAP_Authentication_Parallel_ValidAndInvalid("1.0") ) def parallel_login(self, server, user_count=10, timeout=200, rbac=False): - """Check that login of valid and invalid LDAP authenticated users works in parallel.""" + """Check that login of valid and invalid LDAP authenticated users works in parallel. + """ self.context.ldap_node = self.context.cluster.node(server) user = None @@ -114,7 +118,8 @@ def parallel_login(self, server, user_count=10, timeout=200, rbac=False): RQ_SRS_007_LDAP_Authentication_Invalid_DeletedUser("1.0") ) def login_after_user_is_deleted_from_ldap(self, server, rbac=False): - """Check that login fails after user is deleted from LDAP.""" + """Check that login fails after user is deleted from LDAP. + """ self.context.ldap_node = self.context.cluster.node(server) user = None @@ -146,7 +151,8 @@ def login_after_user_is_deleted_from_ldap(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_PasswordChanged("1.0") ) def login_after_user_password_changed_in_ldap(self, server, rbac=False): - """Check that login fails after user password is changed in LDAP.""" + """Check that login fails after user password is changed in LDAP. + """ self.context.ldap_node = self.context.cluster.node(server) user = None @@ -182,7 +188,8 @@ def login_after_user_password_changed_in_ldap(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_UsernameChanged("1.0") ) def login_after_user_cn_changed_in_ldap(self, server, rbac=False): - """Check that login fails after user cn is changed in LDAP.""" + """Check that login fails after user cn is changed in LDAP. + """ self.context.ldap_node = self.context.cluster.node(server) user = None new_user = None @@ -215,7 +222,8 @@ def login_after_user_cn_changed_in_ldap(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_LDAPServerRestart("1.0") ) def login_after_ldap_server_is_restarted(self, server, timeout=60, rbac=False): - """Check that login succeeds after LDAP server is restarted.""" + """Check that login succeeds after LDAP server is restarted. + """ self.context.ldap_node = self.context.cluster.node(server) user = None @@ -250,7 +258,8 @@ def login_after_ldap_server_is_restarted(self, server, timeout=60, rbac=False): RQ_SRS_007_LDAP_Authentication_ClickHouseServerRestart("1.0") ) def login_after_clickhouse_server_is_restarted(self, server, timeout=60, rbac=False): - """Check that login succeeds after ClickHouse server is restarted.""" + """Check that login succeeds after ClickHouse server is restarted. + """ self.context.ldap_node = self.context.cluster.node(server) user = None @@ -285,7 +294,8 @@ def login_after_clickhouse_server_is_restarted(self, server, timeout=60, rbac=Fa RQ_SRS_007_LDAP_Authentication_Password_Empty("1.0") ) def valid_username_with_valid_empty_password(self, server, rbac=False): - """Check that we can't login using valid username that has empty password.""" + """Check that we can't login using valid username that has empty password. + """ user = {"cn": "empty_password", "userpassword": ""} exitcode = 4 message = f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" @@ -298,7 +308,8 @@ def valid_username_with_valid_empty_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Password_Empty("1.0") ) def valid_username_and_invalid_empty_password(self, server, rbac=False): - """Check that we can't login using valid username but invalid empty password.""" + """Check that we can't login using valid username but invalid empty password. + """ username = "user_non_empty_password" user = {"cn": username, "userpassword": username} login = {"password": ""} @@ -313,7 +324,8 @@ def valid_username_and_invalid_empty_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Valid("1.0") ) def valid_username_and_password(self, server, rbac=False): - """Check that we can login using valid username and password.""" + """Check that we can login using valid username and password. + """ username = "valid_username_and_password" user = {"cn": username, "userpassword": username} @@ -326,7 +338,8 @@ def valid_username_and_password(self, server, rbac=False): ) def valid_username_and_password_invalid_server(self, server=None, rbac=False): """Check that we can't login using valid username and valid - password but for a different server.""" + password but for a different server. + """ self.context.ldap_node = self.context.cluster.node("openldap1") user = {"username": "user2", "userpassword": "user2", "server": "openldap1"} @@ -344,7 +357,8 @@ def valid_username_and_password_invalid_server(self, server=None, rbac=False): RQ_SRS_007_LDAP_Configuration_User_Name_Long("1.0") ) def valid_long_username_and_short_password(self, server, rbac=False): - """Check that we can login using valid very long username and short password.""" + """Check that we can login using valid very long username and short password. + """ username = "long_username_12345678901234567890123456789012345678901234567890123456789012345678901234567890" user = {"cn": username, "userpassword": "long_username"} @@ -355,7 +369,8 @@ def valid_long_username_and_short_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Invalid("1.0") ) def invalid_long_username_and_valid_short_password(self, server, rbac=False): - """Check that we can't login using slightly invalid long username but valid password.""" + """Check that we can't login using slightly invalid long username but valid password. + """ username = "long_username_12345678901234567890123456789012345678901234567890123456789012345678901234567890" user = {"cn": username, "userpassword": "long_username"} login = {"username": f"{username}?"} @@ -371,7 +386,8 @@ def invalid_long_username_and_valid_short_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Password_Long("1.0") ) def valid_short_username_and_long_password(self, server, rbac=False): - """Check that we can login using valid short username with very long password.""" + """Check that we can login using valid short username with very long password. + """ username = "long_password" user = {"cn": username, "userpassword": "long_password_12345678901234567890123456789012345678901234567890123456789012345678901234567890"} add_user_to_ldap_and_login(user=user, server=server, rbac=rbac) @@ -381,7 +397,8 @@ def valid_short_username_and_long_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Invalid("1.0") ) def valid_short_username_and_invalid_long_password(self, server, rbac=False): - """Check that we can't login using valid short username and invalid long password.""" + """Check that we can't login using valid short username and invalid long password. + """ username = "long_password" user = {"cn": username, "userpassword": "long_password_12345678901234567890123456789012345678901234567890123456789012345678901234567890"} login = {"password": user["userpassword"] + "1"} @@ -396,7 +413,8 @@ def valid_short_username_and_invalid_long_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Invalid("1.0") ) def valid_username_and_invalid_password(self, server, rbac=False): - """Check that we can't login using valid username and invalid password.""" + """Check that we can't login using valid username and invalid password. + """ username = "valid_username_and_invalid_password" user = {"cn": username, "userpassword": username} login = {"password": user["userpassword"] + "1"} @@ -411,7 +429,8 @@ def valid_username_and_invalid_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Invalid("1.0") ) def invalid_username_and_valid_password(self, server, rbac=False): - """Check that we can't login using slightly invalid username but valid password.""" + """Check that we can't login using slightly invalid username but valid password. + """ username = "invalid_username_and_valid_password" user = {"cn": username, "userpassword": username} login = {"username": user["cn"] + "1"} @@ -428,7 +447,8 @@ def invalid_username_and_valid_password(self, server, rbac=False): RQ_SRS_007_LDAP_Configuration_User_Name_UTF8("1.0") ) def valid_utf8_username_and_ascii_password(self, server, rbac=False): - """Check that we can login using valid utf-8 username with ascii password.""" + """Check that we can login using valid utf-8 username with ascii password. + """ username = "utf8_username_Gãńdåłf_Thê_Gręât" user = {"cn": username, "userpassword": "utf8_username"} @@ -440,7 +460,8 @@ def valid_utf8_username_and_ascii_password(self, server, rbac=False): RQ_SRS_007_LDAP_Authentication_Password_UTF8("1.0") ) def valid_ascii_username_and_utf8_password(self, server, rbac=False): - """Check that we can login using valid ascii username with utf-8 password.""" + """Check that we can login using valid ascii username with utf-8 password. + """ username = "utf8_password" user = {"cn": username, "userpassword": "utf8_password_Gãńdåłf_Thê_Gręât"} @@ -449,7 +470,8 @@ def valid_ascii_username_and_utf8_password(self, server, rbac=False): @TestScenario def empty_username_and_empty_password(self, server=None, rbac=False): """Check that we can login using empty username and empty password as - it will use the default user and that has an empty password.""" + it will use the default user and that has an empty password. + """ login_and_execute_query(username="", password="") @TestOutline(Feature) diff --git a/tests/testflows/ldap/authentication/tests/common.py b/tests/testflows/ldap/authentication/tests/common.py index cf5cfc1d573..4e3d1e16647 100644 --- a/tests/testflows/ldap/authentication/tests/common.py +++ b/tests/testflows/ldap/authentication/tests/common.py @@ -95,6 +95,8 @@ def add_config(config, timeout=20, restart=False): if exitcode == 0: break time.sleep(1) + if settings.debug: + node.command(f"cat /var/lib/clickhouse/preprocessed_configs/{config.preprocessed_name}") assert exitcode == 0, error() def wait_for_config_to_be_loaded(): diff --git a/tests/testflows/ldap/authentication/tests/connections.py b/tests/testflows/ldap/authentication/tests/connections.py index f16f6c29b0e..dfb920181e1 100644 --- a/tests/testflows/ldap/authentication/tests/connections.py +++ b/tests/testflows/ldap/authentication/tests/connections.py @@ -98,7 +98,8 @@ def starttls_with_custom_port(self): login(servers, *users) def tls_connection(enable_tls, tls_require_cert): - """Try to login using LDAP user authentication over a TLS connection.""" + """Try to login using LDAP user authentication over a TLS connection. + """ servers = { "openldap2": { "host": "openldap2", @@ -152,7 +153,8 @@ def tls(self): RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_Default("1.0") ) def tls_enable_tls_default_yes(self): - """Check that the default value for the `enable_tls` is set to `yes`.""" + """Check that the default value for the `enable_tls` is set to `yes`. + """ servers = { "openldap2": { "host": "openldap2", @@ -171,7 +173,8 @@ def tls_enable_tls_default_yes(self): RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Default("1.0") ) def tls_require_cert_default_demand(self): - """Check that the default value for the `tls_require_cert` is set to `demand`.""" + """Check that the default value for the `tls_require_cert` is set to `demand`. + """ servers = { "openldap2": { "host": "openldap2", @@ -210,7 +213,8 @@ def starttls(self): RQ_SRS_007_LDAP_Configuration_Server_TLSCipherSuite("1.0") ) def tls_cipher_suite(self): - """Check that `tls_cipher_suite` parameter can be used specify allowed cipher suites.""" + """Check that `tls_cipher_suite` parameter can be used specify allowed cipher suites. + """ servers = { "openldap4": { "host": "openldap4", @@ -241,7 +245,8 @@ def tls_cipher_suite(self): ]) def tls_minimum_protocol_version(self, version, exitcode, message): """Check that `tls_minimum_protocol_version` parameter can be used specify - to specify the minimum protocol version of SSL/TLS.""" + to specify the minimum protocol version of SSL/TLS. + """ servers = { "openldap4": { @@ -278,6 +283,8 @@ def tls_minimum_protocol_version(self, version, exitcode, message): @TestFeature @Name("connection protocols") def feature(self, node="clickhouse1"): + """Check different LDAP connection protocols. + """ self.context.node = self.context.cluster.node(node) for scenario in loads(current_module(), Scenario): diff --git a/tests/testflows/ldap/authentication/tests/multiple_servers.py b/tests/testflows/ldap/authentication/tests/multiple_servers.py index 6e906023b0a..c4317187b74 100644 --- a/tests/testflows/ldap/authentication/tests/multiple_servers.py +++ b/tests/testflows/ldap/authentication/tests/multiple_servers.py @@ -14,6 +14,7 @@ def scenario(self, node="clickhouse1"): authenticate users. """ self.context.node = self.context.cluster.node(node) + servers = { "openldap1": { "host": "openldap1", @@ -35,4 +36,6 @@ def scenario(self, node="clickhouse1"): {"server": "openldap1", "username": "user1", "password": "user1", "login": True}, {"server": "openldap2", "username": "user2", "password": "user2", "login": True} ] - login(servers, *users) + + with When("I add multiple LDAP servers and users that use different servers and try to login"): + login(servers, *users) diff --git a/tests/testflows/ldap/authentication/tests/server_config.py b/tests/testflows/ldap/authentication/tests/server_config.py index 80f2a496b0e..f62fda0bbf7 100644 --- a/tests/testflows/ldap/authentication/tests/server_config.py +++ b/tests/testflows/ldap/authentication/tests/server_config.py @@ -267,5 +267,6 @@ def feature(self, node="clickhouse1"): """Check that LDAP server configuration. """ self.context.node = self.context.cluster.node(node) + for scenario in loads(current_module(), Scenario): scenario() diff --git a/tests/testflows/ldap/external_user_directory/tests/authentications.py b/tests/testflows/ldap/external_user_directory/tests/authentications.py index bf5a788c4d5..9b216e7dd30 100644 --- a/tests/testflows/ldap/external_user_directory/tests/authentications.py +++ b/tests/testflows/ldap/external_user_directory/tests/authentications.py @@ -28,7 +28,8 @@ servers = { @TestOutline def add_user_to_ldap_and_login(self, server, user=None, ch_user=None, login=None, exitcode=None, message=None): - """Add user to LDAP and ClickHouse and then try to login.""" + """Add user to LDAP and ClickHouse and then try to login. + """ self.context.ldap_node = self.context.cluster.node(server) if ch_user is None: @@ -91,23 +92,25 @@ def parallel_login(self, server, user_count=10, timeout=200): with Given("a group of LDAP users"): users = [{"cn": f"parallel_user{i}", "userpassword": randomword(20)} for i in range(user_count)] - with ldap_users(*users): - tasks = [] - try: - with When("users try to login in parallel", description=""" - * with valid username and password - * with invalid username and valid password - * with valid username and invalid password - """): - p = Pool(15) - for i in range(25): - tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + with ldap_users(*users): + tasks = [] + try: + with When("users try to login in parallel", description=""" + * with valid username and password + * with invalid username and valid password + * with valid username and invalid password + """): + p = Pool(15) + for i in range(25): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) - finally: - with Then("it should work"): - join(tasks, timeout) + finally: + with Then("it should work"): + join(tasks, timeout) @TestScenario @Requirements( @@ -124,34 +127,36 @@ def parallel_login_with_the_same_user(self, server, timeout=200): with Given("only one LDAP user"): users = [{"cn": f"parallel_user1", "userpassword": randomword(20)}] - with ldap_users(*users): - tasks = [] - try: - with When("the same user tries to login in parallel", description=""" - * with valid username and password - * with invalid username and valid password - * with valid username and invalid password - """): - p = Pool(15) - for i in range(25): - tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + with ldap_users(*users): + tasks = [] + try: + with When("the same user tries to login in parallel", description=""" + * with valid username and password + * with invalid username and valid password + * with valid username and invalid password + """): + p = Pool(15) + for i in range(25): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) - finally: - with Then("it should work"): - join(tasks, timeout) + finally: + with Then("it should work"): + join(tasks, timeout) @TestScenario def login_after_ldap_external_user_directory_is_removed(self, server): """Check that ClickHouse stops authenticating LDAP users after LDAP external user directory is removed. """ - with When("I attempt to login after LDAP external user directory is added"): + with When("I login after LDAP external user directory is added"): with ldap_external_user_directory(server="openldap2", roles=[], restart=True): login_and_execute_query(username="user2", password="user2") - with When("I attempt to login after LDAP external user directory is removed"): + with And("I attempt to login after LDAP external user directory is removed"): exitcode = 4 message = f"DB::Exception: user2: Authentication failed: password is incorrect or there is no user with such name" login_and_execute_query(username="user2", password="user2", exitcode=exitcode, message=message) @@ -318,29 +323,34 @@ def parallel_login_with_rbac_users(self, server, user_count=10, timeout=200): users = [{"cn": f"parallel_user{i}", "userpassword": randomword(20)} for i in range(user_count)] - with rbac_users(*users): - tasks = [] - try: - with When("I login in parallel"): - p = Pool(15) - for i in range(25): - tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) - finally: - with Then("it should work"): - join(tasks, timeout) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + with rbac_users(*users): + tasks = [] + try: + with When("I login in parallel"): + p = Pool(15) + for i in range(25): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) + finally: + with Then("it should work"): + join(tasks, timeout) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Users_Authentication_NewUsers("1.0") ) def login_after_user_is_added_to_ldap(self, server): - """Check that user can login as soon as it is added to LDAP.""" + """Check that user can login as soon as it is added to LDAP. + """ user = {"cn": "myuser", "userpassword": "myuser"} - with When(f"I add user to LDAP and try to login"): - add_user_to_ldap_and_login(user=user, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + with When(f"I add user to LDAP and try to login"): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( @@ -348,29 +358,32 @@ def login_after_user_is_added_to_ldap(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_DeletedUsers("1.0") ) def login_after_user_is_deleted_from_ldap(self, server): - """Check that login fails after user is deleted from LDAP.""" + """Check that login fails after user is deleted from LDAP. + """ self.context.ldap_node = self.context.cluster.node(server) user = None - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": "myuser"} - user = add_user_to_ldap(**user) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": "myuser"} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I delete this user from LDAP"): - delete_user_from_ldap(user) + with When("I delete this user from LDAP"): + delete_user_from_ldap(user) - with Then("when I try to login again it should fail"): - login_and_execute_query(username=user["cn"], password=user["userpassword"], - exitcode=4, - message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - ) - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + with Then("when I try to login again it should fail"): + login_and_execute_query(username=user["cn"], password=user["userpassword"], + exitcode=4, + message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" + ) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -378,33 +391,36 @@ def login_after_user_is_deleted_from_ldap(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_PasswordChanged("1.0") ) def login_after_user_password_changed_in_ldap(self, server): - """Check that login fails after user password is changed in LDAP.""" + """Check that login fails after user password is changed in LDAP. + """ self.context.ldap_node = self.context.cluster.node(server) user = None - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": "myuser"} - user = add_user_to_ldap(**user) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": "myuser"} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I change user password in LDAP"): - change_user_password_in_ldap(user, "newpassword") + with When("I change user password in LDAP"): + change_user_password_in_ldap(user, "newpassword") - with Then("when I try to login again it should fail"): - login_and_execute_query(username=user["cn"], password=user["userpassword"], - exitcode=4, - message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - ) + with Then("when I try to login again it should fail"): + login_and_execute_query(username=user["cn"], password=user["userpassword"], + exitcode=4, + message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" + ) - with And("when I try to login with the new password it should work"): - login_and_execute_query(username=user["cn"], password="newpassword") + with And("when I try to login with the new password it should work"): + login_and_execute_query(username=user["cn"], password="newpassword") - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -412,30 +428,33 @@ def login_after_user_password_changed_in_ldap(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_UsernameChanged("1.0") ) def login_after_user_cn_changed_in_ldap(self, server): - """Check that login fails after user cn is changed in LDAP.""" + """Check that login fails after user cn is changed in LDAP. + """ self.context.ldap_node = self.context.cluster.node(server) user = None new_user = None - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": "myuser"} - user = add_user_to_ldap(**user) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": "myuser"} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I change user password in LDAP"): - new_user = change_user_cn_in_ldap(user, "myuser2") + with When("I change user password in LDAP"): + new_user = change_user_cn_in_ldap(user, "myuser2") - with Then("when I try to login again it should fail"): - login_and_execute_query(username=user["cn"], password=user["userpassword"], - exitcode=4, - message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - ) - finally: - with Finally("I make sure LDAP user is deleted"): - if new_user is not None: - delete_user_from_ldap(new_user, exitcode=None) + with Then("when I try to login again it should fail"): + login_and_execute_query(username=user["cn"], password=user["userpassword"], + exitcode=4, + message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" + ) + finally: + with Finally("I make sure LDAP user is deleted"): + if new_user is not None: + delete_user_from_ldap(new_user, exitcode=None) @TestScenario @Requirements( @@ -443,33 +462,36 @@ def login_after_user_cn_changed_in_ldap(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_LDAPServerRestart("1.0") ) def login_after_ldap_server_is_restarted(self, server, timeout=60): - """Check that login succeeds after LDAP server is restarted.""" + """Check that login succeeds after LDAP server is restarted. + """ self.context.ldap_node = self.context.cluster.node(server) user = None - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": getuid()} - user = add_user_to_ldap(**user) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": getuid()} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I restart LDAP server"): - self.context.ldap_node.restart() + with When("I restart LDAP server"): + self.context.ldap_node.restart() - with Then("I try to login until it works", description=f"timeout {timeout} sec"): - started = time.time() - while True: - r = self.context.node.query("SELECT 1", - settings=[("user", user["cn"]), ("password", user["userpassword"])], - no_checks=True) - if r.exitcode == 0: - break - assert time.time() - started < timeout, error(r.output) - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + with Then("I try to login until it works", description=f"timeout {timeout} sec"): + started = time.time() + while True: + r = self.context.node.query("SELECT 1", + settings=[("user", user["cn"]), ("password", user["userpassword"])], + no_checks=True) + if r.exitcode == 0: + break + assert time.time() - started < timeout, error(r.output) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -477,33 +499,36 @@ def login_after_ldap_server_is_restarted(self, server, timeout=60): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_ClickHouseServerRestart("1.0") ) def login_after_clickhouse_server_is_restarted(self, server, timeout=60): - """Check that login succeeds after ClickHouse server is restarted.""" + """Check that login succeeds after ClickHouse server is restarted. + """ self.context.ldap_node = self.context.cluster.node(server) user = None - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": getuid()} - user = add_user_to_ldap(**user) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": getuid()} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I restart ClickHouse server"): - self.context.node.restart() + with When("I restart ClickHouse server"): + self.context.node.restart() - with Then("I try to login until it works", description=f"timeout {timeout} sec"): - started = time.time() - while True: - r = self.context.node.query("SELECT 1", - settings=[("user", user["cn"]), ("password", user["userpassword"])], - no_checks=True) - if r.exitcode == 0: - break - assert time.time() - started < timeout, error(r.output) - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + with Then("I try to login until it works", description=f"timeout {timeout} sec"): + started = time.time() + while True: + r = self.context.node.query("SELECT 1", + settings=[("user", user["cn"]), ("password", user["userpassword"])], + no_checks=True) + if r.exitcode == 0: + break + assert time.time() - started < timeout, error(r.output) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -511,12 +536,15 @@ def login_after_clickhouse_server_is_restarted(self, server, timeout=60): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Password_Empty("1.0") ) def valid_username_with_valid_empty_password(self, server): - """Check that we can't login using valid username that has empty password.""" + """Check that we can't login using valid username that has empty password. + """ user = {"cn": "empty_password", "userpassword": ""} exitcode = 4 message = f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - add_user_to_ldap_and_login(user=user, exitcode=exitcode, message=message, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -524,41 +552,50 @@ def valid_username_with_valid_empty_password(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Password_Empty("1.0") ) def valid_username_and_invalid_empty_password(self, server): - """Check that we can't login using valid username but invalid empty password.""" - username = "user_non_empty_password" - user = {"cn": username, "userpassword": username} - login = {"password": ""} + """Check that we can't login using valid username but invalid empty password. + """ + username = "user_non_empty_password" + user = {"cn": username, "userpassword": username} + login = {"password": ""} - exitcode = 4 - message = f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" + exitcode = 4 + message = f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Valid("1.0") ) def valid_username_and_password(self, server): - """Check that we can login using valid username and password.""" - username = "valid_username_and_password" - user = {"cn": username, "userpassword": username} + """Check that we can login using valid username and password. + """ + username = "valid_username_and_password" + user = {"cn": username, "userpassword": username} - with When(f"I add user {username} to LDAP and try to login"): - add_user_to_ldap_and_login(user=user, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + with When(f"I add user {username} to LDAP and try to login"): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Invalid("1.0") ) def valid_username_and_password_invalid_server(self, server=None): - """Check that we can't login using valid username and valid - password but for a different server.""" - self.context.ldap_node = self.context.cluster.node("openldap1") + """Check that we can't login using valid username and valid + password but for a different server. + """ + self.context.ldap_node = self.context.cluster.node("openldap1") - exitcode = 4 - message = f"DB::Exception: user2: Authentication failed: password is incorrect or there is no user with such name" + exitcode = 4 + message = f"DB::Exception: user2: Authentication failed: password is incorrect or there is no user with such name" - login_and_execute_query(username="user2", password="user2", exitcode=exitcode, message=message) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + login_and_execute_query(username="user2", password="user2", exitcode=exitcode, message=message) @TestScenario @Requirements( @@ -566,26 +603,32 @@ def valid_username_and_password_invalid_server(self, server=None): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Username_Long("1.0"), ) def valid_long_username_and_short_password(self, server): - """Check that we can login using valid very long username and short password.""" - username = "long_username_12345678901234567890123456789012345678901234567890123456789012345678901234567890" - user = {"cn": username, "userpassword": "long_username"} + """Check that we can login using valid very long username and short password. + """ + username = "long_username_12345678901234567890123456789012345678901234567890123456789012345678901234567890" + user = {"cn": username, "userpassword": "long_username"} - add_user_to_ldap_and_login(user=user, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Invalid("1.0") ) def invalid_long_username_and_valid_short_password(self, server): - """Check that we can't login using slightly invalid long username but valid password.""" - username = "long_username_12345678901234567890123456789012345678901234567890123456789012345678901234567890" - user = {"cn": username, "userpassword": "long_username"} - login = {"username": f"{username}?"} + """Check that we can't login using slightly invalid long username but valid password. + """ + username = "long_username_12345678901234567890123456789012345678901234567890123456789012345678901234567890" + user = {"cn": username, "userpassword": "long_username"} + login = {"username": f"{username}?"} - exitcode = 4 - message=f"DB::Exception: {login['username']}: Authentication failed: password is incorrect or there is no user with such name" + exitcode = 4 + message=f"DB::Exception: {login['username']}: Authentication failed: password is incorrect or there is no user with such name" - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -593,55 +636,68 @@ def invalid_long_username_and_valid_short_password(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Password_Long("1.0") ) def valid_short_username_and_long_password(self, server): - """Check that we can login using valid short username with very long password.""" - username = "long_password" - user = {"cn": username, "userpassword": "long_password_12345678901234567890123456789012345678901234567890123456789012345678901234567890"} - add_user_to_ldap_and_login(user=user, server=server) + """Check that we can login using valid short username with very long password. + """ + username = "long_password" + user = {"cn": username, "userpassword": "long_password_12345678901234567890123456789012345678901234567890123456789012345678901234567890"} + + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Invalid("1.0") ) def valid_short_username_and_invalid_long_password(self, server): - """Check that we can't login using valid short username and invalid long password.""" - username = "long_password" - user = {"cn": username, "userpassword": "long_password_12345678901234567890123456789012345678901234567890123456789012345678901234567890"} - login = {"password": user["userpassword"] + "1"} + """Check that we can't login using valid short username and invalid long password. + """ + username = "long_password" + user = {"cn": username, "userpassword": "long_password_12345678901234567890123456789012345678901234567890123456789012345678901234567890"} + login = {"password": user["userpassword"] + "1"} - exitcode = 4 - message=f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" + exitcode = 4 + message=f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Invalid("1.0") ) def valid_username_and_invalid_password(self, server): - """Check that we can't login using valid username and invalid password.""" - username = "valid_username_and_invalid_password" - user = {"cn": username, "userpassword": username} - login = {"password": user["userpassword"] + "1"} + """Check that we can't login using valid username and invalid password. + """ + username = "valid_username_and_invalid_password" + user = {"cn": username, "userpassword": username} + login = {"password": user["userpassword"] + "1"} - exitcode = 4 - message=f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" + exitcode = 4 + message=f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Invalid("1.0") ) def invalid_username_and_valid_password(self, server): - """Check that we can't login using slightly invalid username but valid password.""" - username = "invalid_username_and_valid_password" - user = {"cn": username, "userpassword": username} - login = {"username": user["cn"] + "1"} + """Check that we can't login using slightly invalid username but valid password. + """ + username = "invalid_username_and_valid_password" + user = {"cn": username, "userpassword": username} + login = {"username": user["cn"] + "1"} - exitcode = 4 - message=f"DB::Exception: {login['username']}: Authentication failed: password is incorrect or there is no user with such name" + exitcode = 4 + message=f"DB::Exception: {login['username']}: Authentication failed: password is incorrect or there is no user with such name" - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -649,11 +705,14 @@ def invalid_username_and_valid_password(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Username_UTF8("1.0") ) def valid_utf8_username_and_ascii_password(self, server): - """Check that we can login using valid utf-8 username with ascii password.""" - username = "utf8_username_Gãńdåłf_Thê_Gręât" - user = {"cn": username, "userpassword": "utf8_username"} + """Check that we can login using valid utf-8 username with ascii password. + """ + username = "utf8_username_Gãńdåłf_Thê_Gręât" + user = {"cn": username, "userpassword": "utf8_username"} - add_user_to_ldap_and_login(user=user, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( @@ -661,17 +720,23 @@ def valid_utf8_username_and_ascii_password(self, server): RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Password_UTF8("1.0") ) def valid_ascii_username_and_utf8_password(self, server): - """Check that we can login using valid ascii username with utf-8 password.""" + """Check that we can login using valid ascii username with utf-8 password. + """ username = "utf8_password" user = {"cn": username, "userpassword": "utf8_password_Gãńdåłf_Thê_Gręât"} - add_user_to_ldap_and_login(user=user, server=server) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario def empty_username_and_empty_password(self, server=None): """Check that we can login using empty username and empty password as - it will use the default user and that has an empty password.""" - login_and_execute_query(username="", password="") + it will use the default user and that has an empty password. + """ + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + login_and_execute_query(username="", password="") @TestScenario @Requirements( @@ -698,16 +763,18 @@ def user_lookup_priority(self, server): "ldap": {"username": "ldap", "password": "userldap"} } - with ldap_users(*[{"cn": user["username"], "userpassword": user["password"]} for user in users.values()]): - with rbac_users({"cn": "local", "userpassword": "local"}): - with When("I try to login as 'default' user which is also defined in users.xml it should fail"): - login_and_execute_query(**users["default"], exitcode=exitcode, message=message.format(username="default")) + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + with ldap_users(*[{"cn": user["username"], "userpassword": user["password"]} for user in users.values()]): + with rbac_users({"cn": "local", "userpassword": "local"}): + with When("I try to login as 'default' user which is also defined in users.xml it should fail"): + login_and_execute_query(**users["default"], exitcode=exitcode, message=message.format(username="default")) - with When("I try to login as 'local' user which is also defined in local storage it should fail"): - login_and_execute_query(**users["local"], exitcode=exitcode, message=message.format(username="local")) + with When("I try to login as 'local' user which is also defined in local storage it should fail"): + login_and_execute_query(**users["local"], exitcode=exitcode, message=message.format(username="local")) - with When("I try to login as 'ldap' user defined only in LDAP it should work"): - login_and_execute_query(**users["ldap"]) + with When("I try to login as 'ldap' user defined only in LDAP it should work"): + login_and_execute_query(**users["ldap"]) @TestOutline(Feature) @@ -728,7 +795,5 @@ def feature(self, servers=None, server=None, node="clickhouse1"): server = "openldap1" with ldap_servers(servers): - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - for scenario in loads(current_module(), Scenario): - Scenario(test=scenario, flags=TE)(server=server) + for scenario in loads(current_module(), Scenario): + Scenario(test=scenario, flags=TE)(server=server) diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py index b4a8c9e6640..d6f414e617a 100644 --- a/tests/testflows/ldap/external_user_directory/tests/common.py +++ b/tests/testflows/ldap/external_user_directory/tests/common.py @@ -70,6 +70,15 @@ def rbac_roles(*roles): with By(f"dropping role {role}", flags=TE): node.query(f"DROP ROLE IF EXISTS {role}") +def verify_ldap_user_exists(server, username, password): + """Check that LDAP user is defined on the LDAP server. + """ + with By("searching LDAP database"): + ldap_node = current().context.cluster.node(server) + r = ldap_node.command( + f"ldapwhoami -H ldap://localhost -D 'cn={username},ou=users,dc=company,dc=com' -w {password}") + assert r.exitcode == 0, error() + def create_ldap_external_user_directory_config_content(server=None, roles=None, **kwargs): """Create LDAP external user directory configuration file content. """ @@ -197,8 +206,26 @@ def login(servers, directory_server, *users, config=None): @TestStep(When) @Name("I login as {username} and execute query") -def login_and_execute_query(self, username, password, exitcode=None, message=None, steps=True, timeout=60): - self.context.node.query("SELECT 1", - settings=[("user", username), ("password", password)], - exitcode=exitcode or 0, - message=message, steps=steps, timeout=timeout) +def login_and_execute_query(self, username, password, exitcode=None, message=None, steps=True, timeout=60, poll=False): + if poll: + start_time = time.time() + attempt = 0 + + with By("repeatedly trying to login until successful or timeout"): + while True: + with When(f"attempt #{attempt}"): + r = self.context.node.query("SELECT 1", settings=[("user", username), ("password", password)], + no_checks=True, steps=False, timeout=timeout) + + if r.exitcode == (0 if exitcode is None else exitcode) and (message in r.output if message is not None else True): + break + + if time.time() - start_time > timeout: + fail(f"timeout {timeout} trying to login") + + attempt += 1 + else: + self.context.node.query("SELECT 1", + settings=[("user", username), ("password", password)], + exitcode=(0 if exitcode is None else exitcode), + message=message, steps=steps, timeout=timeout) From 1a562ee32930e4d4d0db34fc8a80eb731b18206e Mon Sep 17 00:00:00 2001 From: feng lv Date: Fri, 23 Oct 2020 02:32:42 +0000 Subject: [PATCH 124/174] fix small document error --- docs/en/sql-reference/syntax.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index fecea3ab30b..70994f3d882 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -87,7 +87,7 @@ In string literals, you need to escape at least `'` and `\`. Single quotes can b ### Compound {#compound} -Arrays are constructed with square brackets `[1, 2, 3]`. Nuples are constructed with round brackets `(1, 'Hello, world!', 2)`. +Arrays are constructed with square brackets `[1, 2, 3]`. Tuples are constructed with round brackets `(1, 'Hello, world!', 2)`. Technically these are not literals, but expressions with the array creation operator and the tuple creation operator, respectively. An array must consist of at least one item, and a tuple must have at least two items. There’s a separate case when tuples appear in the `IN` clause of a `SELECT` query. Query results can include tuples, but tuples can’t be saved to a database (except of tables with [Memory](../engines/table-engines/special/memory.md) engine). From 40f67488804e021ea637b4f69c6bea436d3e2253 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Oct 2020 08:59:57 +0300 Subject: [PATCH 125/174] Update adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index c4d74ea6ee6..24705a52a9a 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -77,6 +77,7 @@ toc_title: Adopters | Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | | Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | +| RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | | S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | | scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | | Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | From 6cd7d46f934e14d697a06762e317db1194f621f4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Oct 2020 09:13:26 +0300 Subject: [PATCH 126/174] Update AccessControlManager.cpp --- src/Access/AccessControlManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 8fc3f2bfc97..a95d65ebb59 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -253,7 +253,7 @@ void AccessControlManager::addMemoryStorage(const String & storage_name_) if (auto memory_storage = typeid_cast>(storage)) return; } - auto new_storage= std::make_shared(storage_name_); + auto new_storage = std::make_shared(storage_name_); addStorage(new_storage); LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName()); } From 69f2f9838ee7f16407bc64f1ec443cc49dd850d4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 23 Oct 2020 14:16:27 +0300 Subject: [PATCH 127/174] Add ability to specify PRIMARY KEY without ORDER BY --- .../MergeTree/registerStorageMergeTree.cpp | 28 +++-- ...y_key_without_order_by_zookeeper.reference | 23 ++++ ...primary_key_without_order_by_zookeeper.sql | 100 ++++++++++++++++++ 3 files changed, 142 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference create mode 100644 tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.sql diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index f94425a81d3..40c9ac7fe70 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -608,26 +608,36 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// single default partition with name "all". metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_key, metadata.columns, args.context); - if (!args.storage_def->order_by) - throw Exception( - "You must provide an ORDER BY expression in the table definition. " - "If you don't want this table to be sorted, use ORDER BY tuple()", - ErrorCodes::BAD_ARGUMENTS); - /// Get sorting key from engine arguments. + /// Get sorting key from engine arguments. PRIMARY KEY without ORDER BY is allowed and considered as ORDER BY. /// /// NOTE: store merging_param_key_arg as additional key column. We do it /// before storage creation. After that storage will just copy this /// column if sorting key will be changed. - metadata.sorting_key = KeyDescription::getSortingKeyFromAST( - args.storage_def->order_by->ptr(), metadata.columns, args.context, merging_param_key_arg); + if (args.storage_def->order_by) + { + metadata.sorting_key = KeyDescription::getSortingKeyFromAST( + args.storage_def->order_by->ptr(), metadata.columns, args.context, merging_param_key_arg); + } + else if (args.storage_def->primary_key) + { + metadata.sorting_key = KeyDescription::getSortingKeyFromAST( + args.storage_def->primary_key->ptr(), metadata.columns, args.context, merging_param_key_arg); + } + else + { + throw Exception( + "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " + "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()", + ErrorCodes::BAD_ARGUMENTS); + } /// If primary key explicitly defined, than get it from AST if (args.storage_def->primary_key) { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context); } - else /// Otherwise we copy it from primary key definition + else /// Otherwise we don't have explicit primary key and copy if from order by { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context); /// and set it's definition_ast to nullptr (so isPrimaryKeyDefined() diff --git a/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference new file mode 100644 index 00000000000..d8b69d78e02 --- /dev/null +++ b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference @@ -0,0 +1,23 @@ +CREATE TABLE default.merge_tree_pk\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nSETTINGS index_granularity = 8192 +1 a +2 b +1 c +2 b +CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nSETTINGS index_granularity = 8192 +1 a +2 b +1 c +2 b +1 c 0 +2 e 555 +2 b 0 +CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String,\n `key2` UInt64\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nORDER BY (key, key2)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplicatedReplacingMergeTree(\'/clickhouse/test/01532_primary_key_without\', \'r1\')\nPRIMARY KEY key\nSETTINGS index_granularity = 8192 +1 a +2 b +1 c +2 b +1 c 0 +2 e 555 +2 b 0 +CREATE TABLE default.replicated_merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String,\n `key2` UInt64\n)\nENGINE = ReplicatedReplacingMergeTree(\'/clickhouse/test/01532_primary_key_without\', \'r1\')\nPRIMARY KEY key\nORDER BY (key, key2)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.sql b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.sql new file mode 100644 index 00000000000..31294d8ebbc --- /dev/null +++ b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.sql @@ -0,0 +1,100 @@ +DROP TABLE IF EXISTS merge_tree_pk; + +CREATE TABLE merge_tree_pk +( + key UInt64, + value String +) +ENGINE = ReplacingMergeTree() +PRIMARY KEY key; + +SHOW CREATE TABLE merge_tree_pk; + +INSERT INTO merge_tree_pk VALUES (1, 'a'); +INSERT INTO merge_tree_pk VALUES (2, 'b'); + +SELECT * FROM merge_tree_pk ORDER BY key; + +INSERT INTO merge_tree_pk VALUES (1, 'c'); + +DETACH TABLE merge_tree_pk; +ATTACH TABLE merge_tree_pk; + +SELECT * FROM merge_tree_pk FINAL ORDER BY key; + +DROP TABLE IF EXISTS merge_tree_pk; + +DROP TABLE IF EXISTS merge_tree_pk_sql; + +CREATE TABLE merge_tree_pk_sql +( + key UInt64, + value String, + PRIMARY KEY (key) +) +ENGINE = ReplacingMergeTree(); + +SHOW CREATE TABLE merge_tree_pk_sql; + +INSERT INTO merge_tree_pk_sql VALUES (1, 'a'); +INSERT INTO merge_tree_pk_sql VALUES (2, 'b'); + +SELECT * FROM merge_tree_pk_sql ORDER BY key; + +INSERT INTO merge_tree_pk_sql VALUES (1, 'c'); + +DETACH TABLE merge_tree_pk_sql; +ATTACH TABLE merge_tree_pk_sql; + +SELECT * FROM merge_tree_pk_sql FINAL ORDER BY key; + +ALTER TABLE merge_tree_pk_sql ADD COLUMN key2 UInt64, MODIFY ORDER BY (key, key2); + +INSERT INTO merge_tree_pk_sql VALUES (2, 'd', 555); + +INSERT INTO merge_tree_pk_sql VALUES (2, 'e', 555); + +SELECT * FROM merge_tree_pk_sql FINAL ORDER BY key; + +SHOW CREATE TABLE merge_tree_pk_sql; + +DROP TABLE IF EXISTS merge_tree_pk_sql; + +DROP TABLE IF EXISTS replicated_merge_tree_pk_sql; + +CREATE TABLE replicated_merge_tree_pk_sql +( + key UInt64, + value String, + PRIMARY KEY (key) +) +ENGINE = ReplicatedReplacingMergeTree('/clickhouse/test/01532_primary_key_without', 'r1'); + +SHOW CREATE TABLE replicated_merge_tree_pk_sql; + +INSERT INTO replicated_merge_tree_pk_sql VALUES (1, 'a'); +INSERT INTO replicated_merge_tree_pk_sql VALUES (2, 'b'); + +SELECT * FROM replicated_merge_tree_pk_sql ORDER BY key; + +INSERT INTO replicated_merge_tree_pk_sql VALUES (1, 'c'); + +DETACH TABLE replicated_merge_tree_pk_sql; +ATTACH TABLE replicated_merge_tree_pk_sql; + +SELECT * FROM replicated_merge_tree_pk_sql FINAL ORDER BY key; + +ALTER TABLE replicated_merge_tree_pk_sql ADD COLUMN key2 UInt64, MODIFY ORDER BY (key, key2); + +INSERT INTO replicated_merge_tree_pk_sql VALUES (2, 'd', 555); + +INSERT INTO replicated_merge_tree_pk_sql VALUES (2, 'e', 555); + +SELECT * FROM replicated_merge_tree_pk_sql FINAL ORDER BY key; + +DETACH TABLE replicated_merge_tree_pk_sql; +ATTACH TABLE replicated_merge_tree_pk_sql; + +SHOW CREATE TABLE replicated_merge_tree_pk_sql; + +DROP TABLE IF EXISTS replicated_merge_tree_pk_sql; From 59ca0d071c60dd741daa86322e6843d995dbba51 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 23 Oct 2020 14:17:37 +0300 Subject: [PATCH 128/174] Fix comment --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 40c9ac7fe70..e255c66dd70 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -637,7 +637,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context); } - else /// Otherwise we don't have explicit primary key and copy if from order by + else /// Otherwise we don't have explicit primary key and copy it from order by { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context); /// and set it's definition_ast to nullptr (so isPrimaryKeyDefined() From f407504a7a7504e0823ec8833d12c71d4c4c0d92 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 23 Oct 2020 19:42:21 +0800 Subject: [PATCH 129/174] try use cmake version for croaring instead of amalgamation.sh --- .gitmodules | 4 + contrib/CMakeLists.txt | 2 +- contrib/croaring | 1 + contrib/croaring-cmake/CMakeLists.txt | 25 + contrib/croaring/CMakeLists.txt | 6 - contrib/croaring/LICENSE | 202 - contrib/croaring/README.txt | 2 - contrib/croaring/roaring.c | 11093 ---------------- contrib/croaring/roaring/roaring.h | 7187 ---------- contrib/croaring/roaring/roaring.hh | 1732 --- docker/test/fasttest/run.sh | 2 +- .../AggregateFunctionGroupBitmapData.h | 3 +- 12 files changed, 34 insertions(+), 20225 deletions(-) create mode 160000 contrib/croaring create mode 100644 contrib/croaring-cmake/CMakeLists.txt delete mode 100644 contrib/croaring/CMakeLists.txt delete mode 100644 contrib/croaring/LICENSE delete mode 100644 contrib/croaring/README.txt delete mode 100644 contrib/croaring/roaring.c delete mode 100644 contrib/croaring/roaring/roaring.h delete mode 100644 contrib/croaring/roaring/roaring.hh diff --git a/.gitmodules b/.gitmodules index ace36122e6e..fdd48fcce01 100644 --- a/.gitmodules +++ b/.gitmodules @@ -186,3 +186,7 @@ path = contrib/cyrus-sasl url = https://github.com/cyrusimap/cyrus-sasl branch = cyrus-sasl-2.1 +[submodule "contrib/croaring"] + path = contrib/croaring + url = https://github.com/RoaringBitmap/CRoaring + branch = v0.2.66 diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 130e4b13c91..7d6b9c0e374 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -20,7 +20,6 @@ add_subdirectory (boost-cmake) add_subdirectory (cctz-cmake) add_subdirectory (consistent-hashing-sumbur) add_subdirectory (consistent-hashing) -add_subdirectory (croaring) add_subdirectory (FastMemcpy) add_subdirectory (hyperscan-cmake) add_subdirectory (jemalloc-cmake) @@ -34,6 +33,7 @@ add_subdirectory (ryu-cmake) add_subdirectory (unixodbc-cmake) add_subdirectory (poco-cmake) +add_subdirectory (croaring-cmake) # TODO: refactor the contrib libraries below this comment. diff --git a/contrib/croaring b/contrib/croaring new file mode 160000 index 00000000000..5f20740ec0d --- /dev/null +++ b/contrib/croaring @@ -0,0 +1 @@ +Subproject commit 5f20740ec0de5e153e8f4cb2ab91814e8b291a14 diff --git a/contrib/croaring-cmake/CMakeLists.txt b/contrib/croaring-cmake/CMakeLists.txt new file mode 100644 index 00000000000..3189795347b --- /dev/null +++ b/contrib/croaring-cmake/CMakeLists.txt @@ -0,0 +1,25 @@ +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/croaring) + +set(SRCS + ${LIBRARY_DIR}/src/array_util.c + ${LIBRARY_DIR}/src/bitset_util.c + ${LIBRARY_DIR}/src/containers/array.c + ${LIBRARY_DIR}/src/containers/bitset.c + ${LIBRARY_DIR}/src/containers/containers.c + ${LIBRARY_DIR}/src/containers/convert.c + ${LIBRARY_DIR}/src/containers/mixed_intersection.c + ${LIBRARY_DIR}/src/containers/mixed_union.c + ${LIBRARY_DIR}/src/containers/mixed_equal.c + ${LIBRARY_DIR}/src/containers/mixed_subset.c + ${LIBRARY_DIR}/src/containers/mixed_negation.c + ${LIBRARY_DIR}/src/containers/mixed_xor.c + ${LIBRARY_DIR}/src/containers/mixed_andnot.c + ${LIBRARY_DIR}/src/containers/run.c + ${LIBRARY_DIR}/src/roaring.c + ${LIBRARY_DIR}/src/roaring_priority_queue.c + ${LIBRARY_DIR}/src/roaring_array.c) + +add_library(roaring ${SRCS}) + +target_include_directories(roaring PRIVATE ${LIBRARY_DIR}/include/roaring) +target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include) diff --git a/contrib/croaring/CMakeLists.txt b/contrib/croaring/CMakeLists.txt deleted file mode 100644 index da19911487f..00000000000 --- a/contrib/croaring/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_library(roaring - roaring.c - roaring/roaring.h - roaring/roaring.hh) - -target_include_directories (roaring SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/contrib/croaring/LICENSE b/contrib/croaring/LICENSE deleted file mode 100644 index 3265476ea81..00000000000 --- a/contrib/croaring/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2016 The CRoaring authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/contrib/croaring/README.txt b/contrib/croaring/README.txt deleted file mode 100644 index 3daa1c43ed8..00000000000 --- a/contrib/croaring/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -download from https://github.com/RoaringBitmap/CRoaring/archive/v0.2.57.tar.gz -and use ./amalgamation.sh generate diff --git a/contrib/croaring/roaring.c b/contrib/croaring/roaring.c deleted file mode 100644 index 6327db7ade3..00000000000 --- a/contrib/croaring/roaring.c +++ /dev/null @@ -1,11093 +0,0 @@ -/* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */ -#include "roaring/roaring.h" - -/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ -#ifdef DMALLOC -#include "dmalloc.h" -#endif - -/* begin file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */ -#include -#include -#include -#include -#include -#include - -extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, - uint16_t ikey); - -#ifdef USESSE4 -// used by intersect_vector16 -ALIGNED(0x1000) -static const uint8_t shuffle_mask16[] = { - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15}; - -/** - * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions - * Optimized by D. Lemire on May 3rd 2013 - */ -int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C) { - size_t count = 0; - size_t i_a = 0, i_b = 0; - const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - __m128i v_a, v_b; - if ((i_a < st_a) && (i_b < st_b)) { - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - while ((A[i_a] == 0) || (B[i_b] == 0)) { - const __m128i res_v = _mm_cmpestrm( - v_b, vectorlength, v_a, vectorlength, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - if ((i_a < st_a) && (i_b < st_b)) - while (true) { - const __m128i res_v = _mm_cmpistrm( - v_b, v_a, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - __m128i sm16 = - _mm_load_si128((const __m128i *)shuffle_mask16 + r); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - } - // intersect the tail using scalar intersection - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (a < b) { - i_a++; - } else if (b < a) { - i_b++; - } else { - C[count] = a; //==b; - count++; - i_a++; - i_b++; - } - } - return (int32_t)count; -} - -int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, - size_t s_a, - const uint16_t *__restrict__ B, - size_t s_b) { - size_t count = 0; - size_t i_a = 0, i_b = 0; - const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - __m128i v_a, v_b; - if ((i_a < st_a) && (i_b < st_b)) { - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - while ((A[i_a] == 0) || (B[i_b] == 0)) { - const __m128i res_v = _mm_cmpestrm( - v_b, vectorlength, v_a, vectorlength, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - if ((i_a < st_a) && (i_b < st_b)) - while (true) { - const __m128i res_v = _mm_cmpistrm( - v_b, v_a, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - } - // intersect the tail using scalar intersection - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (a < b) { - i_a++; - } else if (b < a) { - i_b++; - } else { - count++; - i_a++; - i_b++; - } - } - return (int32_t)count; -} - -int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C) { - // we handle the degenerate case - if (s_a == 0) return 0; - if (s_b == 0) { - if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a); - return (int32_t)s_a; - } - // handle the leading zeroes, it is messy but it allows us to use the fast - // _mm_cmpistrm instrinsic safely - int32_t count = 0; - if ((A[0] == 0) || (B[0] == 0)) { - if ((A[0] == 0) && (B[0] == 0)) { - A++; - s_a--; - B++; - s_b--; - } else if (A[0] == 0) { - C[count++] = 0; - A++; - s_a--; - } else { - B++; - s_b--; - } - } - // at this point, we have two non-empty arrays, made of non-zero - // increasing values. - size_t i_a = 0, i_b = 0; - const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path - __m128i v_a, v_b; //, v_bmax; - // we load a vector from A and a vector from B - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - // we have a runningmask which indicates which values from A have been - // spotted in B, these don't get written out. - __m128i runningmask_a_found_in_b = _mm_setzero_si128(); - /**** - * start of the main vectorized loop - *****/ - while (true) { - // afoundinb will contain a mask indicate for each entry in A - // whether it is seen - // in B - const __m128i a_found_in_b = - _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | - _SIDD_BIT_MASK); - runningmask_a_found_in_b = - _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); - // we always compare the last values of A and B - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - // Ok. In this code path, we are ready to write our v_a - // because there is no need to read more from B, they will - // all be large values. - const int bitmask_belongs_to_difference = - _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; - /*** next few lines are probably expensive *****/ - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + - bitmask_belongs_to_difference); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(bitmask_belongs_to_difference); - // we advance a - i_a += vectorlength; - if (i_a == st_a) // no more - break; - runningmask_a_found_in_b = _mm_setzero_si128(); - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - // in this code path, the current v_b has become useless - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - // at this point, either we have i_a == st_a, which is the end of the - // vectorized processing, - // or we have i_b == st_b, and we are not done processing the vector... - // so we need to finish it off. - if (i_a < st_a) { // we have unfinished business... - uint16_t buffer[8]; // buffer to do a masked load - memset(buffer, 0, 8 * sizeof(uint16_t)); - memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t)); - v_b = _mm_lddqu_si128((__m128i *)buffer); - const __m128i a_found_in_b = - _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | - _SIDD_BIT_MASK); - runningmask_a_found_in_b = - _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); - const int bitmask_belongs_to_difference = - _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + - bitmask_belongs_to_difference); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(bitmask_belongs_to_difference); - i_a += vectorlength; - } - // at this point we should have i_a == st_a and i_b == st_b - } - // do the tail using scalar code - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (b < a) { - i_b++; - } else if (a < b) { - C[count] = a; - count++; - i_a++; - } else { //== - i_a++; - i_b++; - } - } - if (i_a < s_a) { - memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a)); - count += (int32_t)(s_a - i_a); - } - return count; -} - -#endif // USESSE4 - - - -#ifdef USE_OLD_SKEW_INTERSECT -// TODO: given enough experience with the new skew intersect, drop the old one from the code base. - - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. */ -int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l, - uint16_t *buffer) { - size_t pos = 0, idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return 0; - } - - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - buffer[pos++] = val_s; - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } - } - - return (int32_t)pos; -} -#else // USE_OLD_SKEW_INTERSECT - - -/** -* Branchless binary search going after 4 values at once. -* Assumes that array is sorted. -* You have that array[*index1] >= target1, array[*index12] >= target2, ... -* except when *index1 = n, in which case you know that all values in array are -* smaller than target1, and so forth. -* It has logarithmic complexity. -*/ -static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1, - uint16_t target2, uint16_t target3, uint16_t target4, - int32_t *index1, int32_t *index2, int32_t *index3, - int32_t *index4) { - const uint16_t *base1 = array; - const uint16_t *base2 = array; - const uint16_t *base3 = array; - const uint16_t *base4 = array; - if (n == 0) - return; - while (n > 1) { - int32_t half = n >> 1; - base1 = (base1[half] < target1) ? &base1[half] : base1; - base2 = (base2[half] < target2) ? &base2[half] : base2; - base3 = (base3[half] < target3) ? &base3[half] : base3; - base4 = (base4[half] < target4) ? &base4[half] : base4; - n -= half; - } - *index1 = (int32_t)((*base1 < target1) + base1 - array); - *index2 = (int32_t)((*base2 < target2) + base2 - array); - *index3 = (int32_t)((*base3 < target3) + base3 - array); - *index4 = (int32_t)((*base4 < target4) + base4 - array); -} - -/** -* Branchless binary search going after 2 values at once. -* Assumes that array is sorted. -* You have that array[*index1] >= target1, array[*index12] >= target2. -* except when *index1 = n, in which case you know that all values in array are -* smaller than target1, and so forth. -* It has logarithmic complexity. -*/ -static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, - uint16_t target2, int32_t *index1, int32_t *index2) { - const uint16_t *base1 = array; - const uint16_t *base2 = array; - if (n == 0) - return; - while (n > 1) { - int32_t half = n >> 1; - base1 = (base1[half] < target1) ? &base1[half] : base1; - base2 = (base2[half] < target2) ? &base2[half] : base2; - n -= half; - } - *index1 = (int32_t)((*base1 < target1) + base1 - array); - *index2 = (int32_t)((*base2 < target2) + base2 - array); -} - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. - * Processes the small set in blocks of 4 values calling binarySearch4 - * and binarySearch2. This approach can be slightly superior to a conventional - * galloping search in some instances. - */ -int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l, - uint16_t *buffer) { - size_t pos = 0, idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return 0; - } - int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; - while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small[idx_s]; - uint16_t target2 = small[idx_s + 1]; - uint16_t target3 = small[idx_s + 2]; - uint16_t target4 = small[idx_s + 3]; - binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3, - target4, &index1, &index2, &index3, &index4); - if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { - buffer[pos++] = target1; - } - if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { - buffer[pos++] = target2; - } - if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) { - buffer[pos++] = target3; - } - if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) { - buffer[pos++] = target4; - } - idx_s += 4; - idx_l += index1; - } - if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small[idx_s]; - uint16_t target2 = small[idx_s + 1]; - binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1, - &index2); - if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { - buffer[pos++] = target1; - } - if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { - buffer[pos++] = target2; - } - idx_s += 2; - idx_l += index1; - } - if ((idx_s < size_s) && (idx_l < size_l)) { - uint16_t val_s = small[idx_s]; - int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s); - if (index >= 0) - buffer[pos++] = val_s; - } - return (int32_t)pos; -} - - -#endif //USE_OLD_SKEW_INTERSECT - - -// TODO: this could be accelerated, possibly, by using binarySearch4 as above. -int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, - size_t size_s, - const uint16_t *large, - size_t size_l) { - size_t pos = 0, idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return 0; - } - - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - pos++; - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } - } - - return (int32_t)pos; -} - -bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l) { - size_t idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return false; - } - - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - return true; - } - } - - return false; -} - -/** - * Generic intersection function. - */ -int32_t intersect_uint16(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB, uint16_t *out) { - const uint16_t *initout = out; - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return (int32_t)(out - initout); - } - while (*A > *B) { - if (++B == endB) return (int32_t)(out - initout); - } - if (*A == *B) { - *out++ = *A; - if (++A == endA || ++B == endB) return (int32_t)(out - initout); - } else { - goto SKIP_FIRST_COMPARE; - } - } - return (int32_t)(out - initout); // NOTREACHED -} - -int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB) { - int32_t answer = 0; - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return answer; - } - while (*A > *B) { - if (++B == endB) return answer; - } - if (*A == *B) { - ++answer; - if (++A == endA || ++B == endB) return answer; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return answer; // NOTREACHED -} - - -bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB) { - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return false; - } - while (*A > *B) { - if (++B == endB) return false; - } - if (*A == *B) { - return true; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return false; // NOTREACHED -} - - - -/** - * Generic intersection function. - */ -size_t intersection_uint32(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB, - uint32_t *out) { - const uint32_t *initout = out; - if (lenA == 0 || lenB == 0) return 0; - const uint32_t *endA = A + lenA; - const uint32_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return (out - initout); - } - while (*A > *B) { - if (++B == endB) return (out - initout); - } - if (*A == *B) { - *out++ = *A; - if (++A == endA || ++B == endB) return (out - initout); - } else { - goto SKIP_FIRST_COMPARE; - } - } - return (out - initout); // NOTREACHED -} - -size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB) { - if (lenA == 0 || lenB == 0) return 0; - size_t card = 0; - const uint32_t *endA = A + lenA; - const uint32_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return card; - } - while (*A > *B) { - if (++B == endB) return card; - } - if (*A == *B) { - card++; - if (++A == endA || ++B == endB) return card; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return card; // NOTREACHED -} - -// can one vectorize the computation of the union? (Update: Yes! See -// union_vector16). - -size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; - - if (0 == size_2) { - memmove(buffer, set_1, size_1 * sizeof(uint16_t)); - return size_1; - } - if (0 == size_1) { - memmove(buffer, set_2, size_2 * sizeof(uint16_t)); - return size_2; - } - - uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - buffer[pos++] = val_1; - ++idx_1; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - buffer[pos++] = val_2; - ++idx_2; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - buffer[pos++] = val_1; - ++idx_1; - ++idx_2; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } - - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t)); - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t)); - pos += n_elems; - } - - return pos; -} - -int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, - int length2, uint16_t *a_out) { - int out_card = 0; - int k1 = 0, k2 = 0; - if (length1 == 0) return 0; - if (length2 == 0) { - if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1); - return length1; - } - uint16_t s1 = a1[k1]; - uint16_t s2 = a2[k2]; - while (true) { - if (s1 < s2) { - a_out[out_card++] = s1; - ++k1; - if (k1 >= length1) { - break; - } - s1 = a1[k1]; - } else if (s1 == s2) { - ++k1; - ++k2; - if (k1 >= length1) { - break; - } - if (k2 >= length2) { - memmove(a_out + out_card, a1 + k1, - sizeof(uint16_t) * (length1 - k1)); - return out_card + length1 - k1; - } - s1 = a1[k1]; - s2 = a2[k2]; - } else { // if (val1>val2) - ++k2; - if (k2 >= length2) { - memmove(a_out + out_card, a1 + k1, - sizeof(uint16_t) * (length1 - k1)); - return out_card + length1 - k1; - } - s2 = a2[k2]; - } - } - return out_card; -} - -int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, - const uint16_t *array_2, int32_t card_2, uint16_t *out) { - int32_t pos1 = 0, pos2 = 0, pos_out = 0; - while (pos1 < card_1 && pos2 < card_2) { - const uint16_t v1 = array_1[pos1]; - const uint16_t v2 = array_2[pos2]; - if (v1 == v2) { - ++pos1; - ++pos2; - continue; - } - if (v1 < v2) { - out[pos_out++] = v1; - ++pos1; - } else { - out[pos_out++] = v2; - ++pos2; - } - } - if (pos1 < card_1) { - const size_t n_elems = card_1 - pos1; - memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t)); - pos_out += (int32_t)n_elems; - } else if (pos2 < card_2) { - const size_t n_elems = card_2 - pos2; - memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t)); - pos_out += (int32_t)n_elems; - } - return pos_out; -} - -#ifdef USESSE4 - -/*** - * start of the SIMD 16-bit union code - * - */ - -// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going -// from vecMin all the way to vecMax -// developed originally for merge sort using SIMD instructions. -// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly -// Algorithm for Sorting an Array of Structures -static inline void sse_merge(const __m128i *vInput1, - const __m128i *vInput2, // input 1 & 2 - __m128i *vecMin, __m128i *vecMax) { // output - __m128i vecTmp; - vecTmp = _mm_min_epu16(*vInput1, *vInput2); - *vecMax = _mm_max_epu16(*vInput1, *vInput2); - vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2); -} - -// used by store_unique, generated by simdunion.py -static uint8_t uniqshuf[] = { - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF}; - -// write vector new, while omitting repeated values assuming that previously -// written vector was "old" -static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) { - __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2); - // lots of high latency instructions follow (optimize?) - int M = _mm_movemask_epi8( - _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128())); - int numberofnewvalues = 8 - _mm_popcnt_u32(M); - __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); - __m128i val = _mm_shuffle_epi8(newval, key); - _mm_storeu_si128((__m128i *)output, val); - return numberofnewvalues; -} - -// working in-place, this function overwrites the repeated values -// could be avoided? -static inline uint32_t unique(uint16_t *out, uint32_t len) { - uint32_t pos = 1; - for (uint32_t i = 1; i < len; ++i) { - if (out[i] != out[i - 1]) { - out[pos++] = out[i]; - } - } - return pos; -} - -// use with qsort, could be avoided -static int uint16_compare(const void *a, const void *b) { - return (*(uint16_t *)a - *(uint16_t *)b); -} - -// a one-pass SSE union algorithm -uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output) { - if ((length1 < 8) || (length2 < 8)) { - return (uint32_t)union_uint16(array1, length1, array2, length2, output); - } - __m128i vA, vB, V, vecMin, vecMax; - __m128i laststore; - uint16_t *initoutput = output; - uint32_t len1 = length1 / 8; - uint32_t len2 = length2 / 8; - uint32_t pos1 = 0; - uint32_t pos2 = 0; - // we start the machine - vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - sse_merge(&vA, &vB, &vecMin, &vecMax); - laststore = _mm_set1_epi16(-1); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - if ((pos1 < len1) && (pos2 < len2)) { - uint16_t curA, curB; - curA = array1[8 * pos1]; - curB = array2[8 * pos2]; - while (true) { - if (curA <= curB) { - V = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - if (pos1 < len1) { - curA = array1[8 * pos1]; - } else { - break; - } - } else { - V = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - if (pos2 < len2) { - curB = array2[8 * pos2]; - } else { - break; - } - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - } - // we finish the rest off using a scalar algorithm - // could be improved? - // - // copy the small end on a tmp buffer - uint32_t len = (uint32_t)(output - initoutput); - uint16_t buffer[16]; - uint32_t leftoversize = store_unique(laststore, vecMax, buffer); - if (pos1 == len1) { - memcpy(buffer + leftoversize, array1 + 8 * pos1, - (length1 - 8 * len1) * sizeof(uint16_t)); - leftoversize += length1 - 8 * len1; - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - - leftoversize = unique(buffer, leftoversize); - len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2, - length2 - 8 * pos2, output); - } else { - memcpy(buffer + leftoversize, array2 + 8 * pos2, - (length2 - 8 * len2) * sizeof(uint16_t)); - leftoversize += length2 - 8 * len2; - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique(buffer, leftoversize); - len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1, - length1 - 8 * pos1, output); - } - return len; -} - -/** - * End of the SIMD 16-bit union code - * - */ - -/** - * Start of SIMD 16-bit XOR code - */ - -// write vector new, while omitting repeated values assuming that previously -// written vector was "old" -static inline int store_unique_xor(__m128i old, __m128i newval, - uint16_t *output) { - __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4); - __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2); - __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1); - __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval); - __m128i equalleftoright = _mm_or_si128(equalleft, equalright); - int M = _mm_movemask_epi8( - _mm_packs_epi16(equalleftoright, _mm_setzero_si128())); - int numberofnewvalues = 8 - _mm_popcnt_u32(M); - __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); - __m128i val = _mm_shuffle_epi8(vecTmp2, key); - _mm_storeu_si128((__m128i *)output, val); - return numberofnewvalues; -} - -// working in-place, this function overwrites the repeated values -// could be avoided? Warning: assumes len > 0 -static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { - uint32_t pos = 1; - for (uint32_t i = 1; i < len; ++i) { - if (out[i] != out[i - 1]) { - out[pos++] = out[i]; - } else - pos--; // if it is identical to previous, delete it - } - return pos; -} - -// a one-pass SSE xor algorithm -uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output) { - if ((length1 < 8) || (length2 < 8)) { - return xor_uint16(array1, length1, array2, length2, output); - } - __m128i vA, vB, V, vecMin, vecMax; - __m128i laststore; - uint16_t *initoutput = output; - uint32_t len1 = length1 / 8; - uint32_t len2 = length2 / 8; - uint32_t pos1 = 0; - uint32_t pos2 = 0; - // we start the machine - vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - sse_merge(&vA, &vB, &vecMin, &vecMax); - laststore = _mm_set1_epi16(-1); - uint16_t buffer[17]; - output += store_unique_xor(laststore, vecMin, output); - - laststore = vecMin; - if ((pos1 < len1) && (pos2 < len2)) { - uint16_t curA, curB; - curA = array1[8 * pos1]; - curB = array2[8 * pos2]; - while (true) { - if (curA <= curB) { - V = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - if (pos1 < len1) { - curA = array1[8 * pos1]; - } else { - break; - } - } else { - V = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - if (pos2 < len2) { - curB = array2[8 * pos2]; - } else { - break; - } - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - // conditionally stores the last value of laststore as well as all - // but the - // last value of vecMin - output += store_unique_xor(laststore, vecMin, output); - laststore = vecMin; - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - // conditionally stores the last value of laststore as well as all but - // the - // last value of vecMin - output += store_unique_xor(laststore, vecMin, output); - laststore = vecMin; - } - uint32_t len = (uint32_t)(output - initoutput); - - // we finish the rest off using a scalar algorithm - // could be improved? - // conditionally stores the last value of laststore as well as all but the - // last value of vecMax, - // we store to "buffer" - int leftoversize = store_unique_xor(laststore, vecMax, buffer); - uint16_t vec7 = _mm_extract_epi16(vecMax, 7); - uint16_t vec6 = _mm_extract_epi16(vecMax, 6); - if (vec7 != vec6) buffer[leftoversize++] = vec7; - if (pos1 == len1) { - memcpy(buffer + leftoversize, array1 + 8 * pos1, - (length1 - 8 * len1) * sizeof(uint16_t)); - leftoversize += length1 - 8 * len1; - if (leftoversize == 0) { // trivial case - memcpy(output, array2 + 8 * pos2, - (length2 - 8 * pos2) * sizeof(uint16_t)); - len += (length2 - 8 * pos2); - } else { - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique_xor(buffer, leftoversize); - len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2, - length2 - 8 * pos2, output); - } - } else { - memcpy(buffer + leftoversize, array2 + 8 * pos2, - (length2 - 8 * len2) * sizeof(uint16_t)); - leftoversize += length2 - 8 * len2; - if (leftoversize == 0) { // trivial case - memcpy(output, array1 + 8 * pos1, - (length1 - 8 * pos1) * sizeof(uint16_t)); - len += (length1 - 8 * pos1); - } else { - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique_xor(buffer, leftoversize); - len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1, - length1 - 8 * pos1, output); - } - } - return len; -} - -/** - * End of SIMD 16-bit XOR code - */ - -#endif // USESSE4 - -size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, - size_t size_2, uint32_t *buffer) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; - - if (0 == size_2) { - memmove(buffer, set_1, size_1 * sizeof(uint32_t)); - return size_1; - } - if (0 == size_1) { - memmove(buffer, set_2, size_2 * sizeof(uint32_t)); - return size_2; - } - - uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - buffer[pos++] = val_1; - ++idx_1; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - buffer[pos++] = val_2; - ++idx_2; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - buffer[pos++] = val_1; - ++idx_1; - ++idx_2; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } - - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t)); - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t)); - pos += n_elems; - } - - return pos; -} - -size_t union_uint32_card(const uint32_t *set_1, size_t size_1, - const uint32_t *set_2, size_t size_2) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; - - if (0 == size_2) { - return size_1; - } - if (0 == size_1) { - return size_2; - } - - uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - ++idx_1; - ++pos; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - ++idx_2; - ++pos; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - ++idx_1; - ++idx_2; - ++pos; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } - - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - pos += n_elems; - } - return pos; -} - - - -size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer) { -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - // compute union with smallest array first - if (size_1 < size_2) { - return union_vector16(set_1, (uint32_t)size_1, - set_2, (uint32_t)size_2, buffer); - } else { - return union_vector16(set_2, (uint32_t)size_2, - set_1, (uint32_t)size_1, buffer); - } -#else - // compute union with smallest array first - if (size_1 < size_2) { - return union_uint16( - set_1, size_1, set_2, size_2, buffer); - } else { - return union_uint16( - set_2, size_2, set_1, size_1, buffer); - } -#endif -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */ -#include -#include -#include -#include -#include - - -#ifdef IS_X64 -static uint8_t lengthTable[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -#endif - -#ifdef USEAVX -ALIGNED(32) -static uint32_t vecDecodeTable[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; - -#endif // #ifdef USEAVX - -#ifdef IS_X64 -// same as vecDecodeTable but in 16 bits -ALIGNED(32) -static uint16_t vecDecodeTable_uint16[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; - -#endif - -#ifdef USEAVX - -size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout, - size_t outcapacity, uint32_t base) { - uint32_t *out = (uint32_t *)vout; - uint32_t *initout = out; - __m256i baseVec = _mm256_set1_epi32(base - 1); - __m256i incVec = _mm256_set1_epi32(64); - __m256i add8 = _mm256_set1_epi32(8); - uint32_t *safeout = out + outcapacity; - size_t i = 0; - for (; (i < length) && (out + 64 <= safeout); ++i) { - uint64_t w = array[i]; - if (w == 0) { - baseVec = _mm256_add_epi32(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m256i vecA = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]); - __m256i vecB = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm256_add_epi32(baseVec, vecA); - baseVec = _mm256_add_epi32(baseVec, add8); - vecB = _mm256_add_epi32(baseVec, vecB); - baseVec = _mm256_add_epi32(baseVec, add8); - _mm256_storeu_si256((__m256i *)out, vecA); - out += advanceA; - _mm256_storeu_si256((__m256i *)out, vecB); - out += advanceB; - } - } - } - base += i * 64; - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = array[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w ^= t; - } - base += 64; - } - return out - initout; -} -#endif // USEAVX - -size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - w ^= t; - } - base += 64; - } - return outpos; -} - -size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, - const uint64_t * __restrict__ bitset2, - size_t length, uint16_t *out, - uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset1[i] & bitset2[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; - } - base += 64; - } - return outpos; -} - -#ifdef IS_X64 -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function uses SSE decoding. - */ -size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base) { - uint16_t *initout = out; - __m128i baseVec = _mm_set1_epi16(base - 1); - __m128i incVec = _mm_set1_epi16(64); - __m128i add8 = _mm_set1_epi16(8); - uint16_t *safeout = out + outcapacity; - const int numberofbytes = 2; // process two bytes at a time - size_t i = 0; - for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { - uint64_t w = bitset[i]; - if (w == 0) { - baseVec = _mm_add_epi16(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m128i vecA = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteA]); - __m128i vecB = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm_add_epi16(baseVec, vecA); - baseVec = _mm_add_epi16(baseVec, add8); - vecB = _mm_add_epi16(baseVec, vecB); - baseVec = _mm_add_epi16(baseVec, add8); - _mm_storeu_si128((__m128i *)out, vecA); - out += advanceA; - _mm_storeu_si128((__m128i *)out, vecB); - out += advanceB; - } - } - } - base += (uint16_t)(i * 64); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = bitset[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - *out = r + base; - out++; - w ^= t; - } - base += 64; - } - return out - initout; -} -#endif - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" (can be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; - } - base += 64; - } - return outpos; -} - -#if defined(ASMBITMANIPOPTIMIZATION) - -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // TODO: could unroll for performance, see bitset_set_list - // bts is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)\n" - "sbb $-1, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift)); - return card; -} - -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t pos; - const uint16_t *end = list + length; - - uint64_t shift = 6; - uint64_t offset; - uint64_t load; - for (; list + 3 < end; list += 4) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[1]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[2]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[3]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - } - - while (list != end) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - list++; - } -} - -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // btr is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "btr %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)\n" - "sbb $0, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift) - : - /* clobbers */ "memory"); - return card; -} - -#else -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load | (UINT64_C(1) << index); - ((uint64_t *)bitset)[offset] = newload; - list++; - } -} - -#endif - -/* flip specified bits */ -/* TODO: consider whether worthwhile to make an asm version */ - -uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load ^ (UINT64_C(1) << index); - // todo: is a branch here all that bad? - card += - (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load ^ (UINT64_C(1) << index); - ((uint64_t *)bitset)[offset] = newload; - list++; - } -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */ -/* - * array.c - * - */ - -#include -#include -#include - -extern inline uint16_t array_container_minimum(const array_container_t *arr); -extern inline uint16_t array_container_maximum(const array_container_t *arr); -extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x); - -extern inline int array_container_rank(const array_container_t *arr, - uint16_t x); -extern inline bool array_container_contains(const array_container_t *arr, - uint16_t pos); -extern int array_container_cardinality(const array_container_t *array); -extern bool array_container_nonzero_cardinality(const array_container_t *array); -extern void array_container_clear(array_container_t *array); -extern int32_t array_container_serialized_size_in_bytes(int32_t card); -extern bool array_container_empty(const array_container_t *array); -extern bool array_container_full(const array_container_t *array); - -/* Create a new array with capacity size. Return NULL in case of failure. */ -array_container_t *array_container_create_given_capacity(int32_t size) { - array_container_t *container; - - if ((container = (array_container_t *)malloc(sizeof(array_container_t))) == - NULL) { - return NULL; - } - - if( size <= 0 ) { // we don't want to rely on malloc(0) - container->array = NULL; - } else if ((container->array = (uint16_t *)malloc(sizeof(uint16_t) * size)) == - NULL) { - free(container); - return NULL; - } - - container->capacity = size; - container->cardinality = 0; - - return container; -} - -/* Create a new array. Return NULL in case of failure. */ -array_container_t *array_container_create() { - return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE); -} - -/* Create a new array containing all values in [min,max). */ -array_container_t * array_container_create_range(uint32_t min, uint32_t max) { - array_container_t * answer = array_container_create_given_capacity(max - min + 1); - if(answer == NULL) return answer; - answer->cardinality = 0; - for(uint32_t k = min; k < max; k++) { - answer->array[answer->cardinality++] = k; - } - return answer; -} - -/* Duplicate container */ -array_container_t *array_container_clone(const array_container_t *src) { - array_container_t *newcontainer = - array_container_create_given_capacity(src->capacity); - if (newcontainer == NULL) return NULL; - - newcontainer->cardinality = src->cardinality; - - memcpy(newcontainer->array, src->array, - src->cardinality * sizeof(uint16_t)); - - return newcontainer; -} - -int array_container_shrink_to_fit(array_container_t *src) { - if (src->cardinality == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->cardinality; - src->capacity = src->cardinality; - if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs - free(src->array); - src->array = NULL; - } else { - uint16_t *oldarray = src->array; - src->array = - (uint16_t *)realloc(oldarray, src->capacity * sizeof(uint16_t)); - if (src->array == NULL) free(oldarray); // should never happen? - } - return savings; -} - -/* Free memory. */ -void array_container_free(array_container_t *arr) { - if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise - free(arr->array); - arr->array = NULL; // pedantic - } - free(arr); -} - -static inline int32_t grow_capacity(int32_t capacity) { - return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE - : capacity < 64 ? capacity * 2 - : capacity < 1024 ? capacity * 3 / 2 - : capacity * 5 / 4; -} - -static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { - return ((val < min) ? min : (val > max) ? max : val); -} - -void array_container_grow(array_container_t *container, int32_t min, - bool preserve) { - - int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); - int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max); - - container->capacity = new_capacity; - uint16_t *array = container->array; - - if (preserve) { - container->array = - (uint16_t *)realloc(array, new_capacity * sizeof(uint16_t)); - if (container->array == NULL) free(array); - } else { - // Jon Strabala reports that some tools complain otherwise - if (array != NULL) { - free(array); - } - container->array = (uint16_t *)malloc(new_capacity * sizeof(uint16_t)); - } - - // handle the case where realloc fails - if (container->array == NULL) { - fprintf(stderr, "could not allocate memory\n"); - } - assert(container->array != NULL); -} - -/* Copy one container into another. We assume that they are distinct. */ -void array_container_copy(const array_container_t *src, - array_container_t *dst) { - const int32_t cardinality = src->cardinality; - if (cardinality > dst->capacity) { - array_container_grow(dst, cardinality, false); - } - - dst->cardinality = cardinality; - memcpy(dst->array, src->array, cardinality * sizeof(uint16_t)); -} - -void array_container_add_from_range(array_container_t *arr, uint32_t min, - uint32_t max, uint16_t step) { - for (uint32_t value = min; value < max; value += step) { - array_container_append(arr, value); - } -} - -/* Computes the union of array1 and array2 and write the result to arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - */ -void array_container_union(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; - const int32_t max_cardinality = card_1 + card_2; - - if (out->capacity < max_cardinality) { - array_container_grow(out, max_cardinality, false); - } - out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1, - array_2->array, card_2, out->array); - -} - -/* Computes the difference of array1 and array2 and write the result - * to array out. - * Array out does not need to be distinct from array_1 - */ -void array_container_andnot(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - if (out->capacity < array_1->cardinality) - array_container_grow(out, array_1->cardinality, false); -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - out->cardinality = - difference_vector16(array_1->array, array_1->cardinality, - array_2->array, array_2->cardinality, out->array); -#else - out->cardinality = - difference_uint16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#endif -} - -/* Computes the symmetric difference of array1 and array2 and write the - * result - * to arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - */ -void array_container_xor(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; - const int32_t max_cardinality = card_1 + card_2; - if (out->capacity < max_cardinality) { - array_container_grow(out, max_cardinality, false); - } - -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - out->cardinality = - xor_vector16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#else - out->cardinality = - xor_uint16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#endif -} - -static inline int32_t minimum_int32(int32_t a, int32_t b) { - return (a < b) ? a : b; -} - -/* computes the intersection of array1 and array2 and write the result to - * arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - * */ -void array_container_intersection(const array_container_t *array1, - const array_container_t *array2, - array_container_t *out) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, - min_card = minimum_int32(card_1, card_2); - const int threshold = 64; // subject to tuning -#ifdef USEAVX - if (out->capacity < min_card) { - array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t), - false); - } -#else - if (out->capacity < min_card) { - array_container_grow(out, min_card, false); - } -#endif - - if (card_1 * threshold < card_2) { - out->cardinality = intersect_skewed_uint16( - array1->array, card_1, array2->array, card_2, out->array); - } else if (card_2 * threshold < card_1) { - out->cardinality = intersect_skewed_uint16( - array2->array, card_2, array1->array, card_1, out->array); - } else { -#ifdef USEAVX - out->cardinality = intersect_vector16( - array1->array, card_1, array2->array, card_2, out->array); -#else - out->cardinality = intersect_uint16(array1->array, card_1, - array2->array, card_2, out->array); -#endif - } -} - -/* computes the size of the intersection of array1 and array2 - * */ -int array_container_intersection_cardinality(const array_container_t *array1, - const array_container_t *array2) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - return intersect_skewed_uint16_cardinality(array1->array, card_1, - array2->array, card_2); - } else if (card_2 * threshold < card_1) { - return intersect_skewed_uint16_cardinality(array2->array, card_2, - array1->array, card_1); - } else { -#ifdef USEAVX - return intersect_vector16_cardinality(array1->array, card_1, - array2->array, card_2); -#else - return intersect_uint16_cardinality(array1->array, card_1, - array2->array, card_2); -#endif - } -} - -bool array_container_intersect(const array_container_t *array1, - const array_container_t *array2) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - return intersect_skewed_uint16_nonempty( - array1->array, card_1, array2->array, card_2); - } else if (card_2 * threshold < card_1) { - return intersect_skewed_uint16_nonempty( - array2->array, card_2, array1->array, card_1); - } else { - // we do not bother vectorizing - return intersect_uint16_nonempty(array1->array, card_1, - array2->array, card_2); - } -} - -/* computes the intersection of array1 and array2 and write the result to - * array1. - * */ -void array_container_intersection_inplace(array_container_t *src_1, - const array_container_t *src_2) { - // todo: can any of this be vectorized? - int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - src_1->cardinality = intersect_skewed_uint16( - src_1->array, card_1, src_2->array, card_2, src_1->array); - } else if (card_2 * threshold < card_1) { - src_1->cardinality = intersect_skewed_uint16( - src_2->array, card_2, src_1->array, card_1, src_1->array); - } else { - src_1->cardinality = intersect_uint16( - src_1->array, card_1, src_2->array, card_2, src_1->array); - } -} - -int array_container_to_uint32_array(void *vout, const array_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (int i = 0; i < cont->cardinality; ++i) { - const uint32_t val = base + cont->array[i]; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } - return outpos; -} - -void array_container_printf(const array_container_t *v) { - if (v->cardinality == 0) { - printf("{}"); - return; - } - printf("{"); - printf("%d", v->array[0]); - for (int i = 1; i < v->cardinality; ++i) { - printf(",%d", v->array[i]); - } - printf("}"); -} - -void array_container_printf_as_uint32_array(const array_container_t *v, - uint32_t base) { - if (v->cardinality == 0) { - return; - } - printf("%u", v->array[0] + base); - for (int i = 1; i < v->cardinality; ++i) { - printf(",%u", v->array[i] + base); - } -} - -/* Compute the number of runs */ -int32_t array_container_number_of_runs(const array_container_t *a) { - // Can SIMD work here? - int32_t nr_runs = 0; - int32_t prev = -2; - for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) { - if (*p != prev + 1) nr_runs++; - prev = *p; - } - return nr_runs; -} - -int32_t array_container_serialize(const array_container_t *container, char *buf) { - int32_t l, off; - uint16_t cardinality = (uint16_t)container->cardinality; - - memcpy(buf, &cardinality, off = sizeof(cardinality)); - l = sizeof(uint16_t) * container->cardinality; - if (l) memcpy(&buf[off], container->array, l); - - return (off + l); -} - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * The number of bytes written should be - * array_container_size_in_bytes(container). - * - */ -int32_t array_container_write(const array_container_t *container, char *buf) { - memcpy(buf, container->array, container->cardinality * sizeof(uint16_t)); - return array_container_size_in_bytes(container); -} - -bool array_container_equals(const array_container_t *container1, - const array_container_t *container2) { - if (container1->cardinality != container2->cardinality) { - return false; - } - // could be vectorized: - for (int32_t i = 0; i < container1->cardinality; ++i) { - if (container1->array[i] != container2->array[i]) return false; - } - return true; -} - -bool array_container_is_subset(const array_container_t *container1, - const array_container_t *container2) { - if (container1->cardinality > container2->cardinality) { - return false; - } - int i1 = 0, i2 = 0; - while (i1 < container1->cardinality && i2 < container2->cardinality) { - if (container1->array[i1] == container2->array[i2]) { - i1++; - i2++; - } else if (container1->array[i1] > container2->array[i2]) { - i2++; - } else { // container1->array[i1] < container2->array[i2] - return false; - } - } - if (i1 == container1->cardinality) { - return true; - } else { - return false; - } -} - -int32_t array_container_read(int32_t cardinality, array_container_t *container, - const char *buf) { - if (container->capacity < cardinality) { - array_container_grow(container, cardinality, false); - } - container->cardinality = cardinality; - memcpy(container->array, buf, container->cardinality * sizeof(uint16_t)); - - return array_container_size_in_bytes(container); -} - -uint32_t array_container_serialization_len(const array_container_t *container) { - return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ + - (sizeof(uint16_t) * container->cardinality)); -} - -void *array_container_deserialize(const char *buf, size_t buf_len) { - array_container_t *ptr; - - if (buf_len < 2) /* capacity converted to 16 bit */ - return (NULL); - else - buf_len -= 2; - - if ((ptr = (array_container_t *)malloc(sizeof(array_container_t))) != - NULL) { - size_t len; - int32_t off; - uint16_t cardinality; - - memcpy(&cardinality, buf, off = sizeof(cardinality)); - - ptr->capacity = ptr->cardinality = (uint32_t)cardinality; - len = sizeof(uint16_t) * ptr->cardinality; - - if (len != buf_len) { - free(ptr); - return (NULL); - } - - if ((ptr->array = (uint16_t *)malloc(sizeof(uint16_t) * - ptr->capacity)) == NULL) { - free(ptr); - return (NULL); - } - - if (len) memcpy(ptr->array, &buf[off], len); - - /* Check if returned values are monotonically increasing */ - for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) { - if (ptr->array[i] < j) { - free(ptr->array); - free(ptr); - return (NULL); - } else - j = ptr->array[i]; - } - } - - return (ptr); -} - -bool array_container_iterate(const array_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - for (int i = 0; i < cont->cardinality; i++) - if (!iterator(cont->array[i] + base, ptr)) return false; - return true; -} - -bool array_container_iterate64(const array_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - for (int i = 0; i < cont->cardinality; i++) - if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) - return false; - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */ -/* - * bitset.c - * - */ -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif -#include -#include -#include -#include - - -extern int bitset_container_cardinality(const bitset_container_t *bitset); -extern bool bitset_container_nonzero_cardinality(bitset_container_t *bitset); -extern void bitset_container_set(bitset_container_t *bitset, uint16_t pos); -extern void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos); -extern int32_t bitset_container_serialized_size_in_bytes(); -extern bool bitset_container_add(bitset_container_t *bitset, uint16_t pos); -extern bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_contains(const bitset_container_t *bitset, - uint16_t pos); - -void bitset_container_clear(bitset_container_t *bitset) { - memset(bitset->array, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = 0; -} - -void bitset_container_set_all(bitset_container_t *bitset) { - memset(bitset->array, INT64_C(-1), - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = (1 << 16); -} - - - -/* Create a new bitset. Return NULL in case of failure. */ -bitset_container_t *bitset_container_create(void) { - bitset_container_t *bitset = - (bitset_container_t *)malloc(sizeof(bitset_container_t)); - - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->array = (uint64_t *)aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->array) { - free(bitset); - return NULL; - } - bitset_container_clear(bitset); - return bitset; -} - -/* Copy one container into another. We assume that they are distinct. */ -void bitset_container_copy(const bitset_container_t *source, - bitset_container_t *dest) { - dest->cardinality = source->cardinality; - memcpy(dest->array, source->array, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); -} - -void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, - uint32_t max, uint16_t step) { - if (step == 0) return; // refuse to crash - if ((64 % step) == 0) { // step divides 64 - uint64_t mask = 0; // construct the repeated mask - for (uint32_t value = (min % step); value < 64; value += step) { - mask |= ((uint64_t)1 << value); - } - uint32_t firstword = min / 64; - uint32_t endword = (max - 1) / 64; - bitset->cardinality = (max - min + step - 1) / step; - if (firstword == endword) { - bitset->array[firstword] |= - mask & (((~UINT64_C(0)) << (min % 64)) & - ((~UINT64_C(0)) >> ((~max + 1) % 64))); - return; - } - bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); - for (uint32_t i = firstword + 1; i < endword; i++) - bitset->array[i] = mask; - bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); - } else { - for (uint32_t value = min; value < max; value += step) { - bitset_container_add(bitset, value); - } - } -} - -/* Free memory. */ -void bitset_container_free(bitset_container_t *bitset) { - if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise - aligned_free(bitset->array); - bitset->array = NULL; // pedantic - } - free(bitset); -} - -/* duplicate container. */ -bitset_container_t *bitset_container_clone(const bitset_container_t *src) { - bitset_container_t *bitset = - (bitset_container_t *)malloc(sizeof(bitset_container_t)); - - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->array = (uint64_t *)aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->array) { - free(bitset); - return NULL; - } - bitset->cardinality = src->cardinality; - memcpy(bitset->array, src->array, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - return bitset; -} - -void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, - uint32_t end) { - bitset_set_range(bitset->array, begin, end); - bitset->cardinality = - bitset_container_compute_cardinality(bitset); // could be smarter -} - - -bool bitset_container_intersect(const bitset_container_t *src_1, - const bitset_container_t *src_2) { - // could vectorize, but this is probably already quite fast in practice - const uint64_t * __restrict__ array_1 = src_1->array; - const uint64_t * __restrict__ array_2 = src_2->array; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { - if((array_1[i] & array_2[i]) != 0) return true; - } - return false; -} - - -#ifdef USEAVX -#ifndef WORDS_IN_AVX2_REG -#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) -#endif -/* Get the number of bits set (force computation) */ -int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - return (int) avx2_harley_seal_popcount256( - (const __m256i *)bitset->array, - BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); -} -#else - -/* Get the number of bits set (force computation) */ -int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - const uint64_t *array = bitset->array; - int32_t sum = 0; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { - sum += hamming(array[i]); - sum += hamming(array[i + 1]); - sum += hamming(array[i + 2]); - sum += hamming(array[i + 3]); - } - return sum; -} - -#endif - -#ifdef USEAVX - -#define BITSET_CONTAINER_FN_REPEAT 8 -#ifndef WORDS_IN_AVX2_REG -#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) -#endif -#define LOOP_SIZE \ - BITSET_CONTAINER_SIZE_IN_WORDS / \ - ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) - -/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the - result to bitsetout */ -// clang-format off -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic) \ -int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \ - const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \ - /* not using the blocking optimization for some reason*/ \ - uint8_t *out = (uint8_t*)dst->array; \ - const int innerloop = 8; \ - for (size_t i = 0; \ - i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ - i+=innerloop) {\ - __m256i A1, A2, AO; \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)out, AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+32), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+64), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+96), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+128), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+160), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+192), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+224), AO); \ - out+=256; \ - array_1 += 256; \ - array_2 += 256; \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -/* next, a version that updates cardinality*/ \ -int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \ - const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \ - __m256i *out = (__m256i *) dst->array; \ - dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\ - array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ - return dst->cardinality; \ -} \ -/* next, a version that just computes the cardinality*/ \ -int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \ - const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \ - return (int)avx2_harley_seal_popcount256_##opname(data2, \ - data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ -} - - - -#else /* not USEAVX */ - -#define BITSET_CONTAINER_FN(opname, opsymbol, avxintrinsic) \ -int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - int32_t sum = 0; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ - word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ - out[i] = word_1; \ - out[i + 1] = word_2; \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - dst->cardinality = sum; \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ - out[i] = (array_1[i])opsymbol(array_2[i]); \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - int32_t sum = 0; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ - word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - return sum; \ -} - -#endif - -// we duplicate the function because other containers use the "or" term, makes API more consistent -BITSET_CONTAINER_FN(or, |, _mm256_or_si256) -BITSET_CONTAINER_FN(union, |, _mm256_or_si256) - -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -BITSET_CONTAINER_FN(and, &, _mm256_and_si256) -BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256) - -BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256) -BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256) -// clang-format On - - - -int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) { -#ifdef USEAVX2FORDECODING - if(cont->cardinality >= 8192)// heuristic - return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base); - else - return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); -#else - return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); -#endif -} - -/* - * Print this container using printf (useful for debugging). - */ -void bitset_container_printf(const bitset_container_t * v) { - printf("{"); - uint32_t base = 0; - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u",base + r); - iamfirst = false; - } else { - printf(",%u",base + r); - } - w ^= t; - } - base += 64; - } - printf("}"); -} - - -/* - * Print this container using printf as a comma-separated list of 32-bit integers starting at base. - */ -void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u", r + base); - iamfirst = false; - } else { - printf(",%u",r + base); - } - w ^= t; - } - base += 64; - } -} - - -// TODO: use the fast lower bound, also -int bitset_container_number_of_runs(bitset_container_t *b) { - int num_runs = 0; - uint64_t next_word = b->array[0]; - - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { - uint64_t word = next_word; - next_word = b->array[i+1]; - num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); - } - - uint64_t word = next_word; - num_runs += hamming((~word) & (word << 1)); - if((word & 0x8000000000000000ULL) != 0) - num_runs++; - return num_runs; -} - -int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) { - int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; - memcpy(buf, container->array, l); - return(l); -} - - - -int32_t bitset_container_write(const bitset_container_t *container, - char *buf) { - memcpy(buf, container->array, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); -} - - -int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, - const char *buf) { - container->cardinality = cardinality; - memcpy(container->array, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); -} - -uint32_t bitset_container_serialization_len() { - return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); -} - -void* bitset_container_deserialize(const char *buf, size_t buf_len) { - bitset_container_t *ptr; - size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; - - if(l != buf_len) - return(NULL); - - if((ptr = (bitset_container_t *)malloc(sizeof(bitset_container_t))) != NULL) { - memcpy(ptr, buf, sizeof(bitset_container_t)); - // sizeof(__m256i) == 32 - ptr->array = (uint64_t *) aligned_malloc(32, l); - if (! ptr->array) { - free(ptr); - return NULL; - } - memcpy(ptr->array, buf, l); - ptr->cardinality = bitset_container_compute_cardinality(ptr); - } - - return((void*)ptr); -} - -bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(r + base, ptr)) return false; - w ^= t; - } - base += 64; - } - return true; -} - -bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; - w ^= t; - } - base += 64; - } - return true; -} - - -bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality != container2->cardinality) { - return false; - } - } - for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - if(container1->array[i] != container2->array[i]) { - return false; - } - } - return true; -} - -bool bitset_container_is_subset(const bitset_container_t *container1, - const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality > container2->cardinality) { - return false; - } - } - for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - if((container1->array[i] & container2->array[i]) != container1->array[i]) { - return false; - } - } - return true; -} - -bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { - int card = bitset_container_cardinality(container); - if(rank >= *start_rank + card) { - *start_rank += card; - return false; - } - const uint64_t *array = container->array; - int32_t size; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { - size = hamming(array[i]); - if(rank <= *start_rank + size) { - uint64_t w = container->array[i]; - uint16_t base = i*64; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(*start_rank == rank) { - *element = r+base; - return true; - } - w ^= t; - *start_rank += 1; - } - } - else - *start_rank += size; - } - assert(false); - __builtin_unreachable(); -} - - -/* Returns the smallest value (assumes not empty) */ -uint16_t bitset_container_minimum(const bitset_container_t *container) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = container->array[i]; - if (w != 0) { - int r = __builtin_ctzll(w); - return r + i * 64; - } - } - return UINT16_MAX; -} - -/* Returns the largest value (assumes not empty) */ -uint16_t bitset_container_maximum(const bitset_container_t *container) { - for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { - uint64_t w = container->array[i]; - if (w != 0) { - int r = __builtin_clzll(w); - return i * 64 + 63 - r; - } - } - return 0; -} - -/* Returns the number of values equal or smaller than x */ -int bitset_container_rank(const bitset_container_t *container, uint16_t x) { - uint32_t x32 = x; - int sum = 0; - uint32_t k = 0; - for (; k + 63 <= x32; k += 64) { - sum += hamming(container->array[k / 64]); - } - // at this point, we have covered everything up to k, k not included. - // we have that k < x, but not so large that k+63<=x - // k is a power of 64 - int bitsleft = x32 - k + 1;// will be in [0,64) - uint64_t leftoverword = container->array[k / 64];// k / 64 should be within scope - leftoverword = leftoverword & ((UINT64_C(1) << bitsleft) - 1); - sum += hamming(leftoverword); - return sum; -} - -/* Returns the index of the first value equal or larger than x, or -1 */ -int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { - uint32_t x32 = x; - uint32_t k = x32 / 64; - uint64_t word = container->array[k]; - const int diff = x32 - k * 64; // in [0,64) - word = (word >> diff) << diff; // a mask is faster, but we don't care - while(word == 0) { - k++; - if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; - word = container->array[k]; - } - return k * 64 + __builtin_ctzll(word); -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */ - - -extern inline const void *container_unwrap_shared( - const void *candidate_shared_container, uint8_t *type); -extern inline void *container_mutable_unwrap_shared( - void *candidate_shared_container, uint8_t *type); - -extern const char *get_container_name(uint8_t typecode); - -extern int container_get_cardinality(const void *container, uint8_t typecode); - -extern void *container_iand(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_iandnot(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -void container_free(void *container, uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_free((bitset_container_t *)container); - break; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_free((array_container_t *)container); - break; - case RUN_CONTAINER_TYPE_CODE: - run_container_free((run_container_t *)container); - break; - case SHARED_CONTAINER_TYPE_CODE: - shared_container_free((shared_container_t *)container); - break; - default: - assert(false); - __builtin_unreachable(); - } -} - -void container_printf(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_printf((const bitset_container_t *)container); - return; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_printf((const array_container_t *)container); - return; - case RUN_CONTAINER_TYPE_CODE: - run_container_printf((const run_container_t *)container); - return; - default: - __builtin_unreachable(); - } -} - -void container_printf_as_uint32_array(const void *container, uint8_t typecode, - uint32_t base) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_printf_as_uint32_array( - (const bitset_container_t *)container, base); - return; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_printf_as_uint32_array( - (const array_container_t *)container, base); - return; - case RUN_CONTAINER_TYPE_CODE: - run_container_printf_as_uint32_array( - (const run_container_t *)container, base); - return; - return; - default: - __builtin_unreachable(); - } -} - -int32_t container_serialize(const void *container, uint8_t typecode, - char *buf) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return (bitset_container_serialize((const bitset_container_t *)container, - buf)); - case ARRAY_CONTAINER_TYPE_CODE: - return ( - array_container_serialize((const array_container_t *)container, buf)); - case RUN_CONTAINER_TYPE_CODE: - return (run_container_serialize((const run_container_t *)container, buf)); - default: - assert(0); - __builtin_unreachable(); - return (-1); - } -} - -uint32_t container_serialization_len(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_serialization_len(); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_serialization_len( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_serialization_len( - (const run_container_t *)container); - default: - assert(0); - __builtin_unreachable(); - return (0); - } -} - -void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return (bitset_container_deserialize(buf, buf_len)); - case ARRAY_CONTAINER_TYPE_CODE: - return (array_container_deserialize(buf, buf_len)); - case RUN_CONTAINER_TYPE_CODE: - return (run_container_deserialize(buf, buf_len)); - case SHARED_CONTAINER_TYPE_CODE: - printf("this should never happen.\n"); - assert(0); - __builtin_unreachable(); - return (NULL); - default: - assert(0); - __builtin_unreachable(); - return (NULL); - } -} - -extern bool container_nonzero_cardinality(const void *container, - uint8_t typecode); - -extern void container_free(void *container, uint8_t typecode); - -extern int container_to_uint32_array(uint32_t *output, const void *container, - uint8_t typecode, uint32_t base); - -extern void *container_add(void *container, uint16_t val, uint8_t typecode, - uint8_t *new_typecode); - -extern inline bool container_contains(const void *container, uint16_t val, - uint8_t typecode); - -extern void *container_clone(const void *container, uint8_t typecode); - -extern void *container_and(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_or(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -void *get_copy_of_container(void *container, uint8_t *typecode, - bool copy_on_write) { - if (copy_on_write) { - shared_container_t *shared_container; - if (*typecode == SHARED_CONTAINER_TYPE_CODE) { - shared_container = (shared_container_t *)container; - shared_container->counter += 1; - return shared_container; - } - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - - if ((shared_container = (shared_container_t *)malloc( - sizeof(shared_container_t))) == NULL) { - return NULL; - } - - shared_container->container = container; - shared_container->typecode = *typecode; - - shared_container->counter = 2; - *typecode = SHARED_CONTAINER_TYPE_CODE; - - return shared_container; - } // copy_on_write - // otherwise, no copy on write... - const void *actualcontainer = - container_unwrap_shared((const void *)container, typecode); - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - return container_clone(actualcontainer, *typecode); -} -/** - * Copies a container, requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -void *container_clone(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_clone((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_clone((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_clone((const run_container_t *)container); - case SHARED_CONTAINER_TYPE_CODE: - printf("shared containers are not cloneable\n"); - assert(false); - return NULL; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -void *shared_container_extract_copy(shared_container_t *container, - uint8_t *typecode) { - assert(container->counter > 0); - assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); - container->counter--; - *typecode = container->typecode; - void *answer; - if (container->counter == 0) { - answer = container->container; - container->container = NULL; // paranoid - free(container); - } else { - answer = container_clone(container->container, *typecode); - } - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - return answer; -} - -void shared_container_free(shared_container_t *container) { - assert(container->counter > 0); - container->counter--; - if (container->counter == 0) { - assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); - container_free(container->container, container->typecode); - container->container = NULL; // paranoid - free(container); - } -} - -extern void *container_not(const void *c1, uint8_t type1, uint8_t *result_type); - -extern void *container_not_range(const void *c1, uint8_t type1, - uint32_t range_start, uint32_t range_end, - uint8_t *result_type); - -extern void *container_inot(void *c1, uint8_t type1, uint8_t *result_type); - -extern void *container_inot_range(void *c1, uint8_t type1, uint32_t range_start, - uint32_t range_end, uint8_t *result_type); - -extern void *container_range_of_ones(uint32_t range_start, uint32_t range_end, - uint8_t *result_type); - -// where are the correponding things for union and intersection?? -extern void *container_lazy_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_andnot(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */ -#include - - -// file contains grubby stuff that must know impl. details of all container -// types. -bitset_container_t *bitset_container_from_array(const array_container_t *a) { - bitset_container_t *ans = bitset_container_create(); - int limit = array_container_cardinality(a); - for (int i = 0; i < limit; ++i) bitset_container_set(ans, a->array[i]); - return ans; -} - -bitset_container_t *bitset_container_from_run(const run_container_t *arr) { - int card = run_container_cardinality(arr); - bitset_container_t *answer = bitset_container_create(); - for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - rle16_t vl = arr->runs[rlepos]; - bitset_set_lenrange(answer->array, vl.value, vl.length); - } - answer->cardinality = card; - return answer; -} - -array_container_t *array_container_from_run(const run_container_t *arr) { - array_container_t *answer = - array_container_create_given_capacity(run_container_cardinality(arr)); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - int run_start = arr->runs[rlepos].value; - int run_end = run_start + arr->runs[rlepos].length; - - for (int run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - return answer; -} - -array_container_t *array_container_from_bitset(const bitset_container_t *bits) { - array_container_t *result = - array_container_create_given_capacity(bits->cardinality); - result->cardinality = bits->cardinality; - // sse version ends up being slower here - // (bitset_extract_setbits_sse_uint16) - // because of the sparsity of the data - bitset_extract_setbits_uint16(bits->array, BITSET_CONTAINER_SIZE_IN_WORDS, - result->array, 0); - return result; -} - -/* assumes that container has adequate space. Run from [s,e] (inclusive) */ -static void add_run(run_container_t *r, int s, int e) { - r->runs[r->n_runs].value = s; - r->runs[r->n_runs].length = e - s; - r->n_runs++; -} - -run_container_t *run_container_from_array(const array_container_t *c) { - int32_t n_runs = array_container_number_of_runs(c); - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; - int32_t card = c->cardinality; - if (card == 0) return answer; - for (int i = 0; i < card; ++i) { - const uint16_t cur_val = c->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c->array[i]; - } - // now prev is the last seen value - add_run(answer, run_start, prev); - // assert(run_container_cardinality(answer) == c->cardinality); - return answer; -} - -/** - * Convert the runcontainer to either a Bitmap or an Array Container, depending - * on the cardinality. Frees the container. - * Allocates and returns new container, which caller is responsible for freeing - */ - -void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, - uint8_t *resulttype) { - if (card <= DEFAULT_MAX_SIZE) { - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { - uint16_t run_start = r->runs[rlepos].value; - uint16_t run_end = run_start + r->runs[rlepos].length; - for (uint16_t run_value = run_start; run_value <= run_end; - ++run_value) { - answer->array[answer->cardinality++] = run_value; - } - } - assert(card == answer->cardinality); - *resulttype = ARRAY_CONTAINER_TYPE_CODE; - run_container_free(r); - return answer; - } - bitset_container_t *answer = bitset_container_create(); - for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { - uint16_t run_start = r->runs[rlepos].value; - bitset_set_lenrange(answer->array, run_start, r->runs[rlepos].length); - } - answer->cardinality = card; - *resulttype = BITSET_CONTAINER_TYPE_CODE; - run_container_free(r); - return answer; -} - -/* Converts a run container to either an array or a bitset, IF it saves space. - */ -/* If a conversion occurs, the caller is responsible to free the original - * container and - * he becomes responsible to free the new one. */ -void *convert_run_to_efficient_container(run_container_t *c, - uint8_t *typecode_after) { - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(c->n_runs); - - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); - int32_t card = run_container_cardinality(c); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); - - int32_t min_size_non_run = - size_as_bitset_container < size_as_array_container - ? size_as_bitset_container - : size_as_array_container; - if (size_as_run_container <= min_size_non_run) { // no conversion - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return c; - } - if (card <= DEFAULT_MAX_SIZE) { - // to array - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int run_start = c->runs[rlepos].value; - int run_end = run_start + c->runs[rlepos].length; - - for (int run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - *typecode_after = ARRAY_CONTAINER_TYPE_CODE; - return answer; - } - - // else to bitset - bitset_container_t *answer = bitset_container_create(); - - for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int start = c->runs[rlepos].value; - int end = start + c->runs[rlepos].length; - bitset_set_range(answer->array, start, end + 1); - } - answer->cardinality = card; - *typecode_after = BITSET_CONTAINER_TYPE_CODE; - return answer; -} - -// like convert_run_to_efficient_container but frees the old result if needed -void *convert_run_to_efficient_container_and_free(run_container_t *c, - uint8_t *typecode_after) { - void *answer = convert_run_to_efficient_container(c, typecode_after); - if (answer != c) run_container_free(c); - return answer; -} - -/* once converted, the original container is disposed here, rather than - in roaring_array -*/ - -// TODO: split into run- array- and bitset- subfunctions for sanity; -// a few function calls won't really matter. - -void *convert_run_optimize(void *c, uint8_t typecode_original, - uint8_t *typecode_after) { - if (typecode_original == RUN_CONTAINER_TYPE_CODE) { - void *newc = convert_run_to_efficient_container((run_container_t *)c, - typecode_after); - if (newc != c) { - container_free(c, typecode_original); - } - return newc; - } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) { - // it might need to be converted to a run container. - array_container_t *c_qua_array = (array_container_t *)c; - int32_t n_runs = array_container_number_of_runs(c_qua_array); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t card = array_container_cardinality(c_qua_array); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); - - if (size_as_run_container >= size_as_array_container) { - *typecode_after = ARRAY_CONTAINER_TYPE_CODE; - return c; - } - // else convert array to run container - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; - - assert(card > 0); - for (int i = 0; i < card; ++i) { - uint16_t cur_val = c_qua_array->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c_qua_array->array[i]; - } - assert(run_start >= 0); - // now prev is the last seen value - add_run(answer, run_start, prev); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - array_container_free(c_qua_array); - return answer; - } else if (typecode_original == - BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset - // does bitset need conversion to run? - bitset_container_t *c_qua_bitset = (bitset_container_t *)c; - int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); - - if (size_as_bitset_container <= size_as_run_container) { - // no conversion needed. - *typecode_after = BITSET_CONTAINER_TYPE_CODE; - return c; - } - // bitset to runcontainer (ported from Java RunContainer( - // BitmapContainer bc, int nbrRuns)) - assert(n_runs > 0); // no empty bitmaps - run_container_t *answer = run_container_create_given_capacity(n_runs); - - int long_ctr = 0; - uint64_t cur_word = c_qua_bitset->array[0]; - int run_count = 0; - while (true) { - while (cur_word == UINT64_C(0) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word = c_qua_bitset->array[++long_ctr]; - - if (cur_word == UINT64_C(0)) { - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return answer; - } - - int local_run_start = __builtin_ctzll(cur_word); - int run_start = local_run_start + 64 * long_ctr; - uint64_t cur_word_with_1s = cur_word | (cur_word - 1); - - int run_end = 0; - while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word_with_1s = c_qua_bitset->array[++long_ctr]; - - if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { - run_end = 64 + long_ctr * 64; // exclusive, I guess - add_run(answer, run_start, run_end - 1); - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return answer; - } - int local_run_end = __builtin_ctzll(~cur_word_with_1s); - run_end = local_run_end + long_ctr * 64; - add_run(answer, run_start, run_end - 1); - run_count++; - cur_word = cur_word_with_1s & (cur_word_with_1s + 1); - } - return answer; - } else { - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -bitset_container_t *bitset_container_from_run_range(const run_container_t *run, - uint32_t min, uint32_t max) { - bitset_container_t *bitset = bitset_container_create(); - int32_t union_cardinality = 0; - for (int32_t i = 0; i < run->n_runs; ++i) { - uint32_t rle_min = run->runs[i].value; - uint32_t rle_max = rle_min + run->runs[i].length; - bitset_set_lenrange(bitset->array, rle_min, rle_max - rle_min); - union_cardinality += run->runs[i].length + 1; - } - union_cardinality += max - min + 1; - union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */ -/* - * mixed_andnot.c. More methods since operation is not symmetric, - * except no "wide" andnot , so no lazy options motivated. - */ - -#include -#include - - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, a valid array container that could be the same as dst.*/ -void array_bitset_container_andnot(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst) { - // follows Java implementation as of June 2016 - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - int32_t newcard = 0; - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - dst->array[newcard] = key; - newcard += 1 - bitset_container_contains(src_2, key); - } - dst->cardinality = newcard; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * src_1 */ - -void array_bitset_container_iandnot(array_container_t *src_1, - const bitset_container_t *src_2) { - array_bitset_container_andnot(src_1, src_2, src_1); -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, which does not initially have a valid container. - * Return true for a bitset result; false for array - */ - -bool bitset_array_container_andnot(const bitset_container_t *src_1, - const array_container_t *src_2, void **dst) { - // Java did this directly, but we have option of asm or avx - bitset_container_t *result = bitset_container_create(); - bitset_container_copy(src_1, result); - result->cardinality = - (int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality, - src_2->array, (uint64_t)src_2->cardinality); - - // do required type conversions. - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; - } - *dst = result; - return true; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_iandnot(bitset_container_t *src_1, - const array_container_t *src_2, - void **dst) { - *dst = src_1; - src_1->cardinality = - (int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality, - src_2->array, (uint64_t)src_2->cardinality); - - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_andnot(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - // follows the Java implementation as of June 2016 - int card = run_container_cardinality(src_1); - if (card <= DEFAULT_MAX_SIZE) { - // must be an array - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - for (int run_value = rle.value; run_value <= rle.value + rle.length; - ++run_value) { - if (!bitset_container_get(src_2, (uint16_t)run_value)) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - } - *dst = answer; - return false; - } else { // we guess it will be a bitset, though have to check guess when - // done - bitset_container_t *answer = bitset_container_clone(src_2); - - uint32_t last_pos = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - - uint32_t start = rle.value; - uint32_t end = start + rle.length + 1; - bitset_reset_range(answer->array, last_pos, start); - bitset_flip_range(answer->array, start, end); - last_pos = end; - } - bitset_reset_range(answer->array, last_pos, (uint32_t)(1 << 16)); - - answer->cardinality = bitset_container_compute_cardinality(answer); - - if (answer->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(answer); - bitset_container_free(answer); - return false; // not bitset - } - *dst = answer; - return true; // bitset - } -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_iandnot(run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - // dummy implementation - bool ans = run_bitset_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool bitset_run_container_andnot(const bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - // follows Java implementation - bitset_container_t *result = bitset_container_create(); - - bitset_container_copy(src_1, result); - for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { - rle16_t rle = src_2->runs[rlepos]; - bitset_reset_range(result->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - result->cardinality = bitset_container_compute_cardinality(result); - - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_run_container_iandnot(bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - *dst = src_1; - - for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { - rle16_t rle = src_2->runs[rlepos]; - bitset_reset_range(src_1->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - src_1->cardinality = bitset_container_compute_cardinality(src_1); - - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} - -/* helper. a_out must be a valid array container with adequate capacity. - * Returns the cardinality of the output container. Partly Based on Java - * implementation Util.unsignedDifference. - * - * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper - * to avoid advanceUntil? - */ - -static int run_array_array_subtract(const run_container_t *r, - const array_container_t *a_in, - array_container_t *a_out) { - int out_card = 0; - int32_t in_array_pos = - -1; // since advanceUntil always assumes we start the search AFTER this - - for (int rlepos = 0; rlepos < r->n_runs; rlepos++) { - int32_t start = r->runs[rlepos].value; - int32_t end = start + r->runs[rlepos].length + 1; - - in_array_pos = advanceUntil(a_in->array, in_array_pos, - a_in->cardinality, (uint16_t)start); - - if (in_array_pos >= a_in->cardinality) { // run has no items subtracted - for (int32_t i = start; i < end; ++i) - a_out->array[out_card++] = (uint16_t)i; - } else { - uint16_t next_nonincluded = a_in->array[in_array_pos]; - if (next_nonincluded >= end) { - // another case when run goes unaltered - for (int32_t i = start; i < end; ++i) - a_out->array[out_card++] = (uint16_t)i; - in_array_pos--; // ensure we see this item again if necessary - } else { - for (int32_t i = start; i < end; ++i) - if (i != next_nonincluded) - a_out->array[out_card++] = (uint16_t)i; - else // 0 should ensure we don't match - next_nonincluded = - (in_array_pos + 1 >= a_in->cardinality) - ? 0 - : a_in->array[++in_array_pos]; - in_array_pos--; // see again - } - } - } - return out_card; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any type of container. - */ - -int run_array_container_andnot(const run_container_t *src_1, - const array_container_t *src_2, void **dst) { - // follows the Java impl as of June 2016 - - int card = run_container_cardinality(src_1); - const int arbitrary_threshold = 32; - - if (card <= arbitrary_threshold) { - if (src_2->cardinality == 0) { - *dst = run_container_clone(src_1); - return RUN_CONTAINER_TYPE_CODE; - } - // Java's "lazyandNot.toEfficientContainer" thing - run_container_t *answer = run_container_create_given_capacity( - card + array_container_cardinality(src_2)); - - int rlepos = 0; - int xrlepos = 0; // "x" is src_2 - rle16_t rle = src_1->runs[rlepos]; - int32_t start = rle.value; - int32_t end = start + rle.length + 1; - int32_t xstart = src_2->array[xrlepos]; - - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) { - if (end <= xstart) { - // output the first run - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xstart + 1 <= start) { - // exit the second run - xrlepos++; - if (xrlepos < src_2->cardinality) { - xstart = src_2->array[xrlepos]; - } - } else { - if (start < xstart) { - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(xstart - start - 1)}; - } - if (xstart + 1 < end) { - start = xstart + 1; - } else { - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } - } - } - if (rlepos < src_1->n_runs) { - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos++; - if (rlepos < src_1->n_runs) { - memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos, - (src_1->n_runs - rlepos) * sizeof(rle16_t)); - answer->n_runs += (src_1->n_runs - rlepos); - } - } - uint8_t return_type; - *dst = convert_run_to_efficient_container(answer, &return_type); - if (answer != *dst) run_container_free(answer); - return return_type; - } - // else it's a bitmap or array - - if (card <= DEFAULT_MAX_SIZE) { - array_container_t *ac = array_container_create_given_capacity(card); - // nb Java code used a generic iterator-based merge to compute - // difference - ac->cardinality = run_array_array_subtract(src_1, src_2, ac); - *dst = ac; - return ARRAY_CONTAINER_TYPE_CODE; - } - bitset_container_t *ans = bitset_container_from_run(src_1); - bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst); - return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE); -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_array_container_iandnot(run_container_t *src_1, - const array_container_t *src_2, void **dst) { - // dummy implementation same as June 2016 Java - int ans = run_array_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -/* dst must be a valid array container, allowed to be src_1 */ - -void array_run_container_andnot(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst) { - // basically following Java impl as of June 2016 - if (src_1->cardinality > dst->capacity) { - array_container_grow(dst, src_1->cardinality, false); - } - - if (src_2->n_runs == 0) { - memmove(dst->array, src_1->array, - sizeof(uint16_t) * src_1->cardinality); - dst->cardinality = src_1->cardinality; - return; - } - int32_t run_start = src_2->runs[0].value; - int32_t run_end = run_start + src_2->runs[0].length; - int which_run = 0; - - uint16_t val = 0; - int dest_card = 0; - for (int i = 0; i < src_1->cardinality; ++i) { - val = src_1->array[i]; - if (val < run_start) - dst->array[dest_card++] = val; - else if (val <= run_end) { - ; // omitted item - } else { - do { - if (which_run + 1 < src_2->n_runs) { - ++which_run; - run_start = src_2->runs[which_run].value; - run_end = run_start + src_2->runs[which_run].length; - - } else - run_start = run_end = (1 << 16) + 1; - } while (val > run_end); - --i; - } - } - dst->cardinality = dest_card; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -void array_run_container_iandnot(array_container_t *src_1, - const run_container_t *src_2) { - array_run_container_andnot(src_1, src_2, src_1); -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, void **dst) { - run_container_t *ans = run_container_create(); - run_container_andnot(src_1, src_2, ans); - uint8_t typecode_after; - *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_run_container_iandnot(run_container_t *src_1, - const run_container_t *src_2, void **dst) { - // following Java impl as of June 2016 (dummy) - int ans = run_run_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -/* - * dst is a valid array container and may be the same as src_1 - */ - -void array_array_container_andnot(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst) { - array_container_andnot(src_1, src_2, dst); -} - -/* inplace array-array andnot will always be able to reuse the space of - * src_1 */ -void array_array_container_iandnot(array_container_t *src_1, - const array_container_t *src_2) { - array_container_andnot(src_1, src_2, src_1); -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - bitset_container_t *ans = bitset_container_create(); - int card = bitset_container_andnot(src_1, src_2, ans); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(ans); - bitset_container_free(ans); - return false; // not bitset - } else { - *dst = ans; - return true; - } -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_bitset_container_iandnot(bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - int card = bitset_container_andnot(src_1, src_2, src_1); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else { - *dst = src_1; - return true; - } -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */ - -bool array_container_equal_bitset(const array_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality != container1->cardinality) { - return false; - } - } - int32_t pos = 0; - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = container2->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - uint16_t r = i * 64 + __builtin_ctzll(w); - if (pos >= container1->cardinality) { - return false; - } - if (container1->array[pos] != r) { - return false; - } - ++pos; - w ^= t; - } - } - return (pos == container1->cardinality); -} - -bool run_container_equals_array(const run_container_t* container1, - const array_container_t* container2) { - if (run_container_cardinality(container1) != container2->cardinality) - return false; - int32_t pos = 0; - for (int i = 0; i < container1->n_runs; ++i) { - const uint32_t run_start = container1->runs[i].value; - const uint32_t le = container1->runs[i].length; - - if (container2->array[pos] != run_start) { - return false; - } - - if (container2->array[pos + le] != run_start + le) { - return false; - } - - pos += le + 1; - } - return true; -} - -bool run_container_equals_bitset(const run_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality != run_container_cardinality(container1)) { - return false; - } - } else { - int32_t card = bitset_container_compute_cardinality( - container2); // modify container2? - if (card != run_container_cardinality(container1)) { - return false; - } - } - for (int i = 0; i < container1->n_runs; ++i) { - uint32_t run_start = container1->runs[i].value; - uint32_t le = container1->runs[i].length; - for (uint32_t j = run_start; j <= run_start + le; ++j) { - // todo: this code could be much faster - if (!bitset_container_contains(container2, j)) { - return false; - } - } - } - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */ -/* - * mixed_intersection.c - * - */ - - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. */ -void array_bitset_container_intersection(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst) { - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - int32_t newcard = 0; // dst could be src_1 - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - // this branchless approach is much faster... - dst->array[newcard] = key; - newcard += bitset_container_contains(src_2, key); - /** - * we could do it this way instead... - * if (bitset_container_contains(src_2, key)) { - * dst->array[newcard++] = key; - * } - * but if the result is unpredictible, the processor generates - * many mispredicted branches. - * Difference can be huge (from 3 cycles when predictible all the way - * to 16 cycles when unpredictible. - * See - * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c - */ - } - dst->cardinality = newcard; -} - -/* Compute the size of the intersection of src_1 and src_2. */ -int array_bitset_container_intersection_cardinality( - const array_container_t *src_1, const bitset_container_t *src_2) { - int32_t newcard = 0; - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - newcard += bitset_container_contains(src_2, key); - } - return newcard; -} - - -bool array_bitset_container_intersect(const array_container_t *src_1, - const bitset_container_t *src_2) { - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - if(bitset_container_contains(src_2, key)) return true; - } - return false; -} - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_run_container_intersection(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst) { - if (run_container_is_full(src_2)) { - if (dst != src_1) array_container_copy(src_1, dst); - return; - } - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - if (src_2->n_runs == 0) { - return; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - int32_t newcard = 0; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - dst->cardinality = newcard; - return; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - dst->array[newcard] = arrayval; - newcard++; - arraypos++; - } - } - dst->cardinality = newcard; -} - -/* Compute the intersection of src_1 and src_2 and write the result to - * *dst. If the result is true then the result is a bitset_container_t - * otherwise is a array_container_t. If *dst == src_2, an in-place processing - * is attempted.*/ -bool run_bitset_container_intersection(const run_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - if (run_container_is_full(src_1)) { - if (*dst != src_2) *dst = bitset_container_clone(src_2); - return true; - } - int32_t card = run_container_cardinality(src_1); - if (card <= DEFAULT_MAX_SIZE) { - // result can only be an array (assuming that we never make a - // RunContainer) - if (card > src_2->cardinality) { - card = src_2->cardinality; - } - array_container_t *answer = array_container_create_given_capacity(card); - *dst = answer; - if (*dst == NULL) { - return false; - } - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - uint32_t endofrun = (uint32_t)rle.value + rle.length; - for (uint32_t runValue = rle.value; runValue <= endofrun; - ++runValue) { - answer->array[answer->cardinality] = (uint16_t)runValue; - answer->cardinality += - bitset_container_contains(src_2, runValue); - } - } - return false; - } - if (*dst == src_2) { // we attempt in-place - bitset_container_t *answer = (bitset_container_t *)*dst; - uint32_t start = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - const rle16_t rle = src_1->runs[rlepos]; - uint32_t end = rle.value; - bitset_reset_range(src_2->array, start, end); - - start = end + rle.length + 1; - } - bitset_reset_range(src_2->array, start, UINT32_C(1) << 16); - answer->cardinality = bitset_container_compute_cardinality(answer); - if (src_2->cardinality > DEFAULT_MAX_SIZE) { - return true; - } else { - array_container_t *newanswer = array_container_from_bitset(src_2); - if (newanswer == NULL) { - *dst = NULL; - return false; - } - *dst = newanswer; - return false; - } - } else { // no inplace - // we expect the answer to be a bitmap (if we are lucky) - bitset_container_t *answer = bitset_container_clone(src_2); - - *dst = answer; - if (answer == NULL) { - return true; - } - uint32_t start = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - const rle16_t rle = src_1->runs[rlepos]; - uint32_t end = rle.value; - bitset_reset_range(answer->array, start, end); - start = end + rle.length + 1; - } - bitset_reset_range(answer->array, start, UINT32_C(1) << 16); - answer->cardinality = bitset_container_compute_cardinality(answer); - - if (answer->cardinality > DEFAULT_MAX_SIZE) { - return true; - } else { - array_container_t *newanswer = array_container_from_bitset(answer); - bitset_container_free((bitset_container_t *)*dst); - if (newanswer == NULL) { - *dst = NULL; - return false; - } - *dst = newanswer; - return false; - } - } -} - -/* Compute the size of the intersection between src_1 and src_2 . */ -int array_run_container_intersection_cardinality(const array_container_t *src_1, - const run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return src_1->cardinality; - } - if (src_2->n_runs == 0) { - return 0; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - int32_t newcard = 0; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - return newcard; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - newcard++; - arraypos++; - } - } - return newcard; -} - -/* Compute the intersection between src_1 and src_2 - **/ -int run_bitset_container_intersection_cardinality( - const run_container_t *src_1, const bitset_container_t *src_2) { - if (run_container_is_full(src_1)) { - return bitset_container_cardinality(src_2); - } - int answer = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - answer += - bitset_lenrange_cardinality(src_2->array, rle.value, rle.length); - } - return answer; -} - - -bool array_run_container_intersect(const array_container_t *src_1, - const run_container_t *src_2) { - if( run_container_is_full(src_2) ) { - return !array_container_empty(src_1); - } - if (src_2->n_runs == 0) { - return false; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - return false; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - return true; - } - } - return false; -} - -/* Compute the intersection between src_1 and src_2 - **/ -bool run_bitset_container_intersect(const run_container_t *src_1, - const bitset_container_t *src_2) { - if( run_container_is_full(src_1) ) { - return !bitset_container_empty(src_2); - } - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - if(!bitset_lenrange_empty(src_2->array, rle.value,rle.length)) return true; - } - return false; -} - -/* - * Compute the intersection between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. - */ -bool bitset_bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - const int newCardinality = bitset_container_and_justcard(src_1, src_2); - if (newCardinality > DEFAULT_MAX_SIZE) { - *dst = bitset_container_create(); - if (*dst != NULL) { - bitset_container_and_nocard(src_1, src_2, - (bitset_container_t *)*dst); - ((bitset_container_t *)*dst)->cardinality = newCardinality; - } - return true; // it is a bitset - } - *dst = array_container_create_given_capacity(newCardinality); - if (*dst != NULL) { - ((array_container_t *)*dst)->cardinality = newCardinality; - bitset_extract_intersection_setbits_uint16( - ((const bitset_container_t *)src_1)->array, - ((const bitset_container_t *)src_2)->array, - BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, - 0); - } - return false; // not a bitset -} - -bool bitset_bitset_container_intersection_inplace( - bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) { - const int newCardinality = bitset_container_and_justcard(src_1, src_2); - if (newCardinality > DEFAULT_MAX_SIZE) { - *dst = src_1; - bitset_container_and_nocard(src_1, src_2, src_1); - ((bitset_container_t *)*dst)->cardinality = newCardinality; - return true; // it is a bitset - } - *dst = array_container_create_given_capacity(newCardinality); - if (*dst != NULL) { - ((array_container_t *)*dst)->cardinality = newCardinality; - bitset_extract_intersection_setbits_uint16( - ((const bitset_container_t *)src_1)->array, - ((const bitset_container_t *)src_2)->array, - BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, - 0); - } - return false; // not a bitset -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */ -/* - * mixed_negation.c - * - */ - -#include -#include - - -// TODO: make simplified and optimized negation code across -// the full range. - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap -' * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -void array_container_negation(const array_container_t *src, - bitset_container_t *dst) { - uint64_t card = UINT64_C(1 << 16); - bitset_container_set_all(dst); - - dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array, - (uint64_t)src->cardinality); -} - -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation(const bitset_container_t *src, void **dst) { - return bitset_container_negation_range(src, 0, (1 << 16), dst); -} - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) { - return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); -} - -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation(const run_container_t *src, void **dst) { - return run_container_negation_range(src, 0, (1 << 16), dst); -} - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_inplace(run_container_t *src, void **dst) { - return run_container_negation_range_inplace(src, 0, (1 << 16), dst); -} - -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. - */ -bool array_container_negation_range(const array_container_t *src, - const int range_start, const int range_end, - void **dst) { - /* close port of the Java implementation */ - if (range_start >= range_end) { - *dst = array_container_clone(src); - return false; - } - - int32_t start_index = - binarySearch(src->array, src->cardinality, (uint16_t)range_start); - if (start_index < 0) start_index = -start_index - 1; - - int32_t last_index = - binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); - if (last_index < 0) last_index = -last_index - 2; - - const int32_t current_values_in_range = last_index - start_index + 1; - const int32_t span_to_be_flipped = range_end - range_start; - const int32_t new_values_in_range = - span_to_be_flipped - current_values_in_range; - const int32_t cardinality_change = - new_values_in_range - current_values_in_range; - const int32_t new_cardinality = src->cardinality + cardinality_change; - - if (new_cardinality > DEFAULT_MAX_SIZE) { - bitset_container_t *temp = bitset_container_from_array(src); - bitset_flip_range(temp->array, (uint32_t)range_start, - (uint32_t)range_end); - temp->cardinality = new_cardinality; - *dst = temp; - return true; - } - - array_container_t *arr = - array_container_create_given_capacity(new_cardinality); - *dst = (void *)arr; - if(new_cardinality == 0) { - arr->cardinality = new_cardinality; - return false; // we are done. - } - // copy stuff before the active area - memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); - - // work on the range - int32_t out_pos = start_index, in_pos = start_index; - int32_t val_in_range = range_start; - for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { - if ((uint16_t)val_in_range != src->array[in_pos]) { - arr->array[out_pos++] = (uint16_t)val_in_range; - } else { - ++in_pos; - } - } - for (; val_in_range < range_end; ++val_in_range) - arr->array[out_pos++] = (uint16_t)val_in_range; - - // content after the active range - memcpy(arr->array + out_pos, src->array + (last_index + 1), - (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); - arr->cardinality = new_cardinality; - return false; -} - -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. - */ - -bool array_container_negation_range_inplace(array_container_t *src, - const int range_start, - const int range_end, void **dst) { - bool ans = array_container_negation_range(src, range_start, range_end, dst); - // TODO : try a real inplace version - array_container_free(src); - return ans; -} - -/* Negation across a range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation_range(const bitset_container_t *src, - const int range_start, const int range_end, - void **dst) { - // TODO maybe consider density-based estimate - // and sometimes build result directly as array, with - // conversion back to bitset if wrong. Or determine - // actual result cardinality, then go directly for the known final cont. - - // keep computation using bitsets as long as possible. - bitset_container_t *t = bitset_container_clone(src); - bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end); - t->cardinality = bitset_container_compute_cardinality(t); - - if (t->cardinality > DEFAULT_MAX_SIZE) { - *dst = t; - return true; - } else { - *dst = array_container_from_bitset(t); - bitset_container_free(t); - return false; - } -} - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_range_inplace(bitset_container_t *src, - const int range_start, - const int range_end, void **dst) { - bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end); - src->cardinality = bitset_container_compute_cardinality(src); - if (src->cardinality > DEFAULT_MAX_SIZE) { - *dst = src; - return true; - } - *dst = array_container_from_bitset(src); - bitset_container_free(src); - return false; -} - -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation_range(const run_container_t *src, - const int range_start, const int range_end, - void **dst) { - uint8_t return_typecode; - - // follows the Java implementation - if (range_end <= range_start) { - *dst = run_container_clone(src); - return RUN_CONTAINER_TYPE_CODE; - } - - run_container_t *ans = run_container_create_given_capacity( - src->n_runs + 1); // src->n_runs + 1); - int k = 0; - for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { - ans->runs[k] = src->runs[k]; - ans->n_runs++; - } - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - - for (; k < src->n_runs; ++k) { - run_container_smart_append_exclusive(ans, src->runs[k].value, - src->runs[k].length); - } - - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); - - return return_typecode; -} - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_range_inplace(run_container_t *src, - const int range_start, - const int range_end, void **dst) { - uint8_t return_typecode; - - if (range_end <= range_start) { - *dst = src; - return RUN_CONTAINER_TYPE_CODE; - } - - // TODO: efficient special case when range is 0 to 65535 inclusive - - if (src->capacity == src->n_runs) { - // no excess room. More checking to see if result can fit - bool last_val_before_range = false; - bool first_val_in_range = false; - bool last_val_in_range = false; - bool first_val_past_range = false; - - if (range_start > 0) - last_val_before_range = - run_container_contains(src, (uint16_t)(range_start - 1)); - first_val_in_range = run_container_contains(src, (uint16_t)range_start); - - if (last_val_before_range == first_val_in_range) { - last_val_in_range = - run_container_contains(src, (uint16_t)(range_end - 1)); - if (range_end != 0x10000) - first_val_past_range = - run_container_contains(src, (uint16_t)range_end); - - if (last_val_in_range == - first_val_past_range) { // no space for inplace - int ans = run_container_negation_range(src, range_start, - range_end, dst); - run_container_free(src); - return ans; - } - } - } - // all other cases: result will fit - - run_container_t *ans = src; - int my_nbr_runs = src->n_runs; - - ans->n_runs = 0; - int k = 0; - for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { - // ans->runs[k] = src->runs[k]; (would be self-copy) - ans->n_runs++; - } - - // as with Java implementation, use locals to give self a buffer of depth 1 - rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0}; - rle16_t next = buffered; - if (k < my_nbr_runs) buffered = src->runs[k]; - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - - for (; k < my_nbr_runs; ++k) { - if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; - - run_container_smart_append_exclusive(ans, buffered.value, - buffered.length); - buffered = next; - } - - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); - - return return_typecode; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */ - -bool array_container_is_subset_bitset(const array_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < container1->cardinality) { - return false; - } - } - for (int i = 0; i < container1->cardinality; ++i) { - if (!bitset_container_contains(container2, container1->array[i])) { - return false; - } - } - return true; -} - -bool run_container_is_subset_array(const run_container_t* container1, - const array_container_t* container2) { - if (run_container_cardinality(container1) > container2->cardinality) - return false; - int32_t start_pos = -1, stop_pos = -1; - for (int i = 0; i < container1->n_runs; ++i) { - int32_t start = container1->runs[i].value; - int32_t stop = start + container1->runs[i].length; - start_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, start); - stop_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, stop); - if (start_pos == container2->cardinality) { - return false; - } else if (stop_pos - start_pos != stop - start || - container2->array[start_pos] != start || - container2->array[stop_pos] != stop) { - return false; - } - } - return true; -} - -bool array_container_is_subset_run(const array_container_t* container1, - const run_container_t* container2) { - if (container1->cardinality > run_container_cardinality(container2)) - return false; - int i_array = 0, i_run = 0; - while (i_array < container1->cardinality && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - if (container1->array[i_array] < start) { - return false; - } else if (container1->array[i_array] > stop) { - i_run++; - } else { // the value of the array is in the run - i_array++; - } - } - if (i_array == container1->cardinality) { - return true; - } else { - return false; - } -} - -bool run_container_is_subset_bitset(const run_container_t* container1, - const bitset_container_t* container2) { - // todo: this code could be much faster - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < run_container_cardinality(container1)) { - return false; - } - } else { - int32_t card = bitset_container_compute_cardinality( - container2); // modify container2? - if (card < run_container_cardinality(container1)) { - return false; - } - } - for (int i = 0; i < container1->n_runs; ++i) { - uint32_t run_start = container1->runs[i].value; - uint32_t le = container1->runs[i].length; - for (uint32_t j = run_start; j <= run_start + le; ++j) { - if (!bitset_container_contains(container2, j)) { - return false; - } - } - } - return true; -} - -bool bitset_container_is_subset_run(const bitset_container_t* container1, - const run_container_t* container2) { - // todo: this code could be much faster - if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container1->cardinality > run_container_cardinality(container2)) { - return false; - } - } - int32_t i_bitset = 0, i_run = 0; - while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && - i_run < container2->n_runs) { - uint64_t w = container1->array[i_bitset]; - while (w != 0 && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - uint64_t t = w & (~w + 1); - uint16_t r = i_bitset * 64 + __builtin_ctzll(w); - if (r < start) { - return false; - } else if (r > stop) { - i_run++; - continue; - } else { - w ^= t; - } - } - if (w == 0) { - i_bitset++; - } else { - return false; - } - } - if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { - // terminated iterating on the run containers, check that rest of bitset - // is empty - for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { - if (container1->array[i_bitset] != 0) { - return false; - } - } - } - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */ -/* - * mixed_union.c - * - */ - -#include -#include - - -/* Compute the union of src_1 and src_2 and write the result to - * dst. */ -void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - dst->cardinality = (int32_t)bitset_set_list_withcard( - dst->array, dst->cardinality, src_1->array, src_1->cardinality); -} - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_set_list(dst->array, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->array, rle.value, rle.length); - } - dst->cardinality = bitset_container_compute_cardinality(dst); -} - -void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->array, rle.value, rle.length); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -// why do we leave the result as a run container?? -void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - if (run_container_is_full(src_2)) { - run_container_copy(src_2, dst); - return; - } - // TODO: see whether the "2*" is spurious - run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t previousrle; - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(dst, src_2->runs[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(dst, src_1->array[arraypos]); - arraypos++; - } - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } - } -} - -void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return; - } - const int32_t maxoutput = src_1->cardinality + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_2->n_runs; - if (src_2->capacity < neededcapacity) - run_container_grow(src_2, neededcapacity, true); - memmove(src_2->runs + maxoutput, src_2->runs, - src_2->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc2 = src_2->runs + maxoutput; - int32_t rlepos = 0; - int32_t arraypos = 0; - int src2nruns = src_2->n_runs; - src_2->n_runs = 0; - - rle16_t previousrle; - - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(src_2, src_1->array[arraypos]); - arraypos++; - } - - while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src2nruns) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } - } -} - -bool array_array_container_union(const array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - } - return returnval; -} - -bool array_array_container_inplace_union(array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - if(src_1->capacity < ourbitset->cardinality) { - array_container_grow(src_1, ourbitset->cardinality, false); - } - - bitset_extract_setbits_uint16(ourbitset->array, BITSET_CONTAINER_SIZE_IN_WORDS, - src_1->array, 0); - src_1->cardinality = ourbitset->cardinality; - *dst = src_1; - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - } - return returnval; -} - - -bool array_array_container_lazy_union(const array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; -} - - -bool array_array_container_lazy_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */ -/* - * mixed_xor.c - */ - -#include -#include - - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). - * Result is true iff dst is a bitset */ -bool array_bitset_container_xor(const array_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *result = bitset_container_create(); - bitset_container_copy(src_2, result); - result->cardinality = (int32_t)bitset_flip_list_withcard( - result->array, result->cardinality, src_1->array, src_1->cardinality); - - // do required type conversions. - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - */ - -void array_bitset_container_lazy_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_flip_list(dst->array, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_xor(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *result = bitset_container_create(); - - bitset_container_copy(src_2, result); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(result->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - result->cardinality = bitset_container_compute_cardinality(result); - - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset -} - -/* lazy xor. Dst is initialized and may be equal to src_2. - * Result is left as a bitset container, even if actual - * cardinality would dictate an array container. - */ - -void run_bitset_container_lazy_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(dst->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_xor(const array_container_t *src_1, - const run_container_t *src_2, void **dst) { - // semi following Java XOR implementation as of May 2016 - // the C OR implementation works quite differently and can return a run - // container - // TODO could optimize for full run containers. - - // use of lazy following Java impl. - const int arbitrary_threshold = 32; - if (src_1->cardinality < arbitrary_threshold) { - run_container_t *ans = run_container_create(); - array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. - uint8_t typecode_after; - *dst = - convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; - } - - int card = run_container_cardinality(src_2); - if (card <= DEFAULT_MAX_SIZE) { - // Java implementation works with the array, xoring the run elements via - // iterator - array_container_t *temp = array_container_from_run(src_2); - bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); - array_container_free(temp); - return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - } else { // guess that it will end up as a bitset - bitset_container_t *result = bitset_container_from_run(src_2); - bool is_bitset = bitset_array_container_ixor(result, src_1, dst); - // any necessary type conversion has been done by the ixor - int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE); - return retval; - } -} - -/* Dst is a valid run container. (Can it be src_2? Let's say not.) - * Leaves result as run container, even if other options are - * smaller. - */ - -void array_run_container_lazy_xor(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); - int32_t rlepos = 0; - int32_t arraypos = 0; - dst->n_runs = 0; - - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); - rlepos++; - } else { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], - 0); - arraypos++; - } - } - while (arraypos < src_1->cardinality) { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); - arraypos++; - } - while (rlepos < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); - rlepos++; - } -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, void **dst) { - run_container_t *ans = run_container_create(); - run_container_xor(src_1, src_2, ans); - uint8_t typecode_after; - *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; -} - -/* - * Java implementation (as of May 2016) for array_run, run_run - * and bitset_run don't do anything different for inplace. - * Could adopt the mixed_union.c approach instead (ie, using - * smart_append_exclusive) - * - */ - -bool array_array_container_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = - src_1->cardinality + src_2->cardinality; // upper bound - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - array_container_xor(src_1, src_2, (array_container_t *)*dst); - return false; // not a bitset - } - *dst = bitset_container_from_array(src_1); - bool returnval = true; // expect a bitset - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - - return returnval; -} - -bool array_array_container_lazy_xor(const array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - // upper bound, but probably poor estimate for xor - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) - array_container_xor(src_1, src_2, (array_container_t *)*dst); - return false; // not a bitset - } - *dst = bitset_container_from_array(src_1); - bool returnval = true; // expect a bitset (maybe, for XOR??) - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *ans = bitset_container_create(); - int card = bitset_container_xor(src_1, src_2, ans); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(ans); - bitset_container_free(ans); - return false; // not bitset - } else { - *dst = ans; - return true; - } -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_ixor(bitset_container_t *src_1, - const array_container_t *src_2, void **dst) { - *dst = src_1; - src_1->cardinality = (uint32_t)bitset_flip_list_withcard( - src_1->array, src_1->cardinality, src_2->array, src_2->cardinality); - - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} - -/* a bunch of in-place, some of which may not *really* be inplace. - * TODO: write actual inplace routine if efficiency warrants it - * Anything inplace with a bitset is a good candidate - */ - -bool bitset_bitset_container_ixor(bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = bitset_bitset_container_xor(src_1, src_2, dst); - bitset_container_free(src_1); - return ans; -} - -bool array_bitset_container_ixor(array_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = array_bitset_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_ixor(run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = run_bitset_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -bool bitset_run_container_ixor(bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - bool ans = run_bitset_container_xor(src_2, src_1, dst); - bitset_container_free(src_1); - return ans; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_ixor(array_container_t *src_1, - const run_container_t *src_2, void **dst) { - int ans = array_run_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -int run_array_container_ixor(run_container_t *src_1, - const array_container_t *src_2, void **dst) { - int ans = array_run_container_xor(src_2, src_1, dst); - run_container_free(src_1); - return ans; -} - -bool array_array_container_ixor(array_container_t *src_1, - const array_container_t *src_2, void **dst) { - bool ans = array_array_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, - void **dst) { - int ans = run_run_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */ -#include -#include - - -extern inline uint16_t run_container_minimum(const run_container_t *run); -extern inline uint16_t run_container_maximum(const run_container_t *run); -extern inline int32_t interleavedBinarySearch(const rle16_t *array, - int32_t lenarray, uint16_t ikey); -extern inline bool run_container_contains(const run_container_t *run, - uint16_t pos); -extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); -extern bool run_container_is_full(const run_container_t *run); -extern bool run_container_nonzero_cardinality(const run_container_t *r); -extern void run_container_clear(run_container_t *run); -extern int32_t run_container_serialized_size_in_bytes(int32_t num_runs); -extern run_container_t *run_container_create_range(uint32_t start, - uint32_t stop); - -bool run_container_add(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return false; // already there - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return false; // already there - if (offset == le + 1) { - // we may need to fuse - if (index + 1 < run->n_runs) { - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index].length = run->runs[index + 1].value + - run->runs[index + 1].length - - run->runs[index].value; - recoverRoomAtIndex(run, (uint16_t)(index + 1)); - return true; - } - } - run->runs[index].length++; - return true; - } - if (index + 1 < run->n_runs) { - // we may need to fuse - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index + 1].value = pos; - run->runs[index + 1].length = run->runs[index + 1].length + 1; - return true; - } - } - } - if (index == -1) { - // we may need to extend the first run - if (0 < run->n_runs) { - if (run->runs[0].value == pos + 1) { - run->runs[0].length++; - run->runs[0].value--; - return true; - } - } - } - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = pos; - run->runs[index + 1].length = 0; - return true; -} - -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create_given_capacity(int32_t size) { - run_container_t *run; - /* Allocate the run container itself. */ - if ((run = (run_container_t *)malloc(sizeof(run_container_t))) == NULL) { - return NULL; - } - if (size <= 0 ) { // we don't want to rely on malloc(0) - run->runs = NULL; - } else if ((run->runs = (rle16_t *)malloc(sizeof(rle16_t) * size)) == NULL) { - free(run); - return NULL; - } - run->capacity = size; - run->n_runs = 0; - return run; -} - -int run_container_shrink_to_fit(run_container_t *src) { - if (src->n_runs == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->n_runs; - src->capacity = src->n_runs; - rle16_t *oldruns = src->runs; - src->runs = (rle16_t *)realloc(oldruns, src->capacity * sizeof(rle16_t)); - if (src->runs == NULL) free(oldruns); // should never happen? - return savings; -} -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create(void) { - return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); -} - -run_container_t *run_container_clone(const run_container_t *src) { - run_container_t *run = run_container_create_given_capacity(src->capacity); - if (run == NULL) return NULL; - run->capacity = src->capacity; - run->n_runs = src->n_runs; - memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); - return run; -} - -/* Free memory. */ -void run_container_free(run_container_t *run) { - if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise - free(run->runs); - run->runs = NULL; // pedantic - } - free(run); -} - -void run_container_grow(run_container_t *run, int32_t min, bool copy) { - int32_t newCapacity = - (run->capacity == 0) - ? RUN_DEFAULT_INIT_SIZE - : run->capacity < 64 ? run->capacity * 2 - : run->capacity < 1024 ? run->capacity * 3 / 2 - : run->capacity * 5 / 4; - if (newCapacity < min) newCapacity = min; - run->capacity = newCapacity; - assert(run->capacity >= min); - if (copy) { - rle16_t *oldruns = run->runs; - run->runs = - (rle16_t *)realloc(oldruns, run->capacity * sizeof(rle16_t)); - if (run->runs == NULL) free(oldruns); - } else { - // Jon Strabala reports that some tools complain otherwise - if (run->runs != NULL) { - free(run->runs); - } - run->runs = (rle16_t *)malloc(run->capacity * sizeof(rle16_t)); - } - // handle the case where realloc fails - if (run->runs == NULL) { - fprintf(stderr, "could not allocate memory\n"); - } - assert(run->runs != NULL); -} - -/* copy one container into another */ -void run_container_copy(const run_container_t *src, run_container_t *dst) { - const int32_t n_runs = src->n_runs; - if (src->n_runs > dst->capacity) { - run_container_grow(dst, n_runs, false); - } - dst->n_runs = n_runs; - memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); -} - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_1, dst); - return; - } - if (if2) { - run_container_copy(src_2, dst); - return; - } - } - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - - rle16_t previousrle; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(dst, src_1->runs[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); - xrlepos++; - } - - while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { - rle16_t newrl; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = src_1->runs[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(dst, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < src_1->n_runs) { - run_container_append(dst, src_1->runs[rlepos], &previousrle); - rlepos++; - } -} - -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' - */ -void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return; - } - if (if2) { - run_container_copy(src_2, src_1); - return; - } - } - // we move the data to the end of the current array - const int32_t maxoutput = src_1->n_runs + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_1->n_runs; - if (src_1->capacity < neededcapacity) - run_container_grow(src_1, neededcapacity, true); - memmove(src_1->runs + maxoutput, src_1->runs, - src_1->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc1 = src_1->runs + maxoutput; - const int32_t input1nruns = src_1->n_runs; - src_1->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - - rle16_t previousrle; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); - xrlepos++; - } - while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { - rle16_t newrl; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = inputsrc1[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(src_1, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(src_1, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < input1nruns) { - run_container_append(src_1, inputsrc1[rlepos], &previousrle); - rlepos++; - } -} - -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // don't bother to convert xor with full range into negation - // since negation is implemented similarly - - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - - int32_t pos1 = 0; - int32_t pos2 = 0; - dst->n_runs = 0; - - while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { - if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } else { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } - } - while (pos1 < src_1->n_runs) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } - - while (pos2 < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } -} - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_2, dst); - return; - } - if (if2) { - run_container_copy(src_1, dst); - return; - } - } - // TODO: this could be a lot more efficient, could use SIMD optimizations - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - dst->runs[dst->n_runs].value = (uint16_t)lateststart; - dst->runs[dst->n_runs].length = - (uint16_t)(earliestend - lateststart - 1); - dst->n_runs++; - } - } -} - -/* Compute the size of the intersection of src_1 and src_2 . */ -int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return run_container_cardinality(src_2); - } - if (if2) { - return run_container_cardinality(src_1); - } - } - int answer = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - answer += earliestend - lateststart; - } - } - return answer; -} - -bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return !run_container_empty(src_2); - } - if (if2) { - return !run_container_empty(src_1); - } - } - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - return true; - } - } - return false; -} - - -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // following Java implementation as of June 2016 - - if (dst->capacity < src_1->n_runs + src_2->n_runs) - run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); - - dst->n_runs = 0; - - int rlepos1 = 0; - int rlepos2 = 0; - int32_t start = src_1->runs[rlepos1].value; - int32_t end = start + src_1->runs[rlepos1].length + 1; - int32_t start2 = src_2->runs[rlepos2].value; - int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; - - while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { - if (end <= start2) { - // output the first run - dst->runs[dst->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } else if (end2 <= start) { - // exit the second run - rlepos2++; - if (rlepos2 < src_2->n_runs) { - start2 = src_2->runs[rlepos2].value; - end2 = start2 + src_2->runs[rlepos2].length + 1; - } - } else { - if (start < start2) { - dst->runs[dst->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(start2 - start - 1)}; - } - if (end2 < end) { - start = end2; - } else { - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } - } - } - if (rlepos1 < src_1->n_runs) { - dst->runs[dst->n_runs++] = (rle16_t){ - .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)}; - rlepos1++; - if (rlepos1 < src_1->n_runs) { - memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, - sizeof(rle16_t) * (src_1->n_runs - rlepos1)); - dst->n_runs += src_1->n_runs - rlepos1; - } - } -} - -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (int j = 0; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } - } - return outpos; -} - -/* - * Print this container using printf (useful for debugging). - */ -void run_container_printf(const run_container_t *cont) { - for (int i = 0; i < cont->n_runs; ++i) { - uint16_t run_start = cont->runs[i].value; - uint16_t le = cont->runs[i].length; - printf("[%d,%d]", run_start, run_start + le); - } -} - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void run_container_printf_as_uint32_array(const run_container_t *cont, - uint32_t base) { - if (cont->n_runs == 0) return; - { - uint32_t run_start = base + cont->runs[0].value; - uint16_t le = cont->runs[0].length; - printf("%u", run_start); - for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); - } - for (int32_t i = 1; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); - } -} - -int32_t run_container_serialize(const run_container_t *container, char *buf) { - int32_t l, off; - - memcpy(buf, &container->n_runs, off = sizeof(container->n_runs)); - memcpy(&buf[off], &container->capacity, sizeof(container->capacity)); - off += sizeof(container->capacity); - - l = sizeof(rle16_t) * container->n_runs; - memcpy(&buf[off], container->runs, l); - return (off + l); -} - -int32_t run_container_write(const run_container_t *container, char *buf) { - memcpy(buf, &container->n_runs, sizeof(uint16_t)); - memcpy(buf + sizeof(uint16_t), container->runs, - container->n_runs * sizeof(rle16_t)); - return run_container_size_in_bytes(container); -} - -int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf) { - (void)cardinality; - memcpy(&container->n_runs, buf, sizeof(uint16_t)); - if (container->n_runs > container->capacity) - run_container_grow(container, container->n_runs, false); - if(container->n_runs > 0) { - memcpy(container->runs, buf + sizeof(uint16_t), - container->n_runs * sizeof(rle16_t)); - } - return run_container_size_in_bytes(container); -} - -uint32_t run_container_serialization_len(const run_container_t *container) { - return (sizeof(container->n_runs) + sizeof(container->capacity) + - sizeof(rle16_t) * container->n_runs); -} - -void *run_container_deserialize(const char *buf, size_t buf_len) { - run_container_t *ptr; - - if (buf_len < 8 /* n_runs + capacity */) - return (NULL); - else - buf_len -= 8; - - if ((ptr = (run_container_t *)malloc(sizeof(run_container_t))) != NULL) { - size_t len; - int32_t off; - - memcpy(&ptr->n_runs, buf, off = 4); - memcpy(&ptr->capacity, &buf[off], 4); - off += 4; - - len = sizeof(rle16_t) * ptr->n_runs; - - if (len != buf_len) { - free(ptr); - return (NULL); - } - - if ((ptr->runs = (rle16_t *)malloc(len)) == NULL) { - free(ptr); - return (NULL); - } - - memcpy(ptr->runs, &buf[off], len); - - /* Check if returned values are monotonically increasing */ - for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) { - if (ptr->runs[i].value < j) { - free(ptr->runs); - free(ptr); - return (NULL); - } else - j = ptr->runs[i].value; - } - } - - return (ptr); -} - -bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - for (int j = 0; j <= le; ++j) - if (!iterator(run_start + j, ptr)) return false; - } - return true; -} - -bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - for (int j = 0; j <= le; ++j) - if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) - return false; - } - return true; -} - -bool run_container_equals(const run_container_t *container1, - const run_container_t *container2) { - if (container1->n_runs != container2->n_runs) { - return false; - } - for (int32_t i = 0; i < container1->n_runs; ++i) { - if ((container1->runs[i].value != container2->runs[i].value) || - (container1->runs[i].length != container2->runs[i].length)) - return false; - } - return true; -} - -bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2) { - int i1 = 0, i2 = 0; - while (i1 < container1->n_runs && i2 < container2->n_runs) { - int start1 = container1->runs[i1].value; - int stop1 = start1 + container1->runs[i1].length; - int start2 = container2->runs[i2].value; - int stop2 = start2 + container2->runs[i2].length; - if (start1 < start2) { - return false; - } else { // start1 >= start2 - if (stop1 < stop2) { - i1++; - } else if (stop1 == stop2) { - i1++; - i2++; - } else { // stop1 > stop2 - i2++; - } - } - } - if (i1 == container1->n_runs) { - return true; - } else { - return false; - } -} - -// TODO: write smart_append_exclusive version to match the overloaded 1 param -// Java version (or is it even used?) - -// follows the Java implementation closely -// length is the rle-value. Ie, run [10,12) uses a length value 1. -void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length) { - int old_end; - rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; - rle16_t *appended_last_run = src->runs + src->n_runs; - - if (!src->n_runs || - (start > (old_end = last_run->value + last_run->length + 1))) { - *appended_last_run = (rle16_t){.value = start, .length = length}; - src->n_runs++; - return; - } - if (old_end == start) { - // we merge - last_run->length += (length + 1); - return; - } - int new_end = start + length + 1; - - if (start == last_run->value) { - // wipe out previous - if (new_end < old_end) { - *last_run = (rle16_t){.value = (uint16_t)new_end, - .length = (uint16_t)(old_end - new_end - 1)}; - return; - } else if (new_end > old_end) { - *last_run = (rle16_t){.value = (uint16_t)old_end, - .length = (uint16_t)(new_end - old_end - 1)}; - return; - } else { - src->n_runs--; - return; - } - } - last_run->length = start - last_run->value - 1; - if (new_end < old_end) { - *appended_last_run = - (rle16_t){.value = (uint16_t)new_end, - .length = (uint16_t)(old_end - new_end - 1)}; - src->n_runs++; - } else if (new_end > old_end) { - *appended_last_run = - (rle16_t){.value = (uint16_t)old_end, - .length = (uint16_t)(new_end - old_end - 1)}; - src->n_runs++; - } -} - -bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - for (int i = 0; i < container->n_runs; i++) { - uint16_t length = container->runs[i].length; - if (rank <= *start_rank + length) { - uint16_t value = container->runs[i].value; - *element = value + rank - (*start_rank); - return true; - } else - *start_rank += length + 1; - } - return false; -} - -int run_container_rank(const run_container_t *container, uint16_t x) { - int sum = 0; - uint32_t x32 = x; - for (int i = 0; i < container->n_runs; i++) { - uint32_t startpoint = container->runs[i].value; - uint32_t length = container->runs[i].length; - uint32_t endpoint = length + startpoint; - if (x <= endpoint) { - if (x < startpoint) break; - return sum + (x32 - startpoint) + 1; - } else { - sum += length + 1; - } - } - return sum; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */ -#include -#include -#include -#include -#include -#include - -extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, - uint32_t val); - -// this is like roaring_bitmap_add, but it populates pointer arguments in such a -// way -// that we can recover the container touched, which, in turn can be used to -// accelerate some functions (when you repeatedly need to add to the same -// container) -void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, - uint32_t val, - uint8_t *typecode, - int *index) { - uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, typecode); - uint8_t newtypecode = *typecode; - void *container2 = - container_add(container, val & 0xFFFF, *typecode, &newtypecode); - *index = i; - if (container2 != container) { - container_free(container, *typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - *typecode = newtypecode; - return container2; - } else { - return container; - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, *typecode); - *index = -i - 1; - return container; - } -} - -roaring_bitmap_t *roaring_bitmap_create() { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_init(&ans->high_low_container); - if (!is_ok) { - free(ans); - return NULL; - } - ans->copy_on_write = false; - return ans; -} - -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); - if (!is_ok) { - free(ans); - return NULL; - } - ans->copy_on_write = false; - return ans; -} - -void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - void *container = NULL; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) return; - uint32_t val; - memcpy(&val, vals + i, sizeof(val)); - container = - containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - i++; - for (; i < n_args; i++) { - memcpy(&val, vals + i, sizeof(val)); - if (((prev ^ val) >> 16) == - 0) { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { // rare instance when we need to - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, - containerindex, container2, - newtypecode); - typecode = newtypecode; - container = container2; - } - } else { - container = containerptr_roaring_bitmap_add(r, val, &typecode, - &containerindex); - } - prev = val; - } -} - -roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bitmap_add_many(answer, n_args, vals); - return answer; -} - -roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { - // todo: could be greatly optimized but we do not expect this call to ever - // include long lists - roaring_bitmap_t *answer = roaring_bitmap_create(); - va_list ap; - va_start(ap, n_args); - for (size_t i = 1; i <= n_args; i++) { - uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add(answer, val); - } - va_end(ap); - return answer; -} - -static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { - return (a < b) ? a : b; -} - -static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { - return (a < b) ? a : b; -} - -roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step) { - if(max >= UINT64_C(0x100000000)) { - max = UINT64_C(0x100000000); - } - if (step == 0) return NULL; - if (max <= min) return NULL; - roaring_bitmap_t *answer = roaring_bitmap_create(); - if (step >= (1 << 16)) { - for (uint32_t value = (uint32_t)min; value < max; value += step) { - roaring_bitmap_add(answer, value); - } - return answer; - } - uint64_t min_tmp = min; - do { - uint32_t key = (uint32_t)min_tmp >> 16; - uint32_t container_min = min_tmp & 0xFFFF; - uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); - uint8_t type; - void *container = container_from_range(&type, container_min, - container_max, (uint16_t)step); - ra_append(&answer->high_low_container, key, container, type); - uint32_t gap = container_max - container_min + step - 1; - min_tmp += gap - (gap % step); - } while (min_tmp < max); - // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step - return answer; -} - -void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { - if (min > max) { - return; - } - - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t num_required_containers = max_key - min_key + 1; - int32_t suffix_length = count_greater(ra->high_low_container.keys, - ra->high_low_container.size, - max_key); - int32_t prefix_length = count_less(ra->high_low_container.keys, - ra->high_low_container.size - suffix_length, - min_key); - int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length; - - if (num_required_containers > common_length) { - ra_shift_tail(&ra->high_low_container, suffix_length, - num_required_containers - common_length); - } - - int32_t src = prefix_length + common_length - 1; - int32_t dst = ra->high_low_container.size - suffix_length - 1; - for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 - uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; - void* new_container; - uint8_t new_type; - - if (src >= 0 && ra->high_low_container.keys[src] == key) { - ra_unshare_container_at_index(&ra->high_low_container, src); - new_container = container_add_range(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src], - container_min, container_max, &new_type); - if (new_container != ra->high_low_container.containers[src]) { - container_free(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src]); - } - src--; - } else { - new_container = container_from_range(&new_type, container_min, - container_max+1, 1); - } - ra_replace_key_and_container_at_index(&ra->high_low_container, dst, - key, new_container, new_type); - dst--; - } -} - -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { - if (min > max) { - return; - } - - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key); - int32_t dst = src; - while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) { - uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff; - ra_unshare_container_at_index(&ra->high_low_container, src); - void *new_container; - uint8_t new_type; - new_container = container_remove_range(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src], - container_min, container_max, - &new_type); - if (new_container != ra->high_low_container.containers[src]) { - container_free(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src]); - } - if (new_container) { - ra_replace_key_and_container_at_index(&ra->high_low_container, dst, - ra->high_low_container.keys[src], - new_container, new_type); - dst++; - } - src++; - } - if (src > dst) { - ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src); - } -} - -void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); -void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); - -void roaring_bitmap_printf(const roaring_bitmap_t *ra) { - printf("{"); - for (int i = 0; i < ra->high_low_container.size; ++i) { - container_printf_as_uint32_array( - ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16); - if (i + 1 < ra->high_low_container.size) printf(","); - } - printf("}"); -} - -void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) { - printf("{"); - for (int i = 0; i < ra->high_low_container.size; ++i) { - printf("%d: %s (%d)", ra->high_low_container.keys[i], - get_full_container_name(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]), - container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i])); - if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) { - printf( - "(shared count = %" PRIu32 " )", - ((shared_container_t *)(ra->high_low_container.containers[i])) - ->counter); - } - - if (i + 1 < ra->high_low_container.size) printf(", "); - } - printf("}"); -} - -typedef struct min_max_sum_s { - uint32_t min; - uint32_t max; - uint64_t sum; -} min_max_sum_t; - -static bool min_max_sum_fnc(uint32_t value, void *param) { - min_max_sum_t *mms = (min_max_sum_t *)param; - if (value > mms->max) mms->max = value; - if (value < mms->min) mms->min = value; - mms->sum += value; - return true; // we always process all data points -} - -/** -* (For advanced users.) -* Collect statistics about the bitmap -*/ -void roaring_bitmap_statistics(const roaring_bitmap_t *ra, - roaring_statistics_t *stat) { - memset(stat, 0, sizeof(*stat)); - stat->n_containers = ra->high_low_container.size; - stat->cardinality = roaring_bitmap_get_cardinality(ra); - min_max_sum_t mms; - mms.min = UINT32_C(0xFFFFFFFF); - mms.max = UINT32_C(0); - mms.sum = 0; - roaring_iterate(ra, &min_max_sum_fnc, &mms); - stat->min_value = mms.min; - stat->max_value = mms.max; - stat->sum_value = mms.sum; - - for (int i = 0; i < ra->high_low_container.size; ++i) { - uint8_t truetype = - get_container_type(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - uint32_t card = - container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - uint32_t sbytes = - container_size_in_bytes(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - switch (truetype) { - case BITSET_CONTAINER_TYPE_CODE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE_CODE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE_CODE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - __builtin_unreachable(); - } - } -} - -roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container, - r->copy_on_write); - if (!is_ok) { - free(ans); - return NULL; - } - ans->copy_on_write = r->copy_on_write; - return ans; -} - -bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src) { - return ra_overwrite(&src->high_low_container, &dest->high_low_container, - src->copy_on_write); -} - -void roaring_bitmap_free(roaring_bitmap_t *r) { - ra_clear(&r->high_low_container); - free(r); -} - -void roaring_bitmap_clear(roaring_bitmap_t *r) { - ra_reset(&r->high_low_container); -} - -void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - } -} - -bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - result = true; - } else { - const int newCardinality = - container_get_cardinality(container, newtypecode); - - result = oldCardinality != newCardinality; - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - result = true; - } - - return result; -} - -void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - uint8_t newtypecode = typecode; - void *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - if (container_get_cardinality(container2, newtypecode) != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - } -} - -bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - void *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - - const int newCardinality = - container_get_cardinality(container2, newtypecode); - - if (newCardinality != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - - result = oldCardinality != newCardinality; - } - return result; -} - -void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - if (n_args == 0 || r->high_low_container.size == 0) { - return; - } - int32_t pos = -1; // position of the container used in the previous iteration - for (size_t i = 0; i < n_args; i++) { - uint16_t key = (uint16_t)(vals[i] >> 16); - if (pos < 0 || key != r->high_low_container.keys[pos]) { - pos = ra_get_index(&r->high_low_container, key); - } - if (pos >= 0) { - uint8_t new_typecode; - void *new_container; - new_container = container_remove(r->high_low_container.containers[pos], - vals[i] & 0xffff, - r->high_low_container.typecodes[pos], - &new_typecode); - if (new_container != r->high_low_container.containers[pos]) { - container_free(r->high_low_container.containers[pos], - r->high_low_container.typecodes[pos]); - ra_replace_key_and_container_at_index(&r->high_low_container, - pos, key, new_container, - new_typecode); - } - if (!container_nonzero_cardinality(new_container, new_typecode)) { - container_free(new_container, new_typecode); - ra_remove_at_index(&r->high_low_container, pos); - pos = -1; - } - } - } -} - -// there should be some SIMD optimizations possible here -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_and(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free( - c, container_result_type); // otherwise:memory leak! - } - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; -} - -/** - * Compute the union of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = - roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} - -/** - * Compute the xor of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_xor_inplace(answer, x[i]); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} - -// inplace and (modifies its first argument). -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - if (x1 == x2) return; - int pos1 = 0, pos2 = 0, intersection_size = 0; - const int length1 = ra_get_size(&x1->high_low_container); - const int length2 = ra_get_size(&x2->high_low_container); - - // any skipped-over or newly emptied containers in x1 - // have to be freed. - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t typecode1, typecode2, typecode_result; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &typecode1); - c1 = get_writable_copy_if_shared(c1, &typecode1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &typecode2); - void *c = - container_iand(c1, typecode1, c2, typecode2, &typecode_result); - if (c != c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, typecode1); - } - if (container_nonzero_cardinality(c, typecode_result)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c, - typecode_result); - intersection_size++; - } else { - container_free(c, typecode_result); - } - ++pos1; - ++pos2; - } else if (s1 < s2) { - pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - - // if we ended early because x2 ran out, then all remaining in x1 should be - // freed - while (pos1 < length1) { - container_free(x1->high_low_container.containers[pos1], - x1->high_low_container.typecodes[pos1]); - ++pos1; - } - - // all containers after this have either been copied or freed - ra_downsize(&x1->high_low_container, intersection_size); -} - -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_or(c1, container_type_1, c2, container_type_2, - &container_result_type); - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - // c1 = container_clone(c1, container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // c2 = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -// inplace or (modifies its first argument). -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - if (!container_is_full(c1, container_type_1)) { - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, - pos2, &container_type_2); - void *c = - container_ior(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (c != - c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, container_type_1); - } - - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - - // void *c2_clone = container_clone(c2, container_type_2); - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_xor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -// inplace xor (modifies its first argument). - -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - - // XOR can have new containers inserted from x2, but can also - // lose containers when x1 and x2 are nonempty and identical. - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_ixor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - ++pos1; - } else { - container_free(c, container_result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); - empty_bitmap->copy_on_write = x1->copy_on_write && x2->copy_on_write; - return empty_bitmap; - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = 0; - uint16_t s2 = 0; - while (true) { - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_andnot(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - } else if (s1 < s2) { // s1 < s2 - const int next_pos1 = - ra_advance_until(&x1->high_low_container, s2, pos1); - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, next_pos1, - x1->copy_on_write); - // TODO : perhaps some of the copy_on_write should be based on - // answer rather than x1 (more stringent?). Many similar cases - pos1 = next_pos1; - if (pos1 == length1) break; - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - } - } - if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -// inplace andnot (modifies its first argument). - -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - int intersection_size = 0; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_clear(x1); - return; - } - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_iandnot(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size++, s1, - c, container_result_type); - } else { - container_free(c, container_result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - if (pos1 != intersection_size) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, - pos1, &container_type_1); - - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c1, - container_type_1); - } - intersection_size++; - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - - if (pos1 < length1) { - // all containers between intersection_size and - // pos1 are junk. However, they have either been moved - // (thus still referenced) or involved in an iandnot - // that will clean up all containers that could not be reused. - // Thus we should not free the junk containers between - // intersection_size and pos1. - if (pos1 > intersection_size) { - // left slide of remaining items - ra_copy_range(&x1->high_low_container, pos1, length1, - intersection_size); - } - // else current placement is fine - intersection_size += (length1 - pos1); - } - ra_downsize(&x1->high_low_container, intersection_size); -} - -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) { - uint64_t card = 0; - for (int i = 0; i < ra->high_low_container.size; ++i) - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - return card; -} - -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, - uint64_t range_start, - uint64_t range_end) { - if (range_end > UINT32_MAX) { - range_end = UINT32_MAX + UINT64_C(1); - } - if (range_start >= range_end) { - return 0; - } - range_end--; // make range_end inclusive - // now we have: 0 <= range_start <= range_end <= UINT32_MAX - - int minhb = range_start >> 16; - int maxhb = range_end >> 16; - - uint64_t card = 0; - - int i = ra_get_index(&ra->high_low_container, minhb); - if (i >= 0) { - if (minhb == maxhb) { - card += container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - range_end & 0xffff); - } else { - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - } - if ((range_start & 0xffff) != 0) { - card -= container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - (range_start & 0xffff) - 1); - } - i++; - } else { - i = -i - 1; - } - - for (; i < ra->high_low_container.size; i++) { - uint16_t key = ra->high_low_container.keys[i]; - if (key < maxhb) { - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - } else if (key == maxhb) { - card += container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - range_end & 0xffff); - break; - } else { - break; - } - } - - return card; -} - - -bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) { - return ra->high_low_container.size == 0; -} - -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) { - ra_to_uint32_array(&ra->high_low_container, ans); -} - -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) { - return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans); -} - -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. -*/ -bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original, typecode_after; - ra_unshare_container_at_index( - &r->high_low_container, i); // TODO: this introduces extra cloning! - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - void *c1 = convert_run_optimize(c, typecode_original, &typecode_after); - if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true; - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - } - return answer; -} - -size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { - size_t answer = 0; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original; - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - answer += container_shrink_to_fit(c, typecode_original); - } - answer += ra_shrink_to_fit(&r->high_low_container); - return answer; -} - -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original, typecode_after; - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - if (get_container_type(c, typecode_original) == - RUN_CONTAINER_TYPE_CODE) { - answer = true; - if (typecode_original == SHARED_CONTAINER_TYPE_CODE) { - run_container_t *truec = - (run_container_t *)((shared_container_t *)c)->container; - int32_t card = run_container_cardinality(truec); - void *c1 = convert_to_bitset_or_array_container( - truec, card, &typecode_after); - shared_container_free((shared_container_t *)c); - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - - } else { - int32_t card = run_container_cardinality((run_container_t *)c); - void *c1 = convert_to_bitset_or_array_container( - (run_container_t *)c, card, &typecode_after); - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - } - } - } - return answer; -} - -size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); - uint64_t cardinality = roaring_bitmap_get_cardinality(ra); - uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - if (portablesize < sizeasarray) { - buf[0] = SERIALIZATION_CONTAINER; - return roaring_bitmap_portable_serialize(ra, buf + 1) + 1; - } else { - buf[0] = SERIALIZATION_ARRAY_UINT32; - memcpy(buf + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array( - ra, (uint32_t *)(buf + 1 + sizeof(uint32_t))); - return 1 + (size_t)sizeasarray; - } -} - -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); - uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) + - sizeof(uint32_t); - return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; -} - -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) { - return ra_portable_size_in_bytes(&ra->high_low_container); -} - - -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (ans == NULL) { - return NULL; - } - size_t bytesread; - bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); - if(is_ok) assert(bytesread <= maxbytes); - ans->copy_on_write = false; - if (!is_ok) { - free(ans); - return NULL; - } - return ans; -} - -roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { - return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); -} - - -size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { - return ra_portable_deserialize_size(buf, maxbytes); -} - - -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, - char *buf) { - return ra_portable_serialize(&ra->high_low_container, buf); -} - -roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { - const char *bufaschar = (const char *)buf; - if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - - return roaring_bitmap_of_ptr(card, elems); - } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize(bufaschar + 1); - } else - return (NULL); -} - -bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, - void *ptr) { - for (int i = 0; i < ra->high_low_container.size; ++i) - if (!container_iterate(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16, - iterator, ptr)) { - return false; - } - return true; -} - -bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - for (int i = 0; i < ra->high_low_container.size; ++i) - if (!container_iterate64( - ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator, - high_bits, ptr)) { - return false; - } - return true; -} - -/**** -* begin roaring_uint32_iterator_t -*****/ - -static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { - newit->in_container_index = 0; - newit->run_index = 0; - newit->current_value = 0; - if (newit->container_index >= - newit->parent->high_low_container.size) { // otherwise nothing - newit->current_value = UINT32_MAX; - return (newit->has_value = false); - } - // assume not empty - newit->has_value = true; - // we precompute container, typecode and highbits so that successive - // iterators do not have to grab them from odd memory locations - // and have to worry about the (easily predicted) container_unwrap_shared - // call. - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - uint32_t wordindex; - uint64_t word; // used for bitsets - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - wordindex = 0; - while ((word = ((const bitset_container_t *)(newit->container)) - ->array[wordindex]) == 0) - wordindex++; // advance - // here "word" is non-zero - newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); - newit->current_value = newit->highbits | newit->in_container_index; - break; - case ARRAY_CONTAINER_TYPE_CODE: - newit->current_value = - newit->highbits | - ((const array_container_t *)(newit->container))->array[0]; - break; - case RUN_CONTAINER_TYPE_CODE: - newit->current_value = - newit->highbits | - (((const run_container_t *)(newit->container))->runs[0].value); - newit->in_run_index = - newit->current_value + - (((const run_container_t *)(newit->container))->runs[0].length); - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - return true; -} - -// prerequesite: the value should be in range of the container -static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { - uint16_t lb = val & 0xFFFF; - newit->in_container_index = 0; - newit->run_index = 0; - newit->current_value = 0; - // assume it is found - newit->has_value = true; - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb); - newit->current_value = newit->highbits | newit->in_container_index; - break; - case ARRAY_CONTAINER_TYPE_CODE: - newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb); - newit->current_value = - newit->highbits | - ((const array_container_t *)(newit->container))->array[newit->in_container_index]; - break; - case RUN_CONTAINER_TYPE_CODE: - newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb); - if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) { - newit->current_value = val; - } else { - newit->current_value = - newit->highbits | - (((const run_container_t *)(newit->container))->runs[newit->run_index].value); - } - newit->in_run_index = - (newit->highbits | (((const run_container_t *)(newit->container))->runs[newit->run_index].value)) + - (((const run_container_t *)(newit->container))->runs[newit->run_index].length); - - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - return true; -} - -void roaring_init_iterator(const roaring_bitmap_t *ra, - roaring_uint32_iterator_t *newit) { - newit->parent = ra; - newit->container_index = 0; - newit->has_value = loadfirstvalue(newit); -} - -roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); - if (newit == NULL) return NULL; - roaring_init_iterator(ra, newit); - return newit; -} - -roaring_uint32_iterator_t *roaring_copy_uint32_iterator( - const roaring_uint32_iterator_t *it) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); - memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); - return newit; -} - -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { - uint16_t hb = val >> 16; - const int i = ra_get_index(& it->parent->high_low_container, hb); - if (i >= 0) { - uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); - uint16_t lb = val & 0xFFFF; - if(lowvalue < lb ) { - it->container_index = i+1; // will have to load first value of next container - } else {// the value is necessarily within the range of the container - it->container_index = i; - it->has_value = loadfirstvalue_largeorequal(it, val); - return it->has_value; - } - } else { - // there is no matching, so we are going for the next container - it->container_index = -i-1; - } - it->has_value = loadfirstvalue(it); - return it->has_value; -} - - -bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { - if (it->container_index >= it->parent->high_low_container.size) { - return (it->has_value = false); - } - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - switch (it->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - it->in_container_index++; - wordindex = it->in_container_index / 64; - if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; - word = ((const bitset_container_t *)(it->container)) - ->array[wordindex] & - (UINT64_MAX << (it->in_container_index % 64)); - // next part could be optimized/simplified - while ((word == 0) && - (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { - wordindex++; - word = ((const bitset_container_t *)(it->container)) - ->array[wordindex]; - } - if (word != 0) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - break; - case ARRAY_CONTAINER_TYPE_CODE: - it->in_container_index++; - if (it->in_container_index < - ((const array_container_t *)(it->container))->cardinality) { - it->current_value = it->highbits | - ((const array_container_t *)(it->container)) - ->array[it->in_container_index]; - return true; - } - break; - case RUN_CONTAINER_TYPE_CODE: - if(it->current_value == UINT32_MAX) { - return (it->has_value = false); // without this, we risk an overflow to zero - } - it->current_value++; - if (it->current_value <= it->in_run_index) { - return (it->has_value = true); - } - it->run_index++; - if (it->run_index < - ((const run_container_t *)(it->container))->n_runs) { - it->current_value = - it->highbits | (((const run_container_t *)(it->container)) - ->runs[it->run_index] - .value); - it->in_run_index = it->current_value + - ((const run_container_t *)(it->container)) - ->runs[it->run_index] - .length; - return (it->has_value = true); - } - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - // moving to next container - it->container_index++; - return (it->has_value = loadfirstvalue(it)); -} - -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { - uint32_t ret = 0; - uint32_t num_values; - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - const array_container_t* acont; //TODO remove - const run_container_t* rcont; //TODO remove - const bitset_container_t* bcont; //TODO remove - - while (it->has_value && ret < count) { - switch (it->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bcont = (const bitset_container_t*)(it->container); - wordindex = it->in_container_index / 64; - word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); - do { - while (word != 0 && ret < count) { - buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); - word = word & (word - 1); - buf++; - ret++; - } - while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { - wordindex++; - word = bcont->array[wordindex]; - } - } while (word != 0 && ret < count); - it->has_value = (word != 0); - if (it->has_value) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - } - break; - case ARRAY_CONTAINER_TYPE_CODE: - acont = (const array_container_t *)(it->container); - num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->highbits | acont->array[it->in_container_index + i]; - } - buf += num_values; - ret += num_values; - it->in_container_index += num_values; - it->has_value = (it->in_container_index < acont->cardinality); - if (it->has_value) { - it->current_value = it->highbits | acont->array[it->in_container_index]; - } - break; - case RUN_CONTAINER_TYPE_CODE: - rcont = (const run_container_t*)(it->container); - //"in_run_index" name is misleading, read it as "max_value_in_current_run" - do { - num_values = minimum_uint32(it->in_run_index - it->current_value + 1, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->current_value + i; - } - it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 - buf += num_values; - ret += num_values; - - if (it->current_value > it->in_run_index || it->current_value == 0) { - it->run_index++; - if (it->run_index < rcont->n_runs) { - it->current_value = it->highbits | rcont->runs[it->run_index].value; - it->in_run_index = it->current_value + rcont->runs[it->run_index].length; - } else { - it->has_value = false; - } - } - } while ((ret < count) && it->has_value); - break; - default: - assert(false); - } - if (it->has_value) { - assert(ret == count); - return ret; - } - it->container_index++; - it->has_value = loadfirstvalue(it); - } - return ret; -} - - - -void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); } - -/**** -* end of roaring_uint32_iterator_t -*****/ - -bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - if (ra1->high_low_container.size != ra2->high_low_container.size) { - return false; - } - for (int i = 0; i < ra1->high_low_container.size; ++i) { - if (ra1->high_low_container.keys[i] != - ra2->high_low_container.keys[i]) { - return false; - } - } - for (int i = 0; i < ra1->high_low_container.size; ++i) { - bool areequal = container_equals(ra1->high_low_container.containers[i], - ra1->high_low_container.typecodes[i], - ra2->high_low_container.containers[i], - ra2->high_low_container.typecodes[i]); - if (!areequal) { - return false; - } - } - return true; -} - -bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - const int length1 = ra1->high_low_container.size, - length2 = ra2->high_low_container.size; - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2, - &container_type_2); - bool subset = - container_is_subset(c1, container_type_1, c2, container_type_2); - if (!subset) return false; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - return false; - } else { // s1 > s2 - pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2); - } - } - if (pos1 == length1) - return true; - else - return false; -} - -static void insert_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } -} - -static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = container_inot_range( - container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - // if a new container was created, the old one was already freed - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } -} - -static void insert_fully_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, - uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not(container_to_flip, ctype_in, &ctype_out); - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } -} - -static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_inot(container_to_flip, ctype_in, &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } -} - -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return roaring_bitmap_copy(x1); - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - - roaring_bitmap_t *ans = roaring_bitmap_create(); - ans->copy_on_write = x1->copy_on_write; - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; - - ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, - hb_start, x1->copy_on_write); - if (hb_start == hb_end) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, - lb_start, 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block - - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - insert_fully_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb); - } - - // handle a partial final container - if (lb_end != 0xFFFF) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } - ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, - hb_end, x1->copy_on_write); - return ans; -} - -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return; // empty range - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); - - if (hb_start == hb_end) { - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; - - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - inplace_fully_flip_container(&x1->high_low_container, hb); - } - // handle a partial final container - if (lb_end != 0xFFFF) { - inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } -} - -roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c; - if (bitsetconversion && (get_container_type(c1, container_type_1) != - BITSET_CONTAINER_TYPE_CODE) && - (get_container_type(c2, container_type_2) != - BITSET_CONTAINER_TYPE_CODE)) { - void *newc1 = - container_mutable_unwrap_shared(c1, &container_type_1); - newc1 = container_to_bitset(newc1, container_type_1); - container_type_1 = BITSET_CONTAINER_TYPE_CODE; - c = container_lazy_ior(newc1, container_type_1, c2, - container_type_2, - &container_result_type); - if (c != newc1) { // should not happen - container_free(newc1, container_type_1); - } - } else { - c = container_lazy_or(c1, container_type_1, c2, - container_type_2, &container_result_type); - } - // since we assume that the initial containers are non-empty, - // the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - if (!container_is_full(c1, container_type_1)) { - if ((bitsetconversion == false) || - (get_container_type(c1, container_type_1) == - BITSET_CONTAINER_TYPE_CODE)) { - c1 = get_writable_copy_if_shared(c1, &container_type_1); - } else { - // convert to bitset - void *oldc1 = c1; - uint8_t oldt1 = container_type_1; - c1 = container_mutable_unwrap_shared(c1, &container_type_1); - c1 = container_to_bitset(c1, container_type_1); - container_free(oldc1, oldt1); - container_type_1 = BITSET_CONTAINER_TYPE_CODE; - } - - void *c2 = ra_get_container_at_index(&x2->high_low_container, - pos2, &container_type_2); - void *c = container_lazy_ior(c1, container_type_1, c2, - container_type_2, - &container_result_type); - if (c != - c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, container_type_1); - } - - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // void *c2_clone = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_lazy_xor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_lazy_ixor(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (container_nonzero_cardinality(c, container_result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - ++pos1; - } else { - container_free(c, container_result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // void *c2_clone = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { - for (int i = 0; i < ra->high_low_container.size; ++i) { - const uint8_t original_typecode = ra->high_low_container.typecodes[i]; - void *container = ra->high_low_container.containers[i]; - uint8_t new_typecode = original_typecode; - void *newcontainer = - container_repair_after_lazy(container, &new_typecode); - ra->high_low_container.containers[i] = newcontainer; - ra->high_low_container.typecodes[i] = new_typecode; - } -} - - - -/** -* roaring_bitmap_rank returns the number of integers that are smaller or equal -* to x. -*/ -uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { - uint64_t size = 0; - uint32_t xhigh = x >> 16; - for (int i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - return size + container_rank(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], - x & 0xFFFF); - } else { - return size; - } - } - return size; -} - -/** -* roaring_bitmap_smallest returns the smallest value in the set. -* Returns UINT32_MAX if the set is empty. -*/ -uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - void *container = bm->high_low_container.containers[0]; - uint8_t typecode = bm->high_low_container.typecodes[0]; - uint32_t key = bm->high_low_container.keys[0]; - uint32_t lowvalue = container_minimum(container, typecode); - return lowvalue | (key << 16); - } - return UINT32_MAX; -} - -/** -* roaring_bitmap_smallest returns the greatest value in the set. -* Returns 0 if the set is empty. -*/ -uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - void *container = - bm->high_low_container.containers[bm->high_low_container.size - 1]; - uint8_t typecode = - bm->high_low_container.typecodes[bm->high_low_container.size - 1]; - uint32_t key = - bm->high_low_container.keys[bm->high_low_container.size - 1]; - uint32_t lowvalue = container_maximum(container, typecode); - return lowvalue | (key << 16); - } - return 0; -} - -bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, - uint32_t *element) { - void *container; - uint8_t typecode; - uint16_t key; - uint32_t start_rank = 0; - int i = 0; - bool valid = false; - while (!valid && i < bm->high_low_container.size) { - container = bm->high_low_container.containers[i]; - typecode = bm->high_low_container.typecodes[i]; - valid = - container_select(container, typecode, &start_rank, rank, element); - i++; - } - - if (valid) { - key = bm->high_low_container.keys[i - 1]; - *element |= (key << 16); - return true; - } else - return false; -} - -bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2, - &container_type_2); - if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); - } - } - return answer; -} - - -uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - answer += container_and_cardinality(c1, container_type_1, c2, - container_type_2); - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; -} - -double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return (double)inter / (double)(c1 + c2 - inter); -} - -uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - inter; -} - -uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 - inter; -} - -uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - 2 * inter; -} - - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) is present - */ -bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - if (range_start >= range_end) return true; // empty range are always contained! - if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); - uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); - const int32_t span = hb_re - hb_rs; - const int32_t hlc_sz = ra_get_size(&r->high_low_container); - if (hlc_sz < span + 1) { - return false; - } - int32_t is = ra_get_index(&r->high_low_container, hb_rs); - int32_t ie = ra_get_index(&r->high_low_container, hb_re); - ie = (ie < 0 ? -ie - 1 : ie); - if ((is < 0) || ((ie - is) != span)) { - return false; - } - const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; - uint8_t typecode; - void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode); - if (hb_rs == hb_re) { - return container_contains_range(container, lb_rs, lb_re, typecode); - } - if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) { - return false; - } - assert(ie < hlc_sz); // would indicate an algorithmic bug - container = ra_get_container_at_index(&r->high_low_container, ie, &typecode); - if (!container_contains_range(container, 0, lb_re, typecode)) { - return false; - } - for (int32_t i = is + 1; i < ie; ++i) { - container = ra_get_container_at_index(&r->high_low_container, i, &typecode); - if (!container_is_full(container, typecode) ) { - return false; - } - } - return true; -} - - -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - return (roaring_bitmap_get_cardinality(ra2) > - roaring_bitmap_get_cardinality(ra1) && - roaring_bitmap_is_subset(ra1, ra2)); -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */ -#include -#include -#include -#include -#include -#include - - -// Convention: [0,ra->size) all elements are initialized -// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing - -extern inline int32_t ra_get_size(const roaring_array_t *ra); -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); -extern inline void *ra_get_container_at_index(const roaring_array_t *ra, - uint16_t i, uint8_t *typecode); -extern inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i); -extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, - uint16_t key, void *c, - uint8_t typecode); -extern inline void ra_set_container_at_index(const roaring_array_t *ra, - int32_t i, void *c, - uint8_t typecode); - -#define INITIAL_CAPACITY 4 - -static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { - // because we combine the allocations, it is not possible to use realloc - /*ra->keys = - (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity); -ra->containers = - (void **)realloc(ra->containers, sizeof(void *) * new_capacity); -ra->typecodes = - (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity); -if (!ra->keys || !ra->containers || !ra->typecodes) { - free(ra->keys); - free(ra->containers); - free(ra->typecodes); - return false; -}*/ - - if ( new_capacity == 0 ) { - free(ra->containers); - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; - ra->allocation_size = 0; - return true; - } - const size_t memoryneeded = - new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - void *bigalloc = malloc(memoryneeded); - if (!bigalloc) return false; - void *oldbigalloc = ra->containers; - void **newcontainers = (void **)bigalloc; - uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); - uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); - assert((char *)(newtypecodes + new_capacity) == - (char *)bigalloc + memoryneeded); - if(ra->size > 0) { - memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size); - memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); - memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); - } - ra->containers = newcontainers; - ra->keys = newkeys; - ra->typecodes = newtypecodes; - ra->allocation_size = new_capacity; - free(oldbigalloc); - return true; -} - -bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { - if (!new_ra) return false; - new_ra->keys = NULL; - new_ra->containers = NULL; - new_ra->typecodes = NULL; - - new_ra->allocation_size = cap; - new_ra->size = 0; - if(cap > 0) { - void *bigalloc = - malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t))); - if( bigalloc == NULL ) return false; - new_ra->containers = (void **)bigalloc; - new_ra->keys = (uint16_t *)(new_ra->containers + cap); - new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); - } - return true; -} - -int ra_shrink_to_fit(roaring_array_t *ra) { - int savings = (ra->allocation_size - ra->size) * - (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - if (!realloc_array(ra, ra->size)) { - return 0; - } - ra->allocation_size = ra->size; - return savings; -} - -bool ra_init(roaring_array_t *t) { - return ra_init_with_capacity(t, INITIAL_CAPACITY); -} - -bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - if (!ra_init_with_capacity(dest, source->size)) return false; - dest->size = source->size; - dest->allocation_size = source->size; - if(dest->size > 0) { - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - } - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - if(dest->size > 0) { - memcpy(dest->containers, source->containers, - dest->size * sizeof(void *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } - } else { - if(dest->size > 0) { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } - } - } - return true; -} - -bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - ra_clear_containers(dest); // we are going to overwrite them - if (dest->allocation_size < source->size) { - if (!realloc_array(dest, source->size)) { - return false; - } - } - dest->size = source->size; - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - memcpy(dest->containers, source->containers, - dest->size * sizeof(void *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } else { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } - } - } - return true; -} - -void ra_clear_containers(roaring_array_t *ra) { - for (int32_t i = 0; i < ra->size; ++i) { - container_free(ra->containers[i], ra->typecodes[i]); - } -} - -void ra_reset(roaring_array_t *ra) { - ra_clear_containers(ra); - ra->size = 0; - ra_shrink_to_fit(ra); -} - -void ra_clear_without_containers(roaring_array_t *ra) { - free(ra->containers); // keys and typecodes are allocated with containers - ra->size = 0; - ra->allocation_size = 0; - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; -} - -void ra_clear(roaring_array_t *ra) { - ra_clear_containers(ra); - ra_clear_without_containers(ra); -} - -bool extend_array(roaring_array_t *ra, int32_t k) { - int32_t desired_size = ra->size + k; - assert(desired_size <= MAX_CONTAINERS); - if (desired_size > ra->allocation_size) { - int32_t new_capacity = - (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; - if (new_capacity > MAX_CONTAINERS) { - new_capacity = MAX_CONTAINERS; - } - - return realloc_array(ra, new_capacity); - } - return true; -} - -void ra_append(roaring_array_t *ra, uint16_t key, void *container, - uint8_t typecode) { - extend_array(ra, 1); - const int32_t pos = ra->size; - - ra->keys[pos] = key; - ra->containers[pos] = container; - ra->typecodes[pos] = typecode; - ra->size++; -} - -void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write) { - extend_array(ra, 1); - const int32_t pos = ra->size; - - // old contents is junk not needing freeing - ra->keys[pos] = sa->keys[index]; - // the shared container will be in two bitmaps - if (copy_on_write) { - sa->containers[index] = get_copy_of_container( - sa->containers[index], &sa->typecodes[index], copy_on_write); - ra->containers[pos] = sa->containers[index]; - ra->typecodes[pos] = sa->typecodes[index]; - } else { - ra->containers[pos] = - container_clone(sa->containers[index], sa->typecodes[index]); - ra->typecodes[pos] = sa->typecodes[index]; - } - ra->size++; -} - -void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write) { - for (int32_t i = 0; i < sa->size; ++i) { - if (sa->keys[i] >= stopping_key) break; - ra_append_copy(ra, sa, i, copy_on_write); - } -} - -void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } -} - -void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write) { - int start_location = ra_get_index(sa, before_start); - if (start_location >= 0) - ++start_location; - else - start_location = -start_location - 1; - ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); -} - -void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index) { - extend_array(ra, end_index - start_index); - - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - - ra->keys[pos] = sa->keys[i]; - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - ra->size++; - } -} - -void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } -} - -void *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return ra->containers[i]; -} - -extern void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, - uint8_t *typecode); - -void *ra_get_writable_container(roaring_array_t *ra, uint16_t x, - uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); -} - -void *ra_get_writable_container_at_index(roaring_array_t *ra, uint16_t i, - uint8_t *typecode) { - assert(i < ra->size); - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); -} - -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { - return ra->keys[i]; -} - -extern int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - -extern int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos); - -// everything skipped over is freed -int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { - while (pos < ra->size && ra->keys[pos] < x) { - container_free(ra->containers[pos], ra->typecodes[pos]); - ++pos; - } - return pos; -} - -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - void *container, uint8_t typecode) { - extend_array(ra, 1); - // May be an optimization opportunity with DIY memmove - memmove(&(ra->keys[i + 1]), &(ra->keys[i]), - sizeof(uint16_t) * (ra->size - i)); - memmove(&(ra->containers[i + 1]), &(ra->containers[i]), - sizeof(void *) * (ra->size - i)); - memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), - sizeof(uint8_t) * (ra->size - i)); - ra->keys[i] = key; - ra->containers[i] = container; - ra->typecodes[i] = typecode; - ra->size++; -} - -// note: Java routine set things to 0, enabling GC. -// Java called it "resize" but it was always used to downsize. -// Allowing upsize would break the conventions about -// valid containers below ra->size. - -void ra_downsize(roaring_array_t *ra, int32_t new_length) { - assert(new_length <= ra->size); - ra->size = new_length; -} - -void ra_remove_at_index(roaring_array_t *ra, int32_t i) { - memmove(&(ra->containers[i]), &(ra->containers[i + 1]), - sizeof(void *) * (ra->size - i - 1)); - memmove(&(ra->keys[i]), &(ra->keys[i + 1]), - sizeof(uint16_t) * (ra->size - i - 1)); - memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), - sizeof(uint8_t) * (ra->size - i - 1)); - ra->size--; -} - -void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { - container_free(ra->containers[i], ra->typecodes[i]); - ra_remove_at_index(ra, i); -} - -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. In use it should be followed by a call to -// downsize. -// -void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin) { - assert(begin <= end); - assert(new_begin < begin); - - const int range = end - begin; - - // We ensure to previously have freed overwritten containers - // that are not copied elsewhere - - memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), - sizeof(void *) * range); - memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), - sizeof(uint16_t) * range); - memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), - sizeof(uint8_t) * range); -} - -void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { - if (distance > 0) { - extend_array(ra, distance); - } - int32_t srcpos = ra->size - count; - int32_t dstpos = srcpos + distance; - memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), - sizeof(uint16_t) * count); - memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), - sizeof(void *) * count); - memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), - sizeof(uint8_t) * count); - ra->size += distance; -} - - -size_t ra_size_in_bytes(roaring_array_t *ra) { - size_t cardinality = 0; - size_t tot_len = - 1 /* initial byte type */ + 4 /* tot_len */ + sizeof(roaring_array_t) + - ra->size * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - for (int32_t i = 0; i < ra->size; i++) { - tot_len += - (container_serialization_len(ra->containers[i], ra->typecodes[i]) + - sizeof(uint16_t)); - cardinality += - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - } - - if ((cardinality * sizeof(uint32_t) + sizeof(uint32_t)) < tot_len) { - return cardinality * sizeof(uint32_t) + 1 + sizeof(uint32_t); - } - return tot_len; -} - -void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { - size_t ctr = 0; - for (int32_t i = 0; i < ra->size; ++i) { - int num_added = container_to_uint32_array( - ans + ctr, ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - ctr += num_added; - } -} - -bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { - size_t ctr = 0; - size_t dtr = 0; - - size_t t_limit = 0; - - bool first = false; - size_t first_skip = 0; - - uint32_t *t_ans = NULL; - size_t cur_len = 0; - - for (int i = 0; i < ra->size; ++i) { - - const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: - t_limit = ((const bitset_container_t *)container)->cardinality; - break; - case ARRAY_CONTAINER_TYPE_CODE: - t_limit = ((const array_container_t *)container)->cardinality; - break; - case RUN_CONTAINER_TYPE_CODE: - t_limit = run_container_cardinality((const run_container_t *)container); - break; - } - if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ - if (!first){ - //first_skip = t_limit - (ctr + t_limit - offset); - first_skip = offset - ctr; - first = true; - t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit)); - if(t_ans == NULL) { - return false; - } - memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; - cur_len = first_skip + limit; - } - if (dtr + t_limit > cur_len){ - uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit)); - if(append_ans == NULL) { - if(t_ans != NULL) free(t_ans); - return false; - } - memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); - cur_len = cur_len + t_limit; - memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); - free(t_ans); - t_ans = append_ans; - } - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case ARRAY_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const array_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case RUN_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const run_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - } - dtr += t_limit; - } - ctr += t_limit; - if (dtr-first_skip >= limit) break; - } - if(t_ans != NULL) { - memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); - free(t_ans); - } - return true; -} - -bool ra_has_run_container(const roaring_array_t *ra) { - for (int32_t k = 0; k < ra->size; ++k) { - if (get_container_type(ra->containers[k], ra->typecodes[k]) == - RUN_CONTAINER_TYPE_CODE) - return true; - } - return false; -} - -uint32_t ra_portable_header_size(const roaring_array_t *ra) { - if (ra_has_run_container(ra)) { - if (ra->size < - NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets - return 4 + (ra->size + 7) / 8 + 4 * ra->size; - } - return 4 + (ra->size + 7) / 8 + - 8 * ra->size; // - 4 because we pack the size with the cookie - } else { - return 4 + 4 + 8 * ra->size; - } -} - -size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { - size_t count = ra_portable_header_size(ra); - - for (int32_t k = 0; k < ra->size; ++k) { - count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - return count; -} - -size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { - char *initbuf = buf; - uint32_t startOffset = 0; - bool hasrun = ra_has_run_container(ra); - if (hasrun) { - uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - uint32_t s = (ra->size + 7) / 8; - uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1); - assert(bitmapOfRunContainers != NULL); // todo: handle - for (int32_t i = 0; i < ra->size; ++i) { - if (get_container_type(ra->containers[i], ra->typecodes[i]) == - RUN_CONTAINER_TYPE_CODE) { - bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); - } - } - memcpy(buf, bitmapOfRunContainers, s); - buf += s; - free(bitmapOfRunContainers); - if (ra->size < NO_OFFSET_THRESHOLD) { - startOffset = 4 + 4 * ra->size + s; - } else { - startOffset = 4 + 8 * ra->size + s; - } - } else { // backwards compatibility - uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; - - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - memcpy(buf, &ra->size, sizeof(ra->size)); - buf += sizeof(ra->size); - - startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; - } - for (int32_t k = 0; k < ra->size; ++k) { - memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); - buf += sizeof(ra->keys[k]); - // get_cardinality returns a value in [1,1<<16], subtracting one - // we get [0,1<<16 - 1] which fits in 16 bits - uint16_t card = (uint16_t)( - container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); - memcpy(buf, &card, sizeof(card)); - buf += sizeof(card); - } - if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { - // writing the containers offsets - for (int32_t k = 0; k < ra->size; k++) { - memcpy(buf, &startOffset, sizeof(startOffset)); - buf += sizeof(startOffset); - startOffset = - startOffset + - container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - } - for (int32_t k = 0; k < ra->size; ++k) { - buf += container_write(ra->containers[k], ra->typecodes[k], buf); - } - return buf - initbuf; -} - -// Quickly checks whether there is a serialized bitmap at the pointer, -// not exceeding size "maxbytes" in bytes. This function does not allocate -// memory dynamically. -// -// This function returns 0 if and only if no valid bitmap is found. -// Otherwise, it returns how many bytes are occupied. -// -size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { - size_t bytestotal = sizeof(int32_t);// for cookie - if(bytestotal > maxbytes) return 0; - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - return 0; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - bytestotal += sizeof(int32_t); - if(bytestotal > maxbytes) return 0; - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - return 0; // logically impossible - } - char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - bytestotal += s; - if(bytestotal > maxbytes) return 0; - bitmapOfRunContainers = (char *)buf; - buf += s; - } - bytestotal += size * 2 * sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t *keyscards = (uint16_t *)buf; - buf += size * 2 * sizeof(uint16_t); - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - // skipping the offsets - bytestotal += size * 4; - if(bytestotal > maxbytes) return 0; - buf += size * 4; - } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else if (isrun) { - bytestotal += sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - buf += sizeof(uint16_t); - size_t containersize = n_runs * sizeof(rle16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else { - size_t containersize = thiscard * sizeof(uint16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } - } - return bytestotal; -} - - -// this function populates answer from the content of buf (reading up to maxbytes bytes). -// The function returns false if a properly serialized bitmap cannot be found. -// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. -bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { - *readbytes = sizeof(int32_t);// for cookie - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n"); - return false; - } - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n", - cookie); - return false; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - *readbytes += sizeof(int32_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n"); - return false; - } - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n", - size); - return false; // logically impossible - } - const char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - *readbytes += s; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading run bitmap.\n"); - return false; - } - bitmapOfRunContainers = buf; - buf += s; - } - uint16_t *keyscards = (uint16_t *)buf; - - *readbytes += size * 2 * sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n"); - return false; - } - buf += size * 2 * sizeof(uint16_t); - - bool is_ok = ra_init_with_capacity(answer, size); - if (!is_ok) { - fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n"); - return false; - } - - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); - answer->keys[k] = tmp; - } - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - *readbytes += size * 4; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading offsets.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - - // skipping the offsets - buf += size * 4; - } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - // we check that the read is allowed - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - *readbytes += containersize; - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - bitset_container_t *c = bitset_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += bitset_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE; - } else if (isrun) { - // we check that the read is allowed - *readbytes += sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a run container (header).\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - size_t containersize = n_runs * sizeof(rle16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - - run_container_t *c = run_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += run_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE; - } else { - // we check that the read is allowed - size_t containersize = thiscard * sizeof(uint16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - array_container_t *c = - array_container_create_given_capacity(thiscard); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += array_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; - } - } - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */ - -struct roaring_pq_element_s { - uint64_t size; - bool is_temporary; - roaring_bitmap_t *bitmap; -}; - -typedef struct roaring_pq_element_s roaring_pq_element_t; - -struct roaring_pq_s { - roaring_pq_element_t *elements; - uint64_t size; -}; - -typedef struct roaring_pq_s roaring_pq_t; - -static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { - return t1->size < t2->size; -} - -static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { - uint64_t i = pq->size; - pq->elements[pq->size++] = *t; - while (i > 0) { - uint64_t p = (i - 1) >> 1; - roaring_pq_element_t ap = pq->elements[p]; - if (!compare(t, &ap)) break; - pq->elements[i] = ap; - i = p; - } - pq->elements[i] = *t; -} - -static void pq_free(roaring_pq_t *pq) { - free(pq->elements); - pq->elements = NULL; // paranoid - free(pq); -} - -static void percolate_down(roaring_pq_t *pq, uint32_t i) { - uint32_t size = (uint32_t)pq->size; - uint32_t hsize = size >> 1; - roaring_pq_element_t ai = pq->elements[i]; - while (i < hsize) { - uint32_t l = (i << 1) + 1; - uint32_t r = l + 1; - roaring_pq_element_t bestc = pq->elements[l]; - if (r < size) { - if (compare(pq->elements + r, &bestc)) { - l = r; - bestc = pq->elements[r]; - } - } - if (!compare(&bestc, &ai)) { - break; - } - pq->elements[i] = bestc; - i = l; - } - pq->elements[i] = ai; -} - -static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { - roaring_pq_t *answer = (roaring_pq_t *)malloc(sizeof(roaring_pq_t)); - answer->elements = - (roaring_pq_element_t *)malloc(sizeof(roaring_pq_element_t) * length); - answer->size = length; - for (uint32_t i = 0; i < length; i++) { - answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; - answer->elements[i].is_temporary = false; - answer->elements[i].size = - roaring_bitmap_portable_size_in_bytes(arr[i]); - } - for (int32_t i = (length >> 1); i >= 0; i--) { - percolate_down(answer, i); - } - return answer; -} - -static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { - roaring_pq_element_t ans = *pq->elements; - if (pq->size > 1) { - pq->elements[0] = pq->elements[--pq->size]; - percolate_down(pq, 0); - } else - --pq->size; - // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; - return ans; -} - -// this function consumes and frees the inputs -static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, - roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = ra_get_size(&x1->high_low_container), - length2 = ra_get_size(&x2->high_low_container); - if (0 == length1) { - roaring_bitmap_free(x1); - return x2; - } - if (0 == length2) { - roaring_bitmap_free(x2); - return x1; - } - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - // todo: unsharing can be inefficient as it may create a clone where - // none - // is needed, but it has the benefit of being easy to reason about. - ra_unshare_container_at_index(&x1->high_low_container, pos1); - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE); - ra_unshare_container_at_index(&x2->high_low_container, pos2); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE); - void *c; - - if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) && - (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) { - c = container_lazy_ior(c2, container_type_2, c1, - container_type_1, - &container_result_type); - container_free(c1, container_type_1); - if (c != c2) { - container_free(c2, container_type_2); - } - } else { - c = container_lazy_ior(c1, container_type_1, c2, - container_type_2, - &container_result_type); - container_free(c2, container_type_2); - if (c != c1) { - container_free(c1, container_type_1); - } - } - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_move_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2); - } else if (pos2 == length2) { - ra_append_move_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1); - } - ra_clear_without_containers(&x1->high_low_container); - ra_clear_without_containers(&x2->high_low_container); - free(x1); - free(x2); - return answer; -} - -/** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - */ -roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_pq_t *pq = create_pq(x, number); - while (pq->size > 1) { - roaring_pq_element_t x1 = pq_poll(pq); - roaring_pq_element_t x2 = pq_poll(pq); - - if (x1.is_temporary && x2.is_temporary) { - roaring_bitmap_t *newb = - lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); - // should normally return a fresh new bitmap *except* that - // it can return x1.bitmap or x2.bitmap in degenerate cases - bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = temporary, .bitmap = newb}; - pq_add(pq, &newelement); - } else if (x2.is_temporary) { - roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); - x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); - pq_add(pq, &x2); - } else if (x1.is_temporary) { - roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); - x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); - - pq_add(pq, &x1); - } else { - roaring_bitmap_t *newb = - roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = true, .bitmap = newb}; - - pq_add(pq, &newelement); - } - } - roaring_pq_element_t X = pq_poll(pq); - roaring_bitmap_t *answer = X.bitmap; - roaring_bitmap_repair_after_lazy(answer); - pq_free(pq); - return answer; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */ diff --git a/contrib/croaring/roaring/roaring.h b/contrib/croaring/roaring/roaring.h deleted file mode 100644 index 53413b2a06d..00000000000 --- a/contrib/croaring/roaring/roaring.h +++ /dev/null @@ -1,7187 +0,0 @@ -/* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_version.h */ -// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand -#ifndef ROARING_INCLUDE_ROARING_VERSION -#define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION = 0.2.57, -enum { - ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 2, - ROARING_VERSION_REVISION = 57 -}; -#endif // ROARING_INCLUDE_ROARING_VERSION -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_version.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/portability.h */ -/* - * portability.h - * - */ - - -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wcast-align" -#pragma clang diagnostic ignored "-Wcast-qual" -#pragma clang diagnostic ignored "-Wundef" -#endif - -#ifndef INCLUDE_PORTABILITY_H_ -#define INCLUDE_PORTABILITY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -//#ifndef __STDC_FORMAT_MACROS -//#define __STDC_FORMAT_MACROS 1 -//#endif - -#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) -#define _POSIX_C_SOURCE 200809L -#endif -#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) -#define _XOPEN_SOURCE 700 -#endif - -#include -#include -#include // will provide posix_memalign with _POSIX_C_SOURCE as defined above -#if !(defined(__APPLE__)) && !(defined(__FreeBSD__)) -#include // this should never be needed but there are some reports that it is needed. -#endif - - -#if defined(_MSC_VER) && !defined(__clang__) && !defined(_WIN64) -#pragma message( \ - "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.") -#endif - -#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 -#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. -#endif - -#if defined(_MSC_VER) -#define __restrict__ __restrict -#endif - -#ifndef DISABLE_X64 // some users may want to compile as if they did not have - // an x64 processor - -/////////////////////// -/// We support X64 hardware in the following manner: -/// -/// if IS_X64 is defined then we have at least SSE and SSE2 -/// (All Intel processors sold in the recent past have at least SSE and SSE2 support, -/// going back to the Pentium 4.) -/// -/// if USESSE4 is defined then we assume at least SSE4.2, SSE4.1, -/// SSSE3, SSE3... + IS_X64 -/// if USEAVX is defined, then we assume AVX2, AVX + USESSE4 -/// -/// So if you have hardware that supports AVX but not AVX2, then "USEAVX" -/// won't be enabled. -/// If you have hardware that supports SSE4.1, but not SSE4.2, then USESSE4 -/// won't be defined. -////////////////////// - -// unless DISABLEAVX was defined, if we have __AVX2__, we enable AVX -#if (!defined(USEAVX)) && (!defined(DISABLEAVX)) && (defined(__AVX2__)) -#define USEAVX -#endif - -// if we have __SSE4_2__, we enable SSE4 -#if (defined(__POPCNT__)) && (defined(__SSE4_2__)) -#define USESSE4 -#endif - -#if defined(USEAVX) || defined(__x86_64__) || defined(_M_X64) -// we have an x64 processor -#define IS_X64 -// we include the intrinsic header -#ifndef _MSC_VER -/* Non-Microsoft C/C++-compatible compiler */ -#include // on some recent GCC, this will declare posix_memalign -#endif -#endif - -#ifndef _MSC_VER -/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline - * assembly */ -#define ROARING_INLINE_ASM -#endif - -#ifdef USEAVX -#define USESSE4 // if we have AVX, then we have SSE4 -#define USE_BMI // we assume that AVX2 and BMI go hand and hand -#define USEAVX2FORDECODING // optimization -// vector operations should work on not just AVX -#define ROARING_VECTOR_OPERATIONS_ENABLED // vector unions (optimization) -#endif - -#endif // DISABLE_X64 - -#ifdef _MSC_VER -/* Microsoft C/C++-compatible compiler */ -#include - -#ifndef __clang__ // if one compiles with MSVC *with* clang, then these - // intrinsics are defined!!! -// sadly there is no way to check whether we are missing these intrinsics -// specifically. - -/* wrappers for Visual Studio built-ins that look like gcc built-ins */ -/* result might be undefined when input_num is zero */ -static inline int __builtin_ctzll(unsigned long long input_num) { - unsigned long index; -#ifdef _WIN64 // highly recommended!!! - _BitScanForward64(&index, input_num); -#else // if we must support 32-bit Windows - if ((uint32_t)input_num != 0) { - _BitScanForward(&index, (uint32_t)input_num); - } else { - _BitScanForward(&index, (uint32_t)(input_num >> 32)); - index += 32; - } -#endif - return index; -} - -/* result might be undefined when input_num is zero */ -static inline int __builtin_clzll(unsigned long long input_num) { - unsigned long index; -#ifdef _WIN64 // highly recommended!!! - _BitScanReverse64(&index, input_num); -#else // if we must support 32-bit Windows - if (input_num > 0xFFFFFFFF) { - _BitScanReverse(&index, (uint32_t)(input_num >> 32)); - index += 32; - } else { - _BitScanReverse(&index, (uint32_t)(input_num)); - } -#endif - return 63 - index; -} - -/* result might be undefined when input_num is zero */ -#ifdef USESSE4 -/* POPCNT support was added to processors around the release of SSE4.2 */ -/* USESSE4 flag guarantees POPCNT support */ -static inline int __builtin_popcountll(unsigned long long input_num) { -#ifdef _WIN64 // highly recommended!!! - return (int)__popcnt64(input_num); -#else // if we must support 32-bit Windows - return (int)(__popcnt((uint32_t)input_num) + - __popcnt((uint32_t)(input_num >> 32))); -#endif -} -#else -/* software implementation avoids POPCNT */ -static inline int __builtin_popcountll(unsigned long long input_num) { - const uint64_t m1 = 0x5555555555555555; //binary: 0101... - const uint64_t m2 = 0x3333333333333333; //binary: 00110011.. - const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ... - const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3... - - input_num -= (input_num >> 1) & m1; - input_num = (input_num & m2) + ((input_num >> 2) & m2); - input_num = (input_num + (input_num >> 4)) & m4; - return (input_num * h01) >> 56; -} -#endif - -/* Use #define so this is effective even under /Ob0 (no inline) */ -#define __builtin_unreachable() __assume(0) -#endif - -#endif - -// without the following, we get lots of warnings about posix_memalign -#ifndef __cplusplus -extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); -#endif //__cplusplus // C++ does not have a well defined signature - -// portable version of posix_memalign -static inline void *aligned_malloc(size_t alignment, size_t size) { - void *p; -#ifdef _MSC_VER - p = _aligned_malloc(size, alignment); -#elif defined(__MINGW32__) || defined(__MINGW64__) - p = __mingw_aligned_malloc(size, alignment); -#else - // somehow, if this is used before including "x86intrin.h", it creates an - // implicit defined warning. - if (posix_memalign(&p, alignment, size) != 0) return NULL; -#endif - return p; -} - -static inline void aligned_free(void *memblock) { -#ifdef _MSC_VER - _aligned_free(memblock); -#elif defined(__MINGW32__) || defined(__MINGW64__) - __mingw_aligned_free(memblock); -#else - free(memblock); -#endif -} - -#if defined(_MSC_VER) -#define ALIGNED(x) __declspec(align(x)) -#else -#if defined(__GNUC__) -#define ALIGNED(x) __attribute__((aligned(x))) -#endif -#endif - -#ifdef __GNUC__ -#define WARN_UNUSED __attribute__((warn_unused_result)) -#else -#define WARN_UNUSED -#endif - -#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) - -static inline int hamming(uint64_t x) { -#ifdef USESSE4 - return (int) _mm_popcnt_u64(x); -#else - // won't work under visual studio, but hopeful we have _mm_popcnt_u64 in - // many cases - return __builtin_popcountll(x); -#endif -} - -#ifndef UINT64_C -#define UINT64_C(c) (c##ULL) -#endif - -#ifndef UINT32_C -#define UINT32_C(c) (c##UL) -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_PORTABILITY_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/portability.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/perfparameters.h */ -#ifndef PERFPARAMETERS_H_ -#define PERFPARAMETERS_H_ - -#include - -/** -During lazy computations, we can transform array containers into bitset -containers as -long as we can expect them to have ARRAY_LAZY_LOWERBOUND values. -*/ -enum { ARRAY_LAZY_LOWERBOUND = 1024 }; - -/* default initial size of a run container - setting it to zero delays the malloc.*/ -enum { RUN_DEFAULT_INIT_SIZE = 0 }; - -/* default initial size of an array container - setting it to zero delays the malloc */ -enum { ARRAY_DEFAULT_INIT_SIZE = 0 }; - -/* automatic bitset conversion during lazy or */ -#ifndef LAZY_OR_BITSET_CONVERSION -#define LAZY_OR_BITSET_CONVERSION true -#endif - -/* automatically attempt to convert a bitset to a full run during lazy - * evaluation */ -#ifndef LAZY_OR_BITSET_CONVERSION_TO_FULL -#define LAZY_OR_BITSET_CONVERSION_TO_FULL true -#endif - -/* automatically attempt to convert a bitset to a full run */ -#ifndef OR_BITSET_CONVERSION_TO_FULL -#define OR_BITSET_CONVERSION_TO_FULL true -#endif - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/perfparameters.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/array_util.h */ -#ifndef ARRAY_UTIL_H -#define ARRAY_UTIL_H - -#include // for size_t -#include - - -/* - * Good old binary search. - * Assumes that array is sorted, has logarithmic complexity. - * if the result is x, then: - * if ( x>0 ) you have array[x] = ikey - * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey) - * keys the array sorted. - */ -inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t middleValue = array[middleIndex]; - if (middleValue < ikey) { - low = middleIndex + 1; - } else if (middleValue > ikey) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - -/** - * Galloping search - * Assumes that array is sorted, has logarithmic complexity. - * if the result is x, then if x = length, you have that all values in array between pos and length - * are smaller than min. - * otherwise returns the first index x such that array[x] >= min. - */ -static inline int32_t advanceUntil(const uint16_t *array, int32_t pos, - int32_t length, uint16_t min) { - int32_t lower = pos + 1; - - if ((lower >= length) || (array[lower] >= min)) { - return lower; - } - - int32_t spansize = 1; - - while ((lower + spansize < length) && (array[lower + spansize] < min)) { - spansize <<= 1; - } - int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; - - if (array[upper] == min) { - return upper; - } - if (array[upper] < min) { - // means - // array - // has no - // item - // >= min - // pos = array.length; - return length; - } - - // we know that the next-smallest span was too small - lower += (spansize >> 1); - - int32_t mid = 0; - while (lower + 1 != upper) { - mid = (lower + upper) >> 1; - if (array[mid] == min) { - return mid; - } else if (array[mid] < min) { - lower = mid; - } else { - upper = mid; - } - } - return upper; -} - -/** - * Returns number of elements which are less then $ikey. - * Array elements must be unique and sorted. - */ -static inline int32_t count_less(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - if (lenarray == 0) return 0; - int32_t pos = binarySearch(array, lenarray, ikey); - return pos >= 0 ? pos : -(pos+1); -} - -/** - * Returns number of elements which are greater then $ikey. - * Array elements must be unique and sorted. - */ -static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - if (lenarray == 0) return 0; - int32_t pos = binarySearch(array, lenarray, ikey); - if (pos >= 0) { - return lenarray - (pos+1); - } else { - return lenarray - (-pos-1); - } -} - -/** - * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions - * Optimized by D. Lemire on May 3rd 2013 - * - * C should have capacity greater than the minimum of s_1 and s_b + 8 - * where 8 is sizeof(__m128i)/sizeof(uint16_t). - */ -int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C); - -/** - * Compute the cardinality of the intersection using SSE4 instructions - */ -int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, - size_t s_a, - const uint16_t *__restrict__ B, - size_t s_b); - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. */ -int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, - const uint16_t *largearray, size_t size_l, - uint16_t *buffer); - -/* Computes the size of the intersection between one small and one large set of - * uint16_t. */ -int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, - size_t size_s, - const uint16_t *largearray, - size_t size_l); - - -/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */ -bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, - const uint16_t *largearray, size_t size_l); -/** - * Generic intersection function. - */ -int32_t intersect_uint16(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB, uint16_t *out); -/** - * Compute the size of the intersection (generic). - */ -int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB); - -/** - * Checking whether the size of the intersection is non-zero. - */ -bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB); -/** - * Generic union function. - */ -size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer); - -/** - * Generic XOR function. - */ -int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, - const uint16_t *array_2, int32_t card_2, uint16_t *out); - -/** - * Generic difference function (ANDNOT). - */ -int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, - int length2, uint16_t *a_out); - -/** - * Generic intersection function. - */ -size_t intersection_uint32(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB, uint32_t *out); - -/** - * Generic intersection function, returns just the cardinality. - */ -size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB); - -/** - * Generic union function. - */ -size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, - size_t size_2, uint32_t *buffer); - -/** - * A fast SSE-based union function. - */ -uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, - const uint16_t *__restrict__ set_2, uint32_t size_2, - uint16_t *__restrict__ buffer); -/** - * A fast SSE-based XOR function. - */ -uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output); - -/** - * A fast SSE-based difference function. - */ -int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C); - -/** - * Generic union function, returns just the cardinality. - */ -size_t union_uint32_card(const uint32_t *set_1, size_t size_1, - const uint32_t *set_2, size_t size_2); - -/** -* combines union_uint16 and union_vector16 optimally -*/ -size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer); - - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/array_util.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_types.h */ -/* - Typedefs used by various components -*/ - -#ifndef ROARING_TYPES_H -#define ROARING_TYPES_H - -typedef bool (*roaring_iterator)(uint32_t value, void *param); -typedef bool (*roaring_iterator64)(uint64_t value, void *param); - -/** -* (For advanced users.) -* The roaring_statistics_t can be used to collect detailed statistics about -* the composition of a roaring bitmap. -*/ -typedef struct roaring_statistics_s { - uint32_t n_containers; /* number of containers */ - - uint32_t n_array_containers; /* number of array containers */ - uint32_t n_run_containers; /* number of run containers */ - uint32_t n_bitset_containers; /* number of bitmap containers */ - - uint32_t - n_values_array_containers; /* number of values in array containers */ - uint32_t n_values_run_containers; /* number of values in run containers */ - uint32_t - n_values_bitset_containers; /* number of values in bitmap containers */ - - uint32_t n_bytes_array_containers; /* number of allocated bytes in array - containers */ - uint32_t n_bytes_run_containers; /* number of allocated bytes in run - containers */ - uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap - containers */ - - uint32_t - max_value; /* the maximal value, undefined if cardinality is zero */ - uint32_t - min_value; /* the minimal value, undefined if cardinality is zero */ - uint64_t sum_value; /* the sum of all values (could be used to compute - average) */ - - uint64_t cardinality; /* total number of values stored in the bitmap */ - - // and n_values_arrays, n_values_rle, n_values_bitmap -} roaring_statistics_t; - -#endif /* ROARING_TYPES_H */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_types.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/utilasm.h */ -/* - * utilasm.h - * - */ - -#ifndef INCLUDE_UTILASM_H_ -#define INCLUDE_UTILASM_H_ - - -#if defined(USE_BMI) & defined(ROARING_INLINE_ASM) -#define ASMBITMANIPOPTIMIZATION // optimization flag - -#define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \ - __asm volatile("shrx %1, %2, %0" \ - : "=r"(destReg) \ - : /* write */ \ - "r"(bitsReg), /* read only */ \ - "r"(srcReg) /* read only */ \ - ) - -#define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg) \ - __asm volatile("shrx %1, %0, %0" \ - : "+r"(srcReg) \ - : /* read/write */ \ - "r"(bitsReg) /* read only */ \ - ) - -#define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \ - __asm volatile("shlx %1, %2, %0" \ - : "=r"(destReg) \ - : /* write */ \ - "r"(bitsReg), /* read only */ \ - "r"(srcReg) /* read only */ \ - ) -// set bit at position testBit within testByte to 1 and -// copy cmovDst to cmovSrc if that bit was previously clear -#define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \ - __asm volatile( \ - "bts %2, %0\n" \ - "sbb $-1, %1\n" \ - : "+r"(testByte), /* read/write */ \ - "+r"(count) \ - : /* read/write */ \ - "r"(testBit) /* read only */ \ - ) - -#define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \ - __asm volatile( \ - "btr %2, %0\n" \ - "sbb $0, %1\n" \ - : "+r"(testByte), /* read/write */ \ - "+r"(count) \ - : /* read/write */ \ - "r"(testBit) /* read only */ \ - ) - -#define ASM_BT64(testByte, testBit, count) \ - __asm volatile( \ - "bt %2,%1\n" \ - "sbb %0,%0" /*could use setb */ \ - : "=r"(count) \ - : /* write */ \ - "r"(testByte), /* read only */ \ - "r"(testBit) /* read only */ \ - ) - -#endif // USE_BMI -#endif /* INCLUDE_UTILASM_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/utilasm.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/bitset_util.h */ -#ifndef BITSET_UTIL_H -#define BITSET_UTIL_H - -#include - - -/* - * Set all bits in indexes [begin,end) to true. - */ -static inline void bitset_set_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - if (firstword == endword) { - bitmap[firstword] |= ((~UINT64_C(0)) << (start % 64)) & - ((~UINT64_C(0)) >> ((~end + 1) % 64)); - return; - } - bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); - for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = ~UINT64_C(0); - bitmap[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64); -} - - -/* - * Find the cardinality of the bitset in [begin,begin+lenminusone] - */ -static inline int bitset_lenrange_cardinality(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - return hamming(bitmap[firstword] & - ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64)); - } - int answer = hamming(bitmap[firstword] & ((~UINT64_C(0)) << (start % 64))); - for (uint32_t i = firstword + 1; i < endword; i++) { - answer += hamming(bitmap[i]); - } - answer += - hamming(bitmap[endword] & - (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)); - return answer; -} - -/* - * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0 - */ -static inline bool bitset_lenrange_empty(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - return (bitmap[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64)) == 0; - } - if(((bitmap[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) return false; - for (uint32_t i = firstword + 1; i < endword; i++) { - if(bitmap[i] != 0) return false; - } - if((bitmap[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) return false; - return true; -} - - -/* - * Set all bits in indexes [begin,begin+lenminusone] to true. - */ -static inline void bitset_set_lenrange(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - bitmap[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64); - return; - } - uint64_t temp = bitmap[endword]; - bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); - for (uint32_t i = firstword + 1; i < endword; i += 2) - bitmap[i] = bitmap[i + 1] = ~UINT64_C(0); - bitmap[endword] = - temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); -} - -/* - * Flip all the bits in indexes [begin,end). - */ -static inline void bitset_flip_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - bitmap[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); - for (uint32_t i = firstword; i < endword; i++) bitmap[i] = ~bitmap[i]; - bitmap[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); -} - -/* - * Set all bits in indexes [begin,end) to false. - */ -static inline void bitset_reset_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - if (firstword == endword) { - bitmap[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) & - ((~UINT64_C(0)) >> ((~end + 1) % 64))); - return; - } - bitmap[firstword] &= ~((~UINT64_C(0)) << (start % 64)); - for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = UINT64_C(0); - bitmap[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); -} - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base". - * - * The "out" pointer should be sufficient to store the actual number of bits - * set. - * - * Returns how many values were actually decoded. - * - * This function should only be expected to be faster than - * bitset_extract_setbits - * when the density of the bitset is high. - * - * This function uses AVX2 decoding. - */ -size_t bitset_extract_setbits_avx2(uint64_t *bitset, size_t length, void *vout, - size_t outcapacity, uint32_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base". - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, - uint32_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function should only be expected to be faster than - *bitset_extract_setbits_uint16 - * when the density of the bitset is high. - * - * This function uses SSE decoding. - */ -size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" - * (can be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, uint16_t base); - -/* - * Given two bitsets containing "length" 64-bit words, write out the position - * of all the common set bits to "out", values start at "base" - * (can be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - * set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, - const uint64_t * __restrict__ bitset2, - size_t length, uint16_t *out, - uint16_t base); - -/* - * Given a bitset having cardinality card, set all bit values in the list (there - * are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length); -/* - * Given a bitset, set all bit values in the list (there - * are length of them). - */ -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length); - -/* - * Given a bitset having cardinality card, unset all bit values in the list - * (there are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length); - -/* - * Given a bitset having cardinality card, toggle all bit values in the list - * (there are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ - -uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length); - -void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length); - -#ifdef USEAVX -/*** - * BEGIN Harley-Seal popcount functions. - */ - -/** - * Compute the population count of a 256-bit word - * This is not especially fast, but it is convenient as part of other functions. - */ -static inline __m256i popcount256(__m256i v) { - const __m256i lookuppos = _mm256_setr_epi8( - /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, - /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, - /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, - /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4, - - /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, - /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, - /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, - /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4); - const __m256i lookupneg = _mm256_setr_epi8( - /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, - /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, - /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, - /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4, - - /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, - /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, - /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, - /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4); - const __m256i low_mask = _mm256_set1_epi8(0x0f); - - const __m256i lo = _mm256_and_si256(v, low_mask); - const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask); - const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo); - const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi); - return _mm256_sad_epu8(popcnt1, popcnt2); -} - -/** - * Simple CSA over 256 bits - */ -static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b, - __m256i c) { - const __m256i u = _mm256_xor_si256(a, b); - *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c)); - *l = _mm256_xor_si256(u, c); -} - -/** - * Fast Harley-Seal AVX population count function - */ -inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data, - const uint64_t size) { - __m256i total = _mm256_setzero_si256(); - __m256i ones = _mm256_setzero_si256(); - __m256i twos = _mm256_setzero_si256(); - __m256i fours = _mm256_setzero_si256(); - __m256i eights = _mm256_setzero_si256(); - __m256i sixteens = _mm256_setzero_si256(); - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; - - const uint64_t limit = size - size % 16; - uint64_t i = 0; - - for (; i < limit; i += 16) { - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i), - _mm256_lddqu_si256(data + i + 1)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2), - _mm256_lddqu_si256(data + i + 3)); - CSA(&foursA, &twos, twos, twosA, twosB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4), - _mm256_lddqu_si256(data + i + 5)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6), - _mm256_lddqu_si256(data + i + 7)); - CSA(&foursB, &twos, twos, twosA, twosB); - CSA(&eightsA, &fours, fours, foursA, foursB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8), - _mm256_lddqu_si256(data + i + 9)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10), - _mm256_lddqu_si256(data + i + 11)); - CSA(&foursA, &twos, twos, twosA, twosB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12), - _mm256_lddqu_si256(data + i + 13)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14), - _mm256_lddqu_si256(data + i + 15)); - CSA(&foursB, &twos, twos, twosA, twosB); - CSA(&eightsB, &fours, fours, foursA, foursB); - CSA(&sixteens, &eights, eights, eightsA, eightsB); - - total = _mm256_add_epi64(total, popcount256(sixteens)); - } - - total = _mm256_slli_epi64(total, 4); // * 16 - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ... - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ... - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ... - total = _mm256_add_epi64(total, popcount256(ones)); - for (; i < size; i++) - total = - _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i))); - - return (uint64_t)(_mm256_extract_epi64(total, 0)) + - (uint64_t)(_mm256_extract_epi64(total, 1)) + - (uint64_t)(_mm256_extract_epi64(total, 2)) + - (uint64_t)(_mm256_extract_epi64(total, 3)); -} - -#define AVXPOPCNTFNC(opname, avx_intrinsic) \ - static inline uint64_t avx2_harley_seal_popcount256_##opname( \ - const __m256i *data1, const __m256i *data2, const uint64_t size) { \ - __m256i total = _mm256_setzero_si256(); \ - __m256i ones = _mm256_setzero_si256(); \ - __m256i twos = _mm256_setzero_si256(); \ - __m256i fours = _mm256_setzero_si256(); \ - __m256i eights = _mm256_setzero_si256(); \ - __m256i sixteens = _mm256_setzero_si256(); \ - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ - __m256i A1, A2; \ - const uint64_t limit = size - size % 16; \ - uint64_t i = 0; \ - for (; i < limit; i += 16) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ - _mm256_lddqu_si256(data2 + i + 1)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ - _mm256_lddqu_si256(data2 + i + 2)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ - _mm256_lddqu_si256(data2 + i + 3)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ - _mm256_lddqu_si256(data2 + i + 4)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ - _mm256_lddqu_si256(data2 + i + 5)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ - _mm256_lddqu_si256(data2 + i + 6)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ - _mm256_lddqu_si256(data2 + i + 7)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsA, &fours, fours, foursA, foursB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ - _mm256_lddqu_si256(data2 + i + 8)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ - _mm256_lddqu_si256(data2 + i + 9)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ - _mm256_lddqu_si256(data2 + i + 10)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ - _mm256_lddqu_si256(data2 + i + 11)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ - _mm256_lddqu_si256(data2 + i + 12)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ - _mm256_lddqu_si256(data2 + i + 13)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ - _mm256_lddqu_si256(data2 + i + 14)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ - _mm256_lddqu_si256(data2 + i + 15)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsB, &fours, fours, foursA, foursB); \ - CSA(&sixteens, &eights, eights, eightsA, eightsB); \ - total = _mm256_add_epi64(total, popcount256(sixteens)); \ - } \ - total = _mm256_slli_epi64(total, 4); \ - total = _mm256_add_epi64(total, \ - _mm256_slli_epi64(popcount256(eights), 3)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ - total = _mm256_add_epi64(total, popcount256(ones)); \ - for (; i < size; i++) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - total = _mm256_add_epi64(total, popcount256(A1)); \ - } \ - return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ - (uint64_t)(_mm256_extract_epi64(total, 1)) + \ - (uint64_t)(_mm256_extract_epi64(total, 2)) + \ - (uint64_t)(_mm256_extract_epi64(total, 3)); \ - } \ - static inline uint64_t avx2_harley_seal_popcount256andstore_##opname( \ - const __m256i *__restrict__ data1, const __m256i *__restrict__ data2, \ - __m256i *__restrict__ out, const uint64_t size) { \ - __m256i total = _mm256_setzero_si256(); \ - __m256i ones = _mm256_setzero_si256(); \ - __m256i twos = _mm256_setzero_si256(); \ - __m256i fours = _mm256_setzero_si256(); \ - __m256i eights = _mm256_setzero_si256(); \ - __m256i sixteens = _mm256_setzero_si256(); \ - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ - __m256i A1, A2; \ - const uint64_t limit = size - size % 16; \ - uint64_t i = 0; \ - for (; i < limit; i += 16) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - _mm256_storeu_si256(out + i, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ - _mm256_lddqu_si256(data2 + i + 1)); \ - _mm256_storeu_si256(out + i + 1, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ - _mm256_lddqu_si256(data2 + i + 2)); \ - _mm256_storeu_si256(out + i + 2, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ - _mm256_lddqu_si256(data2 + i + 3)); \ - _mm256_storeu_si256(out + i + 3, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ - _mm256_lddqu_si256(data2 + i + 4)); \ - _mm256_storeu_si256(out + i + 4, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ - _mm256_lddqu_si256(data2 + i + 5)); \ - _mm256_storeu_si256(out + i + 5, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ - _mm256_lddqu_si256(data2 + i + 6)); \ - _mm256_storeu_si256(out + i + 6, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ - _mm256_lddqu_si256(data2 + i + 7)); \ - _mm256_storeu_si256(out + i + 7, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsA, &fours, fours, foursA, foursB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ - _mm256_lddqu_si256(data2 + i + 8)); \ - _mm256_storeu_si256(out + i + 8, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ - _mm256_lddqu_si256(data2 + i + 9)); \ - _mm256_storeu_si256(out + i + 9, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ - _mm256_lddqu_si256(data2 + i + 10)); \ - _mm256_storeu_si256(out + i + 10, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ - _mm256_lddqu_si256(data2 + i + 11)); \ - _mm256_storeu_si256(out + i + 11, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ - _mm256_lddqu_si256(data2 + i + 12)); \ - _mm256_storeu_si256(out + i + 12, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ - _mm256_lddqu_si256(data2 + i + 13)); \ - _mm256_storeu_si256(out + i + 13, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ - _mm256_lddqu_si256(data2 + i + 14)); \ - _mm256_storeu_si256(out + i + 14, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ - _mm256_lddqu_si256(data2 + i + 15)); \ - _mm256_storeu_si256(out + i + 15, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsB, &fours, fours, foursA, foursB); \ - CSA(&sixteens, &eights, eights, eightsA, eightsB); \ - total = _mm256_add_epi64(total, popcount256(sixteens)); \ - } \ - total = _mm256_slli_epi64(total, 4); \ - total = _mm256_add_epi64(total, \ - _mm256_slli_epi64(popcount256(eights), 3)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ - total = _mm256_add_epi64(total, popcount256(ones)); \ - for (; i < size; i++) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - _mm256_storeu_si256(out + i, A1); \ - total = _mm256_add_epi64(total, popcount256(A1)); \ - } \ - return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ - (uint64_t)(_mm256_extract_epi64(total, 1)) + \ - (uint64_t)(_mm256_extract_epi64(total, 2)) + \ - (uint64_t)(_mm256_extract_epi64(total, 3)); \ - } - -AVXPOPCNTFNC(or, _mm256_or_si256) -AVXPOPCNTFNC(union, _mm256_or_si256) -AVXPOPCNTFNC(and, _mm256_and_si256) -AVXPOPCNTFNC(intersection, _mm256_and_si256) -AVXPOPCNTFNC (xor, _mm256_xor_si256) -AVXPOPCNTFNC(andnot, _mm256_andnot_si256) - -/*** - * END Harley-Seal popcount functions. - */ - -#endif // USEAVX - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/bitset_util.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/array.h */ -/* - * array.h - * - */ - -#ifndef INCLUDE_CONTAINERS_ARRAY_H_ -#define INCLUDE_CONTAINERS_ARRAY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include - - -/* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */ -enum { DEFAULT_MAX_SIZE = 4096 }; - -/* struct array_container - sparse representation of a bitmap - * - * @cardinality: number of indices in `array` (and the bitmap) - * @capacity: allocated size of `array` - * @array: sorted list of integers - */ -struct array_container_s { - int32_t cardinality; - int32_t capacity; - uint16_t *array; -}; - -typedef struct array_container_s array_container_t; - -/* Create a new array with default. Return NULL in case of failure. See also - * array_container_create_given_capacity. */ -array_container_t *array_container_create(void); - -/* Create a new array with a specified capacity size. Return NULL in case of - * failure. */ -array_container_t *array_container_create_given_capacity(int32_t size); - -/* Create a new array containing all values in [min,max). */ -array_container_t * array_container_create_range(uint32_t min, uint32_t max); - -/* - * Shrink the capacity to the actual size, return the number of bytes saved. - */ -int array_container_shrink_to_fit(array_container_t *src); - -/* Free memory owned by `array'. */ -void array_container_free(array_container_t *array); - -/* Duplicate container */ -array_container_t *array_container_clone(const array_container_t *src); - -int32_t array_container_serialize(const array_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t array_container_serialization_len(const array_container_t *container); - -void *array_container_deserialize(const char *buf, size_t buf_len); - -/* Get the cardinality of `array'. */ -static inline int array_container_cardinality(const array_container_t *array) { - return array->cardinality; -} - -static inline bool array_container_nonzero_cardinality( - const array_container_t *array) { - return array->cardinality > 0; -} - -/* Copy one container into another. We assume that they are distinct. */ -void array_container_copy(const array_container_t *src, array_container_t *dst); - -/* Add all the values in [min,max) (included) at a distance k*step from min. - The container must have a size less or equal to DEFAULT_MAX_SIZE after this - addition. */ -void array_container_add_from_range(array_container_t *arr, uint32_t min, - uint32_t max, uint16_t step); - -/* Set the cardinality to zero (does not release memory). */ -static inline void array_container_clear(array_container_t *array) { - array->cardinality = 0; -} - -static inline bool array_container_empty(const array_container_t *array) { - return array->cardinality == 0; -} - -/* check whether the cardinality is equal to the capacity (this does not mean -* that it contains 1<<16 elements) */ -static inline bool array_container_full(const array_container_t *array) { - return array->cardinality == array->capacity; -} - - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void array_container_union(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* symmetric difference, see array_container_union */ -void array_container_xor(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out); - -/* Computes the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void array_container_intersection(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* Check whether src_1 and src_2 intersect. */ -bool array_container_intersect(const array_container_t *src_1, - const array_container_t *src_2); - - -/* computers the size of the intersection between two arrays. - */ -int array_container_intersection_cardinality(const array_container_t *src_1, - const array_container_t *src_2); - -/* computes the intersection of array1 and array2 and write the result to - * array1. - * */ -void array_container_intersection_inplace(array_container_t *src_1, - const array_container_t *src_2); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - */ -int array_container_to_uint32_array(void *vout, const array_container_t *cont, - uint32_t base); - -/* Compute the number of runs */ -int32_t array_container_number_of_runs(const array_container_t *a); - -/* - * Print this container using printf (useful for debugging). - */ -void array_container_printf(const array_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void array_container_printf_as_uint32_array(const array_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container having cardinality "card". - */ -static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { - return card * 2 + 2; -} - -/** - * Increase capacity to at least min. - * Whether the existing data needs to be copied over depends on the "preserve" - * parameter. If preserve is false, then the new content will be uninitialized, - * otherwise the old content is copied. - */ -void array_container_grow(array_container_t *container, int32_t min, - bool preserve); - -bool array_container_iterate(const array_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool array_container_iterate64(const array_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * array_container_size_in_bytes(container). - * - */ -int32_t array_container_write(const array_container_t *container, char *buf); -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be array_container_size_in_bytes(container). - * You need to provide the (known) cardinality. - */ -int32_t array_container_read(int32_t cardinality, array_container_t *container, - const char *buf); - -/** - * Return the serialized size in bytes of a container (see - * bitset_container_write) - * This is meant to be compatible with the Java and Go versions of Roaring and - * assumes - * that the cardinality of the container is already known. - * - */ -static inline int32_t array_container_size_in_bytes( - const array_container_t *container) { - return container->cardinality * sizeof(uint16_t); -} - -/** - * Return true if the two arrays have the same content. - */ -bool array_container_equals(const array_container_t *container1, - const array_container_t *container2); - -/** - * Return true if container1 is a subset of container2. - */ -bool array_container_is_subset(const array_container_t *container1, - const array_container_t *container2); - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -static inline bool array_container_select(const array_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - int card = array_container_cardinality(container); - if (*start_rank + card <= rank) { - *start_rank += card; - return false; - } else { - *element = container->array[rank - *start_rank]; - return true; - } -} - -/* Computes the difference of array1 and array2 and write the result - * to array out. - * Array out does not need to be distinct from array_1 - */ -void array_container_andnot(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out); - -/* Append x to the set. Assumes that the value is larger than any preceding - * values. */ -static inline void array_container_append(array_container_t *arr, - uint16_t pos) { - const int32_t capacity = arr->capacity; - - if (array_container_full(arr)) { - array_container_grow(arr, capacity + 1, true); - } - - arr->array[arr->cardinality++] = pos; -} - -/** - * Add value to the set if final cardinality doesn't exceed max_cardinality. - * Return code: - * 1 -- value was added - * 0 -- value was already present - * -1 -- value was not added because cardinality would exceed max_cardinality - */ -static inline int array_container_try_add(array_container_t *arr, uint16_t value, - int32_t max_cardinality) { - const int32_t cardinality = arr->cardinality; - - // best case, we can append. - if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) && - cardinality < max_cardinality) { - array_container_append(arr, value); - return 1; - } - - const int32_t loc = binarySearch(arr->array, cardinality, value); - - if (loc >= 0) { - return 0; - } else if (cardinality < max_cardinality) { - if (array_container_full(arr)) { - array_container_grow(arr, arr->capacity + 1, true); - } - const int32_t insert_idx = -loc - 1; - memmove(arr->array + insert_idx + 1, arr->array + insert_idx, - (cardinality - insert_idx) * sizeof(uint16_t)); - arr->array[insert_idx] = value; - arr->cardinality++; - return 1; - } else { - return -1; - } -} - -/* Add value to the set. Returns true if x was not already present. */ -static inline bool array_container_add(array_container_t *arr, uint16_t value) { - return array_container_try_add(arr, value, INT32_MAX) == 1; -} - -/* Remove x from the set. Returns true if x was present. */ -static inline bool array_container_remove(array_container_t *arr, - uint16_t pos) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, pos); - const bool is_present = idx >= 0; - if (is_present) { - memmove(arr->array + idx, arr->array + idx + 1, - (arr->cardinality - idx - 1) * sizeof(uint16_t)); - arr->cardinality--; - } - - return is_present; -} - -/* Check whether x is present. */ -inline bool array_container_contains(const array_container_t *arr, - uint16_t pos) { - // return binarySearch(arr->array, arr->cardinality, pos) >= 0; - // binary search with fallback to linear search for short ranges - int32_t low = 0; - const uint16_t * carr = (const uint16_t *) arr->array; - int32_t high = arr->cardinality - 1; - // while (high - low >= 0) { - while(high >= low + 16) { - int32_t middleIndex = (low + high)>>1; - uint16_t middleValue = carr[middleIndex]; - if (middleValue < pos) { - low = middleIndex + 1; - } else if (middleValue > pos) { - high = middleIndex - 1; - } else { - return true; - } - } - - for (int i=low; i <= high; i++) { - uint16_t v = carr[i]; - if (v == pos) { - return true; - } - if ( v > pos ) return false; - } - return false; - -} - - -//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ -static inline bool array_container_contains_range(const array_container_t *arr, - uint32_t range_start, uint32_t range_end) { - - const uint16_t rs_included = range_start; - const uint16_t re_included = range_end - 1; - - const uint16_t *carr = (const uint16_t *) arr->array; - - const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included); - const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included); - - return (start < arr->cardinality) && (end < arr->cardinality) - && (((uint16_t)(end - start)) == re_included - rs_included) - && (carr[start] == rs_included) && (carr[end] == re_included); -} - -/* Returns the smallest value (assumes not empty) */ -inline uint16_t array_container_minimum(const array_container_t *arr) { - if (arr->cardinality == 0) return 0; - return arr->array[0]; -} - -/* Returns the largest value (assumes not empty) */ -inline uint16_t array_container_maximum(const array_container_t *arr) { - if (arr->cardinality == 0) return 0; - return arr->array[arr->cardinality - 1]; -} - -/* Returns the number of values equal or smaller than x */ -inline int array_container_rank(const array_container_t *arr, uint16_t x) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, x); - const bool is_present = idx >= 0; - if (is_present) { - return idx + 1; - } else { - return -idx - 1; - } -} - -/* Returns the index of the first value equal or smaller than x, or -1 */ -inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, x); - const bool is_present = idx >= 0; - if (is_present) { - return idx; - } else { - int32_t candidate = - idx - 1; - if(candidate < arr->cardinality) return candidate; - return -1; - } -} - -/* - * Adds all values in range [min,max] using hint: - * nvals_less is the number of array values less than $min - * nvals_greater is the number of array values greater than $max - */ -static inline void array_container_add_range_nvals(array_container_t *array, - uint32_t min, uint32_t max, - int32_t nvals_less, - int32_t nvals_greater) { - int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; - if (union_cardinality > array->capacity) { - array_container_grow(array, union_cardinality, true); - } - memmove(&(array->array[union_cardinality - nvals_greater]), - &(array->array[array->cardinality - nvals_greater]), - nvals_greater * sizeof(uint16_t)); - for (uint32_t i = 0; i <= max - min; i++) { - array->array[nvals_less + i] = min + i; - } - array->cardinality = union_cardinality; -} - -/** - * Adds all values in range [min,max]. - */ -static inline void array_container_add_range(array_container_t *array, - uint32_t min, uint32_t max) { - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); -} - -/* - * Removes all elements array[pos] .. array[pos+count-1] - */ -static inline void array_container_remove_range(array_container_t *array, - uint32_t pos, uint32_t count) { - if (count != 0) { - memmove(&(array->array[pos]), &(array->array[pos+count]), - (array->cardinality - pos - count) * sizeof(uint16_t)); - array->cardinality -= count; - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/array.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/bitset.h */ -/* - * bitset.h - * - */ - -#ifndef INCLUDE_CONTAINERS_BITSET_H_ -#define INCLUDE_CONTAINERS_BITSET_H_ - -#include -#include - -#ifdef USEAVX -#define ALIGN_AVX __attribute__((aligned(sizeof(__m256i)))) -#else -#define ALIGN_AVX -#endif - -enum { - BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64, - BITSET_UNKNOWN_CARDINALITY = -1 -}; - -struct bitset_container_s { - int32_t cardinality; - uint64_t *array; -}; - -typedef struct bitset_container_s bitset_container_t; - -/* Create a new bitset. Return NULL in case of failure. */ -bitset_container_t *bitset_container_create(void); - -/* Free memory. */ -void bitset_container_free(bitset_container_t *bitset); - -/* Clear bitset (sets bits to 0). */ -void bitset_container_clear(bitset_container_t *bitset); - -/* Set all bits to 1. */ -void bitset_container_set_all(bitset_container_t *bitset); - -/* Duplicate bitset */ -bitset_container_t *bitset_container_clone(const bitset_container_t *src); - -int32_t bitset_container_serialize(const bitset_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t bitset_container_serialization_len(void); - -void *bitset_container_deserialize(const char *buf, size_t buf_len); - -/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow - * and - * should not be used in performance-sensitive code. Ever. */ -void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, - uint32_t end); - -#ifdef ASMBITMANIPOPTIMIZATION -/* Set the ith bit. */ -static inline void bitset_container_set(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); - bitset->array[offset] = load; -} - -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); - bitset->array[offset] = load; -} - -/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower - * than bitset_container_set. */ -static inline bool bitset_container_add(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - // could be possibly slightly further optimized - const int32_t oldcard = bitset->cardinality; - ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); - bitset->array[offset] = load; - return bitset->cardinality - oldcard; -} - -/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be - * slower than bitset_container_unset. */ -static inline bool bitset_container_remove(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - // could be possibly slightly further optimized - const int32_t oldcard = bitset->cardinality; - ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); - bitset->array[offset] = load; - return oldcard - bitset->cardinality; -} - -/* Get the value of the ith bit. */ -inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos) { - uint64_t word = bitset->array[pos >> 6]; - const uint64_t p = pos; - ASM_INPLACESHIFT_RIGHT(word, p); - return word & 1; -} - -#else - -/* Set the ith bit. */ -static inline void bitset_container_set(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word | (UINT64_C(1) << index); - bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index); - bitset->array[pos >> 6] = new_word; -} - -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); - bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index); - bitset->array[pos >> 6] = new_word; -} - -/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower - * than bitset_container_set. */ -static inline bool bitset_container_add(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word | (UINT64_C(1) << index); - const uint64_t increment = (old_word ^ new_word) >> index; - bitset->cardinality += (uint32_t)increment; - bitset->array[pos >> 6] = new_word; - return increment > 0; -} - -/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be - * slower than bitset_container_unset. */ -static inline bool bitset_container_remove(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); - const uint64_t increment = (old_word ^ new_word) >> index; - bitset->cardinality -= (uint32_t)increment; - bitset->array[pos >> 6] = new_word; - return increment > 0; -} - -/* Get the value of the ith bit. */ -inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos) { - const uint64_t word = bitset->array[pos >> 6]; - return (word >> (pos & 63)) & 1; -} - -#endif - -/* -* Check if all bits are set in a range of positions from pos_start (included) to -* pos_end (excluded). -*/ -static inline bool bitset_container_get_range(const bitset_container_t *bitset, - uint32_t pos_start, uint32_t pos_end) { - - const uint32_t start = pos_start >> 6; - const uint32_t end = pos_end >> 6; - - const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1); - const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1; - - if (start == end) return ((bitset->array[end] & first & last) == (first & last)); - if ((bitset->array[start] & first) != first) return false; - - if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->array[end] & last) != last)){ - - return false; - } - - for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){ - - if (bitset->array[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false; - } - - return true; -} - -/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */ -inline bool bitset_container_contains(const bitset_container_t *bitset, - uint16_t pos) { - return bitset_container_get(bitset, pos); -} - -/* -* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded) -* is present in `bitset'. Calls bitset_container_get_all. -*/ -static inline bool bitset_container_contains_range(const bitset_container_t *bitset, - uint32_t pos_start, uint32_t pos_end) { - return bitset_container_get_range(bitset, pos_start, pos_end); -} - -/* Get the number of bits set */ -static inline int bitset_container_cardinality( - const bitset_container_t *bitset) { - return bitset->cardinality; -} - - - - -/* Copy one container into another. We assume that they are distinct. */ -void bitset_container_copy(const bitset_container_t *source, - bitset_container_t *dest); - -/* Add all the values [min,max) at a distance k*step from min: min, - * min+step,.... */ -void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, - uint32_t max, uint16_t step); - -/* Get the number of bits set (force computation). This does not modify bitset. - * To update the cardinality, you should do - * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/ -int bitset_container_compute_cardinality(const bitset_container_t *bitset); - -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - when the cardinality is unknown, it is computed and stored in the struct */ -static inline bool bitset_container_nonzero_cardinality( - bitset_container_t *bitset) { - // account for laziness - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - // could bail early instead with a nonzero result - bitset->cardinality = bitset_container_compute_cardinality(bitset); - } - return bitset->cardinality > 0; -} - -/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse), - * it never modifies the bitset struct. */ -static inline bool bitset_container_empty( - const bitset_container_t *bitset) { - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { - if((bitset->array[i]) != 0) return false; - } - return true; - } - return bitset->cardinality == 0; -} - - -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - the bitset is never modified */ -static inline bool bitset_container_const_nonzero_cardinality( - const bitset_container_t *bitset) { - return !bitset_container_empty(bitset); -} - -/* - * Check whether the two bitsets intersect - */ -bool bitset_container_intersect(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. */ -int bitset_container_or(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality. - */ -int bitset_container_or_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. Same as bitset_container_or. */ -int bitset_container_union(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the union of bitsets `src_1' and `src_2' and return the - * cardinality. Same as bitset_container_or_justcard. */ -int bitset_container_union_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not - * update the cardinality. Provided to optimize chained operations. */ -int bitset_container_or_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. */ -int bitset_container_and(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_and_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. Same as bitset_container_and. */ -int bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' and return the - * cardinality. Same as bitset_container_and_justcard. */ -int bitset_container_intersection_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_and_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. */ -int bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_xor_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_xor_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. */ -int bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the and not of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_andnot_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_andnot_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - * The out pointer should point to enough memory (the cardinality times 32 - * bits). - */ -int bitset_container_to_uint32_array(void *out, const bitset_container_t *cont, - uint32_t base); - -/* - * Print this container using printf (useful for debugging). - */ -void bitset_container_printf(const bitset_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void bitset_container_printf_as_uint32_array(const bitset_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container. - */ -static inline int32_t bitset_container_serialized_size_in_bytes(void) { - return BITSET_CONTAINER_SIZE_IN_WORDS * 8; -} - -/** - * Return the the number of runs. - */ -int bitset_container_number_of_runs(bitset_container_t *b); - -bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * bitset_container_size_in_bytes(container). - */ -int32_t bitset_container_write(const bitset_container_t *container, char *buf); - -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be bitset_container_size_in_bytes(container). - * You need to provide the (known) cardinality. - */ -int32_t bitset_container_read(int32_t cardinality, - bitset_container_t *container, const char *buf); -/** - * Return the serialized size in bytes of a container (see - * bitset_container_write). - * This is meant to be compatible with the Java and Go versions of Roaring and - * assumes - * that the cardinality of the container is already known or can be computed. - */ -static inline int32_t bitset_container_size_in_bytes( - const bitset_container_t *container) { - (void)container; - return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); -} - -/** - * Return true if the two containers have the same content. - */ -bool bitset_container_equals(const bitset_container_t *container1, - const bitset_container_t *container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool bitset_container_is_subset(const bitset_container_t *container1, - const bitset_container_t *container2); - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -bool bitset_container_select(const bitset_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element); - -/* Returns the smallest value (assumes not empty) */ -uint16_t bitset_container_minimum(const bitset_container_t *container); - -/* Returns the largest value (assumes not empty) */ -uint16_t bitset_container_maximum(const bitset_container_t *container); - -/* Returns the number of values equal or smaller than x */ -int bitset_container_rank(const bitset_container_t *container, uint16_t x); - -/* Returns the index of the first value equal or larger than x, or -1 */ -int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); -#endif /* INCLUDE_CONTAINERS_BITSET_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/bitset.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/run.h */ -/* - * run.h - * - */ - -#ifndef INCLUDE_CONTAINERS_RUN_H_ -#define INCLUDE_CONTAINERS_RUN_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include - - -/* struct rle16_s - run length pair - * - * @value: start position of the run - * @length: length of the run is `length + 1` - * - * An RLE pair {v, l} would represent the integers between the interval - * [v, v+l+1], e.g. {3, 2} = [3, 4, 5]. - */ -struct rle16_s { - uint16_t value; - uint16_t length; -}; - -typedef struct rle16_s rle16_t; - -/* struct run_container_s - run container bitmap - * - * @n_runs: number of rle_t pairs in `runs`. - * @capacity: capacity in rle_t pairs `runs` can hold. - * @runs: pairs of rle_t. - * - */ -struct run_container_s { - int32_t n_runs; - int32_t capacity; - rle16_t *runs; -}; - -typedef struct run_container_s run_container_t; - -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create(void); - -/* Create a new run container with given capacity. Return NULL in case of - * failure. */ -run_container_t *run_container_create_given_capacity(int32_t size); - -/* - * Shrink the capacity to the actual size, return the number of bytes saved. - */ -int run_container_shrink_to_fit(run_container_t *src); - -/* Free memory owned by `run'. */ -void run_container_free(run_container_t *run); - -/* Duplicate container */ -run_container_t *run_container_clone(const run_container_t *src); - -int32_t run_container_serialize(const run_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t run_container_serialization_len(const run_container_t *container); - -void *run_container_deserialize(const char *buf, size_t buf_len); - -/* - * Effectively deletes the value at index index, repacking data. - */ -static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) { - memmove(run->runs + index, run->runs + (1 + index), - (run->n_runs - index - 1) * sizeof(rle16_t)); - run->n_runs--; -} - -/** - * Good old binary search through rle data - */ -inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t middleValue = array[middleIndex].value; - if (middleValue < ikey) { - low = middleIndex + 1; - } else if (middleValue > ikey) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - -/* - * Returns index of the run which contains $ikey - */ -static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min = array[middleIndex].value; - uint16_t max = array[middleIndex].value + array[middleIndex].length; - if (ikey > max) { - low = middleIndex + 1; - } else if (ikey < min) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - - -/** - * Returns number of runs which can'be be merged with the key because they - * are less than the key. - * Note that [5,6,7,8] can be merged with the key 9 and won't be counted. - */ -static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray, - uint16_t key) { - if (lenarray == 0) return 0; - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min_value = array[middleIndex].value; - uint16_t max_value = array[middleIndex].value + array[middleIndex].length; - if (max_value + UINT32_C(1) < key) { // uint32 arithmetic - low = middleIndex + 1; - } else if (key < min_value) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return low; -} - -static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray, - uint16_t key) { - if (lenarray == 0) return 0; - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min_value = array[middleIndex].value; - uint16_t max_value = array[middleIndex].value + array[middleIndex].length; - if (max_value < key) { - low = middleIndex + 1; - } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic - high = middleIndex - 1; - } else { - return lenarray - (middleIndex + 1); - } - } - return lenarray - low; -} - -/** - * increase capacity to at least min. Whether the - * existing data needs to be copied over depends on copy. If "copy" is false, - * then the new content will be uninitialized, otherwise a copy is made. - */ -void run_container_grow(run_container_t *run, int32_t min, bool copy); - -/** - * Moves the data so that we can write data at index - */ -static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) { - /* This function calls realloc + memmove sequentially to move by one index. - * Potentially copying twice the array. - */ - if (run->n_runs + 1 > run->capacity) - run_container_grow(run, run->n_runs + 1, true); - memmove(run->runs + 1 + index, run->runs + index, - (run->n_runs - index) * sizeof(rle16_t)); - run->n_runs++; -} - -/* Add `pos' to `run'. Returns true if `pos' was not present. */ -bool run_container_add(run_container_t *run, uint16_t pos); - -/* Remove `pos' from `run'. Returns true if `pos' was present. */ -static inline bool run_container_remove(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) { - int32_t le = run->runs[index].length; - if (le == 0) { - recoverRoomAtIndex(run, (uint16_t)index); - } else { - run->runs[index].value++; - run->runs[index].length--; - } - return true; - } - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset < le) { - // need to break in two - run->runs[index].length = (uint16_t)(offset - 1); - // need to insert - uint16_t newvalue = pos + 1; - int32_t newlength = le - offset - 1; - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = newvalue; - run->runs[index + 1].length = (uint16_t)newlength; - return true; - - } else if (offset == le) { - run->runs[index].length--; - return true; - } - } - // no match - return false; -} - -/* Check whether `pos' is present in `run'. */ -inline bool run_container_contains(const run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return true; - index = -index - 2; // points to preceding value, possibly -1 - if (index != -1) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return true; - } - return false; -} - -/* -* Check whether all positions in a range of positions from pos_start (included) -* to pos_end (excluded) is present in `run'. -*/ -static inline bool run_container_contains_range(const run_container_t *run, - uint32_t pos_start, uint32_t pos_end) { - uint32_t count = 0; - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start); - if (index < 0) { - index = -index - 2; - if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){ - return false; - } - } - for (int32_t i = index; i < run->n_runs; ++i) { - const uint32_t stop = run->runs[i].value + run->runs[i].length; - if (run->runs[i].value >= pos_end) break; - if (stop >= pos_end) { - count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0); - break; - } - const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0; - count += (min < run->runs[i].length) ? min : run->runs[i].length; - } - return count >= (pos_end - pos_start - 1); -} - -#ifdef USEAVX - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - int32_t k = 0; - const int32_t step = sizeof(__m256i) / sizeof(rle16_t); - if (n_runs > step) { - __m256i total = _mm256_setzero_si256(); - for (; k + step <= n_runs; k += step) { - __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); - __m256i justlengths = _mm256_srli_epi32(ymm1, 16); - total = _mm256_add_epi32(total, justlengths); - } - // a store might be faster than extract? - uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; - _mm256_storeu_si256((__m256i *)buffer, total); - sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + - (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); - } - for (; k < n_runs; ++k) { - sum += runs[k].length; - } - - return sum; -} - -#else - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - for (int k = 0; k < n_runs; ++k) { - sum += runs[k].length; - } - - return sum; -} -#endif - -/* Card > 0?, see run_container_empty for the reverse */ -static inline bool run_container_nonzero_cardinality( - const run_container_t *run) { - return run->n_runs > 0; // runs never empty -} - -/* Card == 0?, see run_container_nonzero_cardinality for the reverse */ -static inline bool run_container_empty( - const run_container_t *run) { - return run->n_runs == 0; // runs never empty -} - - - -/* Copy one container into another. We assume that they are distinct. */ -void run_container_copy(const run_container_t *src, run_container_t *dst); - -/* Set the cardinality to zero (does not release memory). */ -static inline void run_container_clear(run_container_t *run) { - run->n_runs = 0; -} - -/** - * Append run described by vl to the run container, possibly merging. - * It is assumed that the run would be inserted at the end of the container, no - * check is made. - * It is assumed that the run container has the necessary capacity: caller is - * responsible for checking memory capacity. - * - * - * This is not a safe function, it is meant for performance: use with care. - */ -static inline void run_container_append(run_container_t *run, rle16_t vl, - rle16_t *previousrl) { - const uint32_t previousend = previousrl->value + previousrl->length; - if (vl.value > previousend + 1) { // we add a new one - run->runs[run->n_runs] = vl; - run->n_runs++; - *previousrl = vl; - } else { - uint32_t newend = vl.value + vl.length + UINT32_C(1); - if (newend > previousend) { // we merge - previousrl->length = (uint16_t)(newend - 1 - previousrl->value); - run->runs[run->n_runs - 1] = *previousrl; - } - } -} - -/** - * Like run_container_append but it is assumed that the content of run is empty. - */ -static inline rle16_t run_container_append_first(run_container_t *run, - rle16_t vl) { - run->runs[run->n_runs] = vl; - run->n_runs++; - return vl; -} - -/** - * append a single value given by val to the run container, possibly merging. - * It is assumed that the value would be inserted at the end of the container, - * no check is made. - * It is assumed that the run container has the necessary capacity: caller is - * responsible for checking memory capacity. - * - * This is not a safe function, it is meant for performance: use with care. - */ -static inline void run_container_append_value(run_container_t *run, - uint16_t val, - rle16_t *previousrl) { - const uint32_t previousend = previousrl->value + previousrl->length; - if (val > previousend + 1) { // we add a new one - //*previousrl = (rle16_t){.value = val, .length = 0};// requires C99 - previousrl->value = val; - previousrl->length = 0; - - run->runs[run->n_runs] = *previousrl; - run->n_runs++; - } else if (val == previousend + 1) { // we merge - previousrl->length++; - run->runs[run->n_runs - 1] = *previousrl; - } -} - -/** - * Like run_container_append_value but it is assumed that the content of run is - * empty. - */ -static inline rle16_t run_container_append_value_first(run_container_t *run, - uint16_t val) { - // rle16_t newrle = (rle16_t){.value = val, .length = 0};// requires C99 - rle16_t newrle; - newrle.value = val; - newrle.length = 0; - - run->runs[run->n_runs] = newrle; - run->n_runs++; - return newrle; -} - -/* Check whether the container spans the whole chunk (cardinality = 1<<16). - * This check can be done in constant time (inexpensive). */ -static inline bool run_container_is_full(const run_container_t *run) { - rle16_t vl = run->runs[0]; - return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF); -} - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' */ -void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2); - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* Compute the size of the intersection of src_1 and src_2 . */ -int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2); - -/* Check whether src_1 and src_2 intersect. */ -bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2); - -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - */ -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base); - -/* - * Print this container using printf (useful for debugging). - */ -void run_container_printf(const run_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void run_container_printf_as_uint32_array(const run_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container having "num_runs" runs. - */ -static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) { - return sizeof(uint16_t) + - sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries. -} - -bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be run_container_size_in_bytes(container). - */ -int32_t run_container_write(const run_container_t *container, char *buf); - -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be bitset_container_size_in_bytes(container). - * The cardinality parameter is provided for consistency with other containers, - * but - * it might be effectively ignored.. - */ -int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf); - -/** - * Return the serialized size in bytes of a container (see run_container_write). - * This is meant to be compatible with the Java and Go versions of Roaring. - */ -static inline int32_t run_container_size_in_bytes( - const run_container_t *container) { - return run_container_serialized_size_in_bytes(container->n_runs); -} - -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals(const run_container_t *container1, - const run_container_t *container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2); - -/** - * Used in a start-finish scan that appends segments, for XOR and NOT - */ - -void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length); - -/** -* The new container consists of a single run [start,stop). -* It is required that stop>start, the caller is responsability for this check. -* It is required that stop <= (1<<16), the caller is responsability for this check. -* The cardinality of the created container is stop - start. -* Returns NULL on failure -*/ -static inline run_container_t *run_container_create_range(uint32_t start, - uint32_t stop) { - run_container_t *rc = run_container_create_given_capacity(1); - if (rc) { - rle16_t r; - r.value = (uint16_t)start; - r.length = (uint16_t)(stop - start - 1); - run_container_append_first(rc, r); - } - return rc; -} - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element); - -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ - -void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* Returns the smallest value (assumes not empty) */ -inline uint16_t run_container_minimum(const run_container_t *run) { - if (run->n_runs == 0) return 0; - return run->runs[0].value; -} - -/* Returns the largest value (assumes not empty) */ -inline uint16_t run_container_maximum(const run_container_t *run) { - if (run->n_runs == 0) return 0; - return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length; -} - -/* Returns the number of values equal or smaller than x */ -int run_container_rank(const run_container_t *arr, uint16_t x); - -/* Returns the index of the first run containing a value at least as large as x, or -1 */ -inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) { - int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x); - if (index >= 0) return index; - index = -index - 2; // points to preceding run, possibly -1 - if (index != -1) { // possible match - int32_t offset = x - arr->runs[index].value; - int32_t le = arr->runs[index].length; - if (offset <= le) return index; - } - index += 1; - if(index < arr->n_runs) { - return index; - } - return -1; -} - -/* - * Add all values in range [min, max] using hint. - */ -static inline void run_container_add_range_nruns(run_container_t* run, - uint32_t min, uint32_t max, - int32_t nruns_less, - int32_t nruns_greater) { - int32_t nruns_common = run->n_runs - nruns_less - nruns_greater; - if (nruns_common == 0) { - makeRoomAtIndex(run, nruns_less); - run->runs[nruns_less].value = min; - run->runs[nruns_less].length = max - min; - } else { - uint32_t common_min = run->runs[nruns_less].value; - uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value + - run->runs[nruns_less + nruns_common - 1].length; - uint32_t result_min = (common_min < min) ? common_min : min; - uint32_t result_max = (common_max > max) ? common_max : max; - - run->runs[nruns_less].value = result_min; - run->runs[nruns_less].length = result_max - result_min; - - memmove(&(run->runs[nruns_less + 1]), - &(run->runs[run->n_runs - nruns_greater]), - nruns_greater*sizeof(rle16_t)); - run->n_runs = nruns_less + 1 + nruns_greater; - } -} - -/** - * Add all values in range [min, max] - */ -static inline void run_container_add_range(run_container_t* run, - uint32_t min, uint32_t max) { - int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); - int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); - run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); -} - -/** - * Shifts last $count elements either left (distance < 0) or right (distance > 0) - */ -static inline void run_container_shift_tail(run_container_t* run, - int32_t count, int32_t distance) { - if (distance > 0) { - if (run->capacity < count+distance) { - run_container_grow(run, count+distance, true); - } - } - int32_t srcpos = run->n_runs - count; - int32_t dstpos = srcpos + distance; - memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count); - run->n_runs += distance; -} - -/** - * Remove all elements in range [min, max] - */ -static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) { - int32_t first = rle16_find_run(run->runs, run->n_runs, min); - int32_t last = rle16_find_run(run->runs, run->n_runs, max); - - if (first >= 0 && min > run->runs[first].value && - max < run->runs[first].value + run->runs[first].length) { - // split this run into two adjacent runs - - // right subinterval - makeRoomAtIndex(run, first+1); - run->runs[first+1].value = max + 1; - run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1); - - // left subinterval - run->runs[first].length = (min - 1) - run->runs[first].value; - - return; - } - - // update left-most partial run - if (first >= 0) { - if (min > run->runs[first].value) { - run->runs[first].length = (min - 1) - run->runs[first].value; - first++; - } - } else { - first = -first-1; - } - - // update right-most run - if (last >= 0) { - uint16_t run_max = run->runs[last].value + run->runs[last].length; - if (run_max > max) { - run->runs[last].value = max + 1; - run->runs[last].length = run_max - (max + 1); - last--; - } - } else { - last = (-last-1) - 1; - } - - // remove intermediate runs - if (first <= last) { - run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1)); - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_CONTAINERS_RUN_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/run.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/convert.h */ -/* - * convert.h - * - */ - -#ifndef INCLUDE_CONTAINERS_CONVERT_H_ -#define INCLUDE_CONTAINERS_CONVERT_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Convert an array into a bitset. The input container is not freed or modified. - */ -bitset_container_t *bitset_container_from_array(const array_container_t *arr); - -/* Convert a run into a bitset. The input container is not freed or modified. */ -bitset_container_t *bitset_container_from_run(const run_container_t *arr); - -/* Convert a run into an array. The input container is not freed or modified. */ -array_container_t *array_container_from_run(const run_container_t *arr); - -/* Convert a bitset into an array. The input container is not freed or modified. - */ -array_container_t *array_container_from_bitset(const bitset_container_t *bits); - -/* Convert an array into a run. The input container is not freed or modified. - */ -run_container_t *run_container_from_array(const array_container_t *c); - -/* convert a run into either an array or a bitset - * might free the container */ -void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, - uint8_t *resulttype); - -/* convert containers to and from runcontainers, as is most space efficient. - * The container might be freed. */ -void *convert_run_optimize(void *c, uint8_t typecode_original, - uint8_t *typecode_after); - -/* converts a run container to either an array or a bitset, IF it saves space. - */ -/* If a conversion occurs, the caller is responsible to free the original - * container and - * he becomes reponsible to free the new one. */ -void *convert_run_to_efficient_container(run_container_t *c, - uint8_t *typecode_after); -// like convert_run_to_efficient_container but frees the old result if needed -void *convert_run_to_efficient_container_and_free(run_container_t *c, - uint8_t *typecode_after); - -/** - * Create new bitset container which is a union of run container and - * range [min, max]. Caller is responsible for freeing run container. - */ -bitset_container_t *bitset_container_from_run_range(const run_container_t *run, - uint32_t min, uint32_t max); - - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_CONTAINERS_CONVERT_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/convert.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_equal.h */ -/* - * mixed_equal.h - * - */ - -#ifndef CONTAINERS_MIXED_EQUAL_H_ -#define CONTAINERS_MIXED_EQUAL_H_ - - -/** - * Return true if the two containers have the same content. - */ -bool array_container_equal_bitset(const array_container_t* container1, - const bitset_container_t* container2); - -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals_array(const run_container_t* container1, - const array_container_t* container2); -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals_bitset(const run_container_t* container1, - const bitset_container_t* container2); - -#endif /* CONTAINERS_MIXED_EQUAL_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_equal.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_subset.h */ -/* - * mixed_subset.h - * - */ - -#ifndef CONTAINERS_MIXED_SUBSET_H_ -#define CONTAINERS_MIXED_SUBSET_H_ - - -/** - * Return true if container1 is a subset of container2. - */ -bool array_container_is_subset_bitset(const array_container_t* container1, - const bitset_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool run_container_is_subset_array(const run_container_t* container1, - const array_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool array_container_is_subset_run(const array_container_t* container1, - const run_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool run_container_is_subset_bitset(const run_container_t* container1, - const bitset_container_t* container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool bitset_container_is_subset_run(const bitset_container_t* container1, - const run_container_t* container2); - -#endif /* CONTAINERS_MIXED_SUBSET_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_subset.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_andnot.h */ -/* - * mixed_andnot.h - */ -#ifndef INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ -#define INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ - - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, a valid array container that could be the same as dst.*/ -void array_bitset_container_andnot(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * src_1 */ - -void array_bitset_container_iandnot(array_container_t *src_1, - const bitset_container_t *src_2); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, which does not initially have a valid container. - * Return true for a bitset result; false for array - */ - -bool bitset_array_container_andnot(const bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_iandnot(bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_andnot(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_iandnot(run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool bitset_run_container_andnot(const bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_run_container_iandnot(bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any type of container. - */ - -int run_array_container_andnot(const run_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_array_container_iandnot(run_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* dst must be a valid array container, allowed to be src_1 */ - -void array_run_container_andnot(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -void array_run_container_iandnot(array_container_t *src_1, - const run_container_t *src_2); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_run_container_iandnot(run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* - * dst is a valid array container and may be the same as src_1 - */ - -void array_array_container_andnot(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* inplace array-array andnot will always be able to reuse the space of - * src_1 */ -void array_array_container_iandnot(array_container_t *src_1, - const array_container_t *src_2); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_bitset_container_iandnot(bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_andnot.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_intersection.h */ -/* - * mixed_intersection.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ -#define INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, array intersection - */ - - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_bitset_container_intersection(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst); - -/* Compute the size of the intersection of src_1 and src_2. */ -int array_bitset_container_intersection_cardinality( - const array_container_t *src_1, const bitset_container_t *src_2); - - - -/* Checking whether src_1 and src_2 intersect. */ -bool array_bitset_container_intersect(const array_container_t *src_1, - const bitset_container_t *src_2); - -/* - * Compute the intersection between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the intersection between src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_run_container_intersection(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst); - -/* Compute the intersection between src_1 and src_2 and write the result to - * *dst. If the result is true then the result is a bitset_container_t - * otherwise is a array_container_t. - * If *dst == src_2, then an in-place intersection is attempted - **/ -bool run_bitset_container_intersection(const run_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the size of the intersection between src_1 and src_2 . */ -int array_run_container_intersection_cardinality(const array_container_t *src_1, - const run_container_t *src_2); - -/* Compute the size of the intersection between src_1 and src_2 - **/ -int run_bitset_container_intersection_cardinality(const run_container_t *src_1, - const bitset_container_t *src_2); - - -/* Check that src_1 and src_2 intersect. */ -bool array_run_container_intersect(const array_container_t *src_1, - const run_container_t *src_2); - -/* Check that src_1 and src_2 intersect. - **/ -bool run_bitset_container_intersect(const run_container_t *src_1, - const bitset_container_t *src_2); - -/* - * Same as bitset_bitset_container_intersection except that if the output is to - * be a - * bitset_container_t, then src_1 is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_bitset_container_intersection_inplace( - bitset_container_t *src_1, const bitset_container_t *src_2, void **dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_intersection.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_negation.h */ -/* - * mixed_negation.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_NEGATION_H_ -#define INCLUDE_CONTAINERS_MIXED_NEGATION_H_ - - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap - * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -void array_container_negation(const array_container_t *src, - bitset_container_t *dst); - -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation(const bitset_container_t *src, void **dst); - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_inplace(bitset_container_t *src, void **dst); - -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. - * Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation(const run_container_t *src, void **dst); - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_inplace(run_container_t *src, void **dst); - -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. - */ -bool array_container_negation_range(const array_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. Thus this routine - * may be a wrapper for the non-in-place version - */ -bool array_container_negation_range_inplace(array_container_t *src, - const int range_start, - const int range_end, void **dst); - -/* Negation across a range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation_range(const bitset_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_range_inplace(bitset_container_t *src, - const int range_start, - const int range_end, void **dst); - -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation_range(const run_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_range_inplace(run_container_t *src, - const int range_start, - const int range_end, void **dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_negation.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_union.h */ -/* - * mixed_intersection.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_UNION_H_ -#define INCLUDE_CONTAINERS_MIXED_UNION_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, bitset unions - */ - - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. */ -void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* - * Compute the union between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool array_array_container_union(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* - * Compute the union between src_1 and src_2 and write the result - * to *dst if it cannot be written to src_1. If the return function is true, - * the result is a bitset_container_t - * otherwise is a array_container_t. When the result is an array_container_t, it - * it either written to src_1 (if *dst is null) or to *dst. - * If the result is a bitset_container_t and *dst is null, then there was a failure. - */ -bool array_array_container_inplace_union(array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* - * Same as array_array_container_union except that it will more eagerly produce - * a bitset. - */ -bool array_array_container_lazy_union(const array_container_t *src_1, - const array_container_t *src_2, - void **dst); - -/* - * Same as array_array_container_inplace_union except that it will more eagerly produce - * a bitset. - */ -bool array_array_container_lazy_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - void **dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. We assume that dst is a - * valid container. The result might need to be further converted to array or - * bitset container, - * the caller is responsible for the eventual conversion. */ -void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * src2. The result might need to be further converted to array or - * bitset container, - * the caller is responsible for the eventual conversion. */ -void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be src_2. - * If run_container_is_full(src_1) is true, you must not be calling this - *function. - **/ -void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be src_2. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - * If run_container_is_full(src_1) is true, you must not be calling this - * function. - * */ -void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_union.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_xor.h */ -/* - * mixed_xor.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_XOR_H_ -#define INCLUDE_CONTAINERS_MIXED_XOR_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, bitset unions - */ - -/* - * Java implementation (as of May 2016) for array_run, run_run - * and bitset_run don't do anything different for inplace. - * (They are not truly in place.) - */ - - - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). - * Result is true iff dst is a bitset */ -bool array_bitset_container_xor(const array_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - */ - -void array_bitset_container_lazy_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_xor(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* lazy xor. Dst is initialized and may be equal to src_2. - * Result is left as a bitset container, even if actual - * cardinality would dictate an array container. - */ - -void run_bitset_container_lazy_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_xor(const array_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not initially have a valid container. Creates either - * an array or a bitset container, indicated by return code - */ - -bool array_array_container_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* dst does not initially have a valid container. Creates either - * an array or a bitset container, indicated by return code. - * A bitset container will not have a valid cardinality and the - * container type might not be correct for the actual cardinality - */ - -bool array_array_container_lazy_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Dst is a valid run container. (Can it be src_2? Let's say not.) - * Leaves result as run container, even if other options are - * smaller. - */ - -void array_run_container_lazy_xor(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* INPLACE versions (initial implementation may not exploit all inplace - * opportunities (if any...) - */ - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_ixor(bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -bool bitset_bitset_container_ixor(bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -bool array_bitset_container_ixor(array_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_ixor(run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -bool bitset_run_container_ixor(bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_ixor(array_container_t *src_1, - const run_container_t *src_2, void **dst); - -int run_array_container_ixor(run_container_t *src_1, - const array_container_t *src_2, void **dst); - -bool array_array_container_ixor(array_container_t *src_1, - const array_container_t *src_2, void **dst); - -int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, - void **dst); -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_xor.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/containers.h */ -#ifndef CONTAINERS_CONTAINERS_H -#define CONTAINERS_CONTAINERS_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - - -// would enum be possible or better? - -/** - * The switch case statements follow - * BITSET_CONTAINER_TYPE_CODE -- ARRAY_CONTAINER_TYPE_CODE -- - * RUN_CONTAINER_TYPE_CODE - * so it makes more sense to number them 1, 2, 3 (in the vague hope that the - * compiler might exploit this ordering). - */ - -#define BITSET_CONTAINER_TYPE_CODE 1 -#define ARRAY_CONTAINER_TYPE_CODE 2 -#define RUN_CONTAINER_TYPE_CODE 3 -#define SHARED_CONTAINER_TYPE_CODE 4 - -// macro for pairing container type codes -#define CONTAINER_PAIR(c1, c2) (4 * (c1) + (c2)) - -/** - * A shared container is a wrapper around a container - * with reference counting. - */ - -struct shared_container_s { - void *container; - uint8_t typecode; - uint32_t counter; // to be managed atomically -}; - -typedef struct shared_container_s shared_container_t; - -/* - * With copy_on_write = true - * Create a new shared container if the typecode is not SHARED_CONTAINER_TYPE, - * otherwise, increase the count - * If copy_on_write = false, then clone. - * Return NULL in case of failure. - **/ -void *get_copy_of_container(void *container, uint8_t *typecode, - bool copy_on_write); - -/* Frees a shared container (actually decrement its counter and only frees when - * the counter falls to zero). */ -void shared_container_free(shared_container_t *container); - -/* extract a copy from the shared container, freeing the shared container if -there is just one instance left, -clone instances when the counter is higher than one -*/ -void *shared_container_extract_copy(shared_container_t *container, - uint8_t *typecode); - -/* access to container underneath */ -inline const void *container_unwrap_shared( - const void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - *type = - ((const shared_container_t *)candidate_shared_container)->typecode; - assert(*type != SHARED_CONTAINER_TYPE_CODE); - return ((const shared_container_t *)candidate_shared_container)->container; - } else { - return candidate_shared_container; - } -} - - -/* access to container underneath */ -inline void *container_mutable_unwrap_shared( - void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - *type = - ((shared_container_t *)candidate_shared_container)->typecode; - assert(*type != SHARED_CONTAINER_TYPE_CODE); - return ((shared_container_t *)candidate_shared_container)->container; - } else { - return candidate_shared_container; - } -} - -/* access to container underneath and queries its type */ -static inline uint8_t get_container_type(const void *container, uint8_t type) { - if (type == SHARED_CONTAINER_TYPE_CODE) { - return ((const shared_container_t *)container)->typecode; - } else { - return type; - } -} - -/** - * Copies a container, requires a typecode. This allocates new memory, caller - * is responsible for deallocation. If the container is not shared, then it is - * physically cloned. Sharable containers are not cloneable. - */ -void *container_clone(const void *container, uint8_t typecode); - -/* access to container underneath, cloning it if needed */ -static inline void *get_writable_copy_if_shared( - void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - return shared_container_extract_copy( - (shared_container_t *)candidate_shared_container, type); - } else { - return candidate_shared_container; - } -} - -/** - * End of shared container code - */ - -static const char *container_names[] = {"bitset", "array", "run", "shared"}; -static const char *shared_container_names[] = { - "bitset (shared)", "array (shared)", "run (shared)"}; - -// no matter what the initial container was, convert it to a bitset -// if a new container is produced, caller responsible for freeing the previous -// one -// container should not be a shared container -static inline void *container_to_bitset(void *container, uint8_t typecode) { - bitset_container_t *result = NULL; - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container; // nothing to do - case ARRAY_CONTAINER_TYPE_CODE: - result = - bitset_container_from_array((array_container_t *)container); - return result; - case RUN_CONTAINER_TYPE_CODE: - result = bitset_container_from_run((run_container_t *)container); - return result; - case SHARED_CONTAINER_TYPE_CODE: - assert(false); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Get the container name from the typecode - */ -static inline const char *get_container_name(uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return container_names[2]; - case SHARED_CONTAINER_TYPE_CODE: - return container_names[3]; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } -} - -static inline const char *get_full_container_name(const void *container, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return container_names[2]; - case SHARED_CONTAINER_TYPE_CODE: - switch (((const shared_container_t *)container)->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return shared_container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return shared_container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return shared_container_names[2]; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } - break; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } - __builtin_unreachable(); - return NULL; -} - -/** - * Get the container cardinality (number of elements), requires a typecode - */ -static inline int container_get_cardinality(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_cardinality( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_cardinality( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_cardinality( - (const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - - - -// returns true if a container is known to be full. Note that a lazy bitset -// container -// might be full without us knowing -static inline bool container_is_full(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_cardinality( - (const bitset_container_t *)container) == (1 << 16); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_cardinality( - (const array_container_t *)container) == (1 << 16); - case RUN_CONTAINER_TYPE_CODE: - return run_container_is_full((const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -static inline int container_shrink_to_fit(void *container, uint8_t typecode) { - container = container_mutable_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return 0; // no shrinking possible - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_shrink_to_fit( - (array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_shrink_to_fit((run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - - -/** - * make a container with a run of ones - */ -/* initially always use a run container, even if an array might be - * marginally - * smaller */ -static inline void *container_range_of_ones(uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - assert(range_end >= range_start); - uint64_t cardinality = range_end - range_start + 1; - if(cardinality <= 2) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return array_container_create_range(range_start, range_end); - } else { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(range_start, range_end); - } -} - - -/* Create a container with all the values between in [min,max) at a - distance k*step from min. */ -static inline void *container_from_range(uint8_t *type, uint32_t min, - uint32_t max, uint16_t step) { - if (step == 0) return NULL; // being paranoid - if (step == 1) { - return container_range_of_ones(min,max,type); - // Note: the result is not always a run (need to check the cardinality) - //*type = RUN_CONTAINER_TYPE_CODE; - //return run_container_create_range(min, max); - } - int size = (max - min + step - 1) / step; - if (size <= DEFAULT_MAX_SIZE) { // array container - *type = ARRAY_CONTAINER_TYPE_CODE; - array_container_t *array = array_container_create_given_capacity(size); - array_container_add_from_range(array, min, max, step); - assert(array->cardinality == size); - return array; - } else { // bitset container - *type = BITSET_CONTAINER_TYPE_CODE; - bitset_container_t *bitset = bitset_container_create(); - bitset_container_add_from_range(bitset, min, max, step); - assert(bitset->cardinality == size); - return bitset; - } -} - -/** - * "repair" the container after lazy operations. - */ -static inline void *container_repair_after_lazy(void *container, - uint8_t *typecode) { - container = get_writable_copy_if_shared( - container, typecode); // TODO: this introduces unnecessary cloning - void *result = NULL; - switch (*typecode) { - case BITSET_CONTAINER_TYPE_CODE: - ((bitset_container_t *)container)->cardinality = - bitset_container_compute_cardinality( - (bitset_container_t *)container); - if (((bitset_container_t *)container)->cardinality <= - DEFAULT_MAX_SIZE) { - result = array_container_from_bitset( - (const bitset_container_t *)container); - bitset_container_free((bitset_container_t *)container); - *typecode = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - return container; - case ARRAY_CONTAINER_TYPE_CODE: - return container; // nothing to do - case RUN_CONTAINER_TYPE_CODE: - return convert_run_to_efficient_container_and_free( - (run_container_t *)container, typecode); - case SHARED_CONTAINER_TYPE_CODE: - assert(false); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * container_write(container, buf). - * - */ -static inline int32_t container_write(const void *container, uint8_t typecode, - char *buf) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_write((const bitset_container_t *)container, buf); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_write((const array_container_t *)container, buf); - case RUN_CONTAINER_TYPE_CODE: - return run_container_write((const run_container_t *)container, buf); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Get the container size in bytes under portable serialization (see - * container_write), requires a - * typecode - */ -static inline int32_t container_size_in_bytes(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_size_in_bytes( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_size_in_bytes( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_size_in_bytes((const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * print the container (useful for debugging), requires a typecode - */ -void container_printf(const void *container, uint8_t typecode); - -/** - * print the content of the container as a comma-separated list of 32-bit values - * starting at base, requires a typecode - */ -void container_printf_as_uint32_array(const void *container, uint8_t typecode, - uint32_t base); - -/** - * Checks whether a container is not empty, requires a typecode - */ -static inline bool container_nonzero_cardinality(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_const_nonzero_cardinality( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_nonzero_cardinality( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_nonzero_cardinality( - (const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Recover memory from a container, requires a typecode - */ -void container_free(void *container, uint8_t typecode); - -/** - * Convert a container to an array of values, requires a typecode as well as a - * "base" (most significant values) - * Returns number of ints added. - */ -static inline int container_to_uint32_array(uint32_t *output, - const void *container, - uint8_t typecode, uint32_t base) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_to_uint32_array( - output, (const bitset_container_t *)container, base); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_to_uint32_array( - output, (const array_container_t *)container, base); - case RUN_CONTAINER_TYPE_CODE: - return run_container_to_uint32_array( - output, (const run_container_t *)container, base); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Add a value to a container, requires a typecode, fills in new_typecode and - * return (possibly different) container. - * This function may allocate a new container, and caller is responsible for - * memory deallocation - */ -static inline void *container_add(void *container, uint16_t val, - uint8_t typecode, uint8_t *new_typecode) { - container = get_writable_copy_if_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_set((bitset_container_t *)container, val); - *new_typecode = BITSET_CONTAINER_TYPE_CODE; - return container; - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *ac = (array_container_t *)container; - if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) { - *new_typecode = ARRAY_CONTAINER_TYPE_CODE; - return ac; - } else { - bitset_container_t* bitset = bitset_container_from_array(ac); - bitset_container_add(bitset, val); - *new_typecode = BITSET_CONTAINER_TYPE_CODE; - return bitset; - } - } break; - case RUN_CONTAINER_TYPE_CODE: - // per Java, no container type adjustments are done (revisit?) - run_container_add((run_container_t *)container, val); - *new_typecode = RUN_CONTAINER_TYPE_CODE; - return container; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Remove a value from a container, requires a typecode, fills in new_typecode - * and - * return (possibly different) container. - * This function may allocate a new container, and caller is responsible for - * memory deallocation - */ -static inline void *container_remove(void *container, uint16_t val, - uint8_t typecode, uint8_t *new_typecode) { - container = get_writable_copy_if_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - if (bitset_container_remove((bitset_container_t *)container, val)) { - if (bitset_container_cardinality( - (bitset_container_t *)container) <= DEFAULT_MAX_SIZE) { - *new_typecode = ARRAY_CONTAINER_TYPE_CODE; - return array_container_from_bitset( - (bitset_container_t *)container); - } - } - *new_typecode = typecode; - return container; - case ARRAY_CONTAINER_TYPE_CODE: - *new_typecode = typecode; - array_container_remove((array_container_t *)container, val); - return container; - case RUN_CONTAINER_TYPE_CODE: - // per Java, no container type adjustments are done (revisit?) - run_container_remove((run_container_t *)container, val); - *new_typecode = RUN_CONTAINER_TYPE_CODE; - return container; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Check whether a value is in a container, requires a typecode - */ -inline bool container_contains(const void *container, uint16_t val, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_get((const bitset_container_t *)container, - val); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_contains( - (const array_container_t *)container, val); - case RUN_CONTAINER_TYPE_CODE: - return run_container_contains((const run_container_t *)container, - val); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) - * is in a container, requires a typecode - */ -static inline bool container_contains_range(const void *container, uint32_t range_start, - uint32_t range_end, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_get_range((const bitset_container_t *)container, - range_start, range_end); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_contains_range((const array_container_t *)container, - range_start, range_end); - case RUN_CONTAINER_TYPE_CODE: - return run_container_contains_range((const run_container_t *)container, - range_start, range_end); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -int32_t container_serialize(const void *container, uint8_t typecode, - char *buf) WARN_UNUSED; - -uint32_t container_serialization_len(const void *container, uint8_t typecode); - -void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len); - -/** - * Returns true if the two containers have the same content. Note that - * two containers having different types can be "equal" in this sense. - */ -static inline bool container_equals(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_equals((const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_container_equals_bitset((const run_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_container_equals_bitset((const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - // java would always return false? - return array_container_equal_bitset((const array_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // java would always return false? - return array_container_equal_bitset((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_equals_array((const run_container_t *)c2, - (const array_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return run_container_equals_array((const run_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_equals((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_equals((const run_container_t *)c1, - (const run_container_t *)c2); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -/** - * Returns true if the container c1 is a subset of the container c2. Note that - * c1 can be a subset of c2 even if they have a different type. - */ -static inline bool container_is_subset(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_is_subset((const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return bitset_container_is_subset_run((const bitset_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_container_is_subset_bitset((const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return false; // by construction, size(c1) > size(c2) - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_container_is_subset_bitset((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_container_is_subset_run((const array_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return run_container_is_subset_array((const run_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_is_subset((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_is_subset((const run_container_t *)c1, - (const run_container_t *)c2); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -// macro-izations possibilities for generic non-inplace binary-op dispatch - -/** - * Compute intersection between two containers, generate a new container (having - * type result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -static inline void *container_and(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_intersection( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_container_intersection((const array_container_t *)c1, - (const array_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_intersection((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - return convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_bitset_container_intersection((const array_container_t *)c2, - (const bitset_container_t *)c1, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_bitset_container_intersection((const array_container_t *)c1, - (const bitset_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c2, - (const run_container_t *)c1, - (array_container_t *)result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the size of the intersection between two containers. - */ -static inline int container_and_cardinality(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_and_justcard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_intersection_cardinality( - (const array_container_t *)c1, (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_intersection_cardinality( - (const run_container_t *)c1, (const run_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_bitset_container_intersection_cardinality( - (const array_container_t *)c2, (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_bitset_container_intersection_cardinality( - (const array_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_bitset_container_intersection_cardinality( - (const run_container_t *)c2, (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_bitset_container_intersection_cardinality( - (const run_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_run_container_intersection_cardinality( - (const array_container_t *)c1, (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return array_run_container_intersection_cardinality( - (const array_container_t *)c2, (const run_container_t *)c1); - default: - assert(false); - __builtin_unreachable(); - return 0; - } -} - -/** - * Check whether two containers intersect. - */ -static inline bool container_intersect(const void *c1, uint8_t type1, const void *c2, - uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_intersect( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_intersect((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_intersect((const run_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_bitset_container_intersect((const array_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_bitset_container_intersect((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_bitset_container_intersect( - (const run_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_bitset_container_intersect( - (const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_run_container_intersect((const array_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return array_run_container_intersect((const array_container_t *)c2, - (const run_container_t *)c1); - default: - assert(false); - __builtin_unreachable(); - return 0; - } -} - -/** - * Compute intersection between two containers, with result in the first - container if possible. If the returned pointer is identical to c1, - then the container has been modified. If the returned pointer is different - from c1, then a new container has been created and the caller is responsible - for freeing it. - The type of the first container may change. Returns the modified - (and possibly new) container. -*/ -static inline void *container_iand(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = - bitset_bitset_container_intersection_inplace( - (bitset_container_t *)c1, (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_container_intersection_inplace((array_container_t *)c1, - (const array_container_t *)c2); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_intersection((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - // as of January 2016, Java code used non-in-place intersection for - // two runcontainers - return convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - // c1 is a bitmap so no inplace possible - result = array_container_create(); - array_bitset_container_intersection((const array_container_t *)c2, - (const bitset_container_t *)c1, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_bitset_container_intersection( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (array_container_t *)c1); // allowed - return c1; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - // will attempt in-place computation - *result_type = run_bitset_container_intersection( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &c1) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c2, - (const run_container_t *)c1, - (array_container_t *)result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute union between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -static inline void *container_or(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_union( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_union((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // todo: could be optimized since will never convert to array - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_union((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_union((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c1, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c2, - (const run_container_t *)c1, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute union between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. - */ -static inline void *container_lazy_or(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_or_nocard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_union( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_union((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // we are being lazy - result = convert_run_to_efficient_container( - (run_container_t *)result, result_type); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_lazy_union( - (const array_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_lazy_union( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c1, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union( - (const array_container_t *)c2, (const run_container_t *)c1, - (run_container_t *)result); // TODO make lazy - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the union between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); -#ifdef OR_BITSET_CONVERSION_TO_FULL - if (((bitset_container_t *)c1)->cardinality == - (1 << 16)) { // we convert - result = run_container_create_range(0, (1 << 16)); - *result_type = RUN_CONTAINER_TYPE_CODE; - return result; - } -#endif - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_inplace_union( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - if((result == NULL) - && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { - return c1; // the computation was done in-place! - } - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - run_container_union_inplace((run_container_t *)c1, - (const run_container_t *)c2); - return convert_run_to_efficient_container((run_container_t *)c1, - result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_bitset_container_union((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)c1); - *result_type = BITSET_CONTAINER_TYPE_CODE; // never array - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // c1 is an array, so no in-place possible - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_union((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - run_bitset_container_union((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)c1); // allowed - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - - return c1; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - array_run_container_inplace_union((const array_container_t *)c2, - (run_container_t *)c1); - c1 = convert_run_to_efficient_container((run_container_t *)c1, - result_type); - return c1; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the union between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. -*/ -static inline void *container_lazy_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - assert(type1 != SHARED_CONTAINER_TYPE_CODE); - // c1 = get_writable_copy_if_shared(c1,&type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): -#ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL - // if we have two bitsets, we might as well compute the cardinality - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); - // it is possible that two bitsets can lead to a full container - if (((bitset_container_t *)c1)->cardinality == - (1 << 16)) { // we convert - result = run_container_create_range(0, (1 << 16)); - *result_type = RUN_CONTAINER_TYPE_CODE; - return result; - } -#else - bitset_container_or_nocard((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); - -#endif - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_inplace_union( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - if((result == NULL) - && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { - return c1; // the computation was done in-place! - } - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - run_container_union_inplace((run_container_t *)c1, - (const run_container_t *)c2); - *result_type = RUN_CONTAINER_TYPE_CODE; - return convert_run_to_efficient_container((run_container_t *)c1, - result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_bitset_container_lazy_union( - (const array_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)c1); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; // never array - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // c1 is an array, so no in-place possible - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_union( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - run_bitset_container_lazy_union( - (const run_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)c1); // allowed // lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return c1; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container_and_free(result, - // result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - array_run_container_inplace_union((const array_container_t *)c2, - (run_container_t *)c1); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container_and_free(result, - // result_type); - return c1; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute symmetric difference (xor) between two containers, generate a new - * container (having type result_type), requires a typecode. This allocates new - * memory, caller is responsible for deallocation. - */ -static inline void *container_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_xor( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_xor( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = - run_run_container_xor((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_xor( - (const array_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_xor( - (const array_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_xor( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - - *result_type = run_bitset_container_xor( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = - array_run_container_xor((const array_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = - array_run_container_xor((const array_container_t *)c2, - (const run_container_t *)c1, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute xor between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. - */ -static inline void *container_lazy_xor(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_xor_nocard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_xor( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - // nothing special done yet. - *result_type = - run_run_container_xor((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_xor((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_xor((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - run_bitset_container_lazy_xor((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - run_bitset_container_lazy_xor((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_lazy_xor((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_lazy_xor((const array_container_t *)c2, - (const run_container_t *)c1, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the xor between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_ixor( - (bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_ixor( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = run_run_container_ixor( - (run_container_t *)c1, (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_ixor( - (bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_ixor( - (array_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = - bitset_run_container_ixor((bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_ixor( - (run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = array_run_container_ixor( - (array_container_t *)c1, (const run_container_t *)c2, &result); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_ixor( - (run_container_t *)c1, (const array_container_t *)c2, &result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the xor between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. -*/ -static inline void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - assert(type1 != SHARED_CONTAINER_TYPE_CODE); - // c1 = get_writable_copy_if_shared(c1,&type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - bitset_container_xor_nocard((bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - // TODO: other cases being lazy, esp. when we know inplace not likely - // could see the corresponding code for union - default: - // we may have a dirty bitset (without a precomputed cardinality) and - // calling container_ixor on it might be unsafe. - if( (type1 == BITSET_CONTAINER_TYPE_CODE) - && (((const bitset_container_t *)c1)->cardinality == BITSET_UNKNOWN_CARDINALITY)) { - ((bitset_container_t *)c1)->cardinality = bitset_container_compute_cardinality((bitset_container_t *)c1); - } - return container_ixor(c1, type1, c2, type2, result_type); - } -} - -/** - * Compute difference (andnot) between two containers, generate a new - * container (having type result_type), requires a typecode. This allocates new - * memory, caller is responsible for deallocation. - */ -static inline void *container_andnot(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_andnot( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_array_container_andnot((const array_container_t *)c1, - (const array_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - *result_type = - run_run_container_andnot((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_andnot( - (const bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_bitset_container_andnot((const array_container_t *)c1, - (const bitset_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - *result_type = bitset_run_container_andnot( - (const bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - - *result_type = run_bitset_container_andnot( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - result = array_container_create(); - array_run_container_andnot((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_andnot( - (const run_container_t *)c1, (const array_container_t *)c2, - &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the andnot between two containers, with result in the first - * container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_iandnot(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_iandnot( - (bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_array_container_iandnot((array_container_t *)c1, - (const array_container_t *)c2); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return c1; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = run_run_container_iandnot( - (run_container_t *)c1, (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_iandnot( - (bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; - - array_bitset_container_iandnot((array_container_t *)c1, - (const bitset_container_t *)c2); - return c1; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = bitset_run_container_iandnot( - (bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_iandnot( - (run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_run_container_iandnot((array_container_t *)c1, - (const run_container_t *)c2); - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_iandnot( - (run_container_t *)c1, (const array_container_t *)c2, &result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Visit all values x of the container once, passing (base+x,ptr) - * to iterator. You need to specify a container and its type. - * Returns true if the iteration should continue. - */ -static inline bool container_iterate(const void *container, uint8_t typecode, - uint32_t base, roaring_iterator iterator, - void *ptr) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_iterate( - (const bitset_container_t *)container, base, iterator, ptr); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_iterate((const array_container_t *)container, - base, iterator, ptr); - case RUN_CONTAINER_TYPE_CODE: - return run_container_iterate((const run_container_t *)container, - base, iterator, ptr); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline bool container_iterate64(const void *container, uint8_t typecode, - uint32_t base, - roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_iterate64( - (const bitset_container_t *)container, base, iterator, - high_bits, ptr); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_iterate64( - (const array_container_t *)container, base, iterator, high_bits, - ptr); - case RUN_CONTAINER_TYPE_CODE: - return run_container_iterate64((const run_container_t *)container, - base, iterator, high_bits, ptr); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline void *container_not(const void *c, uint8_t typ, - uint8_t *result_type) { - c = container_unwrap_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = bitset_container_negation( - (const bitset_container_t *)c, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_container_negation((const array_container_t *)c, - (bitset_container_t *)result); - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = - run_container_negation((const run_container_t *)c, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_not_range(const void *c, uint8_t typ, - uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - c = container_unwrap_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = - bitset_container_negation_range((const bitset_container_t *)c, - range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - *result_type = - array_container_negation_range((const array_container_t *)c, - range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = run_container_negation_range( - (const run_container_t *)c, range_start, range_end, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_inot(void *c, uint8_t typ, uint8_t *result_type) { - c = get_writable_copy_if_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = bitset_container_negation_inplace( - (bitset_container_t *)c, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - // will never be inplace - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_container_negation((array_container_t *)c, - (bitset_container_t *)result); - array_container_free((array_container_t *)c); - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = - run_container_negation_inplace((run_container_t *)c, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_inot_range(void *c, uint8_t typ, - uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - c = get_writable_copy_if_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = - bitset_container_negation_range_inplace( - (bitset_container_t *)c, range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - *result_type = - array_container_negation_range_inplace( - (array_container_t *)c, range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = run_container_negation_range_inplace( - (run_container_t *)c, range_start, range_end, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -/** - * If the element of given rank is in this container, supposing that - * the first - * element has rank start_rank, then the function returns true and - * sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -static inline bool container_select(const void *container, uint8_t typecode, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_select((const bitset_container_t *)container, - start_rank, rank, element); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_select((const array_container_t *)container, - start_rank, rank, element); - case RUN_CONTAINER_TYPE_CODE: - return run_container_select((const run_container_t *)container, - start_rank, rank, element); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline uint16_t container_maximum(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_maximum((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_maximum((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_maximum((const run_container_t *)container); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline uint16_t container_minimum(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_minimum((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_minimum((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_minimum((const run_container_t *)container); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -// number of values smaller or equal to x -static inline int container_rank(const void *container, uint8_t typecode, - uint16_t x) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_rank((const bitset_container_t *)container, x); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_rank((const array_container_t *)container, x); - case RUN_CONTAINER_TYPE_CODE: - return run_container_rank((const run_container_t *)container, x); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -/** - * Add all values in range [min, max] to a given container. - * - * If the returned pointer is different from $container, then a new container - * has been created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container. - */ -static inline void *container_add_range(void *container, uint8_t type, - uint32_t min, uint32_t max, - uint8_t *result_type) { - // NB: when selecting new container type, we perform only inexpensive checks - switch (type) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_container_t *bitset = (bitset_container_t *) container; - - int32_t union_cardinality = 0; - union_cardinality += bitset->cardinality; - union_cardinality += max - min + 1; - union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); - - if (union_cardinality == INT32_C(0x10000)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(0, INT32_C(0x10000)); - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; - } - } - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *array = (array_container_t *) container; - - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; - - if (union_cardinality == INT32_C(0x10000)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(0, INT32_C(0x10000)); - } else if (union_cardinality <= DEFAULT_MAX_SIZE) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); - return array; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_container_t *bitset = bitset_container_from_array(array); - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; - } - } - case RUN_CONTAINER_TYPE_CODE: { - run_container_t *run = (run_container_t *) container; - - int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); - int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); - - int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t); - int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - - if (run_size_bytes <= bitset_size_bytes) { - run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); - *result_type = RUN_CONTAINER_TYPE_CODE; - return run; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - return bitset_container_from_run_range(run, min, max); - } - } - default: - __builtin_unreachable(); - } -} - -/* - * Removes all elements in range [min, max]. - * Returns one of: - * - NULL if no elements left - * - pointer to the original container - * - pointer to a newly-allocated container (if it is more efficient) - * - * If the returned pointer is different from $container, then a new container - * has been created and the caller is responsible for freeing the original container. - */ -static inline void *container_remove_range(void *container, uint8_t type, - uint32_t min, uint32_t max, - uint8_t *result_type) { - switch (type) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_container_t *bitset = (bitset_container_t *) container; - - int32_t result_cardinality = bitset->cardinality - - bitset_lenrange_cardinality(bitset->array, min, max-min); - - if (result_cardinality == 0) { - return NULL; - } else if (result_cardinality < DEFAULT_MAX_SIZE) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - bitset_reset_range(bitset->array, min, max+1); - bitset->cardinality = result_cardinality; - return array_container_from_bitset(bitset); - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_reset_range(bitset->array, min, max+1); - bitset->cardinality = result_cardinality; - return bitset; - } - } - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *array = (array_container_t *) container; - - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - int32_t result_cardinality = nvals_less + nvals_greater; - - if (result_cardinality == 0) { - return NULL; - } else { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_container_remove_range(array, nvals_less, - array->cardinality - result_cardinality); - return array; - } - } - case RUN_CONTAINER_TYPE_CODE: { - run_container_t *run = (run_container_t *) container; - - if (run->n_runs == 0) { - return NULL; - } - if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) { - return NULL; - } - - run_container_remove_range(run, min, max); - - if (run_container_serialized_size_in_bytes(run->n_runs) <= - bitset_container_serialized_size_in_bytes()) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - return bitset_container_from_run(run); - } - } - default: - __builtin_unreachable(); - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* CONTAINERS_CONTAINERS_H */ - -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/containers.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_array.h */ -#ifndef INCLUDE_ROARING_ARRAY_H -#define INCLUDE_ROARING_ARRAY_H -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#define MAX_CONTAINERS 65536 - -#define SERIALIZATION_ARRAY_UINT32 1 -#define SERIALIZATION_CONTAINER 2 - -enum { - SERIAL_COOKIE_NO_RUNCONTAINER = 12346, - SERIAL_COOKIE = 12347, - NO_OFFSET_THRESHOLD = 4 -}; - -/** - * Roaring arrays are array-based key-value pairs having containers as values - * and 16-bit integer keys. A roaring bitmap might be implemented as such. - */ - -// parallel arrays. Element sizes quite different. -// Alternative is array -// of structs. Which would have better -// cache performance through binary searches? - -typedef struct roaring_array_s { - int32_t size; - int32_t allocation_size; - void **containers; - uint16_t *keys; - uint8_t *typecodes; -} roaring_array_t; - -/** - * Create a new roaring array - */ -roaring_array_t *ra_create(void); - -/** - * Initialize an existing roaring array with the specified capacity (in number - * of containers) - */ -bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap); - -/** - * Initialize with default capacity - */ -bool ra_init(roaring_array_t *t); - -/** - * Copies this roaring array, we assume that dest is not initialized - */ -bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write); - -/* - * Shrinks the capacity, returns the number of bytes saved. - */ -int ra_shrink_to_fit(roaring_array_t *ra); - -/** - * Copies this roaring array, we assume that dest is initialized - */ -bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write); - -/** - * Frees the memory used by a roaring array - */ -void ra_clear(roaring_array_t *r); - -/** - * Frees the memory used by a roaring array, but does not free the containers - */ -void ra_clear_without_containers(roaring_array_t *r); - -/** - * Frees just the containers - */ -void ra_clear_containers(roaring_array_t *ra); - -/** - * Get the index corresponding to a 16-bit key - */ -inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { - if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1; - return binarySearch(ra->keys, (int32_t)ra->size, x); -} - -/** - * Retrieves the container at index i, filling in the typecode - */ -inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, - uint8_t *typecode) { - *typecode = ra->typecodes[i]; - return ra->containers[i]; -} - -/** - * Retrieves the key at index i - */ -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i); - -/** - * Add a new key-value pair at index i - */ -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - void *container, uint8_t typecode); - -/** - * Append a new key-value pair - */ -void ra_append(roaring_array_t *ra, uint16_t s, void *c, uint8_t typecode); - -/** - * Append a new key-value pair to ra, cloning (in COW sense) a value from sa - * at index index - */ -void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write); - -/** - * Append new key-value pairs to ra, cloning (in COW sense) values from sa - * at indexes - * [start_index, end_index) - */ -void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write); - -/** appends from sa to ra, ending with the greatest key that is - * is less or equal stopping_key - */ -void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write); - -/** appends from sa to ra, starting with the smallest key that is - * is strictly greater than before_start - */ - -void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write); - -/** - * Move the key-value pairs to ra from sa at indexes - * [start_index, end_index), old array should not be freed - * (use ra_clear_without_containers) - **/ -void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index); -/** - * Append new key-value pairs to ra, from sa at indexes - * [start_index, end_index) - */ -void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write); - -/** - * Set the container at the corresponding index using the specified - * typecode. - */ -inline void ra_set_container_at_index(const roaring_array_t *ra, int32_t i, - void *c, uint8_t typecode) { - assert(i < ra->size); - ra->containers[i] = c; - ra->typecodes[i] = typecode; -} - -/** - * If needed, increase the capacity of the array so that it can fit k values - * (at - * least); - */ -bool extend_array(roaring_array_t *ra, int32_t k); - -inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; } - -static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos) { - return advanceUntil(ra->keys, pos, ra->size, x); -} - -int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos); - -void ra_downsize(roaring_array_t *ra, int32_t new_length); - -inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, uint16_t key, - void *c, uint8_t typecode) { - assert(i < ra->size); - - ra->keys[i] = key; - ra->containers[i] = c; - ra->typecodes[i] = typecode; -} - -// write set bits to an array -void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); - -bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); - -/** - * write a bitmap to a buffer. This is meant to be compatible with - * the - * Java and Go versions. Return the size in bytes of the serialized - * output (which should be ra_portable_size_in_bytes(ra)). - */ -size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); - -/** - * read a bitmap from a serialized version. This is meant to be compatible - * with the Java and Go versions. - * maxbytes indicates how many bytes available from buf. - * When the function returns true, roaring_array_t is populated with the data - * and *readbytes indicates how many bytes were read. In all cases, if the function - * returns true, then maxbytes >= *readbytes. - */ -bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes); - -/** - * Quickly checks whether there is a serialized bitmap at the pointer, - * not exceeding size "maxbytes" in bytes. This function does not allocate - * memory dynamically. - * - * This function returns 0 if and only if no valid bitmap is found. - * Otherwise, it returns how many bytes are occupied by the bitmap data. - */ -size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes); - -/** - * How many bytes are required to serialize this bitmap (meant to be - * compatible - * with Java and Go versions) - */ -size_t ra_portable_size_in_bytes(const roaring_array_t *ra); - -/** - * return true if it contains at least one run container. - */ -bool ra_has_run_container(const roaring_array_t *ra); - -/** - * Size of the header when serializing (meant to be compatible - * with Java and Go versions) - */ -uint32_t ra_portable_header_size(const roaring_array_t *ra); - -/** - * If the container at the index i is share, unshare it (creating a local - * copy if needed). - */ -static inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i) { - assert(i < ra->size); - ra->containers[i] = - get_writable_copy_if_shared(ra->containers[i], &ra->typecodes[i]); -} - -/** - * remove at index i, sliding over all entries after i - */ -void ra_remove_at_index(roaring_array_t *ra, int32_t i); - - -/** -* clears all containers, sets the size at 0 and shrinks the memory usage. -*/ -void ra_reset(roaring_array_t *ra); - -/** - * remove at index i, sliding over all entries after i. Free removed container. - */ -void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); - -/** - * remove a chunk of indices, sliding over entries after it - */ -// void ra_remove_index_range(roaring_array_t *ra, int32_t begin, int32_t end); - -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. It is followed by a call to resize. -// -void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin); - -/** - * Shifts rightmost $count containers to the left (distance < 0) or - * to the right (distance > 0). - * Allocates memory if necessary. - * This function doesn't free or create new containers. - * Caller is responsible for that. - */ -void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); - -#ifdef __cplusplus -} -#endif - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_array.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/misc/configreport.h */ -/* - * configreport.h - * - */ - -#ifndef INCLUDE_MISC_CONFIGREPORT_H_ -#define INCLUDE_MISC_CONFIGREPORT_H_ - -#include // for size_t -#include -#include - - -#ifdef IS_X64 -// useful for basic info (0) -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) { -#ifdef ROARING_INLINE_ASM - __asm volatile("cpuid" - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) - : "0"(*eax), "2"(*ecx)); -#endif /* not sure what to do when inline assembly is unavailable*/ -} - -// CPUID instruction takes no parameters as CPUID implicitly uses the EAX -// register. -// The EAX register should be loaded with a value specifying what information to -// return -static inline void cpuinfo(int code, int *eax, int *ebx, int *ecx, int *edx) { -#ifdef ROARING_INLINE_ASM - __asm__ volatile("cpuid;" // call cpuid instruction - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), - "=d"(*edx) // output equal to "movl %%eax %1" - : "a"(code) // input equal to "movl %1, %%eax" - //:"%eax","%ebx","%ecx","%edx"// clobbered register - ); -#endif /* not sure what to do when inline assembly is unavailable*/ -} - -static inline int computecacheline() { - int eax = 0, ebx = 0, ecx = 0, edx = 0; - cpuinfo((int)0x80000006, &eax, &ebx, &ecx, &edx); - return ecx & 0xFF; -} - -// this is quite imperfect, but can be handy -static inline const char *guessprocessor() { - unsigned eax = 1, ebx = 0, ecx = 0, edx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - const char *codename; - switch (eax >> 4) { - case 0x506E: - codename = "Skylake"; - break; - case 0x406C: - codename = "CherryTrail"; - break; - case 0x306D: - codename = "Broadwell"; - break; - case 0x306C: - codename = "Haswell"; - break; - case 0x306A: - codename = "IvyBridge"; - break; - case 0x206A: - case 0x206D: - codename = "SandyBridge"; - break; - case 0x2065: - case 0x206C: - case 0x206F: - codename = "Westmere"; - break; - case 0x106E: - case 0x106A: - case 0x206E: - codename = "Nehalem"; - break; - case 0x1067: - case 0x106D: - codename = "Penryn"; - break; - case 0x006F: - case 0x1066: - codename = "Merom"; - break; - case 0x0066: - codename = "Presler"; - break; - case 0x0063: - case 0x0064: - codename = "Prescott"; - break; - case 0x006D: - codename = "Dothan"; - break; - case 0x0366: - codename = "Cedarview"; - break; - case 0x0266: - codename = "Lincroft"; - break; - case 0x016C: - codename = "Pineview"; - break; - default: - codename = "UNKNOWN"; - break; - } - return codename; -} - -static inline void tellmeall() { - printf("Intel processor: %s\t", guessprocessor()); - -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - printf("\tBuild option USEAVX "); -#ifdef USEAVX - printf("enabled\n"); -#else - printf("disabled\n"); -#endif -#ifndef __AVX2__ - printf("AVX2 is NOT available.\n"); -#endif - - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); - } -#if __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#if __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#if __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif - if (computecacheline() != 64) - printf("cache line: %d bytes\n", computecacheline()); -} -#else - -static inline void tellmeall() { - printf("Non-X64 processor\n"); -#ifdef __arm__ - printf("ARM processor detected\n"); -#endif -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); - } -#if __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#if __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#if __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif -} - -#endif - -#endif /* INCLUDE_MISC_CONFIGREPORT_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/misc/configreport.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring.h */ -/* -An implementation of Roaring Bitmaps in C. -*/ - -#ifndef ROARING_H -#define ROARING_H -#ifdef __cplusplus -extern "C" { -#endif - -#include - -typedef struct roaring_bitmap_s { - roaring_array_t high_low_container; - bool copy_on_write; /* copy_on_write: whether you want to use copy-on-write - (saves memory and avoids - copies but needs more care in a threaded context). - Most users should ignore this flag. - Note: if you do turn this flag to 'true', enabling - COW, then ensure that you do so for all of your bitmaps since - interactions between bitmaps with and without COW is unsafe. */ -} roaring_bitmap_t; - - -void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, - uint32_t val, - uint8_t *typecode, - int *index); -/** - * Creates a new bitmap (initially empty) - */ -roaring_bitmap_t *roaring_bitmap_create(void); - -/** - * Add all the values between min (included) and max (excluded) that are at a - * distance k*step from min. -*/ -roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step); - -/** - * Creates a new bitmap (initially empty) with a provided - * container-storage capacity (it is a performance hint). - */ -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); - -/** - * Creates a new bitmap from a pointer of uint32_t integers - */ -roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); - -/** - * Describe the inner structure of the bitmap. - */ -void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra); - -/** - * Creates a new bitmap from a list of uint32_t integers - */ -roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); - -/** - * Copies a bitmap. This does memory allocation. The caller is responsible for - * memory management. - * - */ -roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); - - -/** - * Copies a bitmap from src to dest. It is assumed that the pointer dest - * is to an already allocated bitmap. The content of the dest bitmap is - * freed/deleted. - * - * It might be preferable and simpler to call roaring_bitmap_copy except - * that roaring_bitmap_overwrite can save on memory allocations. - * - */ -bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src); - -/** - * Print the content of the bitmap. - */ -void roaring_bitmap_printf(const roaring_bitmap_t *ra); - -/** - * Computes the intersection between two bitmaps and returns new bitmap. The - * caller is - * responsible for memory management. - * - */ -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the intersection between two bitmaps. - * - */ -uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - - -/** - * Check whether two bitmaps intersect. - * - */ -bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto - * distance, - * or the Jaccard similarity coefficient) - * - * The Jaccard index is undefined if both bitmaps are empty. - * - */ -double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the union between two bitmaps. - * - */ -uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the difference (andnot) between two bitmaps. - * - */ -uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the symmetric difference (andnot) between two bitmaps. - * - */ -uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version modifies x1, x1 == x2 is allowed - */ -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the union between two bitmaps and returns new bitmap. The caller is - * responsible for memory management. - */ -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version of roaring_bitmap_or, modifies x1. TDOO: decide whether x1 == - *x2 ok - * - */ -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Compute the union of 'number' bitmaps. See also roaring_bitmap_or_many_heap. - * Caller is responsible for freeing the - * result. - * - */ -roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x); - -/** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - * - */ -roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x); - -/** - * Computes the symmetric difference (xor) between two bitmaps - * and returns new bitmap. The caller is responsible for memory management. - */ -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version of roaring_bitmap_xor, modifies x1. x1 != x2. - * - */ -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Compute the xor of 'number' bitmaps. - * Caller is responsible for freeing the - * result. - * - */ -roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x); - -/** - * Computes the difference (andnot) between two bitmaps - * and returns new bitmap. The caller is responsible for memory management. - */ -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version of roaring_bitmap_andnot, modifies x1. x1 != x2. - * - */ -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * TODO: consider implementing: - * Compute the xor of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_xor_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - * - * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number, - * const roaring_bitmap_t **x); - */ - -/** - * Frees the memory. - */ -void roaring_bitmap_free(roaring_bitmap_t *r); - -/** - * Add value n_args from pointer vals, faster than repeatedly calling - * roaring_bitmap_add - * - */ -void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals); - -/** - * Add value x - * - */ -void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); - -/** - * Add value x - * Returns true if a new value was added, false if the value was already existing. - */ -bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); - -/** - * Add all values in range [min, max] - */ -void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); - -/** - * Add all values in range [min, max) - */ -inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { - if(max == min) return; - roaring_bitmap_add_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); -} - -/** - * Remove value x - * - */ -void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); - -/** Remove all values in range [min, max] */ -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); - -/** Remove all values in range [min, max) */ -inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { - if(max == min) return; - roaring_bitmap_remove_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); -} - -/** Remove multiple values */ -void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals); - -/** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. - */ -bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); - -/** - * Check if value x is present - */ -inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - /* - * the next function call involves a binary search and lots of branching. - */ - int32_t i = ra_get_index(&r->high_low_container, hb); - if (i < 0) return false; - - uint8_t typecode; - // next call ought to be cheap - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - // rest might be a tad expensive, possibly involving another round of binary search - return container_contains(container, val & 0xFFFF, typecode); -} - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) is present - */ -bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); - -/** - * Get the cardinality of the bitmap (number of elements). - */ -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra); - -/** - * Returns number of elements in range [range_start, range_end). - */ -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, - uint64_t range_start, uint64_t range_end); - -/** -* Returns true if the bitmap is empty (cardinality is zero). -*/ -bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra); - - -/** -* Empties the bitmap -*/ -void roaring_bitmap_clear(roaring_bitmap_t *ra); - -/** - * Convert the bitmap to an array. Write the output to "ans", - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = malloc(roaring_bitmap_get_cardinality(mybitmap) - * * sizeof(uint32_t)) - */ -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans); - - -/** - * Convert the bitmap to an array from "offset" by "limit". Write the output to "ans". - * so, you can get data in paging. - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = malloc(roaring_bitmap_get_cardinality(limit) - * * sizeof(uint32_t)) - * Return false in case of failure (e.g., insufficient memory) - */ -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans); - -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); - -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - * Additional savings might be possible by calling shrinkToFit(). - */ -bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); - -/** - * If needed, reallocate memory to shrink the memory usage. Returns - * the number of bytes saved. -*/ -size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); - -/** -* write the bitmap to an output pointer, this output buffer should refer to -* at least roaring_bitmap_size_in_bytes(ra) allocated bytes. -* -* see roaring_bitmap_portable_serialize if you want a format that's compatible -* with Java and Go implementations -* -* this format has the benefit of being sometimes more space efficient than -* roaring_bitmap_portable_serialize -* e.g., when the data is sparse. -* -* Returns how many bytes were written which should be -* roaring_bitmap_size_in_bytes(ra). -*/ -size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf); - -/** use with roaring_bitmap_serialize -* see roaring_bitmap_portable_deserialize if you want a format that's -* compatible with Java and Go implementations -*/ -roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); - -/** - * How many bytes are required to serialize this bitmap (NOT compatible - * with Java and Go versions) - */ -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra); - -/** - * read a bitmap from a serialized version. This is meant to be compatible with - * the Java and Go versions. See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - * In case of failure, a null pointer is returned. - * This function is unsafe in the sense that if there is no valid serialized - * bitmap at the pointer, then many bytes could be read, possibly causing a buffer - * overflow. For a safer approach, - * call roaring_bitmap_portable_deserialize_safe. - */ -roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); - -/** - * read a bitmap from a serialized version in a safe manner (reading up to maxbytes). - * This is meant to be compatible with - * the Java and Go versions. See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - * In case of failure, a null pointer is returned. - */ -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); - -/** - * Check how many bytes would be read (up to maxbytes) at this pointer if there - * is a bitmap, returns zero if there is no valid bitmap. - * This is meant to be compatible with - * the Java and Go versions. See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - */ -size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); - - -/** - * How many bytes are required to serialize this bitmap (meant to be compatible - * with Java and Go versions). See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - */ -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra); - -/** - * write a bitmap to a char buffer. The output buffer should refer to at least - * roaring_bitmap_portable_size_in_bytes(ra) bytes of allocated memory. - * This is meant to be compatible with - * the - * Java and Go versions. Returns how many bytes were written which should be - * roaring_bitmap_portable_size_in_bytes(ra). See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - */ -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf); - -/** - * Iterate over the bitmap elements. The function iterator is called once for - * all the values with ptr (can be NULL) as the second parameter of each call. - * - * roaring_iterator is simply a pointer to a function that returns bool - * (true means that the iteration should continue while false means that it - * should stop), - * and takes (uint32_t,void*) as inputs. - * - * Returns true if the roaring_iterator returned true throughout (so that - * all data points were necessarily visited). - */ -bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, - void *ptr); - -bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr); - -/** - * Return true if the two bitmaps contain the same elements. - */ -bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); - -/** - * Return true if all the elements of ra1 are also in ra2. - */ -bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); - -/** - * Return true if all the elements of ra1 are also in ra2 and ra2 is strictly - * greater - * than ra1. - */ -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); - -/** - * (For expert users who seek high performance.) - * - * Computes the union between two bitmaps and returns new bitmap. The caller is - * responsible for memory management. - * - * The lazy version defers some computations such as the maintenance of the - * cardinality counts. Thus you need - * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. - * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result. - * The bitsetconversion conversion is a flag which determines - * whether container-container operations force a bitset conversion. - **/ -roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion); - -/** - * (For expert users who seek high performance.) - * Inplace version of roaring_bitmap_lazy_or, modifies x1 - * The bitsetconversion conversion is a flag which determines - * whether container-container operations force a bitset conversion. - */ -void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion); - -/** - * (For expert users who seek high performance.) - * - * Execute maintenance operations on a bitmap created from - * roaring_bitmap_lazy_or - * or modified with roaring_bitmap_lazy_or_inplace. - */ -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *x1); - -/** - * Computes the symmetric difference between two bitmaps and returns new bitmap. - *The caller is - * responsible for memory management. - * - * The lazy version defers some computations such as the maintenance of the - * cardinality counts. Thus you need - * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. - * It is safe to repeatedly call roaring_bitmap_lazy_xor_inplace on the result. - * - */ -roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * (For expert users who seek high performance.) - * Inplace version of roaring_bitmap_lazy_xor, modifies x1. x1 != x2 - * - */ -void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * compute the negation of the roaring bitmap within a specified - * interval: [range_start, range_end). The number of negated values is - * range_end - range_start. - * Areas outside the range are passed through unchanged. - */ - -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, uint64_t range_end); - -/** - * compute (in place) the negation of the roaring bitmap within a specified - * interval: [range_start, range_end). The number of negated values is - * range_end - range_start. - * Areas outside the range are passed through unchanged. - */ - -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end); - -/** - * If the size of the roaring bitmap is strictly greater than rank, then this - function returns true and set element to the element of given rank. - Otherwise, it returns false. - */ -bool roaring_bitmap_select(const roaring_bitmap_t *ra, uint32_t rank, - uint32_t *element); -/** -* roaring_bitmap_rank returns the number of integers that are smaller or equal -* to x. -*/ -uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x); - -/** -* roaring_bitmap_smallest returns the smallest value in the set. -* Returns UINT32_MAX if the set is empty. -*/ -uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm); - -/** -* roaring_bitmap_smallest returns the greatest value in the set. -* Returns 0 if the set is empty. -*/ -uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm); - -/** -* (For advanced users.) -* Collect statistics about the bitmap, see roaring_types.h for -* a description of roaring_statistics_t -*/ -void roaring_bitmap_statistics(const roaring_bitmap_t *ra, - roaring_statistics_t *stat); - -/********************* -* What follows is code use to iterate through values in a roaring bitmap - -roaring_bitmap_t *ra =... -roaring_uint32_iterator_t i; -roaring_create_iterator(ra, &i); -while(i.has_value) { - printf("value = %d\n", i.current_value); - roaring_advance_uint32_iterator(&i); -} - -Obviously, if you modify the underlying bitmap, the iterator -becomes invalid. So don't. -*/ - -typedef struct roaring_uint32_iterator_s { - const roaring_bitmap_t *parent; // owner - int32_t container_index; // point to the current container index - int32_t in_container_index; // for bitset and array container, this is out - // index - int32_t run_index; // for run container, this points at the run - uint32_t in_run_index; // within a run, this is our index (points at the - // end of the current run) - - uint32_t current_value; - bool has_value; - - const void - *container; // should be: - // parent->high_low_container.containers[container_index]; - uint8_t typecode; // should be: - // parent->high_low_container.typecodes[container_index]; - uint32_t highbits; // should be: - // parent->high_low_container.keys[container_index]) << - // 16; - -} roaring_uint32_iterator_t; - -/** -* Initialize an iterator object that can be used to iterate through the -* values. If there is a value, then it->has_value is true. -* The first value is in it->current_value. The iterator traverses the values -* in increasing order. -*/ -void roaring_init_iterator(const roaring_bitmap_t *ra, - roaring_uint32_iterator_t *newit); - -/** -* Create an iterator object that can be used to iterate through the -* values. Caller is responsible for calling roaring_free_iterator. -* The iterator is initialized. If there is a value, then it->has_value is true. -* The first value is in it->current_value. The iterator traverses the values -* in increasing order. -* -* This function calls roaring_init_iterator. -*/ -roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra); - -/** -* Advance the iterator. If there is a new value, then it->has_value is true. -* The new value is in it->current_value. Values are traversed in increasing -* orders. For convenience, returns it->has_value. -*/ -bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); - -/** -* Move the iterator to the first value >= val. If there is a such a value, then it->has_value is true. -* The new value is in it->current_value. For convenience, returns it->has_value. -*/ -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) ; -/** -* Creates a copy of an iterator. -* Caller must free it. -*/ -roaring_uint32_iterator_t *roaring_copy_uint32_iterator( - const roaring_uint32_iterator_t *it); - -/** -* Free memory following roaring_create_iterator -*/ -void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); - -/* - * Reads next ${count} values from iterator into user-supplied ${buf}. - * Returns the number of read elements. - * This number can be smaller than ${count}, which means that iterator is drained. - * - * This function satisfies semantics of iteration and can be used together with - * other iterator functions. - * - first value is copied from ${it}->current_value - * - after function returns, iterator is positioned at the next element - */ -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count); - -#ifdef __cplusplus -} -#endif - -#endif - -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring.h */ diff --git a/contrib/croaring/roaring/roaring.hh b/contrib/croaring/roaring/roaring.hh deleted file mode 100644 index 6266fff2758..00000000000 --- a/contrib/croaring/roaring/roaring.hh +++ /dev/null @@ -1,1732 +0,0 @@ -/* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */ -#include "roaring.h" -/* begin file /opt/bitmap/CRoaring-0.2.57/cpp/roaring.hh */ -/* -A C++ header for Roaring Bitmaps. -*/ -#ifndef INCLUDE_ROARING_HH_ -#define INCLUDE_ROARING_HH_ - -#include - -#include -#include -#include -#include - -class RoaringSetBitForwardIterator; - -class Roaring { - public: - /** - * Create an empty bitmap - */ - Roaring() { - bool is_ok = ra_init(&roaring.high_low_container); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in constructor"); - } - roaring.copy_on_write = false; - } - - /** - * Construct a bitmap from a list of integer values. - */ - Roaring(size_t n, const uint32_t *data) : Roaring() { - roaring_bitmap_add_many(&roaring, n, data); - } - - /** - * Copy constructor - */ - Roaring(const Roaring &r) { - bool is_ok = - ra_copy(&r.roaring.high_low_container, &roaring.high_low_container, - r.roaring.copy_on_write); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in constructor"); - } - roaring.copy_on_write = r.roaring.copy_on_write; - } - - /** - * Move constructor. The moved object remains valid, i.e. - * all methods can still be called on it. - */ - Roaring(Roaring &&r) { - roaring = std::move(r.roaring); - - // left the moved object in a valid state - bool is_ok = ra_init_with_capacity(&r.roaring.high_low_container, 1); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in constructor"); - } - } - - /** - * Construct a roaring object from the C struct. - * - * Passing a NULL point is unsafe. - * the pointer to the C struct will be invalid after the call. - */ - Roaring(roaring_bitmap_t *s) { - // steal the interior struct - roaring.high_low_container = s->high_low_container; - roaring.copy_on_write = s->copy_on_write; - // deallocate the old container - free(s); - } - - /** - * Construct a bitmap from a list of integer values. - */ - static Roaring bitmapOf(size_t n, ...) { - Roaring ans; - va_list vl; - va_start(vl, n); - for (size_t i = 0; i < n; i++) { - ans.add(va_arg(vl, uint32_t)); - } - va_end(vl); - return ans; - } - - /** - * Add value x - * - */ - void add(uint32_t x) { roaring_bitmap_add(&roaring, x); } - - /** - * Add value x - * Returns true if a new value was added, false if the value was already existing. - */ - bool addChecked(uint32_t x) { - return roaring_bitmap_add_checked(&roaring, x); - } - - /** - * add if all values from x (included) to y (excluded) - */ - void addRange(const uint64_t x, const uint64_t y) { - return roaring_bitmap_add_range(&roaring, x, y); - } - - /** - * Add value n_args from pointer vals - * - */ - void addMany(size_t n_args, const uint32_t *vals) { - roaring_bitmap_add_many(&roaring, n_args, vals); - } - - /** - * Remove value x - * - */ - void remove(uint32_t x) { roaring_bitmap_remove(&roaring, x); } - - /** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. - */ - bool removeChecked(uint32_t x) { - return roaring_bitmap_remove_checked(&roaring, x); - } - - /** - * Return the largest value (if not empty) - * - */ - uint32_t maximum() const { return roaring_bitmap_maximum(&roaring); } - - /** - * Return the smallest value (if not empty) - * - */ - uint32_t minimum() const { return roaring_bitmap_minimum(&roaring); } - - /** - * Check if value x is present - */ - bool contains(uint32_t x) const { - return roaring_bitmap_contains(&roaring, x); - } - - /** - * Check if all values from x (included) to y (excluded) are present - */ - bool containsRange(const uint64_t x, const uint64_t y) const { - return roaring_bitmap_contains_range(&roaring, x, y); - } - - /** - * Destructor - */ - ~Roaring() { ra_clear(&roaring.high_low_container); } - - /** - * Copies the content of the provided bitmap, and - * discard the current content. - */ - Roaring &operator=(const Roaring &r) { - ra_clear(&roaring.high_low_container); - bool is_ok = - ra_copy(&r.roaring.high_low_container, &roaring.high_low_container, - r.roaring.copy_on_write); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in assignment"); - } - roaring.copy_on_write = r.roaring.copy_on_write; - return *this; - } - - /** - * Moves the content of the provided bitmap, and - * discard the current content. - */ - Roaring &operator=(Roaring &&r) { - ra_clear(&roaring.high_low_container); - - roaring = std::move(r.roaring); - bool is_ok = ra_init_with_capacity(&r.roaring.high_low_container, 1); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in assignment"); - } - - return *this; - } - - /** - * Compute the intersection between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring &operator&=(const Roaring &r) { - roaring_bitmap_and_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Compute the difference between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring &operator-=(const Roaring &r) { - roaring_bitmap_andnot_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Compute the union between the current bitmap and the provided bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - * - * See also the fastunion function to aggregate many bitmaps more quickly. - */ - Roaring &operator|=(const Roaring &r) { - roaring_bitmap_or_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Compute the symmetric union between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring &operator^=(const Roaring &r) { - roaring_bitmap_xor_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Exchange the content of this bitmap with another. - */ - void swap(Roaring &r) { std::swap(r.roaring, roaring); } - - /** - * Get the cardinality of the bitmap (number of elements). - */ - uint64_t cardinality() const { - return roaring_bitmap_get_cardinality(&roaring); - } - - /** - * Returns true if the bitmap is empty (cardinality is zero). - */ - bool isEmpty() const { return roaring_bitmap_is_empty(&roaring); } - - /** - * Returns true if the bitmap is subset of the other. - */ - bool isSubset(const Roaring &r) const { - return roaring_bitmap_is_subset(&roaring, &r.roaring); - } - - /** - * Returns true if the bitmap is strict subset of the other. - */ - bool isStrictSubset(const Roaring &r) const { - return roaring_bitmap_is_strict_subset(&roaring, &r.roaring); - } - - /** - * Convert the bitmap to an array. Write the output to "ans", - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = new uint32[mybitmap.cardinality()];) - */ - void toUint32Array(uint32_t *ans) const { - roaring_bitmap_to_uint32_array(&roaring, ans); - } - /** - * to int array with pagination - * - */ - void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const { - roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans); - } - - /** - * Return true if the two bitmaps contain the same elements. - */ - bool operator==(const Roaring &r) const { - return roaring_bitmap_equals(&roaring, &r.roaring); - } - - /** - * compute the negation of the roaring bitmap within a specified interval. - * areas outside the range are passed through unchanged. - */ - void flip(uint64_t range_start, uint64_t range_end) { - roaring_bitmap_flip_inplace(&roaring, range_start, range_end); - } - - /** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ - bool removeRunCompression() { - return roaring_bitmap_remove_run_compression(&roaring); - } - - /** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - * Additional savings might be possible by calling shrinkToFit(). - */ - bool runOptimize() { return roaring_bitmap_run_optimize(&roaring); } - - /** - * If needed, reallocate memory to shrink the memory usage. Returns - * the number of bytes saved. - */ - size_t shrinkToFit() { return roaring_bitmap_shrink_to_fit(&roaring); } - - /** - * Iterate over the bitmap elements. The function iterator is called once for - * all the values with ptr (can be NULL) as the second parameter of each call. - * - * roaring_iterator is simply a pointer to a function that returns bool - * (true means that the iteration should continue while false means that it - * should stop), and takes (uint32_t,void*) as inputs. - */ - void iterate(roaring_iterator iterator, void *ptr) const { - roaring_iterate(&roaring, iterator, ptr); - } - - /** - * If the size of the roaring bitmap is strictly greater than rank, then - * this function returns true and set element to the element of given rank. - * Otherwise, it returns false. - */ - bool select(uint32_t rnk, uint32_t *element) const { - return roaring_bitmap_select(&roaring, rnk, element); - } - - /** - * Computes the size of the intersection between two bitmaps. - * - */ - uint64_t and_cardinality(const Roaring &r) const { - return roaring_bitmap_and_cardinality(&roaring, &r.roaring); - } - - /** - * Check whether the two bitmaps intersect. - * - */ - bool intersect(const Roaring &r) const { - return roaring_bitmap_intersect(&roaring, &r.roaring); - } - - /** - * Computes the Jaccard index between two bitmaps. (Also known as the - * Tanimoto distance, - * or the Jaccard similarity coefficient) - * - * The Jaccard index is undefined if both bitmaps are empty. - * - */ - double jaccard_index(const Roaring &r) const { - return roaring_bitmap_jaccard_index(&roaring, &r.roaring); - } - - /** - * Computes the size of the union between two bitmaps. - * - */ - uint64_t or_cardinality(const Roaring &r) const { - return roaring_bitmap_or_cardinality(&roaring, &r.roaring); - } - - /** - * Computes the size of the difference (andnot) between two bitmaps. - * - */ - uint64_t andnot_cardinality(const Roaring &r) const { - return roaring_bitmap_andnot_cardinality(&roaring, &r.roaring); - } - - /** - * Computes the size of the symmetric difference (andnot) between two - * bitmaps. - * - */ - uint64_t xor_cardinality(const Roaring &r) const { - return roaring_bitmap_xor_cardinality(&roaring, &r.roaring); - } - - /** - * Returns the number of integers that are smaller or equal to x. - */ - uint64_t rank(uint32_t x) const { return roaring_bitmap_rank(&roaring, x); } - - /** - * write a bitmap to a char buffer. This is meant to be compatible with - * the - * Java and Go versions. Returns how many bytes were written which should be - * getSizeInBytes(). - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - * - * Boost users can serialize bitmaps in this manner: - * - * BOOST_SERIALIZATION_SPLIT_FREE(Roaring) - * namespace boost { - * namespace serialization { - * - * template - * void save(Archive& ar, const Roaring& bitmask, - * const unsigned int version) { - * std::size_t expected_size_in_bytes = bitmask.getSizeInBytes(); - * std::vector buffer(expected_size_in_bytes); - * std::size_t size_in_bytes = bitmask.write(buffer.data()); - * - * ar& size_in_bytes; - * ar& boost::serialization::make_binary_object(buffer.data(), - * size_in_bytes); - * } - * template - * void load(Archive& ar, Roaring& bitmask, - * const unsigned int version) { - * std::size_t size_in_bytes = 0; - * ar& size_in_bytes; - * std::vector buffer(size_in_bytes); - * ar& boost::serialization::make_binary_object(buffer.data(), - * size_in_bytes); - * bitmask = Roaring::readSafe(buffer.data(), size_in_bytes); - *} - *} // namespace serialization - *} // namespace boost - */ - size_t write(char *buf, bool portable = true) const { - if (portable) - return roaring_bitmap_portable_serialize(&roaring, buf); - else - return roaring_bitmap_serialize(&roaring, buf); - } - - /** - * read a bitmap from a serialized version. This is meant to be compatible - * with the Java and Go versions. - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - * - * This function is unsafe in the sense that if you provide bad data, - * many, many bytes could be read. See also readSafe. - */ - static Roaring read(const char *buf, bool portable = true) { - roaring_bitmap_t * r = portable ? roaring_bitmap_portable_deserialize(buf) : roaring_bitmap_deserialize(buf); - if (r == NULL) { - throw std::runtime_error("failed alloc while reading"); - } - return Roaring(r); - } - /** - * read a bitmap from a serialized version, reading no more than maxbytes bytes. - * This is meant to be compatible with the Java and Go versions. - * - */ - static Roaring readSafe(const char *buf, size_t maxbytes) { - roaring_bitmap_t * r = roaring_bitmap_portable_deserialize_safe(buf,maxbytes); - if (r == NULL) { - throw std::runtime_error("failed alloc while reading"); - } - return Roaring(r); - } - /** - * How many bytes are required to serialize this bitmap (meant to be - * compatible - * with Java and Go versions) - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - size_t getSizeInBytes(bool portable = true) const { - if (portable) - return roaring_bitmap_portable_size_in_bytes(&roaring); - else - return roaring_bitmap_size_in_bytes(&roaring); - } - - /** - * Computes the intersection between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator&(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_and(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in and"); - } - return Roaring(r); - } - - /** - * Computes the difference between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator-(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_andnot(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in andnot"); - } - return Roaring(r); - } - - /** - * Computes the union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator|(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_or(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in or"); - } - return Roaring(r); - } - - /** - * Computes the symmetric union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator^(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_xor(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in xor"); - } - return Roaring(r); - } - - /** - * Whether or not we apply copy and write. - */ - void setCopyOnWrite(bool val) { roaring.copy_on_write = val; } - - /** - * Print the content of the bitmap - */ - void printf() const { roaring_bitmap_printf(&roaring); } - - /** - * Print the content of the bitmap into a string - */ - std::string toString() const { - struct iter_data { - std::string str; - char first_char = '{'; - } outer_iter_data; - if (!isEmpty()) { - iterate( - [](uint32_t value, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += - std::to_string(value); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - } else - outer_iter_data.str = '{'; - outer_iter_data.str += '}'; - return outer_iter_data.str; - } - - /** - * Whether or not copy and write is active. - */ - bool getCopyOnWrite() const { return roaring.copy_on_write; } - - /** - * computes the logical or (union) between "n" bitmaps (referenced by a - * pointer). - */ - static Roaring fastunion(size_t n, const Roaring **inputs) { - const roaring_bitmap_t **x = - (const roaring_bitmap_t **)malloc(n * sizeof(roaring_bitmap_t *)); - if (x == NULL) { - throw std::runtime_error("failed memory alloc in fastunion"); - } - for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring; - - roaring_bitmap_t *c_ans = roaring_bitmap_or_many(n, x); - if (c_ans == NULL) { - free(x); - throw std::runtime_error("failed memory alloc in fastunion"); - } - Roaring ans(c_ans); - free(x); - return ans; - } - - typedef RoaringSetBitForwardIterator const_iterator; - - /** - * Returns an iterator that can be used to access the position of the - * set bits. The running time complexity of a full scan is proportional to - * the - * number - * of set bits: be aware that if you have long strings of 1s, this can be - * very inefficient. - * - * It can be much faster to use the toArray method if you want to - * retrieve the set bits. - */ - const_iterator begin() const; - - /** - * A bogus iterator that can be used together with begin() - * for constructions such as for(auto i = b.begin(); - * i!=b.end(); ++i) {} - */ - const_iterator &end() const; - - roaring_bitmap_t roaring; -}; - -/** - * Used to go through the set bits. Not optimally fast, but convenient. - */ -class RoaringSetBitForwardIterator final { - public: - typedef std::forward_iterator_tag iterator_category; - typedef uint32_t *pointer; - typedef uint32_t &reference_type; - typedef uint32_t value_type; - typedef int32_t difference_type; - typedef RoaringSetBitForwardIterator type_of_iterator; - - /** - * Provides the location of the set bit. - */ - value_type operator*() const { return i.current_value; } - - bool operator<(const type_of_iterator &o) { - if (!i.has_value) return false; - if (!o.i.has_value) return true; - return i.current_value < *o; - } - - bool operator<=(const type_of_iterator &o) { - if (!o.i.has_value) return true; - if (!i.has_value) return false; - return i.current_value <= *o; - } - - bool operator>(const type_of_iterator &o) { - if (!o.i.has_value) return false; - if (!i.has_value) return true; - return i.current_value > *o; - } - - bool operator>=(const type_of_iterator &o) { - if (!i.has_value) return true; - if (!o.i.has_value) return false; - return i.current_value >= *o; - } - - /** - * Move the iterator to the first value >= val. - */ - void equalorlarger(uint32_t val) { - roaring_move_uint32_iterator_equalorlarger(&i,val); - } - - type_of_iterator &operator++() { // ++i, must returned inc. value - roaring_advance_uint32_iterator(&i); - return *this; - } - - type_of_iterator operator++(int) { // i++, must return orig. value - RoaringSetBitForwardIterator orig(*this); - roaring_advance_uint32_iterator(&i); - return orig; - } - - bool operator==(const RoaringSetBitForwardIterator &o) const { - return i.current_value == *o && i.has_value == o.i.has_value; - } - - bool operator!=(const RoaringSetBitForwardIterator &o) const { - return i.current_value != *o || i.has_value != o.i.has_value; - } - - RoaringSetBitForwardIterator(const Roaring &parent, - bool exhausted = false) { - if (exhausted) { - i.parent = &parent.roaring; - i.container_index = INT32_MAX; - i.has_value = false; - i.current_value = UINT32_MAX; - } else { - roaring_init_iterator(&parent.roaring, &i); - } - } - - RoaringSetBitForwardIterator &operator=( - const RoaringSetBitForwardIterator &o) = default; - RoaringSetBitForwardIterator &operator=(RoaringSetBitForwardIterator &&o) = - default; - - ~RoaringSetBitForwardIterator() = default; - - RoaringSetBitForwardIterator(const RoaringSetBitForwardIterator &o) - : i(o.i) {} - - roaring_uint32_iterator_t i; -}; - -inline RoaringSetBitForwardIterator Roaring::begin() const { - return RoaringSetBitForwardIterator(*this); -} - -inline RoaringSetBitForwardIterator &Roaring::end() const { - static RoaringSetBitForwardIterator e(*this, true); - return e; -} - -#endif /* INCLUDE_ROARING_HH_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/cpp/roaring.hh */ -/* begin file /opt/bitmap/CRoaring-0.2.57/cpp/roaring64map.hh */ -/* -A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many -32-bit Roaring Bitmaps. -*/ -#ifndef INCLUDE_ROARING_64_MAP_HH_ -#define INCLUDE_ROARING_64_MAP_HH_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -class Roaring64MapSetBitForwardIterator; - -class Roaring64Map { - public: - /** - * Create an empty bitmap - */ - Roaring64Map() = default; - - /** - * Construct a bitmap from a list of 32-bit integer values. - */ - Roaring64Map(size_t n, const uint32_t *data) { addMany(n, data); } - - /** - * Construct a bitmap from a list of 64-bit integer values. - */ - Roaring64Map(size_t n, const uint64_t *data) { addMany(n, data); } - - /** - * Copy constructor - */ - Roaring64Map(const Roaring64Map &r) = default; - - /** - * Move constructor - */ - Roaring64Map(Roaring64Map &&r) = default; - - /** - * Construct a 64-bit map from a 32-bit one - */ - Roaring64Map(const Roaring &r) { emplaceOrInsert(0, r); } - - /** - * Construct a roaring object from the C struct. - * - * Passing a NULL point is unsafe. - */ - Roaring64Map(roaring_bitmap_t *s) { emplaceOrInsert(0, s); } - - /** - * Construct a bitmap from a list of integer values. - */ - static Roaring64Map bitmapOf(size_t n...) { - Roaring64Map ans; - va_list vl; - va_start(vl, n); - for (size_t i = 0; i < n; i++) { - ans.add(va_arg(vl, uint64_t)); - } - va_end(vl); - return ans; - } - - /** - * Add value x - * - */ - void add(uint32_t x) { - roarings[0].add(x); - roarings[0].setCopyOnWrite(copyOnWrite); - } - void add(uint64_t x) { - roarings[highBytes(x)].add(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); - } - - /** - * Add value x - * Returns true if a new value was added, false if the value was already existing. - */ - bool addChecked(uint32_t x) { - bool result = roarings[0].addChecked(x); - roarings[0].setCopyOnWrite(copyOnWrite); - return result; - } - bool addChecked(uint64_t x) { - bool result = roarings[highBytes(x)].addChecked(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); - return result; - } - - /** - * Add value n_args from pointer vals - * - */ - void addMany(size_t n_args, const uint32_t *vals) { - for (size_t lcv = 0; lcv < n_args; lcv++) { - roarings[0].add(vals[lcv]); - roarings[0].setCopyOnWrite(copyOnWrite); - } - } - void addMany(size_t n_args, const uint64_t *vals) { - for (size_t lcv = 0; lcv < n_args; lcv++) { - roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv])); - roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite); - } - } - - /** - * Remove value x - * - */ - void remove(uint32_t x) { roarings[0].remove(x); } - void remove(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - roaring_iter->second.remove(lowBytes(x)); - } - - /** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. - */ - bool removeChecked(uint32_t x) { - return roarings[0].removeChecked(x); - } - bool removeChecked(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - return roaring_iter->second.removeChecked(lowBytes(x)); - return false; - } - - /** - * Return the largest value (if not empty) - * - */ - uint64_t maximum() const { - for (auto roaring_iter = roarings.crbegin(); - roaring_iter != roarings.crend(); ++roaring_iter) { - if (!roaring_iter->second.isEmpty()) { - return uniteBytes(roaring_iter->first, - roaring_iter->second.maximum()); - } - } - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - return (std::numeric_limits::min)(); - } - - /** - * Return the smallest value (if not empty) - * - */ - uint64_t minimum() const { - for (auto roaring_iter = roarings.cbegin(); - roaring_iter != roarings.cend(); ++roaring_iter) { - if (!roaring_iter->second.isEmpty()) { - return uniteBytes(roaring_iter->first, - roaring_iter->second.minimum()); - } - } - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - return (std::numeric_limits::max)(); - } - - /** - * Check if value x is present - */ - bool contains(uint32_t x) const { - return roarings.count(0) == 0 ? false : roarings.at(0).contains(x); - } - bool contains(uint64_t x) const { - return roarings.count(highBytes(x)) == 0 - ? false - : roarings.at(highBytes(x)).contains(lowBytes(x)); - } - - /** - * Destructor - */ - ~Roaring64Map() = default; - - /** - * Copies the content of the provided bitmap, and - * discards the current content. - */ - Roaring64Map &operator=(const Roaring64Map &r) { - roarings = r.roarings; - copyOnWrite = r.copyOnWrite; - return *this; - } - - /** - * Moves the content of the provided bitmap, and - * discards the current content. - */ - Roaring64Map &operator=(Roaring64Map &&r) { - roarings = std::move(r.roarings); - copyOnWrite = r.copyOnWrite; - return *this; - } - - /** - * Compute the intersection between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring64Map &operator&=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second &= r.roarings.at(map_entry.first); - else - map_entry.second = Roaring(); - } - return *this; - } - - /** - * Compute the difference between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring64Map &operator-=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second -= r.roarings.at(map_entry.first); - } - return *this; - } - - /** - * Compute the union between the current bitmap and the provided bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - * - * See also the fastunion function to aggregate many bitmaps more quickly. - */ - Roaring64Map &operator|=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] |= map_entry.second; - } - return *this; - } - - /** - * Compute the symmetric union between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring64Map &operator^=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] ^= map_entry.second; - } - return *this; - } - - /** - * Exchange the content of this bitmap with another. - */ - void swap(Roaring64Map &r) { roarings.swap(r.roarings); } - - /** - * Get the cardinality of the bitmap (number of elements). - * Throws std::length_error in the special case where the bitmap is full - * (cardinality() == 2^64). Check isFull() before calling to avoid - * exception. - */ - uint64_t cardinality() const { - if (isFull()) { - throw std::length_error( - "bitmap is full, cardinality is 2^64, " - "unable to represent in a 64-bit integer"); - } - return std::accumulate( - roarings.cbegin(), roarings.cend(), (uint64_t)0, - [](uint64_t previous, - const std::pair &map_entry) { - return previous + map_entry.second.cardinality(); - }); - } - - /** - * Returns true if the bitmap is empty (cardinality is zero). - */ - bool isEmpty() const { - return std::all_of(roarings.cbegin(), roarings.cend(), - [](const std::pair &map_entry) { - return map_entry.second.isEmpty(); - }); - } - - /** - * Returns true if the bitmap is full (cardinality is max uint64_t + 1). - */ - bool isFull() const { - // only bother to check if map is fully saturated - // - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - return roarings.size() == - ((size_t)(std::numeric_limits::max)()) + 1 - ? std::all_of( - roarings.cbegin(), roarings.cend(), - [](const std::pair &roaring_map_entry) { - // roarings within map are saturated if cardinality - // is uint32_t max + 1 - return roaring_map_entry.second.cardinality() == - ((uint64_t) - (std::numeric_limits::max)()) + - 1; - }) - : false; - } - - /** - * Returns true if the bitmap is subset of the other. - */ - bool isSubset(const Roaring64Map &r) const { - for (const auto &map_entry : roarings) { - auto roaring_iter = r.roarings.find(map_entry.first); - if (roaring_iter == roarings.cend()) - return false; - else if (!map_entry.second.isSubset(roaring_iter->second)) - return false; - } - return true; - } - - /** - * Returns true if the bitmap is strict subset of the other. - * Throws std::length_error in the special case where the bitmap is full - * (cardinality() == 2^64). Check isFull() before calling to avoid exception. - */ - bool isStrictSubset(const Roaring64Map &r) const { - return isSubset(r) && cardinality() != r.cardinality(); - } - - /** - * Convert the bitmap to an array. Write the output to "ans", - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = new uint32[mybitmap.cardinality()];) - */ - void toUint64Array(uint64_t *ans) const { - // Annoyingly, VS 2017 marks std::accumulate() as [[nodiscard]] - (void)std::accumulate(roarings.cbegin(), roarings.cend(), ans, - [](uint64_t *previous, - const std::pair &map_entry) { - for (uint32_t low_bits : map_entry.second) - *previous++ = - uniteBytes(map_entry.first, low_bits); - return previous; - }); - } - - /** - * Return true if the two bitmaps contain the same elements. - */ - bool operator==(const Roaring64Map &r) const { - // we cannot use operator == on the map because either side may contain - // empty Roaring Bitmaps - auto lhs_iter = roarings.cbegin(); - auto rhs_iter = r.roarings.cbegin(); - do { - // if the left map has reached its end, ensure that the right map - // contains only empty Bitmaps - if (lhs_iter == roarings.cend()) { - while (rhs_iter != r.roarings.cend()) { - if (rhs_iter->second.isEmpty()) { - ++rhs_iter; - continue; - } - return false; - } - return true; - } - // if the left map has an empty bitmap, skip it - if (lhs_iter->second.isEmpty()) { - ++lhs_iter; - continue; - } - - do { - // if the right map has reached its end, ensure that the right - // map contains only empty Bitmaps - if (rhs_iter == r.roarings.cend()) { - while (lhs_iter != roarings.cend()) { - if (lhs_iter->second.isEmpty()) { - ++lhs_iter; - continue; - } - return false; - } - return true; - } - // if the right map has an empty bitmap, skip it - if (rhs_iter->second.isEmpty()) { - ++rhs_iter; - continue; - } - } while (false); - // if neither map has reached its end ensure elements are equal and - // move to the next element in both - } while (lhs_iter++->second == rhs_iter++->second); - return false; - } - - /** - * compute the negation of the roaring bitmap within a specified interval. - * areas outside the range are passed through unchanged. - */ - void flip(uint64_t range_start, uint64_t range_end) { - uint32_t start_high = highBytes(range_start); - uint32_t start_low = lowBytes(range_start); - uint32_t end_high = highBytes(range_end); - uint32_t end_low = lowBytes(range_end); - - if (start_high == end_high) { - roarings[start_high].flip(start_low, end_low); - return; - } - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - roarings[start_high].flip(start_low, - (std::numeric_limits::max)()); - roarings[start_high++].setCopyOnWrite(copyOnWrite); - - for (; start_high <= highBytes(range_end) - 1; ++start_high) { - roarings[start_high].flip((std::numeric_limits::min)(), - (std::numeric_limits::max)()); - roarings[start_high].setCopyOnWrite(copyOnWrite); - } - - roarings[start_high].flip((std::numeric_limits::min)(), - end_low); - roarings[start_high].setCopyOnWrite(copyOnWrite); - } - - /** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ - bool removeRunCompression() { - return std::accumulate( - roarings.begin(), roarings.end(), false, - [](bool previous, std::pair &map_entry) { - return map_entry.second.removeRunCompression() && previous; - }); - } - - /** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - * Additional savings might be possible by calling shrinkToFit(). - */ - bool runOptimize() { - return std::accumulate( - roarings.begin(), roarings.end(), false, - [](bool previous, std::pair &map_entry) { - return map_entry.second.runOptimize() && previous; - }); - } - - /** - * If needed, reallocate memory to shrink the memory usage. Returns - * the number of bytes saved. - */ - size_t shrinkToFit() { - size_t savedBytes = 0; - auto iter = roarings.begin(); - while (iter != roarings.cend()) { - if (iter->second.isEmpty()) { - // empty Roarings are 84 bytes - savedBytes += 88; - roarings.erase(iter++); - } else { - savedBytes += iter->second.shrinkToFit(); - iter++; - } - } - return savedBytes; - } - - /** - * Iterate over the bitmap elements. The function iterator is called once - * for all the values with ptr (can be NULL) as the second parameter of each - * call. - * - * roaring_iterator is simply a pointer to a function that returns bool - * (true means that the iteration should continue while false means that it - * should stop), and takes (uint32_t,void*) as inputs. - */ - void iterate(roaring_iterator64 iterator, void *ptr) const { - std::for_each(roarings.begin(), roarings.cend(), - [=](const std::pair &map_entry) { - roaring_iterate64(&map_entry.second.roaring, iterator, - uint64_t(map_entry.first) << 32, - ptr); - }); - } - - /** - * If the size of the roaring bitmap is strictly greater than rank, then - this - function returns true and set element to the element of given rank. - Otherwise, it returns false. - */ - bool select(uint64_t rnk, uint64_t *element) const { - for (const auto &map_entry : roarings) { - uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality(); - if (rnk < sub_cardinality) { - *element = ((uint64_t)map_entry.first) << 32; - // assuming little endian - return map_entry.second.select((uint32_t)rnk, - ((uint32_t *)element)); - } - rnk -= sub_cardinality; - } - return false; - } - - /** - * Returns the number of integers that are smaller or equal to x. - */ - uint64_t rank(uint64_t x) const { - uint64_t result = 0; - auto roaring_destination = roarings.find(highBytes(x)); - if (roaring_destination != roarings.cend()) { - for (auto roaring_iter = roarings.cbegin(); - roaring_iter != roaring_destination; ++roaring_iter) { - result += roaring_iter->second.cardinality(); - } - result += roaring_destination->second.rank(lowBytes(x)); - return result; - } - roaring_destination = roarings.lower_bound(highBytes(x)); - for (auto roaring_iter = roarings.cbegin(); - roaring_iter != roaring_destination; ++roaring_iter) { - result += roaring_iter->second.cardinality(); - } - return result; - } - - /** - * write a bitmap to a char buffer. This is meant to be compatible with - * the - * Java and Go versions. Returns how many bytes were written which should be - * getSizeInBytes(). - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - size_t write(char *buf, bool portable = true) const { - const char *orig = buf; - // push map size - *((uint64_t *)buf) = roarings.size(); - buf += sizeof(uint64_t); - std::for_each( - roarings.cbegin(), roarings.cend(), - [&buf, portable](const std::pair &map_entry) { - // push map key - memcpy(buf, &map_entry.first, - sizeof(uint32_t)); // this is undefined: - // *((uint32_t*)buf) = - // map_entry.first; - buf += sizeof(uint32_t); - // push map value Roaring - buf += map_entry.second.write(buf, portable); - }); - return buf - orig; - } - - /** - * read a bitmap from a serialized version. This is meant to be compatible - * with - * the - * Java and Go versions. - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - * - * This function is unsafe in the sense that if you provide bad data, - * many bytes could be read, possibly causing a buffer overflow. See also readSafe. - */ - static Roaring64Map read(const char *buf, bool portable = true) { - Roaring64Map result; - // get map size - uint64_t map_size = *((uint64_t *)buf); - buf += sizeof(uint64_t); - for (uint64_t lcv = 0; lcv < map_size; lcv++) { - // get map key - uint32_t key; - memcpy(&key, buf, sizeof(uint32_t)); // this is undefined: uint32_t - // key = *((uint32_t*)buf); - buf += sizeof(uint32_t); - // read map value Roaring - Roaring read = Roaring::read(buf, portable); - result.emplaceOrInsert(key, read); - // forward buffer past the last Roaring Bitmap - buf += read.getSizeInBytes(portable); - } - return result; - } - - /** - * read a bitmap from a serialized version, reading no more than maxbytes bytes. - * This is meant to be compatible with the Java and Go versions. - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - static Roaring64Map readSafe(const char *buf, size_t maxbytes) { - Roaring64Map result; - // get map size - uint64_t map_size = *((uint64_t *)buf); - buf += sizeof(uint64_t); - for (uint64_t lcv = 0; lcv < map_size; lcv++) { - // get map key - if(maxbytes < sizeof(uint32_t)) { - throw std::runtime_error("ran out of bytes"); - } - uint32_t key; - memcpy(&key, buf, sizeof(uint32_t)); // this is undefined: uint32_t - // key = *((uint32_t*)buf); - buf += sizeof(uint32_t); - maxbytes -= sizeof(uint32_t); - // read map value Roaring - Roaring read = Roaring::readSafe(buf, maxbytes); - result.emplaceOrInsert(key, read); - // forward buffer past the last Roaring Bitmap - size_t tz = read.getSizeInBytes(true); - buf += tz; - maxbytes -= tz; - } - return result; - } - - /** - * How many bytes are required to serialize this bitmap (meant to be - * compatible - * with Java and Go versions) - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - size_t getSizeInBytes(bool portable = true) const { - // start with, respectively, map size and size of keys for each map - // entry - return std::accumulate( - roarings.cbegin(), roarings.cend(), - sizeof(uint64_t) + roarings.size() * sizeof(uint32_t), - [=](size_t previous, - const std::pair &map_entry) { - // add in bytes used by each Roaring - return previous + map_entry.second.getSizeInBytes(portable); - }); - } - - /** - * Computes the intersection between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator&(const Roaring64Map &o) const { - return Roaring64Map(*this) &= o; - } - - /** - * Computes the difference between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator-(const Roaring64Map &o) const { - return Roaring64Map(*this) -= o; - } - - /** - * Computes the union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator|(const Roaring64Map &o) const { - return Roaring64Map(*this) |= o; - } - - /** - * Computes the symmetric union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator^(const Roaring64Map &o) const { - return Roaring64Map(*this) ^= o; - } - - /** - * Whether or not we apply copy and write. - */ - void setCopyOnWrite(bool val) { - if (copyOnWrite == val) return; - copyOnWrite = val; - std::for_each(roarings.begin(), roarings.end(), - [=](std::pair &map_entry) { - map_entry.second.setCopyOnWrite(val); - }); - } - - /** - * Print the content of the bitmap - */ - void printf() const { - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - struct iter_data { - uint32_t high_bits; - char first_char = '{'; - } outer_iter_data; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - std::printf("%c%llu", - ((iter_data *)inner_iter_data)->first_char, - (long long unsigned)uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [](const std::pair &map_entry) { - map_entry.second.iterate( - [](uint32_t low_bits, void *high_bits) -> bool { - std::printf(",%llu", - (long long unsigned)uniteBytes( - *(uint32_t *)high_bits, low_bits)); - return true; - }, - (void *)&map_entry.first); - }); - } else - std::printf("{"); - std::printf("}\n"); - } - - /** - * Print the content of the bitmap into a string - */ - std::string toString() const { - struct iter_data { - std::string str; - uint32_t high_bits; - char first_char = '{'; - } outer_iter_data; - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += std::to_string( - uniteBytes(((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [&outer_iter_data]( - const std::pair &map_entry) { - outer_iter_data.high_bits = map_entry.first; - map_entry.second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += - std::to_string(uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - return true; - }, - (void *)&outer_iter_data); - }); - } else - outer_iter_data.str = '{'; - outer_iter_data.str += '}'; - return outer_iter_data.str; - } - - /** - * Whether or not copy and write is active. - */ - bool getCopyOnWrite() const { return copyOnWrite; } - - /** - * computes the logical or (union) between "n" bitmaps (referenced by a - * pointer). - */ - static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) { - Roaring64Map ans; - // not particularly fast - for (size_t lcv = 0; lcv < n; ++lcv) { - ans |= *(inputs[lcv]); - } - return ans; - } - - friend class Roaring64MapSetBitForwardIterator; - typedef Roaring64MapSetBitForwardIterator const_iterator; - - /** - * Returns an iterator that can be used to access the position of the - * set bits. The running time complexity of a full scan is proportional to - * the - * number - * of set bits: be aware that if you have long strings of 1s, this can be - * very inefficient. - * - * It can be much faster to use the toArray method if you want to - * retrieve the set bits. - */ - const_iterator begin() const; - - /** - * A bogus iterator that can be used together with begin() - * for constructions such as for(auto i = b.begin(); - * i!=b.end(); ++i) {} - */ - const_iterator end() const; - - private: - std::map roarings; - bool copyOnWrite = false; - static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); } - static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); } - static uint64_t uniteBytes(const uint32_t highBytes, - const uint32_t lowBytes) { - return (uint64_t(highBytes) << 32) | uint64_t(lowBytes); - } - // this is needed to tolerate gcc's C++11 libstdc++ lacking emplace - // prior to version 4.8 - void emplaceOrInsert(const uint32_t key, const Roaring &value) { -#if defined(__GLIBCXX__) && __GLIBCXX__ < 20130322 - roarings.insert(std::make_pair(key, value)); -#else - roarings.emplace(std::make_pair(key, value)); -#endif - } -}; - -/** - * Used to go through the set bits. Not optimally fast, but convenient. - */ -class Roaring64MapSetBitForwardIterator final { - public: - typedef std::forward_iterator_tag iterator_category; - typedef uint64_t *pointer; - typedef uint64_t &reference_type; - typedef uint64_t value_type; - typedef int64_t difference_type; - typedef Roaring64MapSetBitForwardIterator type_of_iterator; - - /** - * Provides the location of the set bit. - */ - value_type operator*() const { - return Roaring64Map::uniteBytes(map_iter->first, i.current_value); - } - - bool operator<(const type_of_iterator &o) { - if (map_iter == map_end) return false; - if (o.map_iter == o.map_end) return true; - return **this < *o; - } - - bool operator<=(const type_of_iterator &o) { - if (o.map_iter == o.map_end) return true; - if (map_iter == map_end) return false; - return **this <= *o; - } - - bool operator>(const type_of_iterator &o) { - if (o.map_iter == o.map_end) return false; - if (map_iter == map_end) return true; - return **this > *o; - } - - bool operator>=(const type_of_iterator &o) { - if (map_iter == map_end) return true; - if (o.map_iter == o.map_end) return false; - return **this >= *o; - } - - type_of_iterator &operator++() { // ++i, must returned inc. value - if (i.has_value == true) roaring_advance_uint32_iterator(&i); - while (!i.has_value) { - map_iter++; - if (map_iter == map_end) return *this; - roaring_init_iterator(&map_iter->second.roaring, &i); - } - return *this; - } - - type_of_iterator operator++(int) { // i++, must return orig. value - Roaring64MapSetBitForwardIterator orig(*this); - roaring_advance_uint32_iterator(&i); - while (!i.has_value) { - map_iter++; - if (map_iter == map_end) return orig; - roaring_init_iterator(&map_iter->second.roaring, &i); - } - return orig; - } - - bool operator==(const Roaring64MapSetBitForwardIterator &o) { - if (map_iter == map_end && o.map_iter == o.map_end) return true; - if (o.map_iter == o.map_end) return false; - return **this == *o; - } - - bool operator!=(const Roaring64MapSetBitForwardIterator &o) { - if (map_iter == map_end && o.map_iter == o.map_end) return false; - if (o.map_iter == o.map_end) return true; - return **this != *o; - } - - Roaring64MapSetBitForwardIterator(const Roaring64Map &parent, - bool exhausted = false) - : map_end(parent.roarings.cend()) { - if (exhausted || parent.roarings.empty()) { - map_iter = parent.roarings.cend(); - } else { - map_iter = parent.roarings.cbegin(); - roaring_init_iterator(&map_iter->second.roaring, &i); - while (!i.has_value) { - map_iter++; - if (map_iter == map_end) return; - roaring_init_iterator(&map_iter->second.roaring, &i); - } - } - } - - ~Roaring64MapSetBitForwardIterator() = default; - - Roaring64MapSetBitForwardIterator( - const Roaring64MapSetBitForwardIterator &o) = default; - - private: - std::map::const_iterator map_iter; - std::map::const_iterator map_end; - roaring_uint32_iterator_t i; -}; - -inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const { - return Roaring64MapSetBitForwardIterator(*this); -} - -inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const { - return Roaring64MapSetBitForwardIterator(*this, true); -} - -#endif /* INCLUDE_ROARING_64_MAP_HH_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/cpp/roaring64map.hh */ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index ed1346043bc..c8bbfcf870f 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -125,7 +125,7 @@ function clone_submodules ( cd "$FASTTEST_SOURCE" -SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11) +SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring) git submodule sync git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index d457d01f523..20788318b3d 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -12,7 +12,7 @@ // TODO: find out what it is. On github, they have proper interface headers like // this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h #include - +void * containerptr_roaring_bitmap_add(roaring_bitmap_t * r, uint32_t val, uint8_t * typecode, int * index); namespace DB { @@ -600,6 +600,7 @@ public: } private: + /// To read and write the DB Buffer directly, migrate code from CRoaring void db_roaring_bitmap_add_many(DB::ReadBuffer & db_buf, roaring_bitmap_t * r, size_t n_args) { From d17a49f127b59348299963fde4fb7368007f4f8f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 23 Oct 2020 15:20:07 +0300 Subject: [PATCH 130/174] Add more context to error messages --- src/Interpreters/InDepthNodeVisitor.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InDepthNodeVisitor.h b/src/Interpreters/InDepthNodeVisitor.h index 3e0a8e16185..7b537f0daa0 100644 --- a/src/Interpreters/InDepthNodeVisitor.h +++ b/src/Interpreters/InDepthNodeVisitor.h @@ -29,7 +29,15 @@ public: if constexpr (!_top_to_bottom) visitChildren(ast); - Matcher::visit(ast, data); + try + { + Matcher::visit(ast, data); + } + catch (Exception & e) + { + e.addMessage("While processing {}", ast->formatForErrorMessage()); + throw; + } if constexpr (_top_to_bottom) visitChildren(ast); From 337fe7b81fd3bbb7d59ef7af33227858cc3b9252 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 23 Oct 2020 15:53:12 +0300 Subject: [PATCH 131/174] add comment --- src/Interpreters/InterpreterDropQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index a250ab1afd4..e8ad104851f 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -60,6 +60,7 @@ BlockIO InterpreterDropQuery::execute() BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query) { + /// NOTE: it does not contain UUID, we will resolve it with locked DDLGuard auto table_id = StorageID(query); if (query.temporary || table_id.database_name.empty()) { @@ -88,6 +89,7 @@ BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query) if (query_ptr->as().is_view && !table->isView()) throw Exception("Table " + table_id.getNameForLogs() + " is not a View", ErrorCodes::LOGICAL_ERROR); + /// Now get UUID, so we can wait for table data to be finally dropped table_id = table->getStorageID(); if (query.kind == ASTDropQuery::Kind::Detach) From a73a908264383a4a9858ced6cd78cf070e40b255 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 23 Oct 2020 16:31:00 +0300 Subject: [PATCH 132/174] Optionally upload clickhouse binary --- docker/test/fasttest/Dockerfile | 1 + docker/test/fasttest/run.sh | 3 +++ 2 files changed, 4 insertions(+) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 6547a98c58b..a701357e025 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -79,6 +79,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' ENV PULL_REQUEST_NUMBER='' +ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index ed1346043bc..e7584257efc 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -163,6 +163,9 @@ function build ( cd "$FASTTEST_BUILD" time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" +if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then + cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" +fi ccache --show-stats ||: ) } From f0ef7dd1e90c35c6c0c8f941f877059fe32d98fb Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 23 Oct 2020 16:31:45 +0300 Subject: [PATCH 133/174] Change default to zero --- docker/test/fasttest/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index a701357e025..0fff738e718 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -79,7 +79,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' ENV PULL_REQUEST_NUMBER='' -ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 +ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=0 COPY run.sh / CMD ["/bin/bash", "/run.sh"] From 84b453f4be7ac7d2dc486ff2c923691a0e936979 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 23 Oct 2020 17:19:02 +0300 Subject: [PATCH 134/174] Update InterpreterDropQuery.cpp --- src/Interpreters/InterpreterDropQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index e8ad104851f..61ae707927a 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -90,7 +90,7 @@ BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query) throw Exception("Table " + table_id.getNameForLogs() + " is not a View", ErrorCodes::LOGICAL_ERROR); /// Now get UUID, so we can wait for table data to be finally dropped - table_id = table->getStorageID(); + table_id.uuid = database->tryGetTableUUID(table_id.table_name); if (query.kind == ASTDropQuery::Kind::Detach) { From b0a14a41c384f3544405d848bb46be1eb3dc5025 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 23 Oct 2020 17:28:55 +0300 Subject: [PATCH 135/174] fixup --- src/Functions/if.cpp | 72 +++-- src/Interpreters/ExpressionActions.cpp | 2 + .../00921_datetime64_compatibility.reference | 302 +++++++++--------- .../00921_datetime64_compatibility.sh | 2 +- 4 files changed, 194 insertions(+), 184 deletions(-) diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index dd67f922ddf..8f7647a8899 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -154,22 +154,6 @@ struct NumIfImpl, Decimal, Decimal> } }; -template -struct NumIfImpl -{ -private: - [[noreturn]] static void throwError() - { - throw Exception("Invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } -public: - template static void vectorVector(Args &&...) { throwError(); } - template static void vectorConstant(Args &&...) { throwError(); } - template static void constantVector(Args &&...) { throwError(); } - template static void constantConstant(Args &&...) { throwError(); } -}; - - class FunctionIf : public FunctionIfBase { public: @@ -205,17 +189,29 @@ private: const IColumn * col_right_untyped = columns[arguments[2]].column.get(); UInt32 scale = decimalScale(columns, arguments); - if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) + if constexpr (std::is_same_v) { - NumIfImpl::vectorVector( - cond_col->getData(), col_left->getData(), col_right_vec->getData(), columns, result, scale); - return true; + const auto & arg_left = columns[arguments[1]]; + const auto & arg_right = columns[arguments[2]]; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Incompatible types of arguments of function {}:" + " '{}' and '{}'", getName(), arg_left.type->getName(), + arg_right.type->getName()); } - else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) + else { - NumIfImpl::vectorConstant( - cond_col->getData(), col_left->getData(), col_right_const->template getValue(), columns, result, scale); - return true; + if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) + { + NumIfImpl::vectorVector( + cond_col->getData(), col_left->getData(), col_right_vec->getData(), columns, result, scale); + return true; + } + else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) + { + NumIfImpl::vectorConstant( + cond_col->getData(), col_left->getData(), col_right_const->template getValue(), columns, result, scale); + return true; + } } return false; @@ -234,17 +230,29 @@ private: const IColumn * col_right_untyped = columns[arguments[2]].column.get(); UInt32 scale = decimalScale(columns, arguments); - if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) + if constexpr (std::is_same_v) { - NumIfImpl::constantVector( - cond_col->getData(), col_left->template getValue(), col_right_vec->getData(), columns, result, scale); - return true; + const auto & arg_left = columns[arguments[1]]; + const auto & arg_right = columns[arguments[2]]; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Incompatible types of arguments of function {}:" + " '{}' and '{}'", getName(), arg_left.type->getName(), + arg_right.type->getName()); } - else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) + else { - NumIfImpl::constantConstant( - cond_col->getData(), col_left->template getValue(), col_right_const->template getValue(), columns, result, scale); - return true; + if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) + { + NumIfImpl::constantVector( + cond_col->getData(), col_left->template getValue(), col_right_vec->getData(), columns, result, scale); + return true; + } + else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) + { + NumIfImpl::constantConstant( + cond_col->getData(), col_left->template getValue(), col_right_const->template getValue(), columns, result, scale); + return true; + } } return false; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 1bb2fd8e96b..6432ae90809 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -316,6 +316,7 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings, { auto & result = sample_block.getByName(result_name); result.type = result_type; + result.name = result_name; result.column = source.column; } else @@ -1561,6 +1562,7 @@ const ActionsDAG::Node & ActionsDAG::addFunction( ColumnWithTypeAndName argument; argument.column = child.column; argument.type = child.result_type; + argument.name = child.result_name; if (!argument.column || !isColumnConst(*argument.column)) all_const = false; diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility.reference b/tests/queries/0_stateless/00921_datetime64_compatibility.reference index a42517104b9..398da88e460 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility.reference @@ -1,6 +1,6 @@ SELECT toTimeZone(N, \'UTC\') -Code: 43: Illegal type Date of argument of function toTimeZone. Should be DateTime or DateTime64. +Code: 43 "DateTime('UTC')","2019-09-16 16:20:11" "DateTime64(3, 'UTC')","2019-09-16 16:20:11.234" ------------------------------------------ @@ -36,19 +36,19 @@ SELECT toDayOfWeek(N) ------------------------------------------ SELECT toHour(N) -Code: 43: Illegal type Date of argument for function toHour. +Code: 43 "UInt8",19 "UInt8",19 ------------------------------------------ SELECT toMinute(N) -Code: 43: Illegal type Date of argument for function toMinute. +Code: 43 "UInt8",20 "UInt8",20 ------------------------------------------ SELECT toSecond(N) -Code: 43: Illegal type Date of argument for function toSecond. +Code: 43 "UInt8",11 "UInt8",11 ------------------------------------------ @@ -94,31 +94,31 @@ SELECT toStartOfDay(N) ------------------------------------------ SELECT toStartOfHour(N) -Code: 43: Illegal type Date of argument for function toStartOfHour. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:00:00" "DateTime('Europe/Minsk')","2019-09-16 19:00:00" ------------------------------------------ SELECT toStartOfMinute(N) -Code: 43: Illegal type Date of argument for function toStartOfMinute. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfFiveMinute(N) -Code: 43: Illegal type Date of argument for function toStartOfFiveMinute. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfTenMinutes(N) -Code: 43: Illegal type Date of argument for function toStartOfTenMinutes. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfFifteenMinutes(N) -Code: 43: Illegal type Date of argument for function toStartOfFifteenMinutes. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:15:00" "DateTime('Europe/Minsk')","2019-09-16 19:15:00" ------------------------------------------ @@ -139,7 +139,7 @@ SELECT toStartOfInterval(N, INTERVAL 1 day) ------------------------------------------ SELECT toStartOfInterval(N, INTERVAL 15 minute) -Code: 43: Illegal type Date of argument for function toStartOfInterval. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:15:00" "DateTime('Europe/Minsk')","2019-09-16 19:15:00" ------------------------------------------ @@ -160,13 +160,13 @@ SELECT date_trunc(\'day\', N) ------------------------------------------ SELECT date_trunc(\'minute\', N) -Code: 43: Illegal type Date of argument for function date_trunc. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toTime(N) -Code: 43: Illegal type Date of argument for function toTime. +Code: 43 "DateTime('Europe/Minsk')","1970-01-02 19:20:11" "DateTime('Europe/Minsk')","1970-01-02 19:20:11" ------------------------------------------ @@ -232,7 +232,7 @@ SELECT toYearWeek(N) ------------------------------------------ SELECT timeSlot(N) -Code: 43: Illegal type Date of argument for function timeSlot. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:00:00" "DateTime('Europe/Minsk')","2019-09-16 19:00:00" ------------------------------------------ @@ -375,15 +375,15 @@ SELECT N - N "Int32",0 "Int32",0 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N + N -Code: 43: Illegal types Date and Date of arguments of function plus. +Code: 43 -Code: 43: Illegal types DateTime('Europe/Minsk') and DateTime('Europe/Minsk') of arguments of function plus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function plus. +Code: 43 ------------------------------------------ SELECT N != N "UInt8",0 @@ -417,47 +417,47 @@ SELECT N >= N ------------------------------------------ SELECT N - DT -Code: 43: Illegal types Date and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 "Int32",0 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT DT - N -Code: 43: Illegal types DateTime('Europe/Minsk') and Date of arguments of function minus. +Code: 43 "Int32",0 -Code: 43: Illegal types DateTime('Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N - D "Int32",0 -Code: 43: Illegal types DateTime('Europe/Minsk') and Date of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and Date of arguments of function minus. +Code: 43 ------------------------------------------ SELECT D - N "Int32",0 -Code: 43: Illegal types Date and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types Date and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N - DT64 -Code: 43: Illegal types Date and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime('Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT DT64 - N -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and Date of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N != DT "UInt8",1 @@ -726,11 +726,11 @@ SELECT N - toUInt8(1) ------------------------------------------ SELECT toUInt8(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt8(-1) "Date","2019-09-17" @@ -739,11 +739,11 @@ SELECT N - toInt8(-1) ------------------------------------------ SELECT toInt8(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toUInt16(1) "Date","2019-09-15" @@ -752,11 +752,11 @@ SELECT N - toUInt16(1) ------------------------------------------ SELECT toUInt16(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt16(-1) "Date","2019-09-17" @@ -765,11 +765,11 @@ SELECT N - toInt16(-1) ------------------------------------------ SELECT toInt16(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toUInt32(1) "Date","2019-09-15" @@ -778,11 +778,11 @@ SELECT N - toUInt32(1) ------------------------------------------ SELECT toUInt32(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt32(-1) "Date","2019-09-17" @@ -791,11 +791,11 @@ SELECT N - toInt32(-1) ------------------------------------------ SELECT toInt32(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toUInt64(1) "Date","2019-09-15" @@ -804,11 +804,11 @@ SELECT N - toUInt64(1) ------------------------------------------ SELECT toUInt64(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt64(-1) "Date","2019-09-17" @@ -817,585 +817,585 @@ SELECT N - toInt64(-1) ------------------------------------------ SELECT toInt64(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N == toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt8(1) == N -Code: 43: Illegal types of arguments (UInt8, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt8(-1) == N -Code: 43: Illegal types of arguments (Int8, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt16(1) == N -Code: 43: Illegal types of arguments (UInt16, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt16(-1) == N -Code: 43: Illegal types of arguments (Int16, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt32(1) == N -Code: 43: Illegal types of arguments (UInt32, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt32(-1) == N -Code: 43: Illegal types of arguments (Int32, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt64(1) == N -Code: 43: Illegal types of arguments (UInt64, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt64(-1) == N -Code: 43: Illegal types of arguments (Int64, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N != toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt8(1) != N -Code: 43: Illegal types of arguments (UInt8, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt8(-1) != N -Code: 43: Illegal types of arguments (Int8, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt16(1) != N -Code: 43: Illegal types of arguments (UInt16, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt16(-1) != N -Code: 43: Illegal types of arguments (Int16, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt32(1) != N -Code: 43: Illegal types of arguments (UInt32, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt32(-1) != N -Code: 43: Illegal types of arguments (Int32, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt64(1) != N -Code: 43: Illegal types of arguments (UInt64, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt64(-1) != N -Code: 43: Illegal types of arguments (Int64, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt8(1) < N -Code: 43: Illegal types of arguments (UInt8, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt8(-1) < N -Code: 43: Illegal types of arguments (Int8, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt16(1) < N -Code: 43: Illegal types of arguments (UInt16, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt16(-1) < N -Code: 43: Illegal types of arguments (Int16, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt32(1) < N -Code: 43: Illegal types of arguments (UInt32, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt32(-1) < N -Code: 43: Illegal types of arguments (Int32, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt64(1) < N -Code: 43: Illegal types of arguments (UInt64, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt64(-1) < N -Code: 43: Illegal types of arguments (Int64, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt8(1) <= N -Code: 43: Illegal types of arguments (UInt8, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt8(-1) <= N -Code: 43: Illegal types of arguments (Int8, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt16(1) <= N -Code: 43: Illegal types of arguments (UInt16, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt16(-1) <= N -Code: 43: Illegal types of arguments (Int16, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt32(1) <= N -Code: 43: Illegal types of arguments (UInt32, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt32(-1) <= N -Code: 43: Illegal types of arguments (Int32, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt64(1) <= N -Code: 43: Illegal types of arguments (UInt64, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt64(-1) <= N -Code: 43: Illegal types of arguments (Int64, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N > toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt8(1) > N -Code: 43: Illegal types of arguments (UInt8, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt8(-1) > N -Code: 43: Illegal types of arguments (Int8, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt16(1) > N -Code: 43: Illegal types of arguments (UInt16, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt16(-1) > N -Code: 43: Illegal types of arguments (Int16, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt32(1) > N -Code: 43: Illegal types of arguments (UInt32, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt32(-1) > N -Code: 43: Illegal types of arguments (Int32, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt64(1) > N -Code: 43: Illegal types of arguments (UInt64, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt64(-1) > N -Code: 43: Illegal types of arguments (Int64, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt8(1) >= N -Code: 43: Illegal types of arguments (UInt8, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt8(-1) >= N -Code: 43: Illegal types of arguments (Int8, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt16(1) >= N -Code: 43: Illegal types of arguments (UInt16, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt16(-1) >= N -Code: 43: Illegal types of arguments (Int16, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt32(1) >= N -Code: 43: Illegal types of arguments (UInt32, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt32(-1) >= N -Code: 43: Illegal types of arguments (Int32, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt64(1) >= N -Code: 43: Illegal types of arguments (UInt64, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt64(-1) >= N -Code: 43: Illegal types of arguments (Int64, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility.sh b/tests/queries/0_stateless/00921_datetime64_compatibility.sh index 8f5d9081719..3e5de1a552c 100755 --- a/tests/queries/0_stateless/00921_datetime64_compatibility.sh +++ b/tests/queries/0_stateless/00921_datetime64_compatibility.sh @@ -12,4 +12,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) "${CURDIR}"/00921_datetime64_compatibility.python \ | ${CLICKHOUSE_CLIENT} --ignore-error -T -nm --calculate_text_stack_trace 0 --log-level 'error' 2>&1 \ - | sed -Ee 's/Received exception from server .*//g; s/(Code: [0-9]+). DB::Exception: Received from .* DB::Exception/\1/g' + | sed 's/Received exception .*//g; s/^\(Code: [0-9]\+\).*$/\1/g' From 77654eeee870d9d7f894d56026b75e40230975d2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 23 Oct 2020 17:35:03 +0300 Subject: [PATCH 136/174] fixup --- src/Functions/if.cpp | 72 ++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 8f7647a8899..dd67f922ddf 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -154,6 +154,22 @@ struct NumIfImpl, Decimal, Decimal> } }; +template +struct NumIfImpl +{ +private: + [[noreturn]] static void throwError() + { + throw Exception("Invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +public: + template static void vectorVector(Args &&...) { throwError(); } + template static void vectorConstant(Args &&...) { throwError(); } + template static void constantVector(Args &&...) { throwError(); } + template static void constantConstant(Args &&...) { throwError(); } +}; + + class FunctionIf : public FunctionIfBase { public: @@ -189,29 +205,17 @@ private: const IColumn * col_right_untyped = columns[arguments[2]].column.get(); UInt32 scale = decimalScale(columns, arguments); - if constexpr (std::is_same_v) + if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) { - const auto & arg_left = columns[arguments[1]]; - const auto & arg_right = columns[arguments[2]]; - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Incompatible types of arguments of function {}:" - " '{}' and '{}'", getName(), arg_left.type->getName(), - arg_right.type->getName()); + NumIfImpl::vectorVector( + cond_col->getData(), col_left->getData(), col_right_vec->getData(), columns, result, scale); + return true; } - else + else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) { - if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) - { - NumIfImpl::vectorVector( - cond_col->getData(), col_left->getData(), col_right_vec->getData(), columns, result, scale); - return true; - } - else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) - { - NumIfImpl::vectorConstant( - cond_col->getData(), col_left->getData(), col_right_const->template getValue(), columns, result, scale); - return true; - } + NumIfImpl::vectorConstant( + cond_col->getData(), col_left->getData(), col_right_const->template getValue(), columns, result, scale); + return true; } return false; @@ -230,29 +234,17 @@ private: const IColumn * col_right_untyped = columns[arguments[2]].column.get(); UInt32 scale = decimalScale(columns, arguments); - if constexpr (std::is_same_v) + if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) { - const auto & arg_left = columns[arguments[1]]; - const auto & arg_right = columns[arguments[2]]; - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Incompatible types of arguments of function {}:" - " '{}' and '{}'", getName(), arg_left.type->getName(), - arg_right.type->getName()); + NumIfImpl::constantVector( + cond_col->getData(), col_left->template getValue(), col_right_vec->getData(), columns, result, scale); + return true; } - else + else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) { - if (const auto * col_right_vec = checkAndGetColumn(col_right_untyped)) - { - NumIfImpl::constantVector( - cond_col->getData(), col_left->template getValue(), col_right_vec->getData(), columns, result, scale); - return true; - } - else if (const auto * col_right_const = checkAndGetColumnConst(col_right_untyped)) - { - NumIfImpl::constantConstant( - cond_col->getData(), col_left->template getValue(), col_right_const->template getValue(), columns, result, scale); - return true; - } + NumIfImpl::constantConstant( + cond_col->getData(), col_left->template getValue(), col_right_const->template getValue(), columns, result, scale); + return true; } return false; From 69b16fc8efd49ef4b305fb5d98ba42b51a3d2a90 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 23 Oct 2020 17:39:09 +0300 Subject: [PATCH 137/174] Update tests.md --- docs/en/development/tests.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 8c06c868e66..e6141f7ab02 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -47,6 +47,8 @@ select x; -- { serverError 49 } ``` This test ensures that the server returns an error with code 49 about unknown column `x`. If there is no error, or the error is different, the test will fail. If you want to ensure that an error occurs on the client side, use `clientError` annotation instead. +Do not check for a particular wording of error message, it may change in the future, and the test will needlessly break. Check only the error code. If the existing error code is not precise enough for your needs, consider adding a new one. + ### Testing a Distributed Query If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is ran in CI in correct configurations, where the server is configured to support distributed queries. From ba0513b0311ab4f0224fcf997f98a56992080066 Mon Sep 17 00:00:00 2001 From: Aleksandr Karo Date: Fri, 23 Oct 2020 17:49:56 +0300 Subject: [PATCH 138/174] Fix broken link in replication docs --- docs/ru/engines/table-engines/mergetree-family/replication.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/mergetree-family/replication.md b/docs/ru/engines/table-engines/mergetree-family/replication.md index 440dcf4a7b5..410cf06c0c5 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replication.md +++ b/docs/ru/engines/table-engines/mergetree-family/replication.md @@ -14,7 +14,7 @@ Репликация не зависит от шардирования. На каждом шарде репликация работает независимо. -Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../engines/table-engines/mergetree-family/replication.md#query_language_queries_alter)). +Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../sql-reference/statements/alter/index.md#query_language_queries_alter)). Запросы `CREATE`, `DROP`, `ATTACH`, `DETACH` и `RENAME` выполняются на одном сервере и не реплицируются: From b8e754d68f893f740c6e8efd962e70f240614b9d Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Fri, 23 Oct 2020 18:11:02 +0300 Subject: [PATCH 139/174] DOCSUP-3121: Update the SHOW DATABASES and SHOW TABLES descriptions (#16115) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update show.md Update descriptions of show databases and show tables. * Update show.md Update 'identical query'. * Update show.md Внес поправки согласно комментариям. * Update show.md Внес поправки и перевел на русский язык. * Update show.md Внес мелкие поправки. Co-authored-by: Dmitriy --- docs/en/sql-reference/statements/show.md | 150 ++++++++++++++++++++-- docs/ru/sql-reference/statements/show.md | 152 ++++++++++++++++++++--- 2 files changed, 275 insertions(+), 27 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index a18e99d7b11..81aca1261de 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -15,12 +15,83 @@ Returns a single `String`-type ‘statement’ column, which contains a single v ## SHOW DATABASES {#show-databases} -``` sql -SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] +Prints a list of all databases. + +```sql +SHOW DATABASES [LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] ``` -Prints a list of all databases. -This query is identical to `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. +This statement is identical to the query: + +```sql +SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] +``` + +### Examples {#examples} + +Getting database names, containing the symbols sequence 'de' in their names: + +``` sql +SHOW DATABASES LIKE '%de%' +``` + +Result: + +``` text +┌─name────┐ +│ default │ +└─────────┘ +``` + +Getting database names, containing symbols sequence 'de' in their names, in the case insensitive manner: + +``` sql +SHOW DATABASES ILIKE '%DE%' +``` + +Result: + +``` text +┌─name────┐ +│ default │ +└─────────┘ +``` + +Getting database names, not containing the symbols sequence 'de' in their names: + +``` sql +SHOW DATABASES NOT LIKE '%de%' +``` + +Result: + +``` text +┌─name───────────────────────────┐ +│ _temporary_and_external_tables │ +│ system │ +│ test │ +│ tutorial │ +└────────────────────────────────┘ +``` + +Getting the first two rows from database names: + +``` sql +SHOW DATABASES LIMIT 2 +``` + +Result: + +``` text +┌─name───────────────────────────┐ +│ _temporary_and_external_tables │ +│ default │ +└────────────────────────────────┘ +``` + +### See Also {#see-also} + +- [CREATE DATABASE](https://clickhouse.tech/docs/en/sql-reference/statements/create/database/#query-language-create-database) ## SHOW PROCESSLIST {#show-processlist} @@ -42,33 +113,86 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" Displays a list of tables. -``` sql -SHOW [TEMPORARY] TABLES [{FROM | IN} ] [LIKE '' | WHERE expr] [LIMIT ] [INTO OUTFILE ] [FORMAT ] +```sql +SHOW [TEMPORARY] TABLES [{FROM | IN} ] [LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` If the `FROM` clause is not specified, the query returns the list of tables from the current database. -You can get the same results as the `SHOW TABLES` query in the following way: +This statement is identical to the query: -``` sql -SELECT name FROM system.tables WHERE database = [AND name LIKE ] [LIMIT ] [INTO OUTFILE ] [FORMAT ] +```sql +SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -**Example** +### Examples {#examples} -The following query selects the first two rows from the list of tables in the `system` database, whose names contain `co`. +Getting table names, containing the symbols sequence 'user' in their names: ``` sql -SHOW TABLES FROM system LIKE '%co%' LIMIT 2 +SHOW TABLES FROM system LIKE '%user%' ``` +Result: + +``` text +┌─name─────────────┐ +│ user_directories │ +│ users │ +└──────────────────┘ +``` + +Getting table names, containing sequence 'user' in their names, in the case insensitive manner: + +``` sql +SHOW TABLES FROM system ILIKE '%USER%' +``` + +Result: + +``` text +┌─name─────────────┐ +│ user_directories │ +│ users │ +└──────────────────┘ +``` + +Getting table names, not containing the symbol sequence 's' in their names: + +``` sql +SHOW TABLES FROM system NOT LIKE '%s%' +``` + +Result: + +``` text +┌─name─────────┐ +│ metric_log │ +│ metric_log_0 │ +│ metric_log_1 │ +└──────────────┘ +``` + +Getting the first two rows from table names: + +``` sql +SHOW TABLES FROM system LIMIT 2 +``` + +Result: + ``` text ┌─name───────────────────────────┐ │ aggregate_function_combinators │ -│ collations │ +│ asynchronous_metric_log │ └────────────────────────────────┘ ``` +### See Also {#see-also} + +- [Create Tables](https://clickhouse.tech/docs/en/getting-started/tutorial/#create-tables) +- [SHOW CREATE TABLE](https://clickhouse.tech/docs/en/sql-reference/statements/show/#show-create-table) + ## SHOW DICTIONARIES {#show-dictionaries} Displays a list of [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). diff --git a/docs/ru/sql-reference/statements/show.md b/docs/ru/sql-reference/statements/show.md index 575742568cb..4b226765632 100644 --- a/docs/ru/sql-reference/statements/show.md +++ b/docs/ru/sql-reference/statements/show.md @@ -10,12 +10,83 @@ SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [F ## SHOW DATABASES {#show-databases} -``` sql -SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] +Выводит список всех баз данных. + +```sql +SHOW DATABASES [LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] ``` -Выводит список всех баз данных. -Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. +Этот запрос идентичен запросу: + +```sql +SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] +``` + +### Примеры {#examples} + +Получение списка баз данных, имена которых содержат последовательность символов 'de': + +``` sql +SHOW DATABASES LIKE '%de%' +``` + +Результат: + +``` text +┌─name────┐ +│ default │ +└─────────┘ +``` + +Получение списка баз данных, имена которых содержат последовательность символов 'de' независимо от регистра: + +``` sql +SHOW DATABASES ILIKE '%DE%' +``` + +Результат: + +``` text +┌─name────┐ +│ default │ +└─────────┘ +``` + +Получение списка баз данных, имена которых не содержат последовательность символов 'de': + +``` sql +SHOW DATABASES NOT LIKE '%de%' +``` + +Результат: + +``` text +┌─name───────────────────────────┐ +│ _temporary_and_external_tables │ +│ system │ +│ test │ +│ tutorial │ +└────────────────────────────────┘ +``` + +Получение первых двух строк из списка имен баз данных: + +``` sql +SHOW DATABASES LIMIT 2 +``` + +Результат: + +``` text +┌─name───────────────────────────┐ +│ _temporary_and_external_tables │ +│ default │ +└────────────────────────────────┘ +``` + +### Смотрите также {#see-also} + +- [CREATE DATABASE](https://clickhouse.tech/docs/ru/sql-reference/statements/create/database/#query-language-create-database) ## SHOW PROCESSLIST {#show-processlist} @@ -37,33 +108,86 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" Выводит список таблиц. -``` sql -SHOW [TEMPORARY] TABLES [{FROM | IN} ] [LIKE '' | WHERE expr] [LIMIT ] [INTO OUTFILE ] [FORMAT ] +```sql +SHOW [TEMPORARY] TABLES [{FROM | IN} ] [LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -Если секция `FROM` не используется, то запрос возвращает список таблиц из текущей базы данных. +Если условие `FROM` не указано, запрос возвращает список таблиц из текущей базы данных. -Результат, идентичный тому, что выдаёт запрос `SHOW TABLES` можно получить также запросом следующего вида: +Этот запрос идентичен запросу: -``` sql -SELECT name FROM system.tables WHERE database = [AND name LIKE ] [LIMIT ] [INTO OUTFILE ] [FORMAT ] +```sql +SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -**Пример** +### Примеры {#examples} -Следующий запрос выбирает первые две строки из списка таблиц в базе данных `system`, чьи имена содержат `co`. +Получение списка таблиц, имена которых содержат последовательность символов 'user': ``` sql -SHOW TABLES FROM system LIKE '%co%' LIMIT 2 +SHOW TABLES FROM system LIKE '%user%' ``` +Результат: + +``` text +┌─name─────────────┐ +│ user_directories │ +│ users │ +└──────────────────┘ +``` + +Получение списка таблиц, имена которых содержат последовательность символов 'user' без учета регистра: + +``` sql +SHOW TABLES FROM system ILIKE '%USER%' +``` + +Результат: + +``` text +┌─name─────────────┐ +│ user_directories │ +│ users │ +└──────────────────┘ +``` + +Получение списка таблиц, имена которых не содержат символ 's': + +``` sql +SHOW TABLES FROM system NOT LIKE '%s%' +``` + +Результат: + +``` text +┌─name─────────┐ +│ metric_log │ +│ metric_log_0 │ +│ metric_log_1 │ +└──────────────┘ +``` + +Получение первых двух строк из списка таблиц: + +``` sql +SHOW TABLES FROM system LIMIT 2 +``` + +Результат: + ``` text ┌─name───────────────────────────┐ │ aggregate_function_combinators │ -│ collations │ +│ asynchronous_metric_log │ └────────────────────────────────┘ ``` +### Смотрите также {#see-also} + +- [Create Tables](https://clickhouse.tech/docs/ru/getting-started/tutorial/#create-tables) +- [SHOW CREATE TABLE](https://clickhouse.tech/docs/ru/sql-reference/statements/show/#show-create-table) + ## SHOW DICTIONARIES {#show-dictionaries} Выводит список [внешних словарей](../../sql-reference/statements/show.md). From f2b506785b5178cc564f77b8631b5e1175d56519 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Oct 2020 18:20:43 +0300 Subject: [PATCH 140/174] Update version_date.tsv after release 20.10.2.20 --- utils/list-versions/version_date.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8776047c357..f7d6536a890 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v20.10.2.20-stable 2020-10-23 v20.9.3.45-stable 2020-10-09 v20.9.2.20-stable 2020-09-22 v20.8.4.11-lts 2020-10-09 From b28894c43baf3fa77e45011469b0653d83dd3177 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 23 Oct 2020 19:31:51 +0300 Subject: [PATCH 141/174] More compatible way --- .../MergeTree/registerStorageMergeTree.cpp | 30 ++++++++----------- ...y_key_without_order_by_zookeeper.reference | 6 ++-- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index e255c66dd70..c9e2c9ee460 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -608,29 +608,23 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// single default partition with name "all". metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_key, metadata.columns, args.context); + /// PRIMARY KEY without ORDER BY is allowed and considered as ORDER BY. + if (!args.storage_def->order_by && args.storage_def->primary_key) + args.storage_def->order_by = args.storage_def->primary_key; - /// Get sorting key from engine arguments. PRIMARY KEY without ORDER BY is allowed and considered as ORDER BY. - /// - /// NOTE: store merging_param_key_arg as additional key column. We do it - /// before storage creation. After that storage will just copy this - /// column if sorting key will be changed. - if (args.storage_def->order_by) - { - metadata.sorting_key = KeyDescription::getSortingKeyFromAST( - args.storage_def->order_by->ptr(), metadata.columns, args.context, merging_param_key_arg); - } - else if (args.storage_def->primary_key) - { - metadata.sorting_key = KeyDescription::getSortingKeyFromAST( - args.storage_def->primary_key->ptr(), metadata.columns, args.context, merging_param_key_arg); - } - else - { + if (!args.storage_def->order_by) throw Exception( "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()", ErrorCodes::BAD_ARGUMENTS); - } + + /// Get sorting key from engine arguments. + /// + /// NOTE: store merging_param_key_arg as additional key column. We do it + /// before storage creation. After that storage will just copy this + /// column if sorting key will be changed. + metadata.sorting_key = KeyDescription::getSortingKeyFromAST( + args.storage_def->order_by->ptr(), metadata.columns, args.context, merging_param_key_arg); /// If primary key explicitly defined, than get it from AST if (args.storage_def->primary_key) diff --git a/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference index d8b69d78e02..02d4fe64f8d 100644 --- a/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference +++ b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference @@ -1,9 +1,9 @@ -CREATE TABLE default.merge_tree_pk\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.merge_tree_pk\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nORDER BY key\nSETTINGS index_granularity = 8192 1 a 2 b 1 c 2 b -CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nORDER BY key\nSETTINGS index_granularity = 8192 1 a 2 b 1 c @@ -12,7 +12,7 @@ CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String 2 e 555 2 b 0 CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String,\n `key2` UInt64\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nORDER BY (key, key2)\nSETTINGS index_granularity = 8192 -CREATE TABLE default.replicated_merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplicatedReplacingMergeTree(\'/clickhouse/test/01532_primary_key_without\', \'r1\')\nPRIMARY KEY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplicatedReplacingMergeTree(\'/clickhouse/test/01532_primary_key_without\', \'r1\')\nPRIMARY KEY key\nORDER BY key\nSETTINGS index_granularity = 8192 1 a 2 b 1 c From bc331a802959900380700ca81656f57bcd735101 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 23 Oct 2020 19:34:41 +0300 Subject: [PATCH 142/174] Better set --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index c9e2c9ee460..91bf105af74 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -610,7 +610,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// PRIMARY KEY without ORDER BY is allowed and considered as ORDER BY. if (!args.storage_def->order_by && args.storage_def->primary_key) - args.storage_def->order_by = args.storage_def->primary_key; + args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); if (!args.storage_def->order_by) throw Exception( From 72d7b6117ec023d4abd56b3badcfc783c233c824 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 18 Oct 2020 10:32:49 +0300 Subject: [PATCH 143/174] Use total_memory_tracker when there is no other MemoryTracker object. This should significantly reduce the MemoryTracking drift, test shows that there is 0 drift after query storm (100 queries, via http/tcp/tcp in one session). TL;DR; To track memory, clickhouse creates memory tracker object for each thread **explicitly**, but until it is not created the memory allocations are not under account. There should not be lot of allocations w/o memory tracker, since most of the time it is created early enough, but even this maybe enough to trigger some problems. Plus sometimes it is not possible to create it, for example some 3d party library does not allow to do this explicitly: - for example before #15740 allocations from librdkafka threads, - or even worse, poco threads, they don't have any routines to do this. This won't be a problem for `MemoryTracking` metric if the deallocation will be done from the same thread w/o memory tracker (or vise versa), but this is not always true. NOTE, that this will slow down per-thread allocations w/o memory tracker, since before this patch there were no memory tracking for them while now they will be accounted in total_memory_tracker, and for total_memory_tracker max_untracked_memory is always reached. But this should not be significant. --- programs/server/Server.cpp | 2 +- src/Common/MemoryTracker.cpp | 60 +++++++++++++++++++++++++++-------- src/Common/ThreadStatus.cpp | 17 ++++++++++ src/Common/ThreadStatus.h | 18 +++++++++++ src/Common/TraceCollector.cpp | 16 ++++++++-- 5 files changed, 96 insertions(+), 17 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index da5760acc09..b85cb5e75f2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -258,7 +258,7 @@ int Server::main(const std::vector & /*args*/) Poco::Logger * log = &logger(); UseSSL use_ssl; - ThreadStatus thread_status; + MainThreadStatus::getInstance(); registerFunctions(); registerAggregateFunctions(); diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 87567591ddf..6d0b17270f9 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -13,6 +13,24 @@ #include #include +namespace +{ + +MemoryTracker * getMemoryTracker() +{ + if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker()) + return thread_memory_tracker; + + /// Once the main thread is initialized, + /// total_memory_tracker is initialized too. + /// And can be used, since MainThreadStatus is required for profiling. + if (DB::MainThreadStatus::get()) + return &total_memory_tracker; + + return nullptr; +} + +} namespace DB { @@ -270,16 +288,24 @@ namespace CurrentMemoryTracker void alloc(Int64 size) { - if (auto * memory_tracker = DB::CurrentThread::getMemoryTracker()) + if (auto * memory_tracker = getMemoryTracker()) { - current_thread->untracked_memory += size; - if (current_thread->untracked_memory > current_thread->untracked_memory_limit) + if (current_thread) { - /// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes - /// more. It could be useful to enlarge Exception message in rethrow logic. - Int64 tmp = current_thread->untracked_memory; - current_thread->untracked_memory = 0; - memory_tracker->alloc(tmp); + current_thread->untracked_memory += size; + if (current_thread->untracked_memory > current_thread->untracked_memory_limit) + { + /// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes + /// more. It could be useful to enlarge Exception message in rethrow logic. + Int64 tmp = current_thread->untracked_memory; + current_thread->untracked_memory = 0; + memory_tracker->alloc(tmp); + } + } + /// total_memory_tracker only, ignore untracked_memory + else + { + memory_tracker->alloc(size); } } } @@ -292,13 +318,21 @@ namespace CurrentMemoryTracker void free(Int64 size) { - if (auto * memory_tracker = DB::CurrentThread::getMemoryTracker()) + if (auto * memory_tracker = getMemoryTracker()) { - current_thread->untracked_memory -= size; - if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) + if (current_thread) { - memory_tracker->free(-current_thread->untracked_memory); - current_thread->untracked_memory = 0; + current_thread->untracked_memory -= size; + if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) + { + memory_tracker->free(-current_thread->untracked_memory); + current_thread->untracked_memory = 0; + } + } + /// total_memory_tracker only, ignore untracked_memory + else + { + memory_tracker->free(size); } } } diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index ea9e507850c..bac0559fc6b 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes thread_local ThreadStatus * current_thread = nullptr; +thread_local ThreadStatus * main_thread = nullptr; ThreadStatus::ThreadStatus() @@ -115,4 +116,20 @@ void ThreadStatus::onFatalError() fatal_error_callback(); } +ThreadStatus * MainThreadStatus::main_thread = nullptr; +MainThreadStatus & MainThreadStatus::getInstance() +{ + static MainThreadStatus thread_status; + return thread_status; +} +MainThreadStatus::MainThreadStatus() + : ThreadStatus() +{ + main_thread = current_thread; +} +MainThreadStatus::~MainThreadStatus() +{ + main_thread = nullptr; +} + } diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index c9476ebc9a5..7d85059f23e 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -215,4 +215,22 @@ private: void setupState(const ThreadGroupStatusPtr & thread_group_); }; +/** + * Creates ThreadStatus for the main thread. + */ +class MainThreadStatus : public ThreadStatus +{ +public: + static MainThreadStatus & getInstance(); + static ThreadStatus * get() { return main_thread; } + static bool isMainThread() { return main_thread == current_thread; } + + ~MainThreadStatus(); + +private: + MainThreadStatus(); + + static ThreadStatus * main_thread; +}; + } diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index 104b747d431..fc5318b11fb 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -66,10 +66,20 @@ void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trac char buffer[buf_size]; WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer); - StringRef query_id = CurrentThread::getQueryId(); - query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN); + StringRef query_id; + UInt64 thread_id; - auto thread_id = CurrentThread::get().thread_id; + if (CurrentThread::isInitialized()) + { + query_id = CurrentThread::getQueryId(); + query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN); + + thread_id = CurrentThread::get().thread_id; + } + else + { + thread_id = MainThreadStatus::get()->thread_id; + } writeChar(false, out); /// true if requested to stop the collecting thread. writeStringBinary(query_id, out); From 114ea9b1eb7d7ba8671a7785baf865433d83775c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 18 Oct 2020 13:51:59 +0300 Subject: [PATCH 144/174] Fix accounting for new/delete from different threads for VariableContext::Thread MemoryTracker assumes that for VariableContext::Thread new/delete may be called from different threads, hence the amount of memory can go negative. However the MemoryTracker is nested, so even if the negative amount is allowed for VariableContext::Thread it does not allowed for anything upper, and hence the MemoryTracking will not be decremented properly. Fix this, by passing initial size to the parent free. This should fix memory drift for HTTP queries. --- src/Common/MemoryTracker.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 6d0b17270f9..380fcb1b2b6 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -210,14 +210,15 @@ void MemoryTracker::free(Int64 size) DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), -size); } + Int64 accounted_size = size; if (level == VariableContext::Thread) { /// Could become negative if memory allocated in this thread is freed in another one - amount.fetch_sub(size, std::memory_order_relaxed); + amount.fetch_sub(accounted_size, std::memory_order_relaxed); } else { - Int64 new_amount = amount.fetch_sub(size, std::memory_order_relaxed) - size; + Int64 new_amount = amount.fetch_sub(accounted_size, std::memory_order_relaxed) - accounted_size; /** Sometimes, query could free some data, that was allocated outside of query context. * Example: cache eviction. @@ -228,7 +229,7 @@ void MemoryTracker::free(Int64 size) if (unlikely(new_amount < 0)) { amount.fetch_sub(new_amount); - size += new_amount; + accounted_size += new_amount; } } @@ -236,7 +237,7 @@ void MemoryTracker::free(Int64 size) loaded_next->free(size); if (metric != CurrentMetrics::end()) - CurrentMetrics::sub(metric, size); + CurrentMetrics::sub(metric, accounted_size); } From 0cccf3049abe714edc47e7d9f98f9d050369611f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 22 Oct 2020 23:00:51 +0300 Subject: [PATCH 145/174] Fix parent memory tracker during query detaching --- src/Interpreters/ThreadStatusExt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 10d8249bd49..d324307b487 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -301,7 +301,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) memory_tracker.reset(); /// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below. - memory_tracker.setParent(nullptr); + memory_tracker.setParent(thread_group->memory_tracker.getParent()); query_id.clear(); query_context = nullptr; From 6c42ad564c7e3013c0d4593e70a42f85b7976f52 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 18 Oct 2020 14:27:34 +0300 Subject: [PATCH 146/174] Add a test for MemoryTracking drift v2: disable query profiling and logging in 01540_MemoryTracking (This should make MemoryTracker drift zero). --- docker/test/fasttest/run.sh | 3 + .../01540_MemoryTracking.reference | 4 + .../0_stateless/01540_MemoryTracking.sh | 92 +++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 tests/queries/0_stateless/01540_MemoryTracking.reference create mode 100755 tests/queries/0_stateless/01540_MemoryTracking.sh diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 91fe84a04cd..fc96bb4fe07 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -269,6 +269,9 @@ TESTS_TO_SKIP=( # Look at DistributedFilesToInsert, so cannot run in parallel. 01457_DistributedFilesToInsert + + # Looks at MemoryTracking + 01540_MemoryTracking ) time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/tests/queries/0_stateless/01540_MemoryTracking.reference b/tests/queries/0_stateless/01540_MemoryTracking.reference new file mode 100644 index 00000000000..f4f1685860b --- /dev/null +++ b/tests/queries/0_stateless/01540_MemoryTracking.reference @@ -0,0 +1,4 @@ +HTTP +TCP_ONE_SESSION +TCP +OK diff --git a/tests/queries/0_stateless/01540_MemoryTracking.sh b/tests/queries/0_stateless/01540_MemoryTracking.sh new file mode 100755 index 00000000000..a47b3038cd7 --- /dev/null +++ b/tests/queries/0_stateless/01540_MemoryTracking.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh +cd "$CURDIR" || exit 1 + +# Regression for MemoryTracker drift. +# +# To make this test indepedendent from the: +# - MemoryTracking syncing with rss (via AsynchronousMetrics.cpp) +# - and various other allocations in parallel +# Instead of measure diff of the MemoryTracking before beginning and after the +# end of all iterations, it measures MemoryTracking after each executed query +# and see how much time it had been changed. +# +# TODO: Once #15236 will be merged, add it to the "parallel" group in skip_list.json + +query="SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON" + +function get_MemoryTracking() +{ + $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_profiler_real_time_period_ns=0&query_profiler_cpu_time_period_ns=0&log_queries=0" -d@- <<<"SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'" +} + +function test_execute_http() +{ + local i=$1 && shift + for _ in $(seq 1 "$i"); do + $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&max_threads=1&query_profiler_real_time_period_ns=0&query_profiler_cpu_time_period_ns=0&log_queries=0" -d@- <<<"$query" | grep -F DB::Exception: + get_MemoryTracking >> 01540_MemoryTracking.memory.log + done +} +function test_execute_tcp() +{ + # NOTE: slow in debug + local i=$1 && shift + for _ in $(seq 1 "$i"); do + ${CLICKHOUSE_CLIENT} --max_threads=1 --query_profiler_real_time_period_ns=0 --query_profiler_cpu_time_period_ns=0 --log_queries=0 -q "$query" >/dev/null + get_MemoryTracking >> 01540_MemoryTracking.memory.log + done +} +function test_execute_tcp_one_session() +{ + local i=$1 && shift + for _ in $(seq 1 "$i"); do + echo "$query;" + echo "SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking';" + done | ${CLICKHOUSE_CLIENT} -nm --max_threads=1 --query_profiler_real_time_period_ns=0 --query_profiler_cpu_time_period_ns=0 --log_queries=0 | { + grep ^MemoryTracking | cut -f2 > 01540_MemoryTracking.memory.log + } +} + +# run test and check the MemoryTracking +function run_test() +{ + local iterations=$1 && shift + local test_func=$1 && shift + + # 3 changes to MemoryTracking is minimum, since: + # - this is not that high to not detect inacuracy + # - memory can go like X/X+N due to some background allocations + # - memory can go like X/X+N/X, so at least 2 changes + local changes_allowed=3 + local changes_allowed_auto=$((iterations/6)) + if [[ $changes_allowed_auto -gt $changes_allowed ]]; then + # if iterations is large enough, use 6% from them + changes_allowed=$changes_allowed_auto + fi + + rm -f 01540_MemoryTracking.memory.log + $test_func "$iterations" + + local changed + changed="$(awk '{changed+=(prev && prev!=$0); prev=$0; } END {print changed}' 01540_MemoryTracking.memory.log)" + + if [[ $changed -gt $changes_allowed ]]; then + echo "Test $test_func failed. MemoryTracking changed too frequently: $changed (allowed $changes_allowed)" >&2 + fi +} + +# cleanup +trap 'rm -f 01540_MemoryTracking.memory.log' EXIT + +echo 'HTTP' +run_test 100 test_execute_http +echo 'TCP_ONE_SESSION' +run_test 100 test_execute_tcp_one_session +echo 'TCP' +run_test 100 test_execute_tcp +echo 'OK' + +exit 0 From 4d1385ef19d4594954433775f7a3774e78de5787 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Oct 2020 21:11:55 +0300 Subject: [PATCH 147/174] Add total_rows/total_bytes support for Set/Join --- src/Storages/StorageJoin.cpp | 3 +++ src/Storages/StorageJoin.h | 3 +++ src/Storages/StorageSet.cpp | 4 +++- src/Storages/StorageSet.h | 3 +++ ...0753_system_columns_and_system_tables.reference | 6 ++++++ .../00753_system_columns_and_system_tables.sql | 14 ++++++++++++++ 6 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 33c67229cfa..c6d85174e68 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -100,7 +100,10 @@ HashJoinPtr StorageJoin::getJoin(std::shared_ptr analyzed_join) const void StorageJoin::insertBlock(const Block & block) { join->addJoinedBlock(block, true); } + size_t StorageJoin::getSize() const { return join->getTotalRowCount(); } +std::optional StorageJoin::totalRows() const { return join->getTotalRowCount(); } +std::optional StorageJoin::totalBytes() const { return join->getTotalByteCount(); } void registerStorageJoin(StorageFactory & factory) diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 95037c4d33a..857f3646441 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -46,6 +46,9 @@ public: size_t max_block_size, unsigned num_streams) override; + std::optional totalRows() const override; + std::optional totalBytes() const override; + private: Block sample_block; const Names key_names; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index d6d8b9e1449..b7779d2e550 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -151,8 +151,10 @@ StorageSet::StorageSet( void StorageSet::insertBlock(const Block & block) { set->insertFromBlock(block); } void StorageSet::finishInsert() { set->finishInsert(); } -size_t StorageSet::getSize() const { return set->getTotalRowCount(); } +size_t StorageSet::getSize() const { return set->getTotalRowCount(); } +std::optional StorageSet::totalRows() const { return set->getTotalRowCount(); } +std::optional StorageSet::totalBytes() const { return set->getTotalByteCount(); } void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) { diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index 40d7925de13..98677dcfb15 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -72,6 +72,9 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; + std::optional totalRows() const override; + std::optional totalBytes() const override; + private: SetPtr set; diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference index 4d1fab83cc1..12af231d18c 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference @@ -39,3 +39,9 @@ Check lifetime_bytes/lifetime_rows for Buffer 200 100 200 100 402 201 +Check total_bytes/total_rows for Set +2048 50 +2048 100 +Check total_bytes/total_rows for Join +10240 50 +10240 100 diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql index 9b9fa04e6b0..862e3693f0e 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql @@ -112,3 +112,17 @@ INSERT INTO check_system_tables SELECT * FROM numbers_mt(101); -- direct block w SELECT lifetime_bytes, lifetime_rows FROM system.tables WHERE name = 'check_system_tables'; DROP TABLE check_system_tables; DROP TABLE check_system_tables_null; + +SELECT 'Check total_bytes/total_rows for Set'; +CREATE TABLE check_system_tables Engine=Set() AS SELECT * FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +INSERT INTO check_system_tables SELECT number+50 FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +DROP TABLE check_system_tables; + +SELECT 'Check total_bytes/total_rows for Join'; +CREATE TABLE check_system_tables Engine=Join(ANY, LEFT, number) AS SELECT * FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +INSERT INTO check_system_tables SELECT number+50 FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +DROP TABLE check_system_tables; From d6f1056951ddd97548bfdbabd7bc5db03784ab56 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 23 Oct 2020 21:53:50 +0300 Subject: [PATCH 148/174] Update arcadia_skip_list.txt --- tests/queries/0_stateless/arcadia_skip_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index c3b5c57d7f2..a965126b9a8 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -153,3 +153,4 @@ 01515_force_data_skipping_indices 01526_complex_key_dict_direct_layout 01509_dictionary_preallocate +01530_drop_database_atomic_sync From 3f594ed3ad82458e37b01eee2570cd6a8d1fbe4f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 18 Oct 2020 14:27:34 +0300 Subject: [PATCH 149/174] Add a test for memory drift in user memory tracker (max_memory_usage_for_user) --- docker/test/fasttest/run.sh | 1 + .../01541_max_memory_usage_for_user.reference | 5 ++ .../01541_max_memory_usage_for_user.sh | 57 +++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 tests/queries/0_stateless/01541_max_memory_usage_for_user.reference create mode 100755 tests/queries/0_stateless/01541_max_memory_usage_for_user.sh diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index fc96bb4fe07..746effc4e3f 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -272,6 +272,7 @@ TESTS_TO_SKIP=( # Looks at MemoryTracking 01540_MemoryTracking + 01541_max_memory_usage_for_user ) time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference b/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference new file mode 100644 index 00000000000..138569f04f7 --- /dev/null +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference @@ -0,0 +1,5 @@ +HTTP +TCP_ONE_SESSION +TCP +OK +KILL sleep diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh new file mode 100755 index 00000000000..7544ecfb026 --- /dev/null +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# Regression for MemoryTracker drift via HTTP queries. +# +# For this will be used: +# - max_memory_usage_for_user +# - one users' query in background (to avoid reseting max_memory_usage_for_user) + +query="SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON" + +function execute_http() +{ + for _ in {1..100}; do + $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&max_memory_usage_for_user=100Mi&max_threads=1" -d@- <<<"$query" | grep -F DB::Exception: + done +} +function execute_tcp() +{ + # slow in debug, but should trigger the problem in ~10 iterations, so 20 is ok + for _ in {1..20}; do + ${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=100Mi --max_threads=1 -q "$query" | grep -F DB::Exception: + done +} +function execute_tcp_one_session() +{ + for _ in {1..30}; do + echo "$query;" + done | ${CLICKHOUSE_CLIENT} -nm --max_memory_usage_for_user=100Mi --max_threads=1 | grep -F DB::Exception: +} + + +# one users query in background (to avoid reseting max_memory_usage_for_user) +# --max_block_size=1 to make it killable (check the state each 1 second, 1 row) +# (the test takes ~40 seconds in debug build, so 60 seconds is ok) +${CLICKHOUSE_CLIENT} --max_block_size=1 --format Null -q 'SELECT sleepEachRow(1) FROM numbers(60)' & +# trap +sleep_query_pid=$! +function cleanup() +{ + echo 'KILL sleep' + # if the timeout will not be enough, it will trigger "No such process" error/message + kill $sleep_query_pid +} +trap cleanup EXIT + +echo 'HTTP' +execute_http +echo 'TCP_ONE_SESSION' +execute_tcp_one_session +echo 'TCP' +execute_tcp +echo 'OK' + +exit 0 From a949293f95e3775a137e1e714619c8a8dfd3b470 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Fri, 23 Oct 2020 23:27:38 +0300 Subject: [PATCH 150/174] better --- src/AggregateFunctions/AggregateFunctionStudentTTest.h | 3 +++ src/AggregateFunctions/AggregateFunctionWelchTTest.h | 3 +++ tests/queries/0_stateless/01322_ttest_scipy.python | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.h b/src/AggregateFunctions/AggregateFunctionStudentTTest.h index 2a4ec40e3c1..d260a6be980 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.h +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.h @@ -237,6 +237,9 @@ public: Float64 p_value = 0.0; std::tie(t_statistic, p_value) = this->data(place).getResult(); + /// Because p-value is a probability. + p_value = std::min(1.0, std::max(0.0, p_value)); + auto & column_tuple = assert_cast(to); auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); auto & column_value = assert_cast &>(column_tuple.getColumn(1)); diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h index b262ebb70af..175e0171606 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.h +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -248,6 +248,9 @@ public: Float64 p_value = 0.0; std::tie(t_statistic, p_value) = this->data(place).getResult(); + /// Because p-value is a probability. + p_value = std::min(1.0, std::max(0.0, p_value)); + auto & column_tuple = assert_cast(to); auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); auto & column_value = assert_cast &>(column_tuple.getColumn(1)); diff --git a/tests/queries/0_stateless/01322_ttest_scipy.python b/tests/queries/0_stateless/01322_ttest_scipy.python index 7068b6c4d5a..66659e2ab71 100644 --- a/tests/queries/0_stateless/01322_ttest_scipy.python +++ b/tests/queries/0_stateless/01322_ttest_scipy.python @@ -63,8 +63,8 @@ def test_and_check(name, a, b, t_stat, p_value): "FROM ttest FORMAT TabSeparatedWithNames;") real_t_stat = real['t_stat'][0] real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat) < 1e-3)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < 1e-3), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + assert(abs(real_t_stat - np.float64(t_stat) < 1e-2)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert(abs(real_p_value - np.float64(p_value)) < 1e-2), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS ttest;") From 13b8dce792dc207ef5809e1e7ff728af9598515d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 00:26:22 +0300 Subject: [PATCH 151/174] Update contributors --- src/Storages/System/StorageSystemContributors.generated.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index eeba4d0d3f9..f251fe5ad37 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -165,6 +165,7 @@ const char * auto_contributors[] { "Dongdong Yang", "DoomzD", "Dr. Strange Looker", + "Ekaterina", "Eldar Zaitov", "Elena Baskakova", "Elghazal Ahmed", @@ -283,6 +284,7 @@ const char * auto_contributors[] { "Maks Skorokhod", "Maksim", "Maksim Fedotov", + "Maksim Kita", "Marat IDRISOV", "Marek Vavrusa", "Marek Vavruša", @@ -457,6 +459,7 @@ const char * auto_contributors[] { "Vadim Skipin", "VadimPE", "Valera Ryaboshapko", + "Vasily Kozhukhovskiy", "Vasily Morozov", "Vasily Nemkov", "Vasily Okunev", @@ -588,6 +591,7 @@ const char * auto_contributors[] { "fenglv", "fessmage", "filimonov", + "filipe", "flow", "flynn", "foxxmary", @@ -693,6 +697,7 @@ const char * auto_contributors[] { "sdk2", "serebrserg", "sev7e0", + "sevirov", "sfod", "shangshujie", "shedx", From 6e5b04fad8b4c4137fcb72c6eece783123c3d908 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Oct 2020 01:50:41 +0300 Subject: [PATCH 152/174] Make 01540_MemoryTracking integration 01540_MemoryTracking is failing on CI for the following reasons: - log_queries (fixed, by adding log_queries=0) - profilers (fixed) - but what can't be fixed is metric_log and so on, so we need separate instance with separate configuration (sigh). --- docker/test/fasttest/run.sh | 2 - .../test_MemoryTracking/__init__.py | 0 .../configs/no_system_log.xml | 7 ++ tests/integration/test_MemoryTracking/test.py | 92 +++++++++++++++++++ .../01540_MemoryTracking.reference | 4 - .../0_stateless/01540_MemoryTracking.sh | 92 ------------------- 6 files changed, 99 insertions(+), 98 deletions(-) create mode 100644 tests/integration/test_MemoryTracking/__init__.py create mode 100644 tests/integration/test_MemoryTracking/configs/no_system_log.xml create mode 100644 tests/integration/test_MemoryTracking/test.py delete mode 100644 tests/queries/0_stateless/01540_MemoryTracking.reference delete mode 100755 tests/queries/0_stateless/01540_MemoryTracking.sh diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 746effc4e3f..0f4acfeb34f 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -270,8 +270,6 @@ TESTS_TO_SKIP=( # Look at DistributedFilesToInsert, so cannot run in parallel. 01457_DistributedFilesToInsert - # Looks at MemoryTracking - 01540_MemoryTracking 01541_max_memory_usage_for_user ) diff --git a/tests/integration/test_MemoryTracking/__init__.py b/tests/integration/test_MemoryTracking/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_MemoryTracking/configs/no_system_log.xml b/tests/integration/test_MemoryTracking/configs/no_system_log.xml new file mode 100644 index 00000000000..7e235458d39 --- /dev/null +++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/tests/integration/test_MemoryTracking/test.py b/tests/integration/test_MemoryTracking/test.py new file mode 100644 index 00000000000..6009e8cc953 --- /dev/null +++ b/tests/integration/test_MemoryTracking/test.py @@ -0,0 +1,92 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import logging +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', main_configs=['configs/no_system_log.xml']) + +logging.getLogger().setLevel(logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler()) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +query_settings = { + 'max_threads': 1, + 'query_profiler_real_time_period_ns': 0, + 'query_profiler_cpu_time_period_ns': 0, + 'log_queries': 0, +} +sample_query = "SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON" + +def query(*args, **kwargs): + if 'settings' not in kwargs: + kwargs['settings'] = query_settings + else: + kwargs['settings'].update(query_settings) + return node.query(*args, **kwargs) +def http_query(*args, **kwargs): + if 'params' not in kwargs: + kwargs['params'] = query_settings + else: + kwargs['params'].update(query_settings) + return node.http_query(*args, **kwargs) + +def get_MemoryTracking(): + return int(http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'")) + +def check_memory(memory): + # 3 changes to MemoryTracking is minimum, since: + # - this is not that high to not detect inacuracy + # - memory can go like X/X+N due to some background allocations + # - memory can go like X/X+N/X, so at least 2 changes + changes_allowed = 3 + # if number of samples is large enough, use 10% from them + # (actually most of the time there will be only few changes, it was made 10% to avoid flackiness) + changes_allowed_auto=int(len(memory) * 0.1) + changes_allowed = max(changes_allowed_auto, changes_allowed) + + changed=len(set(memory)) + logging.info('Changes: allowed=%s, actual=%s, sample=%s', + changes_allowed, changed, len(memory)) + assert changed < changes_allowed + +def test_http(): + memory = [] + memory.append(get_MemoryTracking()) + for _ in range(100): + http_query(sample_query) + memory.append(get_MemoryTracking()) + check_memory(memory) + +def test_tcp_multiple_sessions(): + memory = [] + memory.append(get_MemoryTracking()) + for _ in range(100): + query(sample_query) + memory.append(get_MemoryTracking()) + check_memory(memory) + +def test_tcp_single_session(): + memory = [] + memory.append(get_MemoryTracking()) + sample_queries = [ + sample_query, + "SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking'", + ] * 100 + rows = query(';'.join(sample_queries)) + memory = rows.split('\n') + memory = filter(lambda x: x.startswith('MemoryTracking'), memory) + memory = map(lambda x: x.split('\t')[1], memory) + memory = [*memory] + check_memory(memory) diff --git a/tests/queries/0_stateless/01540_MemoryTracking.reference b/tests/queries/0_stateless/01540_MemoryTracking.reference deleted file mode 100644 index f4f1685860b..00000000000 --- a/tests/queries/0_stateless/01540_MemoryTracking.reference +++ /dev/null @@ -1,4 +0,0 @@ -HTTP -TCP_ONE_SESSION -TCP -OK diff --git a/tests/queries/0_stateless/01540_MemoryTracking.sh b/tests/queries/0_stateless/01540_MemoryTracking.sh deleted file mode 100755 index a47b3038cd7..00000000000 --- a/tests/queries/0_stateless/01540_MemoryTracking.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. "$CURDIR"/../shell_config.sh -cd "$CURDIR" || exit 1 - -# Regression for MemoryTracker drift. -# -# To make this test indepedendent from the: -# - MemoryTracking syncing with rss (via AsynchronousMetrics.cpp) -# - and various other allocations in parallel -# Instead of measure diff of the MemoryTracking before beginning and after the -# end of all iterations, it measures MemoryTracking after each executed query -# and see how much time it had been changed. -# -# TODO: Once #15236 will be merged, add it to the "parallel" group in skip_list.json - -query="SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON" - -function get_MemoryTracking() -{ - $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_profiler_real_time_period_ns=0&query_profiler_cpu_time_period_ns=0&log_queries=0" -d@- <<<"SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'" -} - -function test_execute_http() -{ - local i=$1 && shift - for _ in $(seq 1 "$i"); do - $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&max_threads=1&query_profiler_real_time_period_ns=0&query_profiler_cpu_time_period_ns=0&log_queries=0" -d@- <<<"$query" | grep -F DB::Exception: - get_MemoryTracking >> 01540_MemoryTracking.memory.log - done -} -function test_execute_tcp() -{ - # NOTE: slow in debug - local i=$1 && shift - for _ in $(seq 1 "$i"); do - ${CLICKHOUSE_CLIENT} --max_threads=1 --query_profiler_real_time_period_ns=0 --query_profiler_cpu_time_period_ns=0 --log_queries=0 -q "$query" >/dev/null - get_MemoryTracking >> 01540_MemoryTracking.memory.log - done -} -function test_execute_tcp_one_session() -{ - local i=$1 && shift - for _ in $(seq 1 "$i"); do - echo "$query;" - echo "SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking';" - done | ${CLICKHOUSE_CLIENT} -nm --max_threads=1 --query_profiler_real_time_period_ns=0 --query_profiler_cpu_time_period_ns=0 --log_queries=0 | { - grep ^MemoryTracking | cut -f2 > 01540_MemoryTracking.memory.log - } -} - -# run test and check the MemoryTracking -function run_test() -{ - local iterations=$1 && shift - local test_func=$1 && shift - - # 3 changes to MemoryTracking is minimum, since: - # - this is not that high to not detect inacuracy - # - memory can go like X/X+N due to some background allocations - # - memory can go like X/X+N/X, so at least 2 changes - local changes_allowed=3 - local changes_allowed_auto=$((iterations/6)) - if [[ $changes_allowed_auto -gt $changes_allowed ]]; then - # if iterations is large enough, use 6% from them - changes_allowed=$changes_allowed_auto - fi - - rm -f 01540_MemoryTracking.memory.log - $test_func "$iterations" - - local changed - changed="$(awk '{changed+=(prev && prev!=$0); prev=$0; } END {print changed}' 01540_MemoryTracking.memory.log)" - - if [[ $changed -gt $changes_allowed ]]; then - echo "Test $test_func failed. MemoryTracking changed too frequently: $changed (allowed $changes_allowed)" >&2 - fi -} - -# cleanup -trap 'rm -f 01540_MemoryTracking.memory.log' EXIT - -echo 'HTTP' -run_test 100 test_execute_http -echo 'TCP_ONE_SESSION' -run_test 100 test_execute_tcp_one_session -echo 'TCP' -run_test 100 test_execute_tcp -echo 'OK' - -exit 0 From 96da5f65345a8e3b2043a76dba53596f8f7e0c09 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Oct 2020 02:33:16 +0300 Subject: [PATCH 153/174] Disable syncing MemoryTracking with RSS for test_MemoryTracking --- .../configs/asynchronous_metrics_update_period_s.xml | 4 ++++ tests/integration/test_MemoryTracking/test.py | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_MemoryTracking/configs/asynchronous_metrics_update_period_s.xml diff --git a/tests/integration/test_MemoryTracking/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_MemoryTracking/configs/asynchronous_metrics_update_period_s.xml new file mode 100644 index 00000000000..ed131f41ede --- /dev/null +++ b/tests/integration/test_MemoryTracking/configs/asynchronous_metrics_update_period_s.xml @@ -0,0 +1,4 @@ + + + 86400 + diff --git a/tests/integration/test_MemoryTracking/test.py b/tests/integration/test_MemoryTracking/test.py index 6009e8cc953..a0ad8dc519d 100644 --- a/tests/integration/test_MemoryTracking/test.py +++ b/tests/integration/test_MemoryTracking/test.py @@ -8,7 +8,10 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=['configs/no_system_log.xml']) +node = cluster.add_instance('node', main_configs=[ + 'configs/no_system_log.xml', + 'configs/asynchronous_metrics_update_period_s.xml', +]) logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) From 21315127a0adfade3050c1d4f1662077beed9068 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 24 Oct 2020 10:05:54 +0800 Subject: [PATCH 154/174] remove unused codes in AggregateFunctionGroupBitmapData --- .../AggregateFunctionGroupBitmapData.h | 124 ------------------ 1 file changed, 124 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index 20788318b3d..d80e5e81f19 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -12,7 +12,6 @@ // TODO: find out what it is. On github, they have proper interface headers like // this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h #include -void * containerptr_roaring_bitmap_add(roaring_bitmap_t * r, uint32_t val, uint8_t * typecode, int * index); namespace DB { @@ -599,129 +598,6 @@ public: } } -private: - - /// To read and write the DB Buffer directly, migrate code from CRoaring - void db_roaring_bitmap_add_many(DB::ReadBuffer & db_buf, roaring_bitmap_t * r, size_t n_args) - { - void * container = nullptr; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) - return; - uint32_t val; - readBinary(val, db_buf); - container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - ++i; - for (; i < n_args; ++i) - { - readBinary(val, db_buf); - if (((prev ^ val) >> 16) == 0) - { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - void * container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode); - // rare instance when we need to - if (container2 != container) - { - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, containerindex, container2, newtypecode); - typecode = newtypecode; - container = container2; - } - } - else - { - container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - } - prev = val; - } - } - - void db_ra_to_uint32_array(DB::WriteBuffer & db_buf, roaring_array_t * ra) const - { - size_t ctr = 0; - for (Int32 i = 0; i < ra->size; ++i) - { - Int32 num_added = db_container_to_uint32_array(db_buf, ra->containers[i], ra->typecodes[i], (static_cast(ra->keys[i])) << 16); - ctr += num_added; - } - } - - UInt32 db_container_to_uint32_array(DB::WriteBuffer & db_buf, const void * container, uint8_t typecode, UInt32 base) const - { - container = container_unwrap_shared(container, &typecode); - switch (typecode) - { - case BITSET_CONTAINER_TYPE_CODE: - return db_bitset_container_to_uint32_array(db_buf, static_cast(container), base); - case ARRAY_CONTAINER_TYPE_CODE: - return db_array_container_to_uint32_array(db_buf, static_cast(container), base); - case RUN_CONTAINER_TYPE_CODE: - return db_run_container_to_uint32_array(db_buf, static_cast(container), base); - } - return 0; - } - - UInt32 db_bitset_container_to_uint32_array(DB::WriteBuffer & db_buf, const bitset_container_t * cont, UInt32 base) const - { - return static_cast(db_bitset_extract_setbits(db_buf, cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, base)); - } - - size_t db_bitset_extract_setbits(DB::WriteBuffer & db_buf, UInt64 * bitset, size_t length, UInt32 base) const - { - UInt32 outpos = 0; - for (size_t i = 0; i < length; ++i) - { - UInt64 w = bitset[i]; - while (w != 0) - { - UInt64 t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - UInt32 r = __builtin_ctzll(w); // on x64, should compile to TZCNT - UInt32 val = r + base; - writePODBinary(val, db_buf); - outpos++; - w ^= t; - } - base += 64; - } - return outpos; - } - - int db_array_container_to_uint32_array(DB::WriteBuffer & db_buf, const array_container_t * cont, UInt32 base) const - { - UInt32 outpos = 0; - for (Int32 i = 0; i < cont->cardinality; ++i) - { - const UInt32 val = base + cont->array[i]; - writePODBinary(val, db_buf); - outpos++; - } - return outpos; - } - - int db_run_container_to_uint32_array(DB::WriteBuffer & db_buf, const run_container_t * cont, UInt32 base) const - { - UInt32 outpos = 0; - for (Int32 i = 0; i < cont->n_runs; ++i) - { - UInt32 run_start = base + cont->runs[i].value; - UInt16 le = cont->runs[i].length; - for (Int32 j = 0; j <= le; ++j) - { - UInt32 val = run_start + j; - writePODBinary(val, db_buf); - outpos++; - } - } - return outpos; - } }; template From e354108e532b59c98f61ca4e72415543178a9451 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 24 Oct 2020 10:54:02 +0800 Subject: [PATCH 155/174] ISSUES-15883 trigger CI From 74558a4e27d14ff7dc6ae21b5e7dc10cf3f48d06 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 06:41:47 +0300 Subject: [PATCH 156/174] Better diagnostics when client has dropped connection --- src/Server/TCPHandler.cpp | 26 +++++++++++++++++++++++--- src/Server/TCPHandler.h | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 406e29ba4ab..a37f88f9306 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -277,6 +277,9 @@ void TCPHandler::runImpl() /// Do it before sending end of stream, to have a chance to show log message in client. query_scope->logPeakMemoryUsage(); + if (state.is_connection_closed) + break; + sendLogs(); sendEndOfStream(); @@ -444,7 +447,11 @@ bool TCPHandler::readDataNext(const size_t & poll_interval, const int & receive_ /// If client disconnected. if (in->eof()) + { + LOG_INFO(log, "Client has dropped the connection, cancel the query."); + state.is_connection_closed = true; return false; + } /// We accept and process data. And if they are over, then we leave. if (!receivePacket()) @@ -477,9 +484,8 @@ void TCPHandler::readData(const Settings & connection_settings) std::tie(poll_interval, receive_timeout) = getReadTimeouts(connection_settings); sendLogs(); - while (true) - if (!readDataNext(poll_interval, receive_timeout)) - return; + while (readDataNext(poll_interval, receive_timeout)) + ; } @@ -567,6 +573,9 @@ void TCPHandler::processOrdinaryQuery() sendProgress(); } + if (state.is_connection_closed) + return; + sendData({}); } @@ -632,6 +641,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors() sendLogs(); } + if (state.is_connection_closed) + return; + sendData({}); } @@ -1179,6 +1191,14 @@ bool TCPHandler::isQueryCancelled() /// During request execution the only packet that can come from the client is stopping the query. if (static_cast(*in).poll(0)) { + if (in->eof()) + { + LOG_INFO(log, "Client has dropped the connection, cancel the query."); + state.is_cancelled = true; + state.is_connection_closed = true; + return true; + } + UInt64 packet_type = 0; readVarUInt(packet_type, *in); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 12149d9a66f..2f2bf35e59e 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -57,6 +57,7 @@ struct QueryState /// Is request cancelled bool is_cancelled = false; + bool is_connection_closed = false; /// empty or not bool is_empty = true; /// Data was sent. From 0748377ab17e719caab311a8480bc23c52768df2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 24 Oct 2020 07:59:08 +0300 Subject: [PATCH 157/174] Revert "Optionally upload clickhouse binary in fast test" --- docker/test/fasttest/Dockerfile | 1 - docker/test/fasttest/run.sh | 3 --- 2 files changed, 4 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 0fff738e718..6547a98c58b 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -79,7 +79,6 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' ENV PULL_REQUEST_NUMBER='' -ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=0 COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 74fdbeedc08..91fe84a04cd 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -172,9 +172,6 @@ function build ( cd "$FASTTEST_BUILD" time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" -if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then - cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" -fi ccache --show-stats ||: ) } From c3c6ac39e012fd3eba4bc33925fdfbb39be70984 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Oct 2020 09:06:49 +0300 Subject: [PATCH 158/174] Tune TTL of the background query in 01541_max_memory_usage_for_user --- tests/queries/0_stateless/01541_max_memory_usage_for_user.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh index 7544ecfb026..945f202a803 100755 --- a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh @@ -35,7 +35,7 @@ function execute_tcp_one_session() # one users query in background (to avoid reseting max_memory_usage_for_user) # --max_block_size=1 to make it killable (check the state each 1 second, 1 row) # (the test takes ~40 seconds in debug build, so 60 seconds is ok) -${CLICKHOUSE_CLIENT} --max_block_size=1 --format Null -q 'SELECT sleepEachRow(1) FROM numbers(60)' & +${CLICKHOUSE_CLIENT} --max_block_size=1 --format Null -q 'SELECT sleepEachRow(1) FROM numbers(600)' & # trap sleep_query_pid=$! function cleanup() From 85c69aad7f3c4fbda047ca2edbead8bae8b0f39a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 09:23:54 +0300 Subject: [PATCH 159/174] Add check for BOM --- utils/check-style/check-style | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index a65ffeb8c67..4983782c00d 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -94,3 +94,8 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' | while read file; do [[ $(head # Check for executable bit on non-executable files find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -q '.' && echo "These files should not be executable." + +# Check for BOM +find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' && echo "Files should not have UTF-8 BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' && echo "Files should not have UTF-16LE BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' && echo "Files should not have UTF-16BE BOM" From 97d84dd85549dc7c6f635d86f70bf2bfa1b1fc89 Mon Sep 17 00:00:00 2001 From: Xianda Ke Date: Sat, 24 Oct 2020 17:37:51 +0800 Subject: [PATCH 160/174] minor fix. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit redundant variable, duplicated defination of the variable aad_data compiler error: variable ‘aad_value’ set but not used [-Werror=unused-but-set-variable] --- src/Functions/FunctionsAES.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 6600931118e..10c4a27e509 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -307,11 +307,6 @@ private: } const auto input_value = input_column->getDataAt(r); - auto aad_value = StringRef{}; - if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM && !std::is_same_v>) - { - aad_value = aad_column->getDataAt(r); - } if constexpr (mode != CipherMode::MySQLCompatibility) { From 34b9d15b66c1b468fb2d84f7097076bef8d17a63 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 24 Oct 2020 21:34:54 +0300 Subject: [PATCH 161/174] Update ThreadStatusExt.cpp --- src/Interpreters/ThreadStatusExt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index d324307b487..2166c78ef7c 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -300,7 +300,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) performance_counters.setParent(&ProfileEvents::global_counters); memory_tracker.reset(); - /// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below. + /// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below (will reset to its parent). memory_tracker.setParent(thread_group->memory_tracker.getParent()); query_id.clear(); From 1d170f57457fd575714797615b65f2304746e06c Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Sat, 24 Oct 2020 21:46:10 +0300 Subject: [PATCH 162/174] ASTTableIdentifier Part #1: improve internal representation of ASTIdentifier name (#16149) * Use only |name_parts| as primary name source * Restore legacy logic for table restoration * Fix build * Fix tests * Add pytest server config * Fix tests * Fixes due to review --- programs/client/Client.cpp | 2 +- programs/copier/Internals.cpp | 6 +- src/Compression/CompressionFactory.cpp | 4 +- src/DataTypes/DataTypeFactory.cpp | 2 +- .../getDictionaryConfigurationFromAST.cpp | 4 +- src/Interpreters/ActionsVisitor.cpp | 2 +- src/Interpreters/AddDefaultDatabaseVisitor.h | 15 +-- src/Interpreters/ArrayJoinedColumnsVisitor.h | 14 +-- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 12 +-- .../ExtractExpressionInfoVisitor.cpp | 2 +- src/Interpreters/IdentifierSemantic.cpp | 31 +++--- src/Interpreters/IdentifierSemantic.h | 10 +- .../JoinToSubqueryTransformVisitor.cpp | 28 +++--- src/Interpreters/JoinedTables.cpp | 8 +- src/Interpreters/QueryNormalizer.cpp | 8 +- src/Interpreters/RenameColumnVisitor.cpp | 5 +- .../RequiredSourceColumnsData.cpp | 4 +- .../RequiredSourceColumnsVisitor.cpp | 7 +- .../TranslateQualifiedNamesVisitor.cpp | 11 ++- .../evaluateConstantExpression.cpp | 4 +- src/Interpreters/getClusterName.cpp | 2 +- src/Parsers/ASTColumnsTransformers.cpp | 2 +- src/Parsers/ASTIdentifier.cpp | 99 +++++++++++-------- src/Parsers/ASTIdentifier.h | 43 ++++---- src/Parsers/ExpressionElementParsers.cpp | 12 +-- src/Parsers/ExpressionListParsers.cpp | 14 ++- src/Parsers/MySQL/ASTDeclareConstraint.cpp | 2 +- src/Parsers/MySQL/ASTDeclareIndex.cpp | 18 ++-- src/Parsers/MySQL/ASTDeclarePartition.cpp | 2 +- src/Parsers/MySQL/ASTDeclareReference.cpp | 2 +- src/Parsers/MySQL/ASTDeclareSubPartition.cpp | 2 +- .../MySQL/tests/gtest_column_parser.cpp | 8 +- .../MySQL/tests/gtest_constraint_parser.cpp | 10 +- .../MySQL/tests/gtest_create_parser.cpp | 2 +- .../MySQL/tests/gtest_index_parser.cpp | 12 +-- .../tests/gtest_partition_options_parser.cpp | 38 +++---- .../MySQL/tests/gtest_partition_parser.cpp | 34 +++---- .../MySQL/tests/gtest_reference_parser.cpp | 22 ++--- .../MySQL/tests/gtest_subpartition_parser.cpp | 8 +- .../tests/gtest_table_options_parser.cpp | 24 ++--- src/Parsers/ParserCreateQuery.cpp | 4 +- src/Parsers/ParserDictionary.cpp | 4 +- src/Parsers/ParserSystemQuery.cpp | 4 +- src/Parsers/tests/gtest_dictionary_parser.cpp | 2 +- src/Storages/AlterCommands.cpp | 10 +- src/Storages/MutationCommands.cpp | 8 +- src/Storages/StorageDistributed.cpp | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- tests/queries/server.py | 41 +++++--- utils/db-generator/query_db_generator.cpp | 14 +-- 50 files changed, 322 insertions(+), 304 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 07f2a231afe..3cd584c0e55 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1896,7 +1896,7 @@ private: if (has_vertical_output_suffix) throw Exception("Output format already specified", ErrorCodes::CLIENT_OUTPUT_FORMAT_SPECIFIED); const auto & id = query_with_output->format->as(); - current_format = id.name; + current_format = id.name(); } } diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 24b5d616af4..0f607ea5faf 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -168,11 +168,11 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); } -/// Wraps only identifiers with backticks. +/// Wraps only identifiers with backticks. std::string wrapIdentifiersWithBackticks(const ASTPtr & root) { if (auto identifier = std::dynamic_pointer_cast(root)) - return backQuote(identifier->name); + return backQuote(identifier->name()); if (auto function = std::dynamic_pointer_cast(root)) return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')'; @@ -214,7 +214,7 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) for (size_t i = 0; i < sorting_key_size; ++i) { /// Column name could be represented as a f_1(f_2(...f_n(column_name))). - /// Each f_i could take one or more parameters. + /// Each f_i could take one or more parameters. /// We will wrap identifiers with backticks to allow non-standart identifier names. String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName(); diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 91b4aa4b8de..fe6a5b2dacd 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -76,7 +76,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr ASTPtr codec_arguments; if (const auto * family_name = inner_codec_ast->as()) { - codec_family_name = family_name->name; + codec_family_name = family_name->name(); codec_arguments = {}; } else if (const auto * ast_func = inner_codec_ast->as()) @@ -207,7 +207,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, const IData ASTPtr codec_arguments; if (const auto * family_name = inner_codec_ast->as()) { - codec_family_name = family_name->name; + codec_family_name = family_name->name(); codec_arguments = {}; } else if (const auto * ast_func = inner_codec_ast->as()) diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 9386f4b39f1..5052a065163 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -43,7 +43,7 @@ DataTypePtr DataTypeFactory::get(const ASTPtr & ast) const if (const auto * ident = ast->as()) { - return get(ident->name, {}); + return get(ident->name(), {}); } if (const auto * lit = ast->as()) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index b1962e48eea..430c1d591dd 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -172,7 +172,7 @@ Names getPrimaryKeyColumns(const ASTExpressionList * primary_key) for (size_t index = 0; index != children.size(); ++index) { const ASTIdentifier * key_part = children[index]->as(); - result.push_back(key_part->name); + result.push_back(key_part->name()); } return result; } @@ -367,7 +367,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments( if (const auto * identifier = pair->second->as(); identifier) { - AutoPtr value(doc->createTextNode(identifier->name)); + AutoPtr value(doc->createTextNode(identifier->name())); current_xml_element->appendChild(value); } else if (const auto * literal = pair->second->as(); literal) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index e0e921b003b..96da40e8f6c 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -582,7 +582,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, /// Special check for WITH statement alias. Add alias action to be able to use this alias. if (identifier.prefer_alias_to_column_name && !identifier.alias.empty()) - data.addAlias(identifier.name, identifier.alias); + data.addAlias(identifier.name(), identifier.alias); } } diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 9322232c154..bb684c5547a 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -24,11 +24,12 @@ namespace DB class AddDefaultDatabaseVisitor { public: - AddDefaultDatabaseVisitor(const String & database_name_, bool only_replace_current_database_function_ = false, std::ostream * ostr_ = nullptr) - : database_name(database_name_), - only_replace_current_database_function(only_replace_current_database_function_), - visit_depth(0), - ostr(ostr_) + explicit AddDefaultDatabaseVisitor( + const String & database_name_, bool only_replace_current_database_function_ = false, std::ostream * ostr_ = nullptr) + : database_name(database_name_) + , only_replace_current_database_function(only_replace_current_database_function_) + , visit_depth(0) + , ostr(ostr_) {} void visitDDL(ASTPtr & ast) const @@ -105,7 +106,7 @@ private: void visit(const ASTIdentifier & identifier, ASTPtr & ast) const { if (!identifier.compound()) - ast = createTableIdentifier(database_name, identifier.name); + ast = createTableIdentifier(database_name, identifier.name()); } void visit(ASTSubquery & subquery, ASTPtr &) const @@ -116,7 +117,7 @@ private: void visit(ASTFunction & function, ASTPtr &) const { bool is_operator_in = false; - for (auto name : {"in", "notIn", "globalIn", "globalNotIn"}) + for (const auto * name : {"in", "notIn", "globalIn", "globalNotIn"}) { if (function.name == name) { diff --git a/src/Interpreters/ArrayJoinedColumnsVisitor.h b/src/Interpreters/ArrayJoinedColumnsVisitor.h index 56832914b80..94f6bdaf138 100644 --- a/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -98,33 +98,33 @@ private: if (!IdentifierSemantic::getColumnName(node)) return; - auto split = Nested::splitName(node.name); /// ParsedParams, Key1 + auto split = Nested::splitName(node.name()); /// ParsedParams, Key1 - if (array_join_alias_to_name.count(node.name)) + if (array_join_alias_to_name.count(node.name())) { /// ARRAY JOIN was written with an array column. Example: SELECT K1 FROM ... ARRAY JOIN ParsedParams.Key1 AS K1 - array_join_result_to_source[node.name] = array_join_alias_to_name[node.name]; /// K1 -> ParsedParams.Key1 + array_join_result_to_source[node.name()] = array_join_alias_to_name[node.name()]; /// K1 -> ParsedParams.Key1 } else if (array_join_alias_to_name.count(split.first) && !split.second.empty()) { /// ARRAY JOIN was written with a nested table. Example: SELECT PP.KEY1 FROM ... ARRAY JOIN ParsedParams AS PP - array_join_result_to_source[node.name] /// PP.Key1 -> ParsedParams.Key1 + array_join_result_to_source[node.name()] /// PP.Key1 -> ParsedParams.Key1 = Nested::concatenateName(array_join_alias_to_name[split.first], split.second); } - else if (array_join_name_to_alias.count(node.name)) + else if (array_join_name_to_alias.count(node.name())) { /** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams.Key1 AS PP.Key1. * That is, the query uses the original array, replicated by itself. */ array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1 - array_join_name_to_alias[node.name]] = node.name; + array_join_name_to_alias[node.name()]] = node.name(); } else if (array_join_name_to_alias.count(split.first) && !split.second.empty()) { /** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams AS PP. */ array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1 - Nested::concatenateName(array_join_name_to_alias[split.first], split.second)] = node.name; + Nested::concatenateName(array_join_name_to_alias[split.first], split.second)] = node.name(); } } }; diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index e0fce4854d2..48273e32209 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -144,11 +144,11 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases) { - if (identifier->compound()) + if (identifier->supposedToBeCompound()) return identifier; UInt32 max_attempts = 100; - for (auto it = aliases.find(identifier->name); it != aliases.end();) + for (auto it = aliases.find(identifier->name()); it != aliases.end();) { const ASTIdentifier * parent = identifier; identifier = it->second->as(); @@ -156,12 +156,12 @@ const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifie break; /// not a column alias if (identifier == parent) break; /// alias to itself with the same name: 'a as a' - if (identifier->compound()) + if (identifier->supposedToBeCompound()) break; /// not an alias. Break to prevent cycle through short names: 'a as b, t1.b as a' - it = aliases.find(identifier->name); + it = aliases.find(identifier->name()); if (!max_attempts--) - throw Exception("Cannot unroll aliases for '" + identifier->name + "'", ErrorCodes::LOGICAL_ERROR); + throw Exception("Cannot unroll aliases for '" + identifier->name() + "'", ErrorCodes::LOGICAL_ERROR); } return identifier; @@ -186,7 +186,7 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vectorname; + const String & name = identifier->name(); bool in_left_table = data.left_table.hasColumn(name); bool in_right_table = data.right_table.hasColumn(name); diff --git a/src/Interpreters/ExtractExpressionInfoVisitor.cpp b/src/Interpreters/ExtractExpressionInfoVisitor.cpp index 5f7754d315a..c730f49fe90 100644 --- a/src/Interpreters/ExtractExpressionInfoVisitor.cpp +++ b/src/Interpreters/ExtractExpressionInfoVisitor.cpp @@ -41,7 +41,7 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr const auto & table = data.tables[index]; // TODO: make sure no collision ever happens - if (table.hasColumn(identifier.name)) + if (table.hasColumn(identifier.name())) { data.unique_reference_tables_pos.emplace(index); break; diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 256a3784c77..a1fc533eb7f 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -51,7 +51,7 @@ std::optional tryChooseTable(const ASTIdentifier & identifier, const std if ((best_match != ColumnMatch::NoMatch) && same_match) { if (!allow_ambiguous) - throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception("Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); best_match = ColumnMatch::Ambiguous; return {}; } @@ -66,7 +66,7 @@ std::optional tryChooseTable(const ASTIdentifier & identifier, const std std::optional IdentifierSemantic::getColumnName(const ASTIdentifier & node) { if (!node.semantic->special) - return node.name; + return node.name(); return {}; } @@ -75,14 +75,14 @@ std::optional IdentifierSemantic::getColumnName(const ASTPtr & ast) if (ast) if (const auto * id = ast->as()) if (!id->semantic->special) - return id->name; + return id->name(); return {}; } std::optional IdentifierSemantic::getTableName(const ASTIdentifier & node) { if (node.semantic->special) - return node.name; + return node.name(); return {}; } @@ -91,7 +91,7 @@ std::optional IdentifierSemantic::getTableName(const ASTPtr & ast) if (ast) if (const auto * id = ast->as()) if (id->semantic->special) - return id->name; + return id->name(); return {}; } @@ -151,7 +151,7 @@ StorageID IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & iden if (identifier.name_parts.size() == 2) return { identifier.name_parts[0], identifier.name_parts[1], identifier.uuid }; - return { "", identifier.name, identifier.uuid }; + return { "", identifier.name_parts[0], identifier.uuid }; } std::optional IdentifierSemantic::extractNestedName(const ASTIdentifier & identifier, const String & table_name) @@ -232,16 +232,8 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const Da if (!to_strip) return; - std::vector stripped(identifier.name_parts.begin() + to_strip, identifier.name_parts.end()); - - DB::String new_name; - for (const auto & part : stripped) - { - if (!new_name.empty()) - new_name += '.'; - new_name += part; - } - identifier.name.swap(new_name); + identifier.name_parts = std::vector(identifier.name_parts.begin() + to_strip, identifier.name_parts.end()); + identifier.resetFullName(); } void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) @@ -249,10 +241,11 @@ void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const Dat String prefix = db_and_table.getQualifiedNamePrefix(); if (!prefix.empty()) { - String short_name = identifier.shortName(); - identifier.name = prefix + short_name; prefix.resize(prefix.size() - 1); /// crop dot - identifier.name_parts = {prefix, short_name}; + identifier.name_parts = {prefix, identifier.shortName()}; + identifier.resetFullName(); + identifier.semantic->table = prefix; + identifier.semantic->legacy_compound = true; } } diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index ca5a923c2ea..80b55ba0537 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -10,10 +10,12 @@ namespace DB struct IdentifierSemanticImpl { - bool special = false; /// for now it's 'not a column': tables, subselects and some special stuff like FORMAT - bool can_be_alias = true; /// if it's a cropped name it could not be an alias - bool covered = false; /// real (compound) name is hidden by an alias (short name) - std::optional membership; /// table position in join + bool special = false; /// for now it's 'not a column': tables, subselects and some special stuff like FORMAT + bool can_be_alias = true; /// if it's a cropped name it could not be an alias + bool covered = false; /// real (compound) name is hidden by an alias (short name) + std::optional membership; /// table position in join + String table = {}; /// store table name for columns just to support legacy logic. + bool legacy_compound = false; /// true if identifier supposed to be comply for legacy |compound()| behavior }; /// Static class to manipulate IdentifierSemanticImpl via ASTIdentifier diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index cdd7ec3ebf9..372bbfbe648 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -117,7 +117,7 @@ private: throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); ASTIdentifier & identifier = child->children[0]->as(); - data.addTableColumns(identifier.name); + data.addTableColumns(identifier.name()); } else data.new_select_expression_list->children.push_back(child); @@ -228,7 +228,7 @@ struct CollectColumnIdentifiersMatcher void addIdentirier(const ASTIdentifier & ident) { for (const auto & aliases : ignored) - if (aliases.count(ident.name)) + if (aliases.count(ident.name())) return; identifiers.push_back(const_cast(&ident)); } @@ -293,7 +293,7 @@ struct CheckAliasDependencyVisitorData void visit(ASTIdentifier & ident, ASTPtr &) { - if (!dependency && aliases.count(ident.name)) + if (!dependency && aliases.count(ident.name())) dependency = &ident; } }; @@ -467,7 +467,7 @@ std::vector normalizeColumnNamesExtractNeeded( for (ASTIdentifier * ident : identifiers) { - bool got_alias = aliases.count(ident->name); + bool got_alias = aliases.count(ident->name()); bool allow_ambiguous = got_alias; /// allow ambiguous column overridden by an alias if (auto table_pos = IdentifierSemantic::chooseTableColumnMatch(*ident, tables, allow_ambiguous)) @@ -475,12 +475,12 @@ std::vector normalizeColumnNamesExtractNeeded( if (!ident->isShort()) { if (got_alias) - throw Exception("Alias clashes with qualified column '" + ident->name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); String short_name = ident->shortName(); String original_long_name; if (public_identifiers.count(ident)) - original_long_name = ident->name; + original_long_name = ident->name(); size_t count = countTablesWithColumn(tables, short_name); @@ -488,7 +488,7 @@ std::vector normalizeColumnNamesExtractNeeded( { const auto & table = tables[*table_pos]; IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column - auto & unique_long_name = ident->name; + const auto & unique_long_name = ident->name(); /// For tables moved into subselects we need unique short names for clashed names if (*table_pos != last_table_pos) @@ -512,7 +512,7 @@ std::vector normalizeColumnNamesExtractNeeded( needed_columns[*table_pos].no_clashes.emplace(ident->shortName()); } else if (!got_alias) - throw Exception("Unknown column name '" + ident->name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception("Unknown column name '" + ident->name() + "'", ErrorCodes::UNKNOWN_IDENTIFIER); } return needed_columns; @@ -613,12 +613,12 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast { for (auto * ident : on_identifiers) { - auto it = data.aliases.find(ident->name); - if (!on_aliases.count(ident->name) && it != data.aliases.end()) + auto it = data.aliases.find(ident->name()); + if (!on_aliases.count(ident->name()) && it != data.aliases.end()) { auto alias_expression = it->second; alias_pushdown[table_pos].push_back(alias_expression); - on_aliases[ident->name] = alias_expression; + on_aliases[ident->name()] = alias_expression; } } } @@ -638,14 +638,14 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast CheckAliasDependencyVisitor(check).visit(expr.second); if (check.dependency) throw Exception("Cannot rewrite JOINs. Alias '" + expr.first + - "' used in ON section depends on another alias '" + check.dependency->name + "'", + "' used in ON section depends on another alias '" + check.dependency->name() + "'", ErrorCodes::NOT_IMPLEMENTED); } /// Check same name in aliases, USING and ON sections. Cannot push down alias to ON through USING cause of name masquerading. for (auto * ident : using_identifiers) - if (on_aliases.count(ident->name)) - throw Exception("Cannot rewrite JOINs. Alias '" + ident->name + "' appears both in ON and USING", ErrorCodes::NOT_IMPLEMENTED); + if (on_aliases.count(ident->name())) + throw Exception("Cannot rewrite JOINs. Alias '" + ident->name() + "' appears both in ON and USING", ErrorCodes::NOT_IMPLEMENTED); using_identifiers.clear(); /// Replace pushdowned expressions with aliases names in original expression lists. diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index d38a3fa68dc..c0511122c1e 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -49,7 +49,7 @@ void replaceJoinedTable(const ASTSelectQuery & select_query) if (table_expr.database_and_table_name) { const auto & table_id = table_expr.database_and_table_name->as(); - String expr = "(select * from " + table_id.name + ") as " + table_id.shortName(); + String expr = "(select * from " + table_id.name() + ") as " + table_id.shortName(); // FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b", // we can't replace aliased tables. @@ -99,7 +99,7 @@ private: match == IdentifierSemantic::ColumnMatch::DbAndTable) { if (rewritten) - throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name + "'", + throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); /// Table has an alias. So we set a new name qualified by table alias. IdentifierSemantic::setColumnLongName(identifier, table); @@ -114,10 +114,10 @@ private: bool rewritten = false; for (const auto & table : data) { - if (identifier.name == table.table) + if (identifier.name() == table.table) { if (rewritten) - throw Exception("Failed to rewrite distributed table. Ambiguous column '" + identifier.name + "'", + throw Exception("Failed to rewrite distributed table. Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); identifier.setShortName(table.alias); rewritten = true; diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 07d4888b555..3252626959d 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -73,8 +73,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) return; /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). - auto it_alias = data.aliases.find(node.name); - if (it_alias != data.aliases.end() && current_alias != node.name) + auto it_alias = data.aliases.find(node.name()); + if (it_alias != data.aliases.end() && current_alias != node.name()) { if (!IdentifierSemantic::canBeAlias(node)) return; @@ -89,7 +89,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) String node_alias = ast->tryGetAlias(); if (current_asts.count(alias_node.get()) /// We have loop of multiple aliases - || (node.name == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop + || (node.name() == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); /// Let's replace it with the corresponding tree node. @@ -97,7 +97,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) { /// Avoid infinite recursion here auto opt_name = IdentifierSemantic::getColumnName(alias_node); - bool is_cycle = opt_name && *opt_name == node.name; + bool is_cycle = opt_name && *opt_name == node.name(); if (!is_cycle) { diff --git a/src/Interpreters/RenameColumnVisitor.cpp b/src/Interpreters/RenameColumnVisitor.cpp index f94353cf646..22bbfc7a83f 100644 --- a/src/Interpreters/RenameColumnVisitor.cpp +++ b/src/Interpreters/RenameColumnVisitor.cpp @@ -3,10 +3,13 @@ namespace DB { + void RenameColumnData::visit(ASTIdentifier & identifier, ASTPtr &) const { + // TODO(ilezhankin): make proper rename std::optional identifier_column_name = IdentifierSemantic::getColumnName(identifier); if (identifier_column_name && identifier_column_name == column_name) - identifier.name = rename_to; + identifier.setShortName(rename_to); } + } diff --git a/src/Interpreters/RequiredSourceColumnsData.cpp b/src/Interpreters/RequiredSourceColumnsData.cpp index ec9f2ca4817..9118acc38c9 100644 --- a/src/Interpreters/RequiredSourceColumnsData.cpp +++ b/src/Interpreters/RequiredSourceColumnsData.cpp @@ -27,7 +27,7 @@ void RequiredSourceColumnsData::addColumnIdentifier(const ASTIdentifier & node) /// There should be no complex cases after query normalization. Names to aliases: one-to-many. String alias = node.tryGetAlias(); - required_names[node.name].addInclusion(alias); + required_names[node.name()].addInclusion(alias); } bool RequiredSourceColumnsData::addArrayJoinAliasIfAny(const IAST & ast) @@ -42,7 +42,7 @@ bool RequiredSourceColumnsData::addArrayJoinAliasIfAny(const IAST & ast) void RequiredSourceColumnsData::addArrayJoinIdentifier(const ASTIdentifier & node) { - array_join_columns.insert(node.name); + array_join_columns.insert(node.name()); } size_t RequiredSourceColumnsData::nameInclusion(const String & name) const diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index e546a40f28d..5a265b59414 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -34,7 +34,7 @@ std::vector RequiredSourceColumnsMatcher::extractNamesFromLambda(const A if (!identifier) throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - names.push_back(identifier->name); + names.push_back(identifier->name()); } return names; @@ -132,10 +132,11 @@ void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const AS void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPtr &, Data & data) { - if (node.name.empty()) + // FIXME(ilezhankin): shouldn't ever encounter + if (node.name().empty()) throw Exception("Expected not empty name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!data.private_aliases.count(node.name)) + if (!data.private_aliases.count(node.name())) data.addColumnIdentifier(node); } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 32d8841d7b4..98ed2166c40 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -104,7 +104,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (data.unknownColumn(table_pos, identifier)) { String table_name = data.tables[table_pos].table.getQualifiedNamePrefix(false); - throw Exception("There's no column '" + identifier.name + "' in table '" + table_name + "'", + throw Exception("There's no column '" + identifier.name() + "' in table '" + table_name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); } @@ -175,9 +175,12 @@ void TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, const String & column_name) { + std::vector parts = {column_name}; + String table_name = table.getQualifiedNamePrefix(false); - auto identifier = std::make_shared(std::vector{table_name, column_name}); - nodes.emplace_back(identifier); + if (!table_name.empty()) parts.insert(parts.begin(), table_name); + + nodes.emplace_back(std::make_shared(std::move(parts))); } /// Replace *, alias.*, database.table.* with a list of columns. @@ -354,7 +357,7 @@ void RestoreQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, D { if (IdentifierSemantic::getMembership(identifier)) { - identifier.restoreCompoundName(); + identifier.restoreTable(); // TODO(ilezhankin): should restore qualified name here - why exactly here? if (data.rename) data.changeTable(identifier); } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index e1f53c72801..02ef3426483 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -72,7 +72,7 @@ ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(const ASTPtr & node, const Context & context) { if (const auto * id = node->as()) - return std::make_shared(id->name); + return std::make_shared(id->name()); return evaluateConstantExpressionAsLiteral(node, context); } @@ -113,7 +113,7 @@ namespace const auto & name = name_and_type.name; const auto & type = name_and_type.type; - if (name == identifier->name) + if (name == identifier->name()) { ColumnWithTypeAndName column; Field converted = convertFieldToType(value, *type); diff --git a/src/Interpreters/getClusterName.cpp b/src/Interpreters/getClusterName.cpp index 60040ce4cb0..01e45e1d650 100644 --- a/src/Interpreters/getClusterName.cpp +++ b/src/Interpreters/getClusterName.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes std::string getClusterName(const IAST & node) { if (const auto * ast_id = node.as()) - return ast_id->name; + return ast_id->name(); if (const auto * ast_lit = node.as()) return ast_lit->value.safeGet(); diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 43d54f07ab8..fee606aec26 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -71,7 +71,7 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const { for (const auto & except_child : children) { - if (except_child->as().name == id->shortName()) + if (except_child->as().name() == id->shortName()) return true; } } diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 9117be46e51..d980300a22a 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -1,10 +1,10 @@ -#include #include -#include + #include #include #include #include +#include namespace DB @@ -16,6 +16,27 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } +ASTIdentifier::ASTIdentifier(const String & short_name) + : full_name(short_name), name_parts{short_name}, semantic(std::make_shared()) +{ + assert(!full_name.empty()); +} + +ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special) + : name_parts(name_parts_), semantic(std::make_shared()) +{ + assert(!name_parts.empty()); + for (const auto & part [[maybe_unused]] : name_parts) + assert(!part.empty()); + + semantic->special = special; + semantic->legacy_compound = true; + + if (!special && name_parts.size() >= 2) + semantic->table = name_parts.end()[-2]; + + resetFullName(); +} ASTPtr ASTIdentifier::clone() const { @@ -24,51 +45,29 @@ ASTPtr ASTIdentifier::clone() const return ret; } -std::shared_ptr ASTIdentifier::createSpecial(const String & name, std::vector && name_parts) +bool ASTIdentifier::supposedToBeCompound() const { - auto ret = std::make_shared(name, std::move(name_parts)); - ret->semantic->special = true; - return ret; + return semantic->legacy_compound; } -ASTIdentifier::ASTIdentifier(const String & name_, std::vector && name_parts_) - : name(name_) - , name_parts(name_parts_) - , semantic(std::make_shared()) -{ - if (!name_parts.empty() && name_parts[0].empty()) - name_parts.erase(name_parts.begin()); - - if (name.empty()) - { - if (name_parts.size() == 2) - name = name_parts[0] + '.' + name_parts[1]; - else if (name_parts.size() == 1) - name = name_parts[0]; - } -} - -ASTIdentifier::ASTIdentifier(std::vector && name_parts_) - : ASTIdentifier("", std::move(name_parts_)) -{} - void ASTIdentifier::setShortName(const String & new_name) { - name = new_name; - name_parts.clear(); + assert(!new_name.empty()); + + full_name = new_name; + name_parts = {new_name}; bool special = semantic->special; *semantic = IdentifierSemanticImpl(); semantic->special = special; } -void ASTIdentifier::restoreCompoundName() +const String & ASTIdentifier::name() const { - if (name_parts.empty()) - return; - name = name_parts[0]; - for (size_t i = 1; i < name_parts.size(); ++i) - name += '.' + name_parts[i]; + assert(!name_parts.empty()); + assert(!full_name.empty()); + + return full_name; } void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const @@ -93,20 +92,29 @@ void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, Form } else { - format_element(name); + format_element(shortName()); } } void ASTIdentifier::appendColumnNameImpl(WriteBuffer & ostr) const { - writeString(name, ostr); + writeString(name(), ostr); +} + +void ASTIdentifier::restoreTable() +{ + if (!compound()) + { + name_parts.insert(name_parts.begin(), semantic->table); + resetFullName(); + } } void ASTIdentifier::resetTable(const String & database_name, const String & table_name) { auto ast = createTableIdentifier(database_name, table_name); auto & ident = ast->as(); - name.swap(ident.name); + full_name.swap(ident.full_name); name_parts.swap(ident.name_parts); uuid = ident.uuid; } @@ -117,6 +125,13 @@ void ASTIdentifier::updateTreeHashImpl(SipHash & hash_state) const IAST::updateTreeHashImpl(hash_state); } +void ASTIdentifier::resetFullName() +{ + full_name = name_parts[0]; + for (size_t i = 1; i < name_parts.size(); ++i) + full_name += '.' + name_parts[i]; +} + ASTPtr createTableIdentifier(const String & database_name, const String & table_name) { assert(database_name != "_temporary_and_external_tables"); @@ -127,9 +142,9 @@ ASTPtr createTableIdentifier(const StorageID & table_id) { std::shared_ptr res; if (table_id.database_name.empty()) - res = ASTIdentifier::createSpecial(table_id.table_name); + res = std::make_shared(std::vector{table_id.table_name}, true); else - res = ASTIdentifier::createSpecial(table_id.database_name + "." + table_id.table_name, {table_id.database_name, table_id.table_name}); + res = std::make_shared(std::vector{table_id.database_name, table_id.table_name}, true); res->uuid = table_id.uuid; return res; } @@ -156,7 +171,7 @@ bool tryGetIdentifierNameInto(const IAST * ast, String & name) { if (const auto * node = ast->as()) { - name = node->name; + name = node->name(); return true; } } @@ -180,7 +195,7 @@ StorageID getTableIdentifier(const ASTPtr & ast) if (identifier.name_parts.size() == 2) return { identifier.name_parts[0], identifier.name_parts[1], identifier.uuid }; - return { "", identifier.name, identifier.uuid }; + return { "", identifier.name_parts[0], identifier.uuid }; } } diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index 5c06fa7fa38..59f698eab1c 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -18,59 +18,54 @@ struct StorageID; class ASTIdentifier : public ASTWithAlias { public: - /// The composite identifier will have a concatenated name (of the form a.b.c), - /// and individual components will be available inside the name_parts. - String name; UUID uuid = UUIDHelpers::Nil; - ASTIdentifier(const String & name_, std::vector && name_parts_ = {}); - ASTIdentifier(std::vector && name_parts_); + explicit ASTIdentifier(const String & short_name); + explicit ASTIdentifier(std::vector && name_parts, bool special = false); /** Get the text that identifies this element. */ - String getID(char delim) const override { return "Identifier" + (delim + name); } + String getID(char delim) const override { return "Identifier" + (delim + name()); } ASTPtr clone() const override; - void collectIdentifierNames(IdentifierNameSet & set) const override - { - set.insert(name); - } + void collectIdentifierNames(IdentifierNameSet & set) const override { set.insert(name()); } - bool compound() const { return !name_parts.empty(); } - bool isShort() const { return name_parts.empty() || name == name_parts.back(); } + bool compound() const { return name_parts.size() > 1; } + bool isShort() const { return name_parts.size() == 1; } + bool supposedToBeCompound() const; // TODO(ilezhankin): get rid of this void setShortName(const String & new_name); - /// Restore name field from name_parts in case it was cropped by analyzer but we need a full form for future (re)analyze. - void restoreCompoundName(); + /// The composite identifier will have a concatenated name (of the form a.b.c), + /// and individual components will be available inside the name_parts. + const String & shortName() const { return name_parts.back(); } + const String & name() const; - const String & shortName() const - { - if (!name_parts.empty()) - return name_parts.back(); - return name; - } + void restoreTable(); // TODO(ilezhankin): get rid of this - void resetTable(const String & database_name, const String & table_name); + // FIXME: used only when it's needed to rewrite distributed table name to real remote table name. + void resetTable(const String & database_name, const String & table_name); // TODO(ilezhankin): get rid of this void updateTreeHashImpl(SipHash & hash_state) const override; protected: + String full_name; + std::vector name_parts; + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void appendColumnNameImpl(WriteBuffer & ostr) const override; private: using ASTWithAlias::children; /// ASTIdentifier is child free - std::vector name_parts; std::shared_ptr semantic; /// pimpl - static std::shared_ptr createSpecial(const String & name, std::vector && name_parts = {}); - friend struct IdentifierSemantic; friend ASTPtr createTableIdentifier(const StorageID & table_id); friend void setIdentifierSpecial(ASTPtr & ast); friend StorageID getTableIdentifier(const ASTPtr & ast); + + void resetFullName(); }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index b26e73287d0..3c45bd005a9 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -184,16 +184,10 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex .parse(pos, id_list, expected)) return false; - String name; std::vector parts; const auto & list = id_list->as(); for (const auto & child : list.children) - { - if (!name.empty()) - name += '.'; parts.emplace_back(getIdentifierName(child)); - name += parts.back(); - } ParserKeyword s_uuid("UUID"); UUID uuid = UUIDHelpers::Nil; @@ -207,9 +201,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex uuid = parseFromString(ast_uuid->as()->value.get()); } - if (parts.size() == 1) - parts.clear(); - node = std::make_shared(name, std::move(parts)); + node = std::make_shared(std::move(parts)); node->as()->uuid = uuid; return true; @@ -1651,7 +1643,7 @@ bool ParserFunctionWithKeyValueArguments::parseImpl(Pos & pos, ASTPtr & node, Ex } auto function = std::make_shared(left_bracket_found); - function->name = Poco::toLower(typeid_cast(*identifier.get()).name); + function->name = Poco::toLower(identifier->as()->name()); function->elements = expr_list_args; function->children.push_back(function->elements); node = function; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index d6678bb9a78..ad03d949174 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1,13 +1,11 @@ -#include +#include + #include #include -#include -#include -#include -#include #include - -#include +#include +#include +#include #include @@ -750,7 +748,7 @@ bool ParserKeyValuePair::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } auto pair = std::make_shared(with_brackets); - pair->first = Poco::toLower(typeid_cast(*identifier.get()).name); + pair->first = Poco::toLower(identifier->as()->name()); pair->set(pair->second, value); node = pair; return true; diff --git a/src/Parsers/MySQL/ASTDeclareConstraint.cpp b/src/Parsers/MySQL/ASTDeclareConstraint.cpp index 0f447fb3b40..96184dfc89d 100644 --- a/src/Parsers/MySQL/ASTDeclareConstraint.cpp +++ b/src/Parsers/MySQL/ASTDeclareConstraint.cpp @@ -63,7 +63,7 @@ bool ParserDeclareConstraint::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec declare_constraint->check_expression = index_check_expression; if (constraint_symbol) - declare_constraint->constraint_name = constraint_symbol->as()->name; + declare_constraint->constraint_name = constraint_symbol->as()->name(); node = declare_constraint; return true; diff --git a/src/Parsers/MySQL/ASTDeclareIndex.cpp b/src/Parsers/MySQL/ASTDeclareIndex.cpp index 8e6e9d43793..c5b4686e976 100644 --- a/src/Parsers/MySQL/ASTDeclareIndex.cpp +++ b/src/Parsers/MySQL/ASTDeclareIndex.cpp @@ -73,7 +73,7 @@ static inline bool parseDeclareOrdinaryIndex(IParser::Pos & pos, String & index_ index_type = "SPATIAL"; if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); } else if (ParserKeyword("FULLTEXT").ignore(pos, expected)) { @@ -82,7 +82,7 @@ static inline bool parseDeclareOrdinaryIndex(IParser::Pos & pos, String & index_ index_type = "FULLTEXT"; if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); } else { @@ -94,14 +94,14 @@ static inline bool parseDeclareOrdinaryIndex(IParser::Pos & pos, String & index_ index_type = "KEY_BTREE"; /// default index type if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); if (ParserKeyword("USING").ignore(pos, expected)) { if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_type = "KEY_" + temp_node->as()->name; + index_type = "KEY_" + temp_node->as()->name(); } } @@ -122,7 +122,7 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); } } @@ -132,7 +132,7 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde ParserKeyword("INDEX").ignore(pos, expected); if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; /// reset index_name + index_name = temp_node->as()->name(); /// reset index_name index_type = "UNIQUE_BTREE"; /// default btree index_type if (ParserKeyword("USING").ignore(pos, expected)) @@ -140,7 +140,7 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_type = "UNIQUE_" + temp_node->as()->name; + index_type = "UNIQUE_" + temp_node->as()->name(); } } else if (ParserKeyword("PRIMARY KEY").ignore(pos, expected)) @@ -151,14 +151,14 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_type = "PRIMARY_KEY_" + temp_node->as()->name; + index_type = "PRIMARY_KEY_" + temp_node->as()->name(); } } else if (ParserKeyword("FOREIGN KEY").ignore(pos, expected)) { index_type = "FOREIGN"; if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; /// reset index_name + index_name = temp_node->as()->name(); /// reset index_name } return true; diff --git a/src/Parsers/MySQL/ASTDeclarePartition.cpp b/src/Parsers/MySQL/ASTDeclarePartition.cpp index 8e1d27778b5..76f864fcc44 100644 --- a/src/Parsers/MySQL/ASTDeclarePartition.cpp +++ b/src/Parsers/MySQL/ASTDeclarePartition.cpp @@ -107,7 +107,7 @@ bool ParserDeclarePartition::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect partition_declare->less_than = less_than; partition_declare->in_expression = in_expression; partition_declare->subpartitions = subpartitions; - partition_declare->partition_name = partition_name->as()->name; + partition_declare->partition_name = partition_name->as()->name(); if (options) { diff --git a/src/Parsers/MySQL/ASTDeclareReference.cpp b/src/Parsers/MySQL/ASTDeclareReference.cpp index 434b9561eda..862d35e2b76 100644 --- a/src/Parsers/MySQL/ASTDeclareReference.cpp +++ b/src/Parsers/MySQL/ASTDeclareReference.cpp @@ -95,7 +95,7 @@ bool ParserDeclareReference::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect declare_reference->on_delete_option = delete_option; declare_reference->on_update_option = update_option; declare_reference->reference_expression = expression; - declare_reference->reference_table_name = table_name->as()->name; + declare_reference->reference_table_name = table_name->as()->name(); node = declare_reference; return true; diff --git a/src/Parsers/MySQL/ASTDeclareSubPartition.cpp b/src/Parsers/MySQL/ASTDeclareSubPartition.cpp index 1b2d9c081e6..d77fba271c4 100644 --- a/src/Parsers/MySQL/ASTDeclareSubPartition.cpp +++ b/src/Parsers/MySQL/ASTDeclareSubPartition.cpp @@ -41,7 +41,7 @@ bool ParserDeclareSubPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & e auto subpartition_declare = std::make_shared(); subpartition_declare->options = options; - subpartition_declare->logical_name = logical_name->as()->name; + subpartition_declare->logical_name = logical_name->as()->name(); if (options) { diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index ef6371f71d9..de4c64be817 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -29,9 +29,9 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); - EXPECT_EQ(declare_options->changes["column_format"]->as()->name, "FIXED"); - EXPECT_EQ(declare_options->changes["storage"]->as()->name, "MEMORY"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name(), "utf-8"); + EXPECT_EQ(declare_options->changes["column_format"]->as()->name(), "FIXED"); + EXPECT_EQ(declare_options->changes["storage"]->as()->name(), "MEMORY"); EXPECT_TRUE(declare_options->changes["reference"]->as()); EXPECT_TRUE(declare_options->changes["constraint"]->as()); } @@ -52,7 +52,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name(), "utf-8"); EXPECT_EQ(declare_options->changes["generated"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["is_stored"]->as()->value.safeGet(), 1); EXPECT_TRUE(declare_options->changes["reference"]->as()); diff --git a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp index de885bf36c8..9c9124c9f58 100644 --- a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp @@ -18,7 +18,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_01->as()->constraint_name, "symbol_name"); auto * check_expression_01 = ast_constraint_01->as()->check_expression->as(); EXPECT_EQ(check_expression_01->name, "equals"); - EXPECT_EQ(check_expression_01->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_01->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_01->arguments->children[1]->as()->value.safeGet(), 1); String constraint_02 = "CONSTRAINT CHECK col_01 = 1"; @@ -26,7 +26,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_02->as()->constraint_name, ""); auto * check_expression_02 = ast_constraint_02->as()->check_expression->as(); EXPECT_EQ(check_expression_02->name, "equals"); - EXPECT_EQ(check_expression_02->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_02->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_02->arguments->children[1]->as()->value.safeGet(), 1); String constraint_03 = "CHECK col_01 = 1"; @@ -34,7 +34,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_03->as()->constraint_name, ""); auto * check_expression_03 = ast_constraint_03->as()->check_expression->as(); EXPECT_EQ(check_expression_03->name, "equals"); - EXPECT_EQ(check_expression_03->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_03->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_03->arguments->children[1]->as()->value.safeGet(), 1); String constraint_04 = "CONSTRAINT CHECK col_01 = 1 ENFORCED"; @@ -43,7 +43,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_04->as()->constraint_name, ""); auto * check_expression_04 = ast_constraint_04->as()->check_expression->as(); EXPECT_EQ(check_expression_04->name, "equals"); - EXPECT_EQ(check_expression_04->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_04->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_04->arguments->children[1]->as()->value.safeGet(), 1); String constraint_05 = "CONSTRAINT CHECK col_01 = 1 NOT ENFORCED"; @@ -52,6 +52,6 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_05->as()->constraint_name, ""); auto * check_expression_05 = ast_constraint_05->as()->check_expression->as(); EXPECT_EQ(check_expression_05->name, "equals"); - EXPECT_EQ(check_expression_05->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_05->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_05->arguments->children[1]->as()->value.safeGet(), 1); } diff --git a/src/Parsers/MySQL/tests/gtest_create_parser.cpp b/src/Parsers/MySQL/tests/gtest_create_parser.cpp index 92c0070aa88..1aaba8d67e4 100644 --- a/src/Parsers/MySQL/tests/gtest_create_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_create_parser.cpp @@ -28,7 +28,7 @@ TEST(CreateTableParser, SimpleCreate) EXPECT_EQ(ast->as()->columns_list->as()->columns->children.size(), 1); EXPECT_EQ(ast->as()->columns_list->as()->indices->children.size(), 1); EXPECT_EQ(ast->as()->columns_list->as()->constraints->children.size(), 1); - EXPECT_EQ(ast->as()->table_options->as()->changes["engine"]->as()->name, "INNODB"); + EXPECT_EQ(ast->as()->table_options->as()->changes["engine"]->as()->name(), "INNODB"); EXPECT_TRUE(ast->as()->partition_options->as()); } diff --git a/src/Parsers/MySQL/tests/gtest_index_parser.cpp b/src/Parsers/MySQL/tests/gtest_index_parser.cpp index 02b3b10acff..a8be6787b2c 100644 --- a/src/Parsers/MySQL/tests/gtest_index_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_index_parser.cpp @@ -17,13 +17,13 @@ TEST(ParserIndex, AllIndexOptions) ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); ASTDeclareIndex * declare_index = ast->as(); - EXPECT_EQ(declare_index->index_columns->children[0]->as()->name, "col_01"); + EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->name, "col_02"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->arguments->children[0]->as()->value.safeGet(), 100); - EXPECT_EQ(declare_index->index_columns->children[2]->as()->name, "col_03"); + EXPECT_EQ(declare_index->index_columns->children[2]->as()->name(), "col_03"); ASTDeclareOptions * declare_options = declare_index->index_options->as(); EXPECT_EQ(declare_options->changes["key_block_size"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["index_type"]->as()->name, "HASH"); + EXPECT_EQ(declare_options->changes["index_type"]->as()->name(), "HASH"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "index comment"); EXPECT_EQ(declare_options->changes["visible"]->as()->value.safeGet(), 1); } @@ -36,12 +36,12 @@ TEST(ParserIndex, OptionalIndexOptions) ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); ASTDeclareIndex * declare_index = ast->as(); - EXPECT_EQ(declare_index->index_columns->children[0]->as()->name, "col_01"); + EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->name, "col_02"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->arguments->children[0]->as()->value.safeGet(), 100); - EXPECT_EQ(declare_index->index_columns->children[2]->as()->name, "col_03"); + EXPECT_EQ(declare_index->index_columns->children[2]->as()->name(), "col_03"); ASTDeclareOptions * declare_options = declare_index->index_options->as(); - EXPECT_EQ(declare_options->changes["index_type"]->as()->name, "HASH"); + EXPECT_EQ(declare_options->changes["index_type"]->as()->name(), "HASH"); EXPECT_EQ(declare_options->changes["visible"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["key_block_size"]->as()->value.safeGet(), 3); } diff --git a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp index 1651efcb966..01b757e5891 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp @@ -18,14 +18,14 @@ TEST(ParserPartitionOptions, HashPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "hash"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_hash_partition = "PARTITION BY LINEAR HASH(col_01)"; ASTPtr ast_02 = parseQuery(p_partition_options, linear_hash_partition.data(), linear_hash_partition.data() + linear_hash_partition.size(), "", 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_hash"); - EXPECT_EQ(declare_partition_options_02->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_02->partition_expression->as()->name(), "col_01"); } TEST(ParserPartitionOptions, KeyPatitionOptions) @@ -37,7 +37,7 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "key"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_key_partition = "PARTITION BY LINEAR KEY(col_01, col_02)"; ASTPtr ast_02 = parseQuery(p_partition_options, linear_key_partition.data(), linear_key_partition.data() + linear_key_partition.size(), "", 0, 0); @@ -45,15 +45,15 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_key"); ASTPtr columns_list = declare_partition_options_02->partition_expression->as()->arguments; - EXPECT_EQ(columns_list->children[0]->as()->name, "col_01"); - EXPECT_EQ(columns_list->children[1]->as()->name, "col_02"); + EXPECT_EQ(columns_list->children[0]->as()->name(), "col_01"); + EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); String key_partition_with_algorithm = "PARTITION BY KEY ALGORITHM=1 (col_01)"; ASTPtr ast_03 = parseQuery(p_partition_options, key_partition_with_algorithm.data(), key_partition_with_algorithm.data() + key_partition_with_algorithm.size(), "", 0, 0); ASTDeclarePartitionOptions * declare_partition_options_03 = ast_03->as(); EXPECT_EQ(declare_partition_options_03->partition_type, "key_1"); - EXPECT_EQ(declare_partition_options_03->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_03->partition_expression->as()->name(), "col_01"); } TEST(ParserPartitionOptions, RangePatitionOptions) @@ -65,7 +65,7 @@ TEST(ParserPartitionOptions, RangePatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "range"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY RANGE COLUMNS(col_01, col_02)"; ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); @@ -73,8 +73,8 @@ TEST(ParserPartitionOptions, RangePatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "range"); ASTPtr columns_list = declare_partition_options_02->partition_expression->as()->arguments; - EXPECT_EQ(columns_list->children[0]->as()->name, "col_01"); - EXPECT_EQ(columns_list->children[1]->as()->name, "col_02"); + EXPECT_EQ(columns_list->children[0]->as()->name(), "col_01"); + EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); } TEST(ParserPartitionOptions, ListPatitionOptions) @@ -86,7 +86,7 @@ TEST(ParserPartitionOptions, ListPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "list"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY LIST COLUMNS(col_01, col_02)"; ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); @@ -94,8 +94,8 @@ TEST(ParserPartitionOptions, ListPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "list"); ASTPtr columns_list = declare_partition_options_02->partition_expression->as()->arguments; - EXPECT_EQ(columns_list->children[0]->as()->name, "col_01"); - EXPECT_EQ(columns_list->children[1]->as()->name, "col_02"); + EXPECT_EQ(columns_list->children[0]->as()->name(), "col_01"); + EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); } TEST(ParserPartitionOptions, PatitionNumberOptions) @@ -107,7 +107,7 @@ TEST(ParserPartitionOptions, PatitionNumberOptions) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 2); } @@ -120,10 +120,10 @@ TEST(ParserPartitionOptions, PatitionWithSubpartitionOptions) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 3); EXPECT_EQ(declare_partition_options->subpartition_type, "hash"); - EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name, "col_02"); + EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name(), "col_02"); EXPECT_EQ(declare_partition_options->subpartition_numbers->as()->value.safeGet(), 4); } @@ -138,10 +138,10 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartition) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 3); EXPECT_EQ(declare_partition_options->subpartition_type, "hash"); - EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name, "col_02"); + EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name(), "col_02"); EXPECT_EQ(declare_partition_options->subpartition_numbers->as()->value.safeGet(), 4); EXPECT_TRUE(declare_partition_options->declare_partitions->as()->children[0]->as()); } @@ -157,10 +157,10 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartitions) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 3); EXPECT_EQ(declare_partition_options->subpartition_type, "hash"); - EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name, "col_02"); + EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name(), "col_02"); EXPECT_EQ(declare_partition_options->subpartition_numbers->as()->value.safeGet(), 4); EXPECT_TRUE(declare_partition_options->declare_partitions->as()->children[0]->as()); EXPECT_TRUE(declare_partition_options->declare_partitions->as()->children[1]->as()); diff --git a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp index 48e8a9f53c6..458c7acd553 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp @@ -22,13 +22,13 @@ TEST(ParserPartition, AllPatitionOptions) ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); ASTDeclareOptions * declare_options = declare_partition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "partition comment"); EXPECT_EQ(declare_options->changes["data_directory"]->as()->value.safeGet(), "data_directory"); EXPECT_EQ(declare_options->changes["index_directory"]->as()->value.safeGet(), "index_directory"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } TEST(ParserPartition, OptionalPatitionOptions) @@ -40,10 +40,10 @@ TEST(ParserPartition, OptionalPatitionOptions) ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); ASTDeclareOptions * declare_options = declare_partition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } TEST(ParserPartition, PatitionOptionsWithLessThan) @@ -56,16 +56,16 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); EXPECT_EQ(declare_partition_01->less_than->as()->value.safeGet(), 1991); ASTDeclareOptions * declare_options_01 = declare_partition_01->options->as(); - EXPECT_EQ(declare_options_01->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES LESS THAN MAXVALUE STORAGE engine = engine_name"; ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); - EXPECT_EQ(declare_partition_02->less_than->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_02->less_than->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_02 = declare_partition_02->options->as(); - EXPECT_EQ(declare_options_02->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_02->changes["engine"]->as()->name(), "engine_name"); String partition_03 = "PARTITION partition_03 VALUES LESS THAN (50, MAXVALUE) STORAGE engine = engine_name"; ASTPtr ast_partition_03 = parseQuery(p_partition, partition_03.data(), partition_03.data() + partition_03.size(), "", 0, 0); @@ -74,9 +74,9 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_partition_03->partition_name, "partition_03"); ASTPtr declare_partition_03_argument = declare_partition_03->less_than->as()->arguments; EXPECT_EQ(declare_partition_03_argument->children[0]->as()->value.safeGet(), 50); - EXPECT_EQ(declare_partition_03_argument->children[1]->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_03_argument->children[1]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_03 = declare_partition_03->options->as(); - EXPECT_EQ(declare_options_03->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_03->changes["engine"]->as()->name(), "engine_name"); String partition_04 = "PARTITION partition_04 VALUES LESS THAN (MAXVALUE, MAXVALUE) STORAGE engine = engine_name"; ASTPtr ast_partition_04 = parseQuery(p_partition, partition_04.data(), partition_04.data() + partition_04.size(), "", 0, 0); @@ -84,10 +84,10 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) ASTDeclarePartition * declare_partition_04 = ast_partition_04->as(); EXPECT_EQ(declare_partition_04->partition_name, "partition_04"); ASTPtr declare_partition_04_argument = declare_partition_04->less_than->as()->arguments; - EXPECT_EQ(declare_partition_04_argument->children[0]->as()->name, "MAXVALUE"); - EXPECT_EQ(declare_partition_04_argument->children[1]->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_04_argument->children[0]->as()->name(), "MAXVALUE"); + EXPECT_EQ(declare_partition_04_argument->children[1]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_04 = declare_partition_04->options->as(); - EXPECT_EQ(declare_options_04->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_04->changes["engine"]->as()->name(), "engine_name"); } TEST(ParserPartition, PatitionOptionsWithInExpression) @@ -101,9 +101,9 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) ASTPtr declare_partition_01_argument = declare_partition_01->in_expression->as()->arguments; EXPECT_TRUE(declare_partition_01_argument->children[0]->as()->value.isNull()); EXPECT_EQ(declare_partition_01_argument->children[1]->as()->value.safeGet(), 1991); - EXPECT_EQ(declare_partition_01_argument->children[2]->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_01_argument->children[2]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_01 = declare_partition_01->options->as(); - EXPECT_EQ(declare_options_01->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES IN ((NULL, 1991), (1991, NULL), (MAXVALUE, MAXVALUE)) STORAGE engine = engine_name"; ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); @@ -121,11 +121,11 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) EXPECT_TRUE(argument_02->as()->value.safeGet()[1].isNull()); ASTPtr argument_03 = declare_partition_02_argument->children[2]->as()->arguments; - EXPECT_EQ(argument_03->as()->children[0]->as()->name, "MAXVALUE"); - EXPECT_EQ(argument_03->as()->children[1]->as()->name, "MAXVALUE"); + EXPECT_EQ(argument_03->as()->children[0]->as()->name(), "MAXVALUE"); + EXPECT_EQ(argument_03->as()->children[1]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_02 = declare_partition_02->options->as(); - EXPECT_EQ(declare_options_02->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_02->changes["engine"]->as()->name(), "engine_name"); } TEST(ParserPartition, PatitionOptionsWithSubpartitions) diff --git a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp index 694558b9cc3..7447f16fc7c 100644 --- a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp @@ -14,14 +14,14 @@ TEST(ParserReference, SimpleReference) String reference_01 = "REFERENCES table_name (ref_col_01)"; ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); String reference_02 = "REFERENCES table_name (ref_col_01, ref_col_02)"; ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); ASTPtr arguments = ast_reference_02->as()->reference_expression->as()->arguments; - EXPECT_EQ(arguments->children[0]->as()->name, "ref_col_01"); - EXPECT_EQ(arguments->children[1]->as()->name, "ref_col_02"); + EXPECT_EQ(arguments->children[0]->as()->name(), "ref_col_01"); + EXPECT_EQ(arguments->children[1]->as()->name(), "ref_col_02"); } TEST(ParserReference, ReferenceDifferenceKind) @@ -30,19 +30,19 @@ TEST(ParserReference, ReferenceDifferenceKind) String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL"; ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); String reference_02 = "REFERENCES table_name (ref_col_01) MATCH PARTIAL"; ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_PARTIAL); String reference_03 = "REFERENCES table_name (ref_col_01) MATCH SIMPLE"; ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_SIMPLE); } @@ -52,7 +52,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE RESTRICT ON UPDATE RESTRICT"; ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_01->as()->on_delete_option, ASTDeclareReference::RESTRICT); EXPECT_EQ(ast_reference_01->as()->on_update_option, ASTDeclareReference::RESTRICT); @@ -60,7 +60,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_02 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE"; ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_02->as()->on_delete_option, ASTDeclareReference::CASCADE); EXPECT_EQ(ast_reference_02->as()->on_update_option, ASTDeclareReference::CASCADE); @@ -68,7 +68,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_03 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE SET NULL ON UPDATE SET NULL"; ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_03->as()->on_delete_option, ASTDeclareReference::SET_NULL); EXPECT_EQ(ast_reference_03->as()->on_update_option, ASTDeclareReference::SET_NULL); @@ -76,7 +76,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_04 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE NO ACTION ON DELETE NO ACTION"; ASTPtr ast_reference_04 = parseQuery(p_reference, reference_04.data(), reference_04.data() + reference_04.size(), "", 0, 0); EXPECT_EQ(ast_reference_04->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_04->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_04->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_04->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_04->as()->on_delete_option, ASTDeclareReference::NO_ACTION); EXPECT_EQ(ast_reference_04->as()->on_update_option, ASTDeclareReference::NO_ACTION); @@ -84,7 +84,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_05 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE SET DEFAULT ON DELETE SET DEFAULT"; ASTPtr ast_reference_05 = parseQuery(p_reference, reference_05.data(), reference_05.data() + reference_05.size(), "", 0, 0); EXPECT_EQ(ast_reference_05->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_05->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_05->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_05->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_05->as()->on_delete_option, ASTDeclareReference::SET_DEFAULT); EXPECT_EQ(ast_reference_05->as()->on_update_option, ASTDeclareReference::SET_DEFAULT); diff --git a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp index 5c1cf3710ab..b375f73c55c 100644 --- a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp @@ -19,13 +19,13 @@ TEST(ParserSubpartition, AllSubpatitionOptions) ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); ASTDeclareOptions * declare_options = declare_subpartition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "subpartition comment"); EXPECT_EQ(declare_options->changes["data_directory"]->as()->value.safeGet(), "data_directory"); EXPECT_EQ(declare_options->changes["index_directory"]->as()->value.safeGet(), "index_directory"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } TEST(ParserSubpartition, OptionalSubpatitionOptions) @@ -37,9 +37,9 @@ TEST(ParserSubpartition, OptionalSubpatitionOptions) ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); ASTDeclareOptions * declare_options = declare_subpartition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } diff --git a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp index b051f6149bb..b252ee51ace 100644 --- a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp @@ -23,9 +23,9 @@ TEST(ParserTableOptions, AllSubpatitionOptions) ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["avg_row_length"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["character_set"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["character_set"]->as()->name(), "utf-8"); EXPECT_EQ(declare_options->changes["checksum"]->as()->value.safeGet(), 1); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8_bin"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name(), "utf8_bin"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "table option comment"); EXPECT_EQ(declare_options->changes["compression"]->as()->value.safeGet(), "LZ4"); EXPECT_EQ(declare_options->changes["connection"]->as()->value.safeGet(), "connect_string"); @@ -33,23 +33,23 @@ TEST(ParserTableOptions, AllSubpatitionOptions) EXPECT_EQ(declare_options->changes["index_directory"]->as()->value.safeGet(), "index_directory"); EXPECT_EQ(declare_options->changes["delay_key_write"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["encryption"]->as()->value.safeGet(), "Y"); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "INNODB"); - EXPECT_EQ(declare_options->changes["insert_method"]->as()->name, "NO"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "INNODB"); + EXPECT_EQ(declare_options->changes["insert_method"]->as()->name(), "NO"); EXPECT_EQ(declare_options->changes["key_block_size"]->as()->value.safeGet(), 3); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); - EXPECT_EQ(declare_options->changes["pack_keys"]->as()->name, "DEFAULT"); + EXPECT_EQ(declare_options->changes["pack_keys"]->as()->name(), "DEFAULT"); EXPECT_EQ(declare_options->changes["password"]->as()->value.safeGet(), "password"); - EXPECT_EQ(declare_options->changes["row_format"]->as()->name, "DYNAMIC"); - EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name, "DEFAULT"); - EXPECT_EQ(declare_options->changes["stats_persistent"]->as()->name, "DEFAULT"); + EXPECT_EQ(declare_options->changes["row_format"]->as()->name(), "DYNAMIC"); + EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name(), "DEFAULT"); + EXPECT_EQ(declare_options->changes["stats_persistent"]->as()->name(), "DEFAULT"); EXPECT_EQ(declare_options->changes["stats_sample_pages"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "tablespace_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "tablespace_name"); ASTPtr arguments = declare_options->changes["union"]->as()->arguments; - EXPECT_EQ(arguments->children[0]->as()->name, "table_01"); - EXPECT_EQ(arguments->children[1]->as()->name, "table_02"); + EXPECT_EQ(arguments->children[0]->as()->name(), "table_01"); + EXPECT_EQ(arguments->children[1]->as()->name(), "table_02"); } TEST(ParserTableOptions, OptionalTableOptions) @@ -60,5 +60,5 @@ TEST(ParserTableOptions, OptionalTableOptions) ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); - EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name, "DEFAULT"); + EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name(), "DEFAULT"); } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1afdfac0461..6416e08d93b 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -114,7 +114,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return false; auto index = std::make_shared(); - index->name = name->as().name; + index->name = name->as().name(); index->granularity = granularity->as().value.safeGet(); index->set(index->expr, expr); index->set(index->type, type); @@ -143,7 +143,7 @@ bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; auto constraint = std::make_shared(); - constraint->name = name->as().name; + constraint->name = name->as().name(); constraint->set(constraint->expr, expr); node = constraint; diff --git a/src/Parsers/ParserDictionary.cpp b/src/Parsers/ParserDictionary.cpp index d69e4b02aed..77cd480d595 100644 --- a/src/Parsers/ParserDictionary.cpp +++ b/src/Parsers/ParserDictionary.cpp @@ -95,9 +95,9 @@ bool ParserDictionaryRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; if (pair.first == "min") - res->min_attr_name = identifier->name; + res->min_attr_name = identifier->name(); else if (pair.first == "max") - res->max_attr_name = identifier->name; + res->max_attr_name = identifier->name(); else return false; } diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 296f4187e3a..020b7993c2d 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -137,7 +137,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & { ASTPtr ast; if (ParserIdentifier{}.parse(pos, ast, expected)) - storage_policy_str = ast->as().name; + storage_policy_str = ast->as().name(); else return false; @@ -145,7 +145,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; if (ParserIdentifier{}.parse(pos, ast, expected)) - volume_str = ast->as().name; + volume_str = ast->as().name(); else return false; } diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index 6b777af77a2..c2bde5fa8f1 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -86,7 +86,7 @@ TEST(ParserDictionaryDDL, SimpleDictionary) auto * primary_key = create->dictionary->primary_key; EXPECT_EQ(primary_key->children.size(), 1); - EXPECT_EQ(primary_key->children[0]->as()->name, "key_column"); + EXPECT_EQ(primary_key->children[0]->as()->name(), "key_column"); /// range test auto * range = create->dictionary->range; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 7beb0a4d706..8cae7866748 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -208,7 +208,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.index_name = ast_index_decl.name; if (command_ast->index) - command.after_index_name = command_ast->index->as().name; + command.after_index_name = command_ast->index->as().name(); command.if_not_exists = command_ast->if_not_exists; @@ -235,7 +235,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.ast = command_ast->clone(); command.if_exists = command_ast->if_exists; command.type = AlterCommand::DROP_CONSTRAINT; - command.constraint_name = command_ast->constraint->as().name; + command.constraint_name = command_ast->constraint->as().name(); return command; } @@ -244,7 +244,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::DROP_INDEX; - command.index_name = command_ast->index->as().name; + command.index_name = command_ast->index->as().name(); command.if_exists = command_ast->if_exists; if (command_ast->clear_index) command.clear = true; @@ -290,8 +290,8 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::RENAME_COLUMN; - command.column_name = command_ast->column->as().name; - command.rename_to = command_ast->rename_to->as().name; + command.column_name = command_ast->column->as().name(); + command.rename_to = command_ast->rename_to->as().name(); command.if_exists = command_ast->if_exists; return command; } diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index f3569c344d9..ba998dd5951 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -57,7 +57,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.type = MATERIALIZE_INDEX; res.partition = command->partition; res.predicate = nullptr; - res.index_name = command->index->as().name; + res.index_name = command->index->as().name(); return res; } else if (parse_alter_commands && command->type == ASTAlterCommand::MODIFY_COLUMN) @@ -88,7 +88,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MutationCommand::Type::DROP_INDEX; - res.column_name = command->index->as().name; + res.column_name = command->index->as().name(); if (command->partition) res.partition = command->partition; if (command->clear_index) @@ -100,8 +100,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MutationCommand::Type::RENAME_COLUMN; - res.column_name = command->column->as().name; - res.rename_to = command->rename_to->as().name; + res.column_name = command->column->as().name(); + res.rename_to = command->rename_to->as().name(); return res; } else if (command->type == ASTAlterCommand::MATERIALIZE_TTL) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b858239d637..9046940b3f7 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -271,7 +271,7 @@ std::optional getOptimizedQueryProcessingStage(const if (!id) return false; /// TODO: if GROUP BY contains multiIf()/if() it should contain only columns from sharding_key - if (!sharding_key_block.has(id->name)) + if (!sharding_key_block.has(id->name())) return false; } return true; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 81a42f1fe63..a3660cf2dec 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -74,7 +74,7 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con else return false; - if (ident->name != "path") + if (ident->name() != "path") return false; auto evaluated = evaluateConstantExpressionAsLiteral(value, context); diff --git a/tests/queries/server.py b/tests/queries/server.py index e9f7361a6fe..c4f8968e08a 100644 --- a/tests/queries/server.py +++ b/tests/queries/server.py @@ -137,19 +137,34 @@ ServerThread.DEFAULT_SERVER_CONFIG = \ - - - localhost - {tcp_port} - - - - - localhost - {tcp_port} - - - + + + localhost + {tcp_port} + + + + + localhost + {tcp_port} + + + + + + + + 127.0.0.1 + {tcp_port} + + + + + 127.0.0.2 + {tcp_port} + + + diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp index 84ad07056b9..c8aae4a56f3 100644 --- a/utils/db-generator/query_db_generator.cpp +++ b/utils/db-generator/query_db_generator.cpp @@ -622,7 +622,7 @@ FuncRet arrayJoinFunc(DB::ASTPtr ch, std::map & columns) { auto ident = std::dynamic_pointer_cast(arg); if (ident) - indents.insert(ident->name); + indents.insert(ident->name()); } for (const auto & indent : indents) { @@ -654,7 +654,7 @@ FuncRet inFunc(DB::ASTPtr ch, std::map & columns) auto ident = std::dynamic_pointer_cast(arg); if (ident) { - indents.insert(ident->name); + indents.insert(ident->name()); } auto literal = std::dynamic_pointer_cast(arg); if (literal) @@ -734,7 +734,7 @@ FuncRet arrayFunc(DB::ASTPtr ch, std::map & columns) if (ident) { no_indent = false; - indents.insert(ident->name); + indents.insert(ident->name()); } auto literal = std::dynamic_pointer_cast(arg); if (literal) @@ -784,7 +784,7 @@ FuncRet arithmeticFunc(DB::ASTPtr ch, std::map & columns) if (ident) { no_indent = false; - indents.insert(ident->name); + indents.insert(ident->name()); } auto literal = std::dynamic_pointer_cast(arg); if (literal) @@ -848,7 +848,7 @@ FuncRet likeFunc(DB::ASTPtr ch, std::map & columns) { auto ident = std::dynamic_pointer_cast(arg); if (ident) - indents.insert(ident->name); + indents.insert(ident->name()); auto literal = std::dynamic_pointer_cast(arg); if (literal) { @@ -905,7 +905,7 @@ FuncRet simpleFunc(DB::ASTPtr ch, std::map & columns) if (ident) { no_indent = false; - indents.insert(ident->name); + indents.insert(ident->name()); } auto literal = std::dynamic_pointer_cast(arg); if (literal) @@ -1046,7 +1046,7 @@ std::set getIndent(DB::ASTPtr ch) std::set ret = {}; auto x = std::dynamic_pointer_cast(ch); if (x) - ret.insert(x->name); + ret.insert(x->name()); for (const auto & child : (*ch).children) { auto child_ind = getIndent(child); From 2e23fc242fe3cc295571c126f487a874b8e8fb8f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 22:48:16 +0300 Subject: [PATCH 163/174] Remove flaky LIVE VIEW test --- .../__init__.py | 0 .../configs/remote_servers.xml | 18 -- .../configs/set_distributed_defaults.xml | 35 --- .../test_distributed_over_live_view/test.py | 276 ------------------ 4 files changed, 329 deletions(-) delete mode 100644 tests/integration/test_distributed_over_live_view/__init__.py delete mode 100644 tests/integration/test_distributed_over_live_view/configs/remote_servers.xml delete mode 100644 tests/integration/test_distributed_over_live_view/configs/set_distributed_defaults.xml delete mode 100644 tests/integration/test_distributed_over_live_view/test.py diff --git a/tests/integration/test_distributed_over_live_view/__init__.py b/tests/integration/test_distributed_over_live_view/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_distributed_over_live_view/configs/remote_servers.xml b/tests/integration/test_distributed_over_live_view/configs/remote_servers.xml deleted file mode 100644 index ebce4697529..00000000000 --- a/tests/integration/test_distributed_over_live_view/configs/remote_servers.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - node1 - 9000 - - - - - node2 - 9000 - - - - - diff --git a/tests/integration/test_distributed_over_live_view/configs/set_distributed_defaults.xml b/tests/integration/test_distributed_over_live_view/configs/set_distributed_defaults.xml deleted file mode 100644 index 194eb1ebb87..00000000000 --- a/tests/integration/test_distributed_over_live_view/configs/set_distributed_defaults.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - 3 - 1000 - 1 - - - 5 - 3000 - 1 - - - - - - - - ::/0 - - default - default - - - - - ::/0 - - delays - default - - - - - diff --git a/tests/integration/test_distributed_over_live_view/test.py b/tests/integration/test_distributed_over_live_view/test.py deleted file mode 100644 index 78b90024ebf..00000000000 --- a/tests/integration/test_distributed_over_live_view/test.py +++ /dev/null @@ -1,276 +0,0 @@ - - -import sys -import time - -import pytest -from helpers.cluster import ClickHouseCluster -from helpers.uclient import client, prompt, end_of_block - -cluster = ClickHouseCluster(__file__) - -# log = sys.stdout -log = None - -NODES = {'node' + str(i): cluster.add_instance( - 'node' + str(i), - main_configs=['configs/remote_servers.xml'], - user_configs=['configs/set_distributed_defaults.xml'], -) for i in (1, 2)} - -CREATE_TABLES_SQL = ''' -DROP TABLE IF EXISTS lv_over_base_table; -DROP TABLE IF EXISTS distributed_table; -DROP TABLE IF EXISTS base_table; - -SET allow_experimental_live_view = 1; - -CREATE TABLE - base_table( - node String, - key Int32, - value Int32 - ) -ENGINE = Memory; - -CREATE LIVE VIEW lv_over_base_table AS SELECT * FROM base_table; - -CREATE TABLE - distributed_table -AS base_table -ENGINE = Distributed(test_cluster, default, base_table, rand()); -''' - -INSERT_SQL_TEMPLATE = "INSERT INTO base_table VALUES ('{node_id}', {key}, {value})" - - -@pytest.fixture(scope="function") -def started_cluster(): - try: - cluster.start() - for node_index, (node_name, node) in enumerate(NODES.items()): - node.query(CREATE_TABLES_SQL) - for i in range(0, 2): - sql = INSERT_SQL_TEMPLATE.format(node_id=node_name, key=i, value=i + (node_index * 10)) - node.query(sql) - yield cluster - - finally: - cluster.shutdown() - -def poll_query(node, query, expected, timeout): - """Repeatedly execute query until either expected result is returned or timeout occurs. - """ - start_time = time.time() - while node.query(query) != expected and time.time() - start_time < timeout: - pass - assert node.query(query) == expected - -@pytest.mark.parametrize("node", list(NODES.values())[:1]) -@pytest.mark.parametrize("source", ["lv_over_distributed_table"]) -class TestLiveViewOverDistributedSuite: - def test_distributed_over_live_view_order_by_node(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT * FROM distributed_over_lv ORDER BY node, key FORMAT CSV" - select_query_dist_table = "SELECT * FROM distributed_table ORDER BY node, key FORMAT CSV" - select_count_query = "SELECT count() FROM distributed_over_lv" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node1",1,1') - client1.expect('"node2",0,10') - client1.expect('"node2",1,11') - client1.expect(prompt) - - client1.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 2, 3)") - client1.expect(prompt) - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, select_count_query, "7\n", timeout=60) - print("\n--DEBUG1--") - print(select_query) - print(node0.query(select_query)) - print("---------") - print("\n--DEBUG2--") - print(select_query_dist_table) - print(node0.query(select_query_dist_table)) - print("---------") - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node1",1,1') - client1.expect('"node1",1,3') - client1.expect('"node1",2,3') - client1.expect('"node1",3,3') - client1.expect('"node2",0,10') - client1.expect('"node2",1,11') - client1.expect(prompt) - - def test_distributed_over_live_view_order_by_key(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT * FROM distributed_over_lv ORDER BY key, node FORMAT CSV" - select_count_query = "SELECT count() FROM distributed_over_lv" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node2",0,10') - client1.expect('"node1",1,1') - client1.expect('"node2",1,11') - client1.expect(prompt) - - client1.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 2, 3)") - client1.expect(prompt) - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, select_count_query, "7\n", timeout=60) - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node2",0,10') - client1.expect('"node1",1,1') - client1.expect('"node1",1,3') - client1.expect('"node2",1,11') - client1.expect('"node1",2,3') - client1.expect('"node1",3,3') - client1.expect(prompt) - - def test_distributed_over_live_view_group_by_node(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT node, SUM(value) FROM distributed_over_lv GROUP BY node ORDER BY node FORMAT CSV" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('"node1",1') - client1.expect('"node2",21') - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 2, 2)") - client2.expect(prompt) - - poll_query(node0, select_query, '"node1",3\n"node2",21\n', timeout=60) - - client1.send(select_query) - client1.expect('"node1",3') - client1.expect('"node2",21') - client1.expect(prompt) - - client1.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 3, 3)") - client1.expect(prompt) - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, select_query, '"node1",12\n"node2",21\n', timeout=60) - - client1.send(select_query) - client1.expect('"node1",12') - client1.expect('"node2",21') - client1.expect(prompt) - - def test_distributed_over_live_view_group_by_key(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT key, SUM(value) FROM distributed_over_lv GROUP BY key ORDER BY key FORMAT CSV" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('0,10') - client1.expect('1,12') - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 2, 2)") - client2.expect(prompt) - - poll_query(node0, "SELECT count() FROM (%s)" % select_query.rsplit("FORMAT")[0], "3\n", timeout=60) - - client1.send(select_query) - client1.expect('0,10') - client1.expect('1,12') - client1.expect('2,2') - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, "SELECT count() FROM (%s)" % select_query.rsplit("FORMAT")[0], "4\n", timeout=60) - - client1.send(select_query) - client1.expect('0,10') - client1.expect('1,15') - client1.expect('2,2') - client1.expect('3,3') - client1.expect(prompt) - - def test_distributed_over_live_view_sum(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send("SELECT sum(value) FROM distributed_over_lv") - client1.expect(r"22" + end_of_block) - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 2, 2)") - client2.expect(prompt) - - poll_query(node0, "SELECT sum(value) FROM distributed_over_lv", "24\n", timeout=60) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3), ('node1', 4, 4)") - client2.expect(prompt) - - poll_query(node0, "SELECT sum(value) FROM distributed_over_lv", "31\n", timeout=60) From 9d50921e529402ec7dbd76b61506fa40df84e2f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 23:52:32 +0300 Subject: [PATCH 164/174] Fix performance test "functions logical" after move to clang --- src/Functions/FunctionsLogical.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 58e1c52a60c..defa4f4493b 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -290,10 +290,9 @@ private: /// Apply target function by feeding it "batches" of N columns -/// Combining 10 columns per pass is the fastest for large columns sizes. -/// For small columns sizes - more columns is faster. +/// Combining 8 columns per pass is the fastest method, because it's the maximum when clang vectorizes a loop. template < - typename Op, template typename OperationApplierImpl, size_t N = 10> + typename Op, template typename OperationApplierImpl, size_t N = 8> struct OperationApplier { template From 8b21ef5d4fd6c9d2e3a49135d819676ed09fffd0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Oct 2020 00:50:52 +0300 Subject: [PATCH 165/174] Remove excessive exclamation marks --- .../AggregateFunctionTimeSeriesGroupSum.h | 2 +- src/Core/tests/CMakeLists.txt | 3 --- src/Core/tests/gtest_move_field.cpp | 22 ++++++++++++++++ src/Core/tests/move_field.cpp | 25 ------------------- src/Interpreters/tests/two_level_hash_map.cpp | 2 +- 5 files changed, 24 insertions(+), 30 deletions(-) create mode 100644 src/Core/tests/gtest_move_field.cpp delete mode 100644 src/Core/tests/move_field.cpp diff --git a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h index be0a3eb4af5..b755fbf081b 100644 --- a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h +++ b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h @@ -92,7 +92,7 @@ struct AggregateFunctionTimeSeriesGroupSumData it_ss->second.add(t, v); } if (result.size() > 0 && t < result.back().first) - throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc!!!", ErrorCodes::LOGICAL_ERROR}; + throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR}; if (result.size() > 0 && t == result.back().first) { //do not add new point diff --git a/src/Core/tests/CMakeLists.txt b/src/Core/tests/CMakeLists.txt index d609e49f247..cd6450633ff 100644 --- a/src/Core/tests/CMakeLists.txt +++ b/src/Core/tests/CMakeLists.txt @@ -5,9 +5,6 @@ target_include_directories (string_pool SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLU add_executable (field field.cpp) target_link_libraries (field PRIVATE dbms) -add_executable (move_field move_field.cpp) -target_link_libraries (move_field PRIVATE clickhouse_common_io) - add_executable (string_ref_hash string_ref_hash.cpp) target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io) diff --git a/src/Core/tests/gtest_move_field.cpp b/src/Core/tests/gtest_move_field.cpp new file mode 100644 index 00000000000..9c807039c6a --- /dev/null +++ b/src/Core/tests/gtest_move_field.cpp @@ -0,0 +1,22 @@ +#include +#include + +using namespace DB; + +GTEST_TEST(Field, Move) +{ + Field f; + + f = Field{String{"Hello, world (1)"}}; + ASSERT_EQ(f.get(), "Hello, world (1)"); + f = Field{String{"Hello, world (2)"}}; + ASSERT_EQ(f.get(), "Hello, world (2)"); + f = Field{Array{Field{String{"Hello, world (3)"}}}}; + ASSERT_EQ(f.get()[0].get(), "Hello, world (3)"); + f = String{"Hello, world (4)"}; + ASSERT_EQ(f.get(), "Hello, world (4)"); + f = Array{Field{String{"Hello, world (5)"}}}; + ASSERT_EQ(f.get()[0].get(), "Hello, world (5)"); + f = Array{String{"Hello, world (6)"}}; + ASSERT_EQ(f.get()[0].get(), "Hello, world (6)"); +} diff --git a/src/Core/tests/move_field.cpp b/src/Core/tests/move_field.cpp deleted file mode 100644 index 2780abffc40..00000000000 --- a/src/Core/tests/move_field.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include - - -int main(int, char **) -{ - using namespace DB; - - Field f; - - f = Field{String{"Hello, world"}}; - std::cerr << f.get() << "\n"; - f = Field{String{"Hello, world!"}}; - std::cerr << f.get() << "\n"; - f = Field{Array{Field{String{"Hello, world!!"}}}}; - std::cerr << f.get()[0].get() << "\n"; - f = String{"Hello, world!!!"}; - std::cerr << f.get() << "\n"; - f = Array{Field{String{"Hello, world!!!!"}}}; - std::cerr << f.get()[0].get() << "\n"; - f = Array{String{"Hello, world!!!!!"}}; - std::cerr << f.get()[0].get() << "\n"; - - return 0; -} diff --git a/src/Interpreters/tests/two_level_hash_map.cpp b/src/Interpreters/tests/two_level_hash_map.cpp index f79be16e095..33fd5ee8305 100644 --- a/src/Interpreters/tests/two_level_hash_map.cpp +++ b/src/Interpreters/tests/two_level_hash_map.cpp @@ -128,7 +128,7 @@ int main(int argc, char ** argv) std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl; if (sum_counts != n) - std::cerr << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cerr << "Error!" << std::endl; } return 0; From 942828f4d5c810235cdcd281de90ffb4350ef07b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Oct 2020 00:54:17 +0300 Subject: [PATCH 166/174] Check style --- utils/check-style/check-style | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 4983782c00d..ef569c9f73e 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -99,3 +99,6 @@ find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} '(' -name '*. find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' && echo "Files should not have UTF-8 BOM" find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' && echo "Files should not have UTF-16LE BOM" find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' && echo "Files should not have UTF-16BE BOM" + +# Too many exclamation marks +find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -F '!!!' && echo "Too many exclamation marks (looks dirty, unconfident)." From 2613012fd1f888f5ec72f1ee0b4296e7a672596a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Oct 2020 04:43:06 +0300 Subject: [PATCH 167/174] Improve performance of FunctionsLogical a little by adding "restrict" --- src/Functions/FunctionsLogical.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index defa4f4493b..3e19516daaa 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -299,28 +299,29 @@ struct OperationApplier static void apply(Columns & in, ResultData & result_data, bool use_result_data_as_input = false) { if (!use_result_data_as_input) - doBatchedApply(in, result_data); + doBatchedApply(in, result_data.data(), result_data.size()); while (!in.empty()) - doBatchedApply(in, result_data); + doBatchedApply(in, result_data.data(), result_data.size()); } - template - static void NO_INLINE doBatchedApply(Columns & in, ResultData & result_data) + template + static void NO_INLINE doBatchedApply(Columns & in, Result * __restrict result_data, size_t size) { if (N > in.size()) { OperationApplier - ::template doBatchedApply(in, result_data); + ::template doBatchedApply(in, result_data, size); return; } const OperationApplierImpl operation_applier_impl(in); - size_t i = 0; - for (auto & res : result_data) + for (size_t i = 0; i < size; ++i) + { if constexpr (CarryResult) - res = Op::apply(res, operation_applier_impl.apply(i++)); + result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i)); else - res = operation_applier_impl.apply(i++); + result_data[i] = operation_applier_impl.apply(i); + } in.erase(in.end() - N, in.end()); } @@ -331,7 +332,7 @@ template < struct OperationApplier { template - static void NO_INLINE doBatchedApply(Columns &, Result &) + static void NO_INLINE doBatchedApply(Columns &, Result &, size_t) { throw Exception( "OperationApplier<...>::apply(...): not enough arguments to run this method", From 98f073a3a541db7e9bed9ed4d78057bde3eef4ac Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Sun, 25 Oct 2020 05:38:54 +0300 Subject: [PATCH 168/174] Text updated ant translated to Russian --- .../{crash_log.md => crash-log.md} | 16 +++++-- docs/ru/operations/system-tables/crash-log.md | 48 +++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) rename docs/en/operations/system-tables/{crash_log.md => crash-log.md} (77%) create mode 100644 docs/ru/operations/system-tables/crash-log.md diff --git a/docs/en/operations/system-tables/crash_log.md b/docs/en/operations/system-tables/crash-log.md similarity index 77% rename from docs/en/operations/system-tables/crash_log.md rename to docs/en/operations/system-tables/crash-log.md index d38ce31584f..5bdf402513a 100644 --- a/docs/en/operations/system-tables/crash_log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -1,4 +1,4 @@ -## system.crash_log {#system-tables_crash_log} +# system.crash_log {#system-tables_crash_log} Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur. @@ -10,11 +10,11 @@ Columns: - `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. - `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. - `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID. -- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Array of traces. -- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of full traces. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process. - `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version. - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision. -- `build_id` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server build. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID that is generated by compiler. **Example** @@ -23,9 +23,12 @@ Query: ``` sql SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1; ``` + Result (not full): ``` text +Row 1: +────── event_date: 2020-10-14 event_time: 2020-10-14 15:47:40 timestamp_ns: 1602679660271312710 @@ -39,4 +42,7 @@ revision: 54442 build_id: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/crash_log) +**See also** +- [trace_log](../../operations/system_tables/trace_log.md) system table + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/crash-log) diff --git a/docs/ru/operations/system-tables/crash-log.md b/docs/ru/operations/system-tables/crash-log.md new file mode 100644 index 00000000000..19e9d19b198 --- /dev/null +++ b/docs/ru/operations/system-tables/crash-log.md @@ -0,0 +1,48 @@ +# system.crash_log {#system-tables_crash_log} + +Содержит информацию о трассировках стека для фатальных ошибок. Таблица не содержится в базе данных по умолчанию, а создается только при возникновении фатальных ошибок. + +Колонки: + +- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Дата события. +- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Время события. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Время события с наносекундами. +- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Номер сигнала, пришедшего в поток. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Идентификатор треда. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Идентификатор запроса. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Трассировка стека в момент ошибки. Представляет собой список физических адресов, по которым расположены вызываемые методы. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Трассировка стека в момент ошибки. Содержит вызываемые методы. +- `version` ([String](../../sql-reference/data-types/string.md)) — Версия сервера ClickHouse. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Ревизия сборки сервера ClickHouse. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID, сгенерированный компилятором. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1; +``` + +Результат (приведён не полностью): + +``` text +Row 1: +────── +event_date: 2020-10-14 +event_time: 2020-10-14 15:47:40 +timestamp_ns: 1602679660271312710 +signal: 11 +thread_id: 23624 +query_id: 428aab7c-8f5c-44e9-9607-d16b44467e69 +trace: [188531193,...] +trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector >&, std::__1::vector > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...] +version: ClickHouse 20.11.1.1 +revision: 54442 +build_id: +``` + +**См. также** +- Системная таблица [trace_log](../../operations/system_tables/trace_log.md) + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/crash-log) From 300f07bdba532ccdd789ce6300cf4388683a36ed Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Sun, 25 Oct 2020 05:58:39 +0300 Subject: [PATCH 169/174] Links fixed. --- docs/en/operations/system-tables/crash-log.md | 4 ++-- docs/ru/operations/system-tables/crash-log.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index 5bdf402513a..5b338237b83 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -43,6 +43,6 @@ build_id: ``` **See also** -- [trace_log](../../operations/system_tables/trace_log.md) system table +- [trace_log](../../operations/system-tables/trace_log.md) system table -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/crash-log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/crash-log) diff --git a/docs/ru/operations/system-tables/crash-log.md b/docs/ru/operations/system-tables/crash-log.md index 19e9d19b198..7b645a06b2d 100644 --- a/docs/ru/operations/system-tables/crash-log.md +++ b/docs/ru/operations/system-tables/crash-log.md @@ -43,6 +43,6 @@ build_id: ``` **См. также** -- Системная таблица [trace_log](../../operations/system_tables/trace_log.md) +- Системная таблица [trace_log](../../operations/system-tables/trace_log.md) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/crash-log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/crash-log) From edc8d6e5e76560eca7b59feb62eb1c06c4167d9d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 25 Oct 2020 14:14:52 +0300 Subject: [PATCH 170/174] Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication --- programs/server/config.xml | 16 +++++++++ src/Interpreters/Cluster.cpp | 11 ++++-- ..._directory_monitor_batch_inserts.reference | 16 +++++++++ ...ibuted_directory_monitor_batch_inserts.sql | 36 ++++++++++++++++++- 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 9850d77abb7..5bdec5377fd 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -392,6 +392,22 @@ + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 8a98e8282a6..9c2766ae7d6 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -614,13 +614,18 @@ const std::string & Cluster::ShardInfo::pathForInsert(bool prefer_localhost_repl if (!has_internal_replication) throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR); - if (dir_name_for_internal_replication.empty() || dir_name_for_internal_replication_with_local.empty()) - throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); - if (prefer_localhost_replica) + { + if (dir_name_for_internal_replication.empty()) + throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); return dir_name_for_internal_replication; + } else + { + if (dir_name_for_internal_replication_with_local.empty()) + throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); return dir_name_for_internal_replication_with_local; + } } bool Cluster::maybeCrossReplication() const diff --git a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference index 5565ed6787f..03e58c13ff2 100644 --- a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference +++ b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference @@ -1,4 +1,20 @@ +test_cluster_two_shards prefer_localhost_replica=0 +0 0 1 +1 +test_cluster_two_shards prefer_localhost_replica=1 +0 0 1 +1 +test_cluster_two_shards_internal_replication prefer_localhost_replica=0 +0 +0 +1 +1 +test_cluster_two_shards_internal_replication prefer_localhost_replica=1 +0 +0 +1 +1 diff --git a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql index dbec319ab76..dec748789c8 100644 --- a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql +++ b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql @@ -7,6 +7,40 @@ DROP TABLE IF EXISTS dist_test_01040; CREATE TABLE test_01040 (key UInt64) ENGINE=TinyLog(); CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards, currentDatabase(), test_01040, key); + +-- internal_replication=false +SELECT 'test_cluster_two_shards prefer_localhost_replica=0'; +SET prefer_localhost_replica=0; INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); SYSTEM FLUSH DISTRIBUTED dist_test_01040; -SELECT * FROM dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + +SELECT 'test_cluster_two_shards prefer_localhost_replica=1'; +SET prefer_localhost_replica=1; +INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); +SYSTEM FLUSH DISTRIBUTED dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + +DROP TABLE dist_test_01040; + +-- internal_replication=true +CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards_internal_replication, currentDatabase(), test_01040, key); +SELECT 'test_cluster_two_shards_internal_replication prefer_localhost_replica=0'; +SET prefer_localhost_replica=0; +INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); +SYSTEM FLUSH DISTRIBUTED dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + +SELECT 'test_cluster_two_shards_internal_replication prefer_localhost_replica=1'; +SET prefer_localhost_replica=1; +INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); +SYSTEM FLUSH DISTRIBUTED dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + + +DROP TABLE dist_test_01040; +DROP TABLE test_01040; From f97e6beb70984d65aa705dfe3b4bba467d824d2f Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 25 Oct 2020 15:34:18 +0300 Subject: [PATCH 171/174] Remove message broker pool from context --- src/Interpreters/Context.cpp | 13 ------------- src/Interpreters/Context.h | 1 - 2 files changed, 14 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7f2ada8a426..9d2ea6ded86 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -335,7 +335,6 @@ struct ContextShared std::optional background_move_pool; /// The thread pool for the background moves performed by the tables. std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) std::optional distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) - std::optional message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used in kafka streaming) MultiVersion macros; /// Substitutions extracted from config. std::unique_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. @@ -438,7 +437,6 @@ struct ContextShared schedule_pool.reset(); distributed_schedule_pool.reset(); ddl_worker.reset(); - message_broker_schedule_pool.reset(); /// Stop trace collector if any trace_collector.reset(); @@ -1441,17 +1439,6 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() return *shared->distributed_schedule_pool; } -BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() -{ - auto lock = getLock(); - if (!shared->message_broker_schedule_pool) - shared->message_broker_schedule_pool.emplace( - settings.background_message_broker_schedule_pool_size, - CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask, - "BgMBSchPool"); - return *shared->message_broker_schedule_pool; -} - void Context::setDDLWorker(std::unique_ptr ddl_worker) { auto lock = getLock(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index bd5e17fe2e4..075fc3837ef 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -511,7 +511,6 @@ public: BackgroundProcessingPool & getBackgroundPool(); BackgroundProcessingPool & getBackgroundMovePool(); BackgroundSchedulePool & getSchedulePool(); - BackgroundSchedulePool & getMessageBrokerSchedulePool(); BackgroundSchedulePool & getDistributedSchedulePool(); void setDDLWorker(std::unique_ptr ddl_worker); From 4bcbcfed1f243a1759b10b3df58409e23909f82b Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Sun, 25 Oct 2020 09:40:19 -0400 Subject: [PATCH 172/174] Fixing procedure of getting log file size in LDAP tests. Increasing default timeouts. --- .../ldap/authentication/tests/common.py | 16 ++++++++-------- .../ldap/external_user_directory/tests/common.py | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/testflows/ldap/authentication/tests/common.py b/tests/testflows/ldap/authentication/tests/common.py index 4e3d1e16647..1c662b3898b 100644 --- a/tests/testflows/ldap/authentication/tests/common.py +++ b/tests/testflows/ldap/authentication/tests/common.py @@ -47,7 +47,7 @@ ASCII_CHARS = string.ascii_lowercase + string.ascii_uppercase + string.digits def randomword(length, chars=ASCII_CHARS): return ''.join(random.choice(chars) for i in range(length)) -def restart(node=None, safe=False, timeout=20): +def restart(node=None, safe=False, timeout=60): """Restart ClickHouse server and wait for config to be reloaded. """ with When("I restart ClickHouse server node"): @@ -62,7 +62,7 @@ def restart(node=None, safe=False, timeout=20): with And("getting current log size"): logsize = \ - node.command("ls -s --block-size=1 /var/log/clickhouse-server/clickhouse-server.log").output.split(" ")[ + node.command("stat --format=%s /var/log/clickhouse-server/clickhouse-server.log").output.split(" ")[ 0].strip() with And("restarting ClickHouse server"): @@ -78,7 +78,7 @@ def restart(node=None, safe=False, timeout=20): f"ConfigReloader: Loaded config '/etc/clickhouse-server/config.xml', performed update on configuration", timeout=timeout) -def add_config(config, timeout=20, restart=False): +def add_config(config, timeout=60, restart=False): """Add dynamic configuration file to ClickHouse. :param node: node @@ -108,7 +108,7 @@ def add_config(config, timeout=20, restart=False): with And("I get the current log size"): logsize = \ - node.command("ls -s --block-size=1 /var/log/clickhouse-server/clickhouse-server.log").output.split(" ")[ + node.command("stat --format=%s /var/log/clickhouse-server/clickhouse-server.log").output.split(" ")[ 0].strip() with And("I restart ClickHouse to apply the config changes"): @@ -189,7 +189,7 @@ def create_ldap_servers_config_content(servers, config_d_dir="/etc/clickhouse-se @contextmanager def ldap_servers(servers, config_d_dir="/etc/clickhouse-server/config.d", config_file="ldap_servers.xml", - timeout=20, restart=False): + timeout=60, restart=False): """Add LDAP servers configuration. """ config = create_ldap_servers_config_content(servers, config_d_dir, config_file) @@ -236,7 +236,7 @@ def add_users_identified_with_ldap(*users): @contextmanager def ldap_authenticated_users(*users, config_d_dir="/etc/clickhouse-server/users.d", - config_file=None, timeout=20, restart=True, config=None, rbac=False): + config_file=None, timeout=60, restart=True, config=None, rbac=False): """Add LDAP authenticated users. """ if rbac: @@ -248,7 +248,7 @@ def ldap_authenticated_users(*users, config_d_dir="/etc/clickhouse-server/users. config = create_ldap_users_config_content(*users, config_d_dir=config_d_dir, config_file=config_file) return add_config(config, restart=restart) -def invalid_server_config(servers, message=None, tail=13, timeout=20): +def invalid_server_config(servers, message=None, tail=13, timeout=60): """Check that ClickHouse errors when trying to load invalid LDAP servers configuration file. """ node = current().context.node @@ -277,7 +277,7 @@ def invalid_server_config(servers, message=None, tail=13, timeout=20): with By("removing the config file", description=config.path): node.command(f"rm -rf {config.path}", exitcode=0) -def invalid_user_config(servers, config, message=None, tail=13, timeout=20): +def invalid_user_config(servers, config, message=None, tail=13, timeout=60): """Check that ClickHouse errors when trying to load invalid LDAP users configuration file. """ node = current().context.node diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py index d6f414e617a..38b53ca6e9f 100644 --- a/tests/testflows/ldap/external_user_directory/tests/common.py +++ b/tests/testflows/ldap/external_user_directory/tests/common.py @@ -129,7 +129,7 @@ def create_entries_ldap_external_user_directory_config_content(entries, config_d return Config(content, path, name, uid, "config.xml") -def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, timeout=20, config=None): +def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, timeout=60, config=None): """Check that ClickHouse errors when trying to load invalid LDAP external user directory configuration file. """ @@ -181,7 +181,7 @@ def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, @contextmanager def ldap_external_user_directory(server, roles, config_d_dir="/etc/clickhouse-server/config.d", - config_file=None, timeout=20, restart=True, config=None): + config_file=None, timeout=60, restart=True, config=None): """Add LDAP external user directory. """ if config_file is None: From be95d3d854dc9bc96995944882482b090d34b44c Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Sun, 25 Oct 2020 21:09:14 -0400 Subject: [PATCH 173/174] Fixing another issue in LDAP tests. --- .../ldap/authentication/tests/common.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/testflows/ldap/authentication/tests/common.py b/tests/testflows/ldap/authentication/tests/common.py index 1c662b3898b..0fd9670fae0 100644 --- a/tests/testflows/ldap/authentication/tests/common.py +++ b/tests/testflows/ldap/authentication/tests/common.py @@ -85,19 +85,29 @@ def add_config(config, timeout=60, restart=False): :param config: configuration file description :param timeout: timeout, default: 20 sec """ - def check_preprocessed_config_is_updated(): + def check_preprocessed_config_is_updated(after_removal=False): """Check that preprocessed config is updated. """ started = time.time() command = f"cat /var/lib/clickhouse/preprocessed_configs/{config.preprocessed_name} | grep {config.uid}{' > /dev/null' if not settings.debug else ''}" + while time.time() - started < timeout: exitcode = node.command(command, steps=False).exitcode - if exitcode == 0: - break + if after_removal: + if exitcode == 1: + break + else: + if exitcode == 0: + break time.sleep(1) + if settings.debug: node.command(f"cat /var/lib/clickhouse/preprocessed_configs/{config.preprocessed_name}") - assert exitcode == 0, error() + + if after_removal: + assert exitcode == 1, error() + else: + assert exitcode == 0, error() def wait_for_config_to_be_loaded(): """Wait for config to be loaded. @@ -160,7 +170,7 @@ def add_config(config, timeout=60, restart=False): node.command(f"rm -rf {config.path}", exitcode=0) with Then(f"{config.preprocessed_name} should be updated", description=f"timeout {timeout}"): - check_preprocessed_config_is_updated() + check_preprocessed_config_is_updated(after_removal=True) with And("I wait for config to be reloaded"): wait_for_config_to_be_loaded() From 18458f36e773ae2a04b35bc244d2751308a553f1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Oct 2020 08:20:37 +0300 Subject: [PATCH 174/174] Fix trash --- docs/en/operations/system-tables/crash-log.md | 6 +++--- docs/ru/operations/system-tables/crash-log.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index 5b338237b83..5e9fec53429 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -1,6 +1,6 @@ -# system.crash_log {#system-tables_crash_log} +# system.crash_log {#system-tables_crash_log} -Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur. +Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur. Columns: @@ -39,7 +39,7 @@ trace: [188531193,...] trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector >&, std::__1::vector > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...] version: ClickHouse 20.11.1.1 revision: 54442 -build_id: +build_id: ``` **See also** diff --git a/docs/ru/operations/system-tables/crash-log.md b/docs/ru/operations/system-tables/crash-log.md index 7b645a06b2d..d2b3ae5c6f5 100644 --- a/docs/ru/operations/system-tables/crash-log.md +++ b/docs/ru/operations/system-tables/crash-log.md @@ -1,6 +1,6 @@ -# system.crash_log {#system-tables_crash_log} +# system.crash_log {#system-tables_crash_log} -Содержит информацию о трассировках стека для фатальных ошибок. Таблица не содержится в базе данных по умолчанию, а создается только при возникновении фатальных ошибок. +Содержит информацию о трассировках стека для фатальных ошибок. Таблица не содержится в базе данных по умолчанию, а создается только при возникновении фатальных ошибок. Колонки: @@ -39,7 +39,7 @@ trace: [188531193,...] trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector >&, std::__1::vector > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...] version: ClickHouse 20.11.1.1 revision: 54442 -build_id: +build_id: ``` **См. также**