Merge branch 'master' into better-diagnostic-on-create-syntax-error

This commit is contained in:
Alexey Milovidov 2020-11-08 01:21:07 +03:00
commit 5ea17f9896
61 changed files with 1271 additions and 383 deletions

View File

@ -14,6 +14,11 @@ unset (_current_dir_name)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
if (SANITIZE STREQUAL "undefined")
# 3rd-party libraries usually not intended to work with UBSan.
add_compile_options(-fno-sanitize=undefined)
endif()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (boost-cmake)
@ -157,9 +162,6 @@ if(USE_INTERNAL_SNAPPY_LIBRARY)
add_subdirectory(snappy)
set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy")
if(SANITIZE STREQUAL "undefined")
target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined)
endif()
endif()
if (USE_INTERNAL_PARQUET_LIBRARY)

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 757d947235b307675cff964f29b19d388140a9eb
Subproject commit f49c6ab8d3aa71828bd1b411485c21722e8c9d82

View File

@ -240,6 +240,10 @@ TESTS_TO_SKIP=(
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_
arrow
avro

View File

@ -36,6 +36,7 @@ toc_title: Adopters
| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) |
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
@ -45,6 +46,7 @@ toc_title: Adopters
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
@ -68,6 +70,7 @@ toc_title: Adopters
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |

View File

@ -1765,6 +1765,23 @@ Default value: `0`.
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed)
- [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine.
Possible values:
- 0 — Uses `user[:password]@host:port#default_database` directory format.
- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format.
Default value: `1`.
!!! note "Note"
- with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for async INSERT.
- with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware.
## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and cant be changed in a user session.

View File

@ -4,6 +4,6 @@ toc_priority: 140
# sumWithOverflow {#sumwithoverflowx}
Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, the function returns an error.
Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, it is calculated with overflow.
Only works for numbers.

View File

@ -64,6 +64,6 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться.
Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел «Производительность»).
Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance/).
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/buffer/) <!--hide-->

View File

@ -0,0 +1 @@
../../../tests/config/config.d/test_cluster_with_incorrect_pw.xml

View File

@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n)
offsets_data.resize_assume_reserved(offsets_data.size() - n);
}
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const
{
const ColumnArray & rhs = assert_cast<const ColumnArray &>(rhs_);
@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
size_t rhs_size = rhs.sizeAt(m);
size_t min_size = std::min(lhs_size, rhs_size);
for (size_t i = 0; i < min_size; ++i)
if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint))
{
int res;
if (collator)
res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator);
else
res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint);
if (res)
return res;
}
return lhs_size < rhs_size
? -1
@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
: 1);
}
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint);
}
int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator);
}
void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -352,27 +368,26 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
namespace
template <bool positive>
struct ColumnArray::Cmp
{
template <bool positive>
struct Less
const ColumnArray & parent;
int nan_direction_hint;
const Collator * collator;
Cmp(const ColumnArray & parent_, int nan_direction_hint_, const Collator * collator_=nullptr)
: parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_) {}
int operator()(size_t lhs, size_t rhs) const
{
const ColumnArray & parent;
int nan_direction_hint;
Less(const ColumnArray & parent_, int nan_direction_hint_)
: parent(parent_), nan_direction_hint(nan_direction_hint_) {}
bool operator()(size_t lhs, size_t rhs) const
{
if (positive)
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0;
else
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0;
}
};
}
int res;
if (collator)
res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator);
else
res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
return positive ? res : -res;
}
};
void ColumnArray::reserve(size_t n)
{
@ -753,7 +768,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit
INSTANTIATE_INDEX_IMPL(ColumnArray)
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
template <typename Comparator>
void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{
size_t s = size();
if (limit >= s)
@ -763,23 +779,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
for (size_t i = 0; i < s; ++i)
res[i] = i;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(*this, nan_direction_hint));
}
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
else
{
if (reverse)
std::sort(res.begin(), res.end(), Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(*this, nan_direction_hint));
}
std::sort(res.begin(), res.end(), less);
}
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
template <typename Comparator>
void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const
{
if (equal_range.empty())
return;
@ -792,20 +801,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
if (limit)
--number_of_ranges;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
EqualRanges new_ranges;
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto & [first, last] = equal_range[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin() + first, res.begin() + last, Less<true>(*this, nan_direction_hint));
std::sort(res.begin() + first, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (cmp(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -827,14 +835,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<true>(*this, nan_direction_hint));
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (cmp(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -845,7 +850,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0)
if (cmp(res[new_first], res[j]) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
@ -859,6 +864,39 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
equal_range = std::move(new_ranges);
}
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint));
else
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint));
}
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint));
else
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint));
}
void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint, &collator));
else
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint, &collator));
}
void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint, &collator));
else
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{
if (replicate_offsets.empty())

View File

@ -77,8 +77,11 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
@ -132,6 +135,8 @@ public:
return false;
}
bool isCollationSupported() const override { return getData().isCollationSupported(); }
private:
WrappedPtr data;
WrappedPtr offsets;
@ -169,6 +174,17 @@ private:
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const;
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
template <bool positive>
struct Cmp;
};

View File

@ -248,6 +248,8 @@ public:
/// The constant value. It is valid even if the size of the column is 0.
template <typename T>
T getValue() const { return getField().safeGet<NearestFieldType<T>>(); }
bool isCollationSupported() const override { return data->isCollationSupported(); }
};
}

View File

@ -1,5 +1,6 @@
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <DataStreams/ColumnGathererStream.h>
#include <DataTypes/NumberTraits.h>
#include <Common/HashTable/HashMap.h>
@ -278,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
}
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs);
size_t n_index = getIndexes().getUInt(n);
size_t m_index = low_cardinality_column.getIndexes().getUInt(m);
if (collator)
return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator);
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
}
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -295,14 +308,17 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num
compare_results, direction, nan_direction_hint);
}
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const
{
if (limit == 0)
limit = size();
size_t unique_limit = getDictionary().size();
Permutation unique_perm;
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
if (collator)
getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm);
else
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
/// TODO: optimize with sse.
@ -330,7 +346,8 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di
}
}
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
template <typename Cmp>
void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const
{
if (equal_ranges.empty())
return;
@ -345,20 +362,17 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
EqualRanges new_ranges;
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; };
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto& [first, last] = equal_ranges[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
else
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
std::sort(res.begin() + first, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (comparator(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -379,17 +393,12 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0)
if (comparator(res[new_first],res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -401,7 +410,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) == 0)
if (comparator(res[new_first], res[j]) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
@ -412,6 +421,38 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
}
}
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, nan_direction_hint, res);
}
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
auto comparator = [this, nan_direction_hint, reverse](size_t lhs, size_t rhs)
{
int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint);
return reverse ? -ret : ret;
};
updatePermutationImpl(limit, res, equal_ranges, comparator);
}
void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator);
}
void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs)
{
int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator);
return reverse ? -ret : ret;
};
updatePermutationImpl(limit, res, equal_ranges, comparator);
}
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
{
auto columns = getIndexes().scatter(num_columns, selector);

View File

@ -125,10 +125,16 @@ public:
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
ColumnPtr replicate(const Offsets & offsets) const override
{
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
@ -170,6 +176,7 @@ public:
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
bool isNumeric() const override { return getDictionary().isNumeric(); }
bool lowCardinality() const override { return true; }
bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); }
/**
* Checks if the dictionary column is Nullable(T).
@ -309,6 +316,13 @@ private:
void compactInplace();
void compactIfSharedDictionary();
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
template <typename Cmp>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const;
};

View File

@ -6,6 +6,7 @@
#include <Common/WeakHash.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataStreams/ColumnGathererStream.h>
@ -223,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
return ColumnNullable::create(indexed_data, indexed_null_map);
}
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const
{
/// NULL values share the properties of NaN values.
/// Here the last parameter of compareAt is called null_direction_hint
@ -245,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null
}
const IColumn & nested_rhs = nullable_rhs.getNestedColumn();
if (collator)
return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator);
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
}
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint);
}
int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint, &collator);
}
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -256,10 +270,14 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
{
/// Cannot pass limit because of unknown amount of NULLs.
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
if (collator)
getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res);
else
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
if ((null_direction_hint > 0) != reverse)
{
@ -329,7 +347,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
}
}
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
{
if (equal_ranges.empty())
return;
@ -432,12 +450,35 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire
}
}
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
if (collator)
getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges);
else
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
equal_ranges = std::move(new_ranges);
std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges));
}
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, null_direction_hint, res);
}
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_ranges);
}
void ColumnNullable::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, null_direction_hint, res, &collator);
}
void ColumnNullable::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_range, &collator);
}
void ColumnNullable::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);

View File

@ -6,6 +6,7 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
class Collator;
namespace DB
{
@ -92,8 +93,12 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(
const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
@ -129,6 +134,7 @@ public:
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); }
bool onlyNull() const override { return nested_column->isDummy(); }
bool isCollationSupported() const override { return nested_column->isCollationSupported(); }
/// Return the column that represents values.
@ -164,6 +170,13 @@ private:
template <bool negative>
void applyNullMapImpl(const ColumnUInt8 & map);
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
void updatePermutationImpl(
bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const;
};
ColumnPtr makeNullable(const ColumnPtr & column);

View File

@ -285,21 +285,22 @@ void ColumnString::compareColumn(
}
template <bool positive>
struct ColumnString::less
struct ColumnString::Cmp
{
const ColumnString & parent;
explicit less(const ColumnString & parent_) : parent(parent_) {}
bool operator()(size_t lhs, size_t rhs) const
explicit Cmp(const ColumnString & parent_) : parent(parent_) {}
int operator()(size_t lhs, size_t rhs) const
{
int res = memcmpSmallAllowOverflow15(
parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1,
parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1);
return positive ? (res < 0) : (res > 0);
return positive ? res : -res;
}
};
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
template <typename Comparator>
void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{
size_t s = offsets.size();
res.resize(s);
@ -309,23 +310,16 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio
if (limit >= s)
limit = 0;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
}
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
else
{
if (reverse)
std::sort(res.begin(), res.end(), less<false>(*this));
else
std::sort(res.begin(), res.end(), less<true>(*this));
}
std::sort(res.begin(), res.end(), less);
}
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
template <typename Comparator>
void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const
{
if (equal_ranges.empty())
return;
@ -340,21 +334,17 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
if (limit)
--number_of_ranges;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto & [first, last] = equal_ranges[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
else
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
std::sort(res.begin() + first, res.begin() + last, less);
size_t new_first = first;
for (size_t j = first + 1; j < last; ++j)
{
if (memcmpSmallAllowOverflow15(
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
if (cmp(res[j], res[new_first]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -375,17 +365,12 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
size_t new_first = first;
for (size_t j = first + 1; j < limit; ++j)
{
if (memcmpSmallAllowOverflow15(
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
if (cmp(res[j], res[new_first]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -395,9 +380,7 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
size_t new_last = limit;
for (size_t j = limit; j < last; ++j)
{
if (memcmpSmallAllowOverflow15(
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0)
if (cmp(res[j], res[new_first]) == 0)
{
std::swap(res[j], res[new_last]);
++new_last;
@ -408,6 +391,56 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
}
}
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this));
else
getPermutationImpl(limit, res, Cmp<true>(*this));
}
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_ranges, Cmp<false>(*this));
else
updatePermutationImpl(limit, res, equal_ranges, Cmp<true>(*this));
}
template <bool positive>
struct ColumnString::CmpWithCollation
{
const ColumnString & parent;
const Collator & collator;
CmpWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
int operator()(size_t lhs, size_t rhs) const
{
int res = collator.compare(
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
return positive ? res : -res;
}
};
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, CmpWithCollation<false>(*this, collator));
else
getPermutationImpl(limit, res, CmpWithCollation<true>(*this, collator));
}
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<false>(*this, collator));
else
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<true>(*this, collator));
}
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
{
size_t col_size = size();
@ -476,13 +509,13 @@ void ColumnString::getExtremes(Field & min, Field & max) const
size_t min_idx = 0;
size_t max_idx = 0;
less<true> less_op(*this);
Cmp<true> cmp_op(*this);
for (size_t i = 1; i < col_size; ++i)
{
if (less_op(i, min_idx))
if (cmp_op(i, min_idx) < 0)
min_idx = i;
else if (less_op(max_idx, i))
else if (cmp_op(max_idx, i) < 0)
max_idx = i;
}
@ -491,7 +524,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const
}
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
{
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
@ -500,134 +533,6 @@ int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs
reinterpret_cast<const char *>(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m));
}
template <bool positive>
struct ColumnString::lessWithCollation
{
const ColumnString & parent;
const Collator & collator;
lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
bool operator()(size_t lhs, size_t rhs) const
{
int res = collator.compare(
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
return positive ? (res < 0) : (res > 0);
}
};
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const
{
size_t s = offsets.size();
res.resize(s);
for (size_t i = 0; i < s; ++i)
res[i] = i;
if (limit >= s)
limit = 0;
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<false>(*this, collator));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<true>(*this, collator));
}
else
{
if (reverse)
std::sort(res.begin(), res.end(), lessWithCollation<false>(*this, collator));
else
std::sort(res.begin(), res.end(), lessWithCollation<true>(*this, collator));
}
}
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
{
if (equal_ranges.empty())
return;
if (limit >= size() || limit >= equal_ranges.back().second)
limit = 0;
size_t number_of_ranges = equal_ranges.size();
if (limit)
--number_of_ranges;
EqualRanges new_ranges;
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto& [first, last] = equal_ranges[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<false>(*this, collator));
else
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<true>(*this, collator));
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
new_first = j;
}
}
if (last - new_first > 1)
new_ranges.emplace_back(new_first, last);
}
if (limit)
{
const auto & [first, last] = equal_ranges.back();
if (limit < first || limit > last)
return;
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<false>(*this, collator));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<true>(*this, collator));
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
new_first = j;
}
}
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
}
}
if (new_last - new_first > 1)
new_ranges.emplace_back(new_first, new_last);
}
}
void ColumnString::protect()
{
getChars().protect();

View File

@ -43,14 +43,20 @@ private:
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
template <bool positive>
struct less;
struct Cmp;
template <bool positive>
struct lessWithCollation;
struct CmpWithCollation;
ColumnString() = default;
ColumnString(const ColumnString & src);
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const;
public:
const char * getFamilyName() const override { return "String"; }
TypeIndex getDataType() const override { return TypeIndex::String; }
@ -229,16 +235,16 @@ public:
int direction, int nan_direction_hint) const override;
/// Variant of compareAt for string comparison with respect of collation.
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
/// Sorting with respect of collation.
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
@ -270,6 +276,8 @@ public:
// Throws an exception if offsets/chars are messed up
void validate() const;
bool isCollationSupported() const override { return true; }
};

View File

@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se
return res;
}
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
if (int res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint))
{
int res;
if (collator && columns[i]->isCollationSupported())
res = columns[i]->compareAtWithCollation(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint, *collator);
else
res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint);
if (res)
return res;
}
return 0;
}
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
template <bool positive>
struct ColumnTuple::Less
{
TupleColumns columns;
int nan_direction_hint;
const Collator * collator;
Less(const TupleColumns & columns_, int nan_direction_hint_)
: columns(columns_), nan_direction_hint(nan_direction_hint_)
Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr)
: columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_)
{
}
@ -308,7 +325,11 @@ struct ColumnTuple::Less
{
for (const auto & column : columns)
{
int res = column->compareAt(a, b, *column, nan_direction_hint);
int res;
if (collator && column->isCollationSupported())
res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator);
else
res = column->compareAt(a, b, *column, nan_direction_hint);
if (res < 0)
return positive;
else if (res > 0)
@ -318,7 +339,8 @@ struct ColumnTuple::Less
}
};
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
template <typename LessOperator>
void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const
{
size_t rows = size();
res.resize(rows);
@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(columns, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(columns, nan_direction_hint));
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
}
else
{
if (reverse)
std::sort(res.begin(), res.end(), Less<false>(columns, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(columns, nan_direction_hint));
std::sort(res.begin(), res.end(), less);
}
}
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
{
if (equal_ranges.empty())
return;
for (const auto & column : columns)
{
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
if (collator && column->isCollationSupported())
column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges);
else
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first)
equal_ranges.pop_back();
@ -361,6 +380,32 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio
}
}
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint));
else
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint));
}
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges);
}
void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint, &collator));
else
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint, &collator));
}
void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator);
}
void ColumnTuple::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
@ -433,5 +478,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
return false;
}
bool ColumnTuple::isCollationSupported() const
{
for (const auto& column : columns)
{
if (column->isCollationSupported())
return true;
}
return false;
}
}

View File

@ -75,15 +75,19 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
size_t tupleSize() const { return columns.size(); }
@ -94,6 +98,15 @@ public:
Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; }
const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
private:
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename LessOperator>
void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const;
void updatePermutationImpl(
bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const;
};

View File

@ -9,7 +9,7 @@
class SipHash;
class Collator;
namespace DB
{
@ -18,6 +18,7 @@ namespace ErrorCodes
{
extern const int CANNOT_GET_SIZE_OF_FIELD;
extern const int NOT_IMPLEMENTED;
extern const int BAD_COLLATION;
}
class Arena;
@ -250,6 +251,12 @@ public:
*/
virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
/// Equivalent to compareAt, but collator is used to compare values.
virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
}
/// Compare the whole column with single value from rhs column.
/// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare.
/// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction
@ -277,6 +284,18 @@ public:
*/
virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0;
/** Equivalent to getPermutation and updatePermutation but collator is used to compare values.
* Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them.
*/
virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
/** Copies each element according offsets parameter.
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
* It is necessary in ARRAY JOIN operation.
@ -402,6 +421,8 @@ public:
virtual bool lowCardinality() const { return false; }
virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default;
IColumn() = default;
IColumn(const IColumn &) = default;

View File

@ -1,76 +1,44 @@
#pragma once
#include <Common/Arena.h>
#include <ext/range.h>
#include <ext/size.h>
#include <ext/bit_cast.h>
#include <cstdlib>
#include <memory>
#include <common/unaligned.h>
namespace DB
{
/** Can allocate memory objects of fixed size with deletion support.
* For small `object_size`s allocated no less than getMinAllocationSize() bytes. */
* For small `object_size`s allocated no less than pointer size.
*/
class SmallObjectPool
{
private:
struct Block { Block * next; };
static constexpr auto getMinAllocationSize() { return sizeof(Block); }
const size_t object_size;
Arena pool;
Block * free_list{};
char * free_list = nullptr;
public:
SmallObjectPool(
const size_t object_size_, const size_t initial_size = 4096, const size_t growth_factor = 2,
const size_t linear_growth_threshold = 128 * 1024 * 1024)
: object_size{std::max(object_size_, getMinAllocationSize())},
pool{initial_size, growth_factor, linear_growth_threshold}
SmallObjectPool(size_t object_size_)
: object_size{std::max(object_size_, sizeof(char *))}
{
if (pool.size() < object_size)
return;
const auto num_objects = pool.size() / object_size;
auto head = free_list = ext::bit_cast<Block *>(pool.alloc(num_objects * object_size));
for (const auto i : ext::range(0, num_objects - 1))
{
(void) i;
head->next = ext::bit_cast<Block *>(ext::bit_cast<char *>(head) + object_size);
head = head->next;
}
head->next = nullptr;
}
char * alloc()
{
if (free_list)
{
const auto res = reinterpret_cast<char *>(free_list);
free_list = free_list->next;
char * res = free_list;
free_list = unalignedLoad<char *>(free_list);
return res;
}
return pool.alloc(object_size);
}
void free(const void * ptr)
void free(char * ptr)
{
union
{
const void * p_v;
Block * block;
};
p_v = ptr;
block->next = free_list;
free_list = block;
unalignedStore<char *>(ptr, free_list);
free_list = ptr;
}
/// The size of the allocated pool in bytes
@ -81,5 +49,4 @@ public:
};
}

View File

@ -216,7 +216,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
if (!jobs.empty())
{
job = jobs.top().job;
job = std::move(jobs.top().job);
jobs.pop();
}
else

View File

@ -374,9 +374,8 @@ class IColumn;
M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
\
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
@ -385,7 +384,6 @@ class IColumn;
M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \
M(Bool, materialize_ttl_after_modify, true, "Apply TTL for old data, after ALTER MODIFY TTL query", 0) \
M(String, function_implementation, "", "Choose function implementation for specific target or variant (experimental). If empty enable all of them.", 0) \
\
M(Bool, allow_experimental_geo_types, false, "Allow geo data types such as Point, Ring, Polygon, MultiPolygon", 0) \
M(Bool, allow_experimental_bigint_types, false, "Allow Int128, Int256, UInt256 and Decimal256 types", 0) \
M(Bool, data_type_default_nullable, false, "Data types without NULL or NOT NULL will make Nullable", 0) \
@ -394,20 +392,18 @@ class IColumn;
M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\
M(Bool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \
M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
\
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0) \
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0)
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.

View File

@ -96,7 +96,7 @@ struct SortCursorImpl
: column_desc.column_number;
sort_columns.push_back(columns[column_number].get());
need_collation[j] = desc[j].collator != nullptr && typeid_cast<const ColumnString *>(sort_columns.back()); /// TODO Nullable(String)
need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String)
has_collation |= need_collation[j];
}
@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper<SortCursorWithCollation>
int nulls_direction = desc.nulls_direction;
int res;
if (impl->need_collation[i])
{
const ColumnString & column_string = assert_cast<const ColumnString &>(*impl->sort_columns[i]);
res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator);
}
res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator);
else
res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction);

View File

@ -16,6 +16,8 @@
#include <common/StringRef.h>
#include <ext/bit_cast.h>
#include <ext/map.h>
#include <ext/range.h>
#include <ext/size.h>
#include <ext/scope_guard.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"

View File

@ -20,6 +20,7 @@
# include <Poco/Net/PrivateKeyPassphraseHandler.h>
# include <Poco/Net/RejectCertificateHandler.h>
# include <Poco/Net/SSLManager.h>
# include <Poco/Net/SecureStreamSocket.h>
#endif
#include <Poco/Net/HTTPServerResponse.h>
@ -68,27 +69,27 @@ namespace
throw Exception("Unsupported scheme in URI '" + uri.toString() + "'", ErrorCodes::UNSUPPORTED_URI_SCHEME);
}
HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host=true)
HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host = true)
{
HTTPSessionPtr session;
if (https)
{
#if USE_SSL
session = std::make_shared<Poco::Net::HTTPSClientSession>();
/// Cannot resolve host in advance, otherwise SNI won't work in Poco.
session = std::make_shared<Poco::Net::HTTPSClientSession>(host, port);
#else
throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME);
#endif
}
else
session = std::make_shared<Poco::Net::HTTPClientSession>();
{
String resolved_host = resolve_host ? DNSResolver::instance().resolveHost(host).toString() : host;
session = std::make_shared<Poco::Net::HTTPClientSession>(resolved_host, port);
}
ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections);
if (resolve_host)
session->setHost(DNSResolver::instance().resolveHost(host).toString());
else
session->setHost(host);
session->setPort(port);
/// doesn't work properly without patch
#if defined(POCO_CLICKHOUSE_PATCH)
session->setKeepAlive(keep_alive);

View File

@ -13,6 +13,7 @@
#include <IO/ConnectionTimeouts.h>
namespace Poco
{
namespace Net
@ -24,6 +25,7 @@ namespace Net
namespace DB
{
constexpr int HTTP_TOO_MANY_REQUESTS = 429;
class SingleEndpointHTTPSessionPool : public PoolBase<Poco::Net::HTTPClientSession>
@ -39,6 +41,7 @@ private:
public:
SingleEndpointHTTPSessionPool(const std::string & host_, UInt16 port_, bool https_, size_t max_pool_size_);
};
using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry;
using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
@ -59,5 +62,7 @@ bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status);
*/
std::istream * receiveResponse(
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects);
void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false);
void assertResponseIsOk(
const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false);
}

View File

@ -67,7 +67,7 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_,
bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds)
{
return offset() != buffer().size() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
}
}

View File

@ -1 +0,0 @@
#include <IO/ReadWriteBufferFromHTTP.h>

View File

@ -38,7 +38,6 @@ SRCS(
ReadBufferFromMemory.cpp
ReadBufferFromPocoSocket.cpp
ReadHelpers.cpp
ReadWriteBufferFromHTTP.cpp
SeekAvoidingReadBuffer.cpp
UseSSL.cpp
WriteBufferAIO.cpp

View File

@ -65,6 +65,7 @@
#include <Interpreters/DatabaseCatalog.h>
#include <Storages/MergeTree/BackgroundJobsExecutor.h>
namespace ProfileEvents
{
extern const Event ContextLock;
@ -153,7 +154,7 @@ public:
}
else if (it->second->key.first != context.client_info.current_user)
{
throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR);
throw Exception("Session belongs to a different user", ErrorCodes::SESSION_IS_LOCKED);
}
/// Use existing session.
@ -596,7 +597,8 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic
{
StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name);
if (tmp_policy->getVolumes().size() != 1)
throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume",
ErrorCodes::NO_ELEMENTS_IN_CONFIG);
shared->tmp_volume = tmp_policy->getVolume(0);
}
@ -1083,11 +1085,13 @@ String Context::getInitialQueryId() const
void Context::setCurrentDatabaseNameInGlobalContext(const String & name)
{
if (global_context != this)
throw Exception("Cannot set current database for non global context, this method should be used during server initialization", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot set current database for non global context, this method should be used during server initialization",
ErrorCodes::LOGICAL_ERROR);
auto lock = getLock();
if (!current_database.empty())
throw Exception("Default database name cannot be changed in global context without server restart", ErrorCodes::LOGICAL_ERROR);
throw Exception("Default database name cannot be changed in global context without server restart",
ErrorCodes::LOGICAL_ERROR);
current_database = name;
}
@ -1470,7 +1474,7 @@ DDLWorker & Context::getDDLWorker() const
{
auto lock = getLock();
if (!shared->ddl_worker)
throw Exception("DDL background thread is not initialized.", ErrorCodes::LOGICAL_ERROR);
throw Exception("DDL background thread is not initialized.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
return *shared->ddl_worker;
}

View File

@ -31,6 +31,7 @@
#include <IO/WriteHelpers.h>
#include <Storages/IStorage.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
@ -110,6 +111,27 @@ struct CustomizeFunctionsSuffixData
char ifDistinct[] = "ifdistinct";
using CustomizeIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsSuffixData<ifDistinct>>, true>;
/// Used to rewrite all aggregate functions to add -OrNull suffix to them if setting `aggregate_functions_null_for_empty` is set.
struct CustomizeAggregateFunctionsSuffixData
{
using TypeToVisit = ASTFunction;
const String & customized_func_suffix;
void visit(ASTFunction & func, ASTPtr &)
{
const auto & instance = AggregateFunctionFactory::instance();
if (instance.isAggregateFunctionName(func.name) && !endsWith(func.name, customized_func_suffix))
{
auto properties = instance.tryGetProperties(func.name);
if (properties && !properties->returns_default_when_only_null)
func.name = func.name + customized_func_suffix;
}
}
};
using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsSuffixData>, true>;
/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
/// Expand asterisks and qualified asterisks with column names.
/// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
@ -710,6 +732,13 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings &
CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query);
}
// Rewrite all aggregate functions to add -OrNull suffix to them
if (settings.aggregate_functions_null_for_empty)
{
CustomizeAggregateFunctionsOrNullVisitor::Data data_or_null{"OrNull"};
CustomizeAggregateFunctionsOrNullVisitor(data_or_null).visit(query);
}
/// Creates a dictionary `aliases`: alias -> ASTPtr
QueryAliasesVisitor(aliases).visit(query);

View File

@ -2,6 +2,8 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/typeid_cast.h>
#include <Functions/FunctionHelpers.h>
@ -86,8 +88,7 @@ struct PartialSortingLessWithCollation
}
else if (isCollationRequired(elem.description))
{
const ColumnString & column_string = assert_cast<const ColumnString &>(*elem.column);
res = column_string.compareAtWithCollation(a, b, *elem.column, *elem.description.collator);
res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator);
}
else
res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction);
@ -101,7 +102,6 @@ struct PartialSortingLessWithCollation
}
};
void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
{
if (!block)
@ -120,14 +120,13 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
bool is_column_const = false;
if (isCollationRequired(description[0]))
{
/// it it's real string column, than we need sort
if (const ColumnString * column_string = checkAndGetColumn<ColumnString>(column))
column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm);
else if (checkAndGetColumnConstData<ColumnString>(column))
if (!column->isCollationSupported())
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
if (isColumnConst(*column))
is_column_const = true;
else
throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION);
column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
}
else if (!isColumnConst(*column))
{
@ -163,8 +162,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
const IColumn * column = columns_with_sort_desc[i].column;
if (isCollationRequired(description[i]))
{
if (!checkAndGetColumn<ColumnString>(column) && !checkAndGetColumnConstData<ColumnString>(column))
throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION);
if (!column->isCollationSupported())
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
need_collation = true;
}
@ -187,10 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
if (isCollationRequired(column.description))
{
const ColumnString & column_string = assert_cast<const ColumnString &>(*column.column);
column_string.updatePermutationWithCollation(
*column.description.collator,
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
column.column->updatePermutationWithCollation(
*column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
}
else
{

View File

@ -55,7 +55,11 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
if (type)
{
settings.ostr << ' ';
type->formatImpl(settings, state, frame);
FormatStateStacked type_frame = frame;
type_frame.indent = 0;
type->formatImpl(settings, state, type_frame);
}
if (null_modifier)

View File

@ -10,6 +10,77 @@
#include <Common/typeid_cast.h>
#include <Databases/IDatabase.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
namespace
{
using namespace DB;
/// Drop "password" from the path.
///
/// In case of use_compact_format_in_distributed_parts_names=0 the path format is:
///
/// user[:password]@host:port#default_database format
///
/// And password should be masked out.
///
/// See:
/// - Cluster::Address::fromFullString()
/// - Cluster::Address::toFullString()
std::string maskDataPath(const std::string & path)
{
std::string masked_path = path;
if (!masked_path.ends_with('/'))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid path format");
masked_path.pop_back();
size_t node_pos = masked_path.rfind('/');
/// Loop through each node, that separated with a comma
while (node_pos != std::string::npos)
{
++node_pos;
size_t user_pw_end = masked_path.find('@', node_pos);
if (user_pw_end == std::string::npos)
{
/// Likey new format (use_compact_format_in_distributed_parts_names=1)
return path;
}
size_t pw_start = masked_path.find(':', node_pos);
if (pw_start > user_pw_end)
{
/// No password in path
return path;
}
++pw_start;
size_t pw_length = user_pw_end - pw_start;
/// Replace with a single '*' to hide even the password length.
masked_path.replace(pw_start, pw_length, 1, '*');
/// "," cannot be in the node specification since it will be encoded in hex.
node_pos = masked_path.find(',', node_pos);
}
masked_path.push_back('/');
return masked_path;
}
}
namespace DB
{
@ -103,7 +174,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, cons
size_t col_num = 0;
res_columns[col_num++]->insert(database);
res_columns[col_num++]->insert(table);
res_columns[col_num++]->insert(status.path);
res_columns[col_num++]->insert(maskDataPath(status.path));
res_columns[col_num++]->insert(status.is_blocked);
res_columns[col_num++]->insert(status.error_count);
res_columns[col_num++]->insert(status.files_count);

View File

@ -0,0 +1,21 @@
<yandex>
<remote_servers>
<test_cluster_with_incorrect_pw>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
<!-- password is incorrect -->
<password>foo</password>
</replica>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
<!-- password is incorrect -->
<password>foo</password>
</replica>
</shard>
</test_cluster_with_incorrect_pw>
</remote_servers>
</yandex>

View File

@ -27,6 +27,7 @@ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/
ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/
ln -sf $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/

View File

@ -47,10 +47,12 @@ def test_single_file(started_cluster, cluster):
def test_two_files(started_cluster, cluster):
node.query(
"create table test.distr_2 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster))
node.query("insert into test.distr_2 values (0, '_'), (1, 'a')",
settings={"use_compact_format_in_distributed_parts_names": "1"})
node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')",
settings={"use_compact_format_in_distributed_parts_names": "1"})
node.query("insert into test.distr_2 values (0, '_'), (1, 'a')", settings={
"use_compact_format_in_distributed_parts_names": "1",
})
node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')", settings={
"use_compact_format_in_distributed_parts_names": "1",
})
query = "select * from file('/var/lib/clickhouse/data/test/distr_2/shard1_replica1/{1,2,3,4}.bin', 'Distributed') order by x"
out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query])
@ -70,7 +72,9 @@ def test_two_files(started_cluster, cluster):
def test_single_file_old(started_cluster, cluster):
node.query(
"create table test.distr_3 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster))
node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')")
node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')", settings={
"use_compact_format_in_distributed_parts_names": "0",
})
query = "select * from file('/var/lib/clickhouse/data/test/distr_3/default@not_existing:9000/1.bin', 'Distributed')"
out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query])

View File

@ -48,7 +48,9 @@ def test_insert(start_cluster):
# manual only (but only for remote node)
node.query('SYSTEM STOP DISTRIBUTED SENDS test.dist_foo')
node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)')
node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)', settings={
'use_compact_format_in_distributed_parts_names': '0',
})
assert _files_in_dist_mon(node, 'disk1', 'dist_foo') == 1
assert _files_in_dist_mon(node, 'disk2', 'dist_foo') == 0
@ -61,7 +63,9 @@ def test_insert(start_cluster):
#
node.query('RENAME TABLE test.dist_foo TO test.dist2_foo')
node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)')
node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)', settings={
'use_compact_format_in_distributed_parts_names': '0',
})
assert _files_in_dist_mon(node, 'disk1', 'dist2_foo') == 0
assert _files_in_dist_mon(node, 'disk2', 'dist2_foo') == 1

View File

@ -0,0 +1,6 @@
syntax = "proto3";
message User {
string username = 1;
int32 timestamp = 2;
}

View File

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: social.proto
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='social.proto',
package='',
syntax='proto3',
serialized_options=None,
serialized_pb=b'\n\x0csocial.proto\"+\n\x04User\x12\x10\n\x08username\x18\x01 \x01(\t\x12\x11\n\ttimestamp\x18\x02 \x01(\x05\x62\x06proto3'
)
_USER = _descriptor.Descriptor(
name='User',
full_name='User',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='username', full_name='User.username', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='timestamp', full_name='User.timestamp', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=16,
serialized_end=59,
)
DESCRIPTOR.message_types_by_name['User'] = _USER
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
User = _reflection.GeneratedProtocolMessageType('User', (_message.Message,), {
'DESCRIPTOR' : _USER,
'__module__' : 'social_pb2'
# @@protoc_insertion_point(class_scope:User)
})
_sym_db.RegisterMessage(User)
# @@protoc_insertion_point(module_scope)

View File

@ -30,6 +30,8 @@ libprotoc 3.0.0
protoc --python_out=. kafka.proto
"""
from . import kafka_pb2
from . import social_pb2
# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side.
# TODO: add test for SELECT LIMIT is working.
@ -115,6 +117,20 @@ def kafka_produce_protobuf_messages_no_delimeters(topic, start_index, num_messag
producer.flush()
print("Produced {} messages for topic {}".format(num_messages, topic))
def kafka_produce_protobuf_social(topic, start_index, num_messages):
data = b''
for i in range(start_index, start_index + num_messages):
msg = social_pb2.User()
msg.username='John Doe {}'.format(i)
msg.timestamp=1000000+i
serialized_msg = msg.SerializeToString()
data = data + _VarintBytes(len(serialized_msg)) + serialized_msg
producer = KafkaProducer(bootstrap_servers="localhost:9092", value_serializer=producer_serializer)
producer.send(topic=topic, value=data)
producer.flush()
print(("Produced {} messages for topic {}".format(num_messages, topic)))
def avro_confluent_message(schema_registry_client, value):
# type: (CachedSchemaRegistryClient, dict) -> str
@ -982,6 +998,84 @@ def test_kafka_protobuf(kafka_cluster):
kafka_check_result(result, True)
@pytest.mark.timeout(180)
def test_kafka_string_field_on_first_position_in_protobuf(kafka_cluster):
# https://github.com/ClickHouse/ClickHouse/issues/12615
instance.query('''
CREATE TABLE test.kafka (
username String,
timestamp Int32
) ENGINE = Kafka()
SETTINGS
kafka_broker_list = 'kafka1:19092',
kafka_topic_list = 'string_field_on_first_position_in_protobuf',
kafka_group_name = 'string_field_on_first_position_in_protobuf',
kafka_format = 'Protobuf',
kafka_schema = 'social:User';
SELECT * FROM test.kafka;
''')
kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 0, 20)
kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 20, 1)
kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 21, 29)
result = instance.query('SELECT * FROM test.kafka', ignore_error=True)
expected = '''\
John Doe 0 1000000
John Doe 1 1000001
John Doe 2 1000002
John Doe 3 1000003
John Doe 4 1000004
John Doe 5 1000005
John Doe 6 1000006
John Doe 7 1000007
John Doe 8 1000008
John Doe 9 1000009
John Doe 10 1000010
John Doe 11 1000011
John Doe 12 1000012
John Doe 13 1000013
John Doe 14 1000014
John Doe 15 1000015
John Doe 16 1000016
John Doe 17 1000017
John Doe 18 1000018
John Doe 19 1000019
John Doe 20 1000020
John Doe 21 1000021
John Doe 22 1000022
John Doe 23 1000023
John Doe 24 1000024
John Doe 25 1000025
John Doe 26 1000026
John Doe 27 1000027
John Doe 28 1000028
John Doe 29 1000029
John Doe 30 1000030
John Doe 31 1000031
John Doe 32 1000032
John Doe 33 1000033
John Doe 34 1000034
John Doe 35 1000035
John Doe 36 1000036
John Doe 37 1000037
John Doe 38 1000038
John Doe 39 1000039
John Doe 40 1000040
John Doe 41 1000041
John Doe 42 1000042
John Doe 43 1000043
John Doe 44 1000044
John Doe 45 1000045
John Doe 46 1000046
John Doe 47 1000047
John Doe 48 1000048
John Doe 49 1000049
'''
assert TSV(result) == TSV(expected)
@pytest.mark.timeout(30)
def test_kafka_protobuf_no_delimiter(kafka_cluster):
instance.query('''
@ -2117,7 +2211,7 @@ def test_kafka_duplicates_when_commit_failed(kafka_cluster):
kafka_format = 'JSONEachRow',
kafka_max_block_size = 20,
kafka_flush_interval_ms = 1000;
SELECT * FROM test.kafka LIMIT 1; /* do subscription & assignment in advance (it can take different time, test rely on timing, so can flap otherwise) */
''')

View File

@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --multiquery --query "
INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000);
OPTIMIZE TABLE bug FINAL;"
$CLICKHOUSE_BENCHMARK --database "$CLICKHOUSE_DATABASE" --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$CLICKHOUSE_TMP"/err
$CLICKHOUSE_BENCHMARK --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$CLICKHOUSE_TMP"/err
cat "$CLICKHOUSE_TMP"/err | grep Exception
cat "$CLICKHOUSE_TMP"/err | grep Loaded

View File

@ -1,12 +1,12 @@
CREATE TABLE default.tuple
(
`j` Tuple( a Int8, b String)
`j` Tuple(a Int8, b String)
)
ENGINE = Memory
j Tuple(a Int8, b String)
CREATE TABLE default.tuple
(
`j` Tuple( a Int8, b String)
`j` Tuple(a Int8, b String)
)
ENGINE = Memory
j Tuple(a Int8, b String)

View File

@ -10,9 +10,7 @@ SHOW CREATE TABLE tuple FORMAT TSVRaw;
DESC tuple;
DROP TABLE tuple;
CREATE TABLE tuple
ENGINE = Memory AS
SELECT CAST((1, 'Test'), 'Tuple(a Int8, b String)') AS j;
CREATE TABLE tuple ENGINE = Memory AS SELECT CAST((1, 'Test'), 'Tuple(a Int8, b String)') AS j;
SHOW CREATE TABLE tuple FORMAT TSVRaw;
DESC tuple;

View File

@ -0,0 +1,16 @@
0
\N
0
\N
\N
\N
0
\N
45
45
10
10
45
45
10
10

View File

@ -0,0 +1,36 @@
DROP TABLE IF EXISTS defaults;
CREATE TABLE defaults
(
n Int8
)ENGINE = Memory();
SELECT sum(n) FROM defaults;
SELECT sumOrNull(n) FROM defaults;
SELECT count(n) FROM defaults;
SELECT countOrNull(n) FROM defaults;
SET aggregate_functions_null_for_empty=1;
SELECT sum(n) FROM defaults;
SELECT sumOrNull(n) FROM defaults;
SELECT count(n) FROM defaults;
SELECT countOrNull(n) FROM defaults;
INSERT INTO defaults SELECT * FROM numbers(10);
SET aggregate_functions_null_for_empty=0;
SELECT sum(n) FROM defaults;
SELECT sumOrNull(n) FROM defaults;
SELECT count(n) FROM defaults;
SELECT countOrNull(n) FROM defaults;
SET aggregate_functions_null_for_empty=1;
SELECT sum(n) FROM defaults;
SELECT sumOrNull(n) FROM defaults;
SELECT count(n) FROM defaults;
SELECT countOrNull(n) FROM defaults;
DROP TABLE defaults;

View File

@ -0,0 +1,64 @@
Order by without collate
1 Ё
2 А
2 Я
1 а
2 я
1 ё
Order by with collate
1 а
2 А
1 ё
1 Ё
2 я
2 Я
Order by tuple without collate
1 Ё
1 а
1 ё
2 А
2 Я
2 я
Order by tuple with collate
1 а
1 ё
1 Ё
2 А
2 я
2 Я
Order by without collate
1 Ё
2 А
2 Я
1 а
2 я
1 ё
1 \N
2 \N
Order by with collate
1 а
2 А
1 ё
1 Ё
2 я
2 Я
1 \N
2 \N
Order by tuple without collate
1 Ё
1 а
1 ё
1 \N
2 А
2 Я
2 я
2 \N
Order by tuple with collate
1 а
1 ё
1 Ё
1 \N
2 А
2 я
2 Я
2 \N

View File

@ -0,0 +1,33 @@
DROP TABLE IF EXISTS test_collate;
DROP TABLE IF EXISTS test_collate_null;
CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory();
CREATE TABLE test_collate_null (x UInt32, s LowCardinality(Nullable(String))) ENGINE=Memory();
INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я');
INSERT INTO test_collate_null VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'), (1, null), (2, null);
SELECT 'Order by without collate';
SELECT * FROM test_collate ORDER BY s;
SELECT 'Order by with collate';
SELECT * FROM test_collate ORDER BY s COLLATE 'ru';
SELECT 'Order by tuple without collate';
SELECT * FROM test_collate ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru';
SELECT 'Order by without collate';
SELECT * FROM test_collate_null ORDER BY s;
SELECT 'Order by with collate';
SELECT * FROM test_collate_null ORDER BY s COLLATE 'ru';
SELECT 'Order by tuple without collate';
SELECT * FROM test_collate_null ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate_null ORDER BY x, s COLLATE 'ru';
DROP TABLE test_collate;
DROP TABLE test_collate_null;

View File

@ -0,0 +1,36 @@
Order by without collate
1 Ё
2 А
2 Я
1 а
2 я
1 ё
1 \N
2 \N
Order by with collate
1 а
2 А
1 ё
1 Ё
2 я
2 Я
1 \N
2 \N
Order by tuple without collate
1 Ё
1 а
1 ё
1 \N
2 А
2 Я
2 я
2 \N
Order by tuple with collate
1 а
1 ё
1 Ё
1 \N
2 А
2 я
2 Я
2 \N

View File

@ -0,0 +1,18 @@
DROP TABLE IF EXISTS test_collate;
CREATE TABLE test_collate (x UInt32, s Nullable(String)) ENGINE=Memory();
INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (1, null), (2, 'А'), (2, 'я'), (2, 'Я'), (2, null);
SELECT 'Order by without collate';
SELECT * FROM test_collate ORDER BY s;
SELECT 'Order by with collate';
SELECT * FROM test_collate ORDER BY s COLLATE 'ru';
SELECT 'Order by tuple without collate';
SELECT * FROM test_collate ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru';
DROP TABLE test_collate;

View File

@ -0,0 +1,50 @@
1 ['а']
2 ['А']
1 ['ё']
1 ['ё','а']
2 ['ё','а','а']
1 ['ё','я']
1 ['Ё']
2 ['я','а']
2 ['Я']
1 ['а']
1 ['ё']
1 ['ё','а']
1 ['ё','я']
1 ['Ё']
2 ['А']
2 ['ё','а','а']
2 ['я','а']
2 ['Я']
1 ['а']
2 ['А']
1 ['ё']
1 ['ё','а']
2 ['ё','а','а',NULL]
1 ['ё',NULL,'я']
1 ['Ё']
2 ['я']
2 [NULL,'Я']
1 ['а']
1 ['ё']
1 ['ё','а']
1 ['ё',NULL,'я']
1 ['Ё']
2 ['А']
2 ['ё','а','а',NULL]
2 ['я']
2 [NULL,'Я']
2 [['а','а'],['я','ё']]
1 [['а','Ё'],['ё','я']]
1 [['а','я'],['а','ё']]
2 [['ё']]
1 [['а','Ё'],['ё','я']]
1 [['а','я'],['а','ё']]
2 [['а','а'],['я','ё']]
2 [['ё']]

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS collate_test1;
DROP TABLE IF EXISTS collate_test2;
DROP TABLE IF EXISTS collate_test3;
CREATE TABLE collate_test1 (x UInt32, s Array(String)) ENGINE=Memory();
CREATE TABLE collate_test2 (x UInt32, s Array(LowCardinality(Nullable(String)))) ENGINE=Memory();
CREATE TABLE collate_test3 (x UInt32, s Array(Array(String))) ENGINE=Memory();
INSERT INTO collate_test1 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я', 'а']), (2, ['Я']), (1, ['ё','а']), (1, ['ё', 'я']), (2, ['ё', 'а', 'а']);
INSERT INTO collate_test2 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я']), (2, [null, 'Я']), (1, ['ё','а']), (1, ['ё', null, 'я']), (2, ['ё', 'а', 'а', null]);
INSERT INTO collate_test3 VALUES (1, [['а', 'я'], ['а', 'ё']]), (1, [['а', 'Ё'], ['ё', 'я']]), (2, [['ё']]), (2, [['а', 'а'], ['я', 'ё']]);
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
SELECT '';
DROP TABLE collate_test1;
DROP TABLE collate_test2;
DROP TABLE collate_test3;

View File

@ -0,0 +1,60 @@
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
2 (1,'я')
1 (2,'а')
2 (2,'А')
2 (2,'Я')
1 (3,'я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
1 (2,'а')
1 (3,'я')
2 (1,'я')
2 (2,'А')
2 (2,'Я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
2 (1,'я')
1 (1,NULL)
2 (2,'А')
2 (2,'Я')
1 (2,NULL)
2 (2,NULL)
1 (3,'я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
1 (1,NULL)
1 (2,NULL)
1 (3,'я')
2 (1,'я')
2 (2,'А')
2 (2,'Я')
2 (2,NULL)
2 (1,(1,['А']))
2 (1,(1,['ё','а','а']))
1 (1,(1,['Ё']))
2 (1,(1,['Я']))
1 (1,(2,['а']))
1 (1,(2,['ё','я']))
1 (2,(1,['ё']))
1 (2,(1,['ё','а']))
2 (2,(1,['я']))
1 (1,(1,['Ё']))
1 (1,(2,['а']))
1 (1,(2,['ё','я']))
1 (2,(1,['ё']))
1 (2,(1,['ё','а']))
2 (1,(1,['А']))
2 (1,(1,['ё','а','а']))
2 (1,(1,['Я']))
2 (2,(1,['я']))

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS collate_test1;
DROP TABLE IF EXISTS collate_test2;
DROP TABLE IF EXISTS collate_test3;
CREATE TABLE collate_test1 (x UInt32, s Tuple(UInt32, String)) ENGINE=Memory();
CREATE TABLE collate_test2 (x UInt32, s Tuple(UInt32, LowCardinality(Nullable(String)))) ENGINE=Memory();
CREATE TABLE collate_test3 (x UInt32, s Tuple(UInt32, Tuple(UInt32, Array(String)))) ENGINE=Memory();
INSERT INTO collate_test1 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2,'а')), (1, (3, 'я'));
INSERT INTO collate_test2 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2, null)), (1, (3, 'я')), (1, (1, null)), (2, (2, null));
INSERT INTO collate_test3 VALUES (1, (1, (1, ['Ё']))), (1, (2, (1, ['ё']))), (1, (1, (2, ['а']))), (2, (1, (1, ['А']))), (2, (2, (1, ['я']))), (2, (1, (1, ['Я']))), (1, (2, (1, ['ё','а']))), (1, (1, (2, ['ё', 'я']))), (2, (1, (1, ['ё', 'а', 'а'])));
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
SELECT '';
DROP TABLE collate_test1;
DROP TABLE collate_test2;
DROP TABLE collate_test3;

View File

@ -0,0 +1,12 @@
CREATE TABLE test
(
`a` Int64,
`b` NESTED(a Int64)
)
ENGINE = TinyLog
CREATE TABLE test
(
`a` Int64,
`b` TUPLE(a Int64)
)
ENGINE = TinyLog

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
echo "CREATE TABLE test(a Int64, b NESTED(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT
echo "CREATE TABLE test(a Int64, b TUPLE(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT

View File

@ -155,8 +155,13 @@
01509_dictionary_preallocate
01526_max_untracked_memory
01530_drop_database_atomic_sync
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
01546_log_queries_min_query_duration_ms
01547_query_log_current_database
01548_query_log_query_execution_ms
01552_dict_fixedstring
01555_system_distribution_queue_mask
01557_max_parallel_replicas_no_sample.sql

View File

@ -7,6 +7,7 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL
[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
[ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} "
[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
[ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client}
@ -17,7 +18,7 @@ export CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:="$CLICKHOUSE_CLIENT_BINARY ${CLICK
[ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY} local"}
export CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"}
export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfuscator"}
export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark"}
export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark ${CLICKHOUSE_BENCHMARK_OPT0:-}"}
export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:="/etc/clickhouse-server/config.xml"}
export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:="/etc/clickhouse-client/config.xml"}

View File

@ -19,6 +19,7 @@ issue_14674 = "https://github.com/ClickHouse/ClickHouse/issues/14674"
issue_14810 = "https://github.com/ClickHouse/ClickHouse/issues/14810"
issue_15165 = "https://github.com/ClickHouse/ClickHouse/issues/15165"
issue_15980 = "https://github.com/ClickHouse/ClickHouse/issues/15980"
issue_16403 = "https://github.com/ClickHouse/ClickHouse/issues/16403"
xfails = {
"syntax/show create quota/I show create quota current":
@ -89,6 +90,12 @@ xfails = {
[(Fail, ".inner table is not created as expected")],
"views/materialized view/select from source table privilege granted directly or via role/select from implicit target table, privilege granted through a role":
[(Fail, ".inner table is not created as expected")],
"privileges/alter move/:/:/:/:/move partition to implicit target table of a materialized view":
[(Fail, ".inner table is not created as expected")],
"privileges/alter move/:/:/:/:/user without ALTER MOVE PARTITION privilege/":
[(Fail, issue_16403)],
"privileges/alter move/:/:/:/:/user with revoked ALTER MOVE PARTITION privilege/":
[(Fail, issue_16403)],
}
xflags = {

View File

@ -7,7 +7,7 @@ from rbac.helper.common import *
def feature(self):
tasks = []
pool = Pool(10)
pool = Pool(16)
try:
try:
@ -21,6 +21,12 @@ def feature(self):
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_constraint", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_ttl", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_settings", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_update", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_delete", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_freeze", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_fetch", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_move", "feature"), flags=TE), {})
run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.grant_option", "feature"), flags=TE), {})
finally:
join(tasks)
finally: