From f354b704e0c7579533ac2b7378576c41ebdccce2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 16 Mar 2022 13:27:58 +0100 Subject: [PATCH 001/117] Add more checks, different counting for download size --- src/Common/FileCache.cpp | 20 +++- src/Common/FileCache.h | 2 + src/Common/FileSegment.cpp | 92 +++++++++++++------ src/Common/FileSegment.h | 8 +- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 79 ++++++++++++---- src/Disks/IO/CachedReadBufferFromRemoteFS.h | 2 + 6 files changed, 149 insertions(+), 54 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index dffa4fac44d..e13106511e0 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -315,7 +315,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t LRUFileCache::FileSegmentCell * LRUFileCache::addCell( const Key & key, size_t offset, size_t size, FileSegment::State state, - std::lock_guard & /* cache_lock */) + std::lock_guard & cache_lock) { /// Create a file segment cell and put it in `files` map by [key][offset]. @@ -323,8 +323,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( return nullptr; /// Empty files are not cached. if (files[key].contains(offset)) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Cache already exists for key: `{}`, offset: {}, size: {}", keyToStr(key), offset, size); + throw Exception( + ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Cache already exists for key: `{}`, offset: {}, size: {}, current cache structure: {}", + keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock)); auto file_segment = std::make_shared(offset, size, key, this, state); FileSegmentCell cell(std::move(file_segment), queue); @@ -340,8 +342,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( auto [it, inserted] = offsets.insert({offset, std::move(cell)}); if (!inserted) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Failed to insert into cache key: `{}`, offset: {}, size: {}", keyToStr(key), offset, size); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Failed to insert into cache key: `{}`, offset: {}, size: {}", + keyToStr(key), offset, size); return &(it->second); } @@ -688,6 +692,12 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU } String LRUFileCache::dumpStructure(const Key & key_) +{ + std::lock_guard cache_lock(mutex); + return dumpStructureImpl(key_, cache_lock); +} + +String LRUFileCache::dumpStructureImpl(const Key & key_, std::lock_guard & /* cache_lock */) { std::lock_guard cache_lock(mutex); diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index d51dfe7a9ff..3ffa578a997 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -196,6 +196,8 @@ private: FileSegments splitRangeIntoEmptyCells( const Key & key, size_t offset, size_t size, std::lock_guard & cache_lock); + String dumpStructureImpl(const Key & key_, std::lock_guard & cache_lock); + public: struct Stat { diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index fb61a5bfc01..02b43f1bbe2 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -46,16 +46,20 @@ FileSegment::State FileSegment::state() const size_t FileSegment::getDownloadOffset() const { std::lock_guard segment_lock(mutex); - return range().left + getDownloadedSize(segment_lock); + return getDownloadOffsetImpl(segment_lock); } -size_t FileSegment::getDownloadedSize(std::lock_guard & /* segment_lock */) const +size_t FileSegment::getDownloadOffsetImpl(std::lock_guard & /* segment_lock */) const { - if (download_state == State::DOWNLOADED) - return downloaded_size; + return range().left + getDownloadedSize(); +} - std::lock_guard download_lock(download_mutex); - return downloaded_size; +size_t FileSegment::getDownloadedSize() const +{ + auto path = cache->getPathInLocalCache(key(), offset()); + if (std::filesystem::exists(path)) + return std::filesystem::file_size(path); + return 0; } String FileSegment::getCallerId() @@ -119,7 +123,7 @@ void FileSegment::resetDownloader() void FileSegment::resetDownloaderImpl(std::lock_guard & segment_lock) { - if (downloaded_size == range().size()) + if (getDownloadedSize() == range().size()) setDownloaded(segment_lock); else download_state = State::PARTIALLY_DOWNLOADED; @@ -159,7 +163,7 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) remote_file_reader = remote_file_reader_; } -void FileSegment::write(const char * from, size_t size) +void FileSegment::write(const char * from, size_t size, size_t offset_) { if (!size) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed"); @@ -174,6 +178,15 @@ void FileSegment::write(const char * from, size_t size) "Only downloader can do the downloading. (CallerId: {}, DownloaderId: {})", getCallerId(), downloader_id); + { + std::lock_guard segment_lock(mutex); + auto download_offset = getDownloadOffsetImpl(segment_lock); + if (offset_ != download_offset) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Attempt to write {} bytes to offset: {}, but current download offset is {} ({})", + size, offset_, download_offset, getInfoForLogImpl(segment_lock)); + } + if (!cache_writer) { auto download_path = cache->getPathInLocalCache(key(), offset()); @@ -183,12 +196,7 @@ void FileSegment::write(const char * from, size_t size) try { cache_writer->write(from, size); - - std::lock_guard download_lock(download_mutex); - cache_writer->next(); - - downloaded_size += size; } catch (...) { @@ -201,6 +209,8 @@ void FileSegment::write(const char * from, size_t size) cache_writer->finalize(); cache_writer.reset(); + cv.notify_all(); + throw; } } @@ -240,12 +250,12 @@ bool FileSegment::reserve(size_t size) if (downloader_id != caller_id) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Space can be reserved only by downloader (current: {}, expected: {})", caller_id, downloader_id); - if (downloaded_size + size > range().size()) + if (getDownloadedSize() + size > range().size()) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Attempt to reserve space too much space ({}) for file segment with range: {} (downloaded size: {})", - size, range().toString(), downloaded_size); + size, range().toString(), getDownloadedSize()); - assert(reserved_size >= downloaded_size); + assert(reserved_size >= getDownloadedSize()); } /** @@ -253,7 +263,7 @@ bool FileSegment::reserve(size_t size) * in case previous downloader did not fully download current file_segment * and the caller is going to continue; */ - size_t free_space = reserved_size - downloaded_size; + size_t free_space = reserved_size - getDownloadedSize(); size_t size_to_reserve = size - free_space; std::lock_guard cache_lock(cache->mutex); @@ -292,7 +302,7 @@ void FileSegment::completeBatchAndResetDownloader() resetDownloaderImpl(segment_lock); - LOG_TEST(log, "Complete batch. Current downloaded size: {}", downloaded_size); + LOG_TEST(log, "Complete batch. Current downloaded size: {}", getDownloadedSize()); cv.notify_all(); } @@ -322,7 +332,20 @@ void FileSegment::complete(State state) download_state = state; } - completeImpl(); + try + { + completeImpl(); + } + catch (...) + { + std::lock_guard segment_lock(mutex); + if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + downloader_id.clear(); + + cv.notify_all(); + throw; + } + cv.notify_all(); } @@ -334,14 +357,24 @@ void FileSegment::complete() if (download_state == State::SKIP_CACHE || detached) return; - if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size()) + if (download_state != State::DOWNLOADED && getDownloadedSize() == range().size()) setDownloaded(segment_lock); - - if (download_state == State::DOWNLOADING || download_state == State::EMPTY) - download_state = State::PARTIALLY_DOWNLOADED; } - completeImpl(true); + try + { + completeImpl(true); + } + catch (...) + { + std::lock_guard segment_lock(mutex); + if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + downloader_id.clear(); + + cv.notify_all(); + throw; + } + cv.notify_all(); } @@ -361,7 +394,7 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) if (!download_can_continue) { - size_t current_downloaded_size = getDownloadedSize(segment_lock); + size_t current_downloaded_size = getDownloadedSize(); if (current_downloaded_size == 0) { download_state = State::SKIP_CACHE; @@ -387,10 +420,7 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) } if (!downloader_id.empty() && downloader_id == getCallerIdImpl(allow_non_strict_checking)) - { - LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state)); downloader_id.clear(); - } if (!download_can_continue && cache_writer) { @@ -405,11 +435,15 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) String FileSegment::getInfoForLog() const { std::lock_guard segment_lock(mutex); + return getInfoForLogImpl(segment_lock); +} +String FileSegment::getInfoForLogImpl(std::lock_guard & /* segment_lock */) const +{ WriteBufferFromOwnString info; info << "File segment: " << range().toString() << ", "; info << "state: " << download_state << ", "; - info << "downloaded size: " << getDownloadedSize(segment_lock) << ", "; + info << "downloaded size: " << getDownloadedSize() << ", "; info << "downloader id: " << downloader_id << ", "; info << "caller id: " << getCallerId(); diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index ff854adf089..736e5b0c505 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -95,7 +95,7 @@ public: bool reserve(size_t size); - void write(const char * from, size_t size); + void write(const char * from, size_t size, size_t offset); RemoteFileReaderPtr getRemoteFileReader(); @@ -129,7 +129,9 @@ private: void setDownloaded(std::lock_guard & segment_lock); static String getCallerIdImpl(bool allow_non_strict_checking = false); void resetDownloaderImpl(std::lock_guard & segment_lock); - size_t getDownloadedSize(std::lock_guard & segment_lock) const; + size_t getDownloadedSize() const; + size_t getDownloadOffsetImpl(std::lock_guard & segment_lock) const; + String getInfoForLogImpl(std::lock_guard & segment_lock) const; const Range segment_range; @@ -143,7 +145,7 @@ private: size_t reserved_size = 0; mutable std::mutex mutex; - std::condition_variable cv; + mutable std::condition_variable cv; /// Protects downloaded_size access with actual write into fs. /// downloaded_size is not protected by download_mutex in methods which diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 5cab2cb2995..3d0d9a895ca 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -63,7 +63,11 @@ void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size) SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t offset) const { - return std::make_shared(cache->getPathInLocalCache(cache_key, offset), settings.local_fs_buffer_size); + auto path = cache->getPathInLocalCache(cache_key, offset); + auto buf = std::make_shared(path, settings.local_fs_buffer_size); + if (buf->size() == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path); + return buf; } SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_) @@ -296,17 +300,23 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File { case ReadType::CACHED: { + auto * file_reader = assert_cast(read_buffer_for_file_segment.get()); + size_t file_size = file_reader->size(); + + if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected state of cache file. Cache file size: {}, cache file offset: {}, " + "expected file size to be non-zero and file downloaded size to exceed current file read offset (expected: {} > {})", + file_size, range.left, range.left + file_size, file_offset_of_buffer_end); + size_t seek_offset = file_offset_of_buffer_end - range.left; read_buffer_for_file_segment->seek(seek_offset, SEEK_SET); - auto * file_reader = assert_cast(read_buffer_for_file_segment.get()); - size_t file_size = file_reader->size(); auto state = file_segment->state(); - LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}", file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset()); - assert(file_size > 0); break; } case ReadType::REMOTE_FS_READ_BYPASS_CACHE: @@ -384,6 +394,7 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId()); assert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment->getDownloadOffset()); + size_t current_offset = file_segment->getDownloadOffset(); while (true) { @@ -418,7 +429,8 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) { LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size()); - file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size); + file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset); + current_offset += current_predownload_size; bytes_to_predownload -= current_predownload_size; implementation_buffer->position() += current_predownload_size; @@ -532,7 +544,7 @@ bool CachedReadBufferFromRemoteFS::nextImpl() } catch (Exception & e) { - e.addMessage("Cache info: {}", getInfoForLog()); + e.addMessage("Cache info: {}", nextimpl_step_log_info); throw; } } @@ -549,6 +561,9 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() return false; SCOPE_EXIT({ + /// Save state of current file segment before it is completed. + nextimpl_step_log_info = getInfoForLog(); + if (current_file_segment_it == file_segments_holder->file_segments.end()) return; @@ -611,13 +626,25 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; if (download_current_segment != file_segment->isDownloader()) + { throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect segment state. Having read type: {}, Caller id: {}, downloader id: {}, file segment state: {}", toString(read_type), file_segment->getCallerId(), file_segment->getDownloader(), file_segment->state()); + } if (!result) { + if (auto * cache_file_reader = typeid_cast(implementation_buffer.get())) + { + auto cache_file_size = cache_file_reader->size(); + if (cache_file_size == 0) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Attempt to read from an empty cache file: {} (just before actual read)", + cache_file_size); + } + result = implementation_buffer->next(); size = implementation_buffer->buffer().size(); } @@ -630,7 +657,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() if (file_segment->reserve(size)) { - file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size); + file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size, file_offset_of_buffer_end); } else { @@ -687,9 +714,16 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() read_until_position, first_offset, file_segments_holder->toString()); if (size == 0 && file_offset_of_buffer_end < read_until_position) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Having zero bytes, but range is not finished: file offset: {}, reading until: {}", - file_offset_of_buffer_end, read_until_position); + { + std::optional cache_file_size; + if (auto * cache_file_reader = assert_cast(implementation_buffer.get())) + cache_file_size = cache_file_reader->size(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Having zero bytes, but range is not finished: file offset: {}, reading until: {}, read type: {}, cache file size: {}", + file_offset_of_buffer_end, read_until_position, toString(read_type), cache_file_size ? std::to_string(*cache_file_size) : "None"); + } + return result; } @@ -752,12 +786,23 @@ std::optional CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset() String CachedReadBufferFromRemoteFS::getInfoForLog() { - return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, file segment info: {}", - remote_fs_object_path, getHexUIntLowercase(cache_key), file_offset_of_buffer_end, - (implementation_buffer ? - std::to_string(implementation_buffer->getRemainingReadRange().left) + '-' + (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None") - : "None"), - (current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog())); + auto implementation_buffer_read_range_str = + implementation_buffer ? + std::to_string(implementation_buffer->getRemainingReadRange().left) + + '-' + + (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None") + : "None"; + + auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog(); + + return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, " + "read_type: {}, file segment info: {}", + remote_fs_object_path, + getHexUIntLowercase(cache_key), + file_offset_of_buffer_end, + implementation_buffer_read_range_str, + toString(read_type), + current_file_segment_info); } } diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h index 3d03debcd01..760d4360662 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.h +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h @@ -97,7 +97,9 @@ private: return "REMOTE_FS_READ_AND_PUT_IN_CACHE"; } } + size_t first_offset = 0; + String nextimpl_step_log_info; }; } From 59c0fb024451b8a8e18908f1842cc88b0ab3eded Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 16 Mar 2022 14:29:21 +0100 Subject: [PATCH 002/117] Revert changes with downloaded size --- src/Common/FileSegment.cpp | 46 ++++++++++++++++---------------------- src/Common/FileSegment.h | 14 ++---------- 2 files changed, 21 insertions(+), 39 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 02b43f1bbe2..7a68cac9cb7 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -46,20 +46,7 @@ FileSegment::State FileSegment::state() const size_t FileSegment::getDownloadOffset() const { std::lock_guard segment_lock(mutex); - return getDownloadOffsetImpl(segment_lock); -} - -size_t FileSegment::getDownloadOffsetImpl(std::lock_guard & /* segment_lock */) const -{ - return range().left + getDownloadedSize(); -} - -size_t FileSegment::getDownloadedSize() const -{ - auto path = cache->getPathInLocalCache(key(), offset()); - if (std::filesystem::exists(path)) - return std::filesystem::file_size(path); - return 0; + return range().left + downloaded_size; } String FileSegment::getCallerId() @@ -123,7 +110,7 @@ void FileSegment::resetDownloader() void FileSegment::resetDownloaderImpl(std::lock_guard & segment_lock) { - if (getDownloadedSize() == range().size()) + if (downloaded_size == range().size()) setDownloaded(segment_lock); else download_state = State::PARTIALLY_DOWNLOADED; @@ -180,7 +167,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) { std::lock_guard segment_lock(mutex); - auto download_offset = getDownloadOffsetImpl(segment_lock); + auto download_offset = range().left + downloaded_size; if (offset_ != download_offset) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Attempt to write {} bytes to offset: {}, but current download offset is {} ({})", @@ -213,6 +200,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) throw; } + + std::lock_guard segment_lock(mutex); + downloaded_size += size; } FileSegment::State FileSegment::wait() @@ -250,12 +240,12 @@ bool FileSegment::reserve(size_t size) if (downloader_id != caller_id) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Space can be reserved only by downloader (current: {}, expected: {})", caller_id, downloader_id); - if (getDownloadedSize() + size > range().size()) + if (downloaded_size + size > range().size()) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Attempt to reserve space too much space ({}) for file segment with range: {} (downloaded size: {})", - size, range().toString(), getDownloadedSize()); + size, range().toString(), downloaded_size); - assert(reserved_size >= getDownloadedSize()); + assert(reserved_size >= downloaded_size); } /** @@ -263,7 +253,7 @@ bool FileSegment::reserve(size_t size) * in case previous downloader did not fully download current file_segment * and the caller is going to continue; */ - size_t free_space = reserved_size - getDownloadedSize(); + size_t free_space = reserved_size - downloaded_size; size_t size_to_reserve = size - free_space; std::lock_guard cache_lock(cache->mutex); @@ -302,7 +292,7 @@ void FileSegment::completeBatchAndResetDownloader() resetDownloaderImpl(segment_lock); - LOG_TEST(log, "Complete batch. Current downloaded size: {}", getDownloadedSize()); + LOG_TEST(log, "Complete batch. Current downloaded size: {}", downloaded_size); cv.notify_all(); } @@ -357,7 +347,7 @@ void FileSegment::complete() if (download_state == State::SKIP_CACHE || detached) return; - if (download_state != State::DOWNLOADED && getDownloadedSize() == range().size()) + if (downloaded_size == range().size() && download_state != State::DOWNLOADED) setDownloaded(segment_lock); } @@ -394,11 +384,10 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) if (!download_can_continue) { - size_t current_downloaded_size = getDownloadedSize(); - if (current_downloaded_size == 0) + if (!downloaded_size) { download_state = State::SKIP_CACHE; - LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString()); + LOG_TEST(log, "Remove cell {} (downloaded: {})", range().toString(), downloaded_size); cache->remove(key(), offset(), cache_lock, segment_lock); detached = true; @@ -411,7 +400,7 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) * in FileSegmentsHolder represent a contiguous range, so we can resize * it only when nobody needs it. */ - LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size); + LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), downloaded_size); cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock); detached = true; @@ -420,7 +409,10 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) } if (!downloader_id.empty() && downloader_id == getCallerIdImpl(allow_non_strict_checking)) + { + LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state)); downloader_id.clear(); + } if (!download_can_continue && cache_writer) { @@ -443,7 +435,7 @@ String FileSegment::getInfoForLogImpl(std::lock_guard & /* segment_l WriteBufferFromOwnString info; info << "File segment: " << range().toString() << ", "; info << "state: " << download_state << ", "; - info << "downloaded size: " << getDownloadedSize() << ", "; + info << "downloaded size: " << downloaded_size << ", "; info << "downloader id: " << downloader_id << ", "; info << "caller id: " << getCallerId(); diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index 736e5b0c505..a6f006f299d 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -95,7 +95,7 @@ public: bool reserve(size_t size); - void write(const char * from, size_t size, size_t offset); + void write(const char * from, size_t size, size_t offset_); RemoteFileReaderPtr getRemoteFileReader(); @@ -129,8 +129,6 @@ private: void setDownloaded(std::lock_guard & segment_lock); static String getCallerIdImpl(bool allow_non_strict_checking = false); void resetDownloaderImpl(std::lock_guard & segment_lock); - size_t getDownloadedSize() const; - size_t getDownloadOffsetImpl(std::lock_guard & segment_lock) const; String getInfoForLogImpl(std::lock_guard & segment_lock) const; const Range segment_range; @@ -145,15 +143,7 @@ private: size_t reserved_size = 0; mutable std::mutex mutex; - mutable std::condition_variable cv; - - /// Protects downloaded_size access with actual write into fs. - /// downloaded_size is not protected by download_mutex in methods which - /// can never be run in parallel to FileSegment::write() method - /// as downloaded_size is updated only in FileSegment::write() method. - /// Such methods are identified by isDownloader() check at their start, - /// e.g. they are executed strictly by the same thread, sequentially. - mutable std::mutex download_mutex; + std::condition_variable cv; Key file_key; IFileCache * cache; From 36f5c0eec62a51d655ee7b641c5931c47bff7a1a Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 17 Mar 2022 21:14:57 +0800 Subject: [PATCH 003/117] improve check access in table functions --- src/TableFunctions/Hive/TableFunctionHive.h | 2 +- src/TableFunctions/ITableFunction.cpp | 7 ++++++- src/TableFunctions/ITableFunction.h | 4 ++++ src/TableFunctions/TableFunctionHDFSCluster.h | 2 ++ src/TableFunctions/TableFunctionS3Cluster.h | 2 ++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h index 0973bdda329..dba838389e5 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.h +++ b/src/TableFunctions/Hive/TableFunctionHive.h @@ -10,7 +10,7 @@ class TableFunctionHive : public ITableFunction { public: static constexpr auto name = "hive"; - static constexpr auto storage_type_name = "hive"; + static constexpr auto storage_type_name = "Hive"; std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index 42b24abdbbe..639240fd105 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -14,11 +14,16 @@ namespace ProfileEvents namespace DB { +AccessType ITableFunction::getSourceAccessType() const +{ + return StorageFactory::instance().getSourceAccessType(getStorageTypeName()); +} + StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool use_global_context) const { ProfileEvents::increment(ProfileEvents::TableFunctionExecute); - context->checkAccess(AccessType::CREATE_TEMPORARY_TABLE | StorageFactory::instance().getSourceAccessType(getStorageTypeName())); + context->checkAccess(AccessType::CREATE_TEMPORARY_TABLE | getSourceAccessType()); auto context_to_use = use_global_context ? context->getGlobalContext() : context; diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 9c8d694865b..b419c4cfeed 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,10 @@ public: private: virtual StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0; + virtual const char * getStorageTypeName() const = 0; + + virtual AccessType getSourceAccessType() const; }; using TableFunctionPtr = std::shared_ptr; diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h index 58d1c3d9b05..b5464e2fd19 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ b/src/TableFunctions/TableFunctionHDFSCluster.h @@ -39,6 +39,8 @@ protected: const char * getStorageTypeName() const override { return "HDFSCluster"; } + AccessType getSourceAccessType() const override { return AccessType::HDFS; } + ColumnsDescription getActualTableStructure(ContextPtr) const override; void parseArguments(const ASTPtr &, ContextPtr) override; diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 35d18631ae1..ed628ab4434 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -39,6 +39,8 @@ protected: const char * getStorageTypeName() const override { return "S3Cluster"; } + AccessType getSourceAccessType() const override { return AccessType::S3; } + ColumnsDescription getActualTableStructure(ContextPtr) const override; void parseArguments(const ASTPtr &, ContextPtr) override; From 1b6e7fea2fb62b5452e2310eb97ed1ec4c8c4f38 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 14 Mar 2022 17:33:29 +0100 Subject: [PATCH 004/117] Finally fixed --- src/Common/FileCache.cpp | 2 +- src/Common/FileSegment.cpp | 50 ++++++++++++------- src/Common/FileSegment.h | 9 ++++ src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 49 ++++++++++++------ src/Disks/IO/CachedReadBufferFromRemoteFS.h | 1 + src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 9 ++-- 6 files changed, 83 insertions(+), 37 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index e13106511e0..568b5b12ef4 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -325,7 +325,7 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( if (files[key].contains(offset)) throw Exception( ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Cache already exists for key: `{}`, offset: {}, size: {}, current cache structure: {}", + "Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}", keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock)); auto file_segment = std::make_shared(offset, size, key, this, state); diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 7a68cac9cb7..b4ee7e7ca31 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -46,7 +46,16 @@ FileSegment::State FileSegment::state() const size_t FileSegment::getDownloadOffset() const { std::lock_guard segment_lock(mutex); - return range().left + downloaded_size; + return range().left + getDownloadedSize(segment_lock); +} + +size_t FileSegment::getDownloadedSize(std::lock_guard & /* segment_lock */) const +{ + if (download_state == State::DOWNLOADED) + return downloaded_size; + + std::lock_guard download_lock(download_mutex); + return downloaded_size; } String FileSegment::getCallerId() @@ -165,17 +174,17 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) "Only downloader can do the downloading. (CallerId: {}, DownloaderId: {})", getCallerId(), downloader_id); - { - std::lock_guard segment_lock(mutex); - auto download_offset = range().left + downloaded_size; - if (offset_ != download_offset) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Attempt to write {} bytes to offset: {}, but current download offset is {} ({})", - size, offset_, download_offset, getInfoForLogImpl(segment_lock)); - } + auto download_offset = range().left + downloaded_size; + if (offset_ != download_offset) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Attempt to write {} bytes to offset: {}, but current download offset is {} ({})", + size, offset_, download_offset); if (!cache_writer) { + if (downloaded_size > 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer should be finalized (downloaded size: {})", downloaded_size); + auto download_path = cache->getPathInLocalCache(key(), offset()); cache_writer = std::make_unique(download_path); } @@ -183,12 +192,15 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) try { cache_writer->write(from, size); + + std::lock_guard download_lock(download_mutex); + cache_writer->next(); + + downloaded_size += size; } catch (...) { - std::lock_guard segment_lock(mutex); - LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLog()); download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; @@ -201,8 +213,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) throw; } - std::lock_guard segment_lock(mutex); - downloaded_size += size; + assert(getDownloadOffset() == offset_ + size); } FileSegment::State FileSegment::wait() @@ -347,7 +358,7 @@ void FileSegment::complete() if (download_state == State::SKIP_CACHE || detached) return; - if (downloaded_size == range().size() && download_state != State::DOWNLOADED) + if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size()) setDownloaded(segment_lock); } @@ -384,10 +395,11 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) if (!download_can_continue) { - if (!downloaded_size) + size_t current_downloaded_size = getDownloadedSize(segment_lock); + if (current_downloaded_size == 0) { download_state = State::SKIP_CACHE; - LOG_TEST(log, "Remove cell {} (downloaded: {})", range().toString(), downloaded_size); + LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString()); cache->remove(key(), offset(), cache_lock, segment_lock); detached = true; @@ -400,7 +412,7 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) * in FileSegmentsHolder represent a contiguous range, so we can resize * it only when nobody needs it. */ - LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), downloaded_size); + LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size); cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock); detached = true; @@ -430,12 +442,12 @@ String FileSegment::getInfoForLog() const return getInfoForLogImpl(segment_lock); } -String FileSegment::getInfoForLogImpl(std::lock_guard & /* segment_lock */) const +String FileSegment::getInfoForLogImpl(std::lock_guard & segment_lock) const { WriteBufferFromOwnString info; info << "File segment: " << range().toString() << ", "; info << "state: " << download_state << ", "; - info << "downloaded size: " << downloaded_size << ", "; + info << "downloaded size: " << getDownloadedSize(segment_lock) << ", "; info << "downloader id: " << downloader_id << ", "; info << "caller id: " << getCallerId(); diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index a6f006f299d..03e2a1ee866 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -130,6 +130,7 @@ private: static String getCallerIdImpl(bool allow_non_strict_checking = false); void resetDownloaderImpl(std::lock_guard & segment_lock); String getInfoForLogImpl(std::lock_guard & segment_lock) const; + size_t getDownloadedSize(std::lock_guard & segment_lock) const; const Range segment_range; @@ -145,6 +146,14 @@ private: mutable std::mutex mutex; std::condition_variable cv; + /// Protects downloaded_size access with actual write into fs. + /// downloaded_size is not protected by download_mutex in methods which + /// can never be run in parallel to FileSegment::write() method + /// as downloaded_size is updated only in FileSegment::write() method. + /// Such methods are identified by isDownloader() check at their start, + /// e.g. they are executed strictly by the same thread, sequentially. + mutable std::mutex download_mutex; + Key file_key; IFileCache * cache; diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 3d0d9a895ca..f5bb52e9134 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -242,6 +242,9 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); + + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); } download_offset = file_segment->getDownloadOffset(); @@ -300,6 +303,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File { case ReadType::CACHED: { +#ifdef NDEBUG auto * file_reader = assert_cast(read_buffer_for_file_segment.get()); size_t file_size = file_reader->size(); @@ -309,14 +313,11 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File "Unexpected state of cache file. Cache file size: {}, cache file offset: {}, " "expected file size to be non-zero and file downloaded size to exceed current file read offset (expected: {} > {})", file_size, range.left, range.left + file_size, file_offset_of_buffer_end); +#endif size_t seek_offset = file_offset_of_buffer_end - range.left; read_buffer_for_file_segment->seek(seek_offset, SEEK_SET); - auto state = file_segment->state(); - LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}", - file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset()); - break; } case ReadType::REMOTE_FS_READ_BYPASS_CACHE: @@ -424,12 +425,23 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) } size_t current_predownload_size = std::min(implementation_buffer->buffer().size(), bytes_to_predownload); + if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) + { + size_t remaining_size_to_read = std::min(file_segment->range().right, read_until_position - 1) - file_offset_of_buffer_end + 1; + current_predownload_size = std::min(current_predownload_size, remaining_size_to_read); + } if (file_segment->reserve(current_predownload_size)) { LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size()); + assert(file_segment->getDownloadOffset() == static_cast(implementation_buffer->getPosition())); + file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset); + + assert(file_segment->getDownloadOffset() <= std::min(file_segment->range().right + 1, read_until_position)); + assert(std::next(current_file_segment_it) == file_segments_holder->file_segments.end() || file_segment->getDownloadOffset() == implementation_buffer->getFileOffsetOfBufferEnd()); + current_offset += current_predownload_size; bytes_to_predownload -= current_predownload_size; @@ -551,6 +563,8 @@ bool CachedReadBufferFromRemoteFS::nextImpl() bool CachedReadBufferFromRemoteFS::nextImplStep() { + last_caller_id = FileSegment::getCallerId(); + if (IFileCache::shouldBypassCache()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed"); @@ -626,15 +640,14 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; if (download_current_segment != file_segment->isDownloader()) - { throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect segment state. Having read type: {}, Caller id: {}, downloader id: {}, file segment state: {}", toString(read_type), file_segment->getCallerId(), file_segment->getDownloader(), file_segment->state()); - } if (!result) { +#ifdef NDEBUG if (auto * cache_file_reader = typeid_cast(implementation_buffer.get())) { auto cache_file_size = cache_file_reader->size(); @@ -644,6 +657,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() "Attempt to read from an empty cache file: {} (just before actual read)", cache_file_size); } +#endif result = implementation_buffer->next(); size = implementation_buffer->buffer().size(); @@ -651,13 +665,26 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() if (result) { + if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) + { + size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; + size = std::min(size, remaining_size_to_read); + assert(implementation_buffer->buffer().size() <= nextimpl_working_buffer_offset + size); + implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); + } + if (download_current_segment) { assert(file_offset_of_buffer_end + size - 1 <= file_segment->range().right); if (file_segment->reserve(size)) { + assert(file_segment->getDownloadOffset() == static_cast(implementation_buffer->getPosition())); + file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size, file_offset_of_buffer_end); + + assert(file_segment->getDownloadOffset() <= file_segment->range().right + 1); + assert(std::next(current_file_segment_it) == file_segments_holder->file_segments.end() || file_segment->getDownloadOffset() == implementation_buffer->getFileOffsetOfBufferEnd()); } else { @@ -687,13 +714,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() } } - if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) - { - size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; - size = std::min(size, remaining_size_to_read); - implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); - } - file_offset_of_buffer_end += size; } @@ -796,12 +816,13 @@ String CachedReadBufferFromRemoteFS::getInfoForLog() auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog(); return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, " - "read_type: {}, file segment info: {}", + "read_type: {}, last caller: {}, file segment info: {}", remote_fs_object_path, getHexUIntLowercase(cache_key), file_offset_of_buffer_end, implementation_buffer_read_range_str, toString(read_type), + last_caller_id, current_file_segment_info); } diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h index 760d4360662..aa363a836e8 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.h +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h @@ -100,6 +100,7 @@ private: size_t first_offset = 0; String nextimpl_step_log_info; + String last_caller_id; }; } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index bdb012a6376..d07373df3b7 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -1,5 +1,6 @@ #include "ThreadPoolRemoteFSReader.h" +#include #include #include #include @@ -62,6 +63,11 @@ std::future ThreadPoolRemoteFSReader::submit(Reques if (running_group) thread_status.attachQuery(running_group); + SCOPE_EXIT_SAFE({ + if (running_group) + CurrentThread::detachQuery(); + }); + setThreadName("VFSRead"); CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; @@ -74,9 +80,6 @@ std::future ThreadPoolRemoteFSReader::submit(Reques ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read); - if (running_group) - thread_status.detachQuery(); - return Result{ .size = bytes_read, .offset = offset }; }); From c698fb9539bd942455a6203ae5956ba1fc98d12f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 17 Mar 2022 17:50:51 +0100 Subject: [PATCH 005/117] Debug --- src/Common/FileSegment.cpp | 15 ++++++++++++--- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 14 +++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index b4ee7e7ca31..e3c45949fe4 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -174,16 +174,23 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) "Only downloader can do the downloading. (CallerId: {}, DownloaderId: {})", getCallerId(), downloader_id); + if (downloaded_size == range().size()) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Attempt to write {} bytes to offset: {}, but current file segment is already fully downloaded", + size, offset_); + auto download_offset = range().left + downloaded_size; if (offset_ != download_offset) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Attempt to write {} bytes to offset: {}, but current download offset is {} ({})", + "Attempt to write {} bytes to offset: {}, but current download offset is {}", size, offset_, download_offset); if (!cache_writer) { if (downloaded_size > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer should be finalized (downloaded size: {})", downloaded_size); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cache writer should be finalized (downloaded size: {}, state: {})", + downloaded_size, stateToString(download_state)); auto download_path = cache->getPathInLocalCache(key(), offset()); cache_writer = std::make_unique(download_path); @@ -201,7 +208,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) } catch (...) { - LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLog()); + std::lock_guard segment_lock(mutex); + + LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLogImpl(segment_lock)); download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index f5bb52e9134..bdfee262901 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -95,12 +95,19 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSe auto remote_fs_segment_reader = file_segment->getRemoteFileReader(); if (remote_fs_segment_reader) + { + if (remote_fs_segment_reader->hasPendingData()) + { + remote_fs_segment_reader = remote_file_reader_creator(); + file_segment->setRemoteFileReader(remote_fs_segment_reader); + } + return remote_fs_segment_reader; + } remote_fs_segment_reader = remote_file_reader_creator(); file_segment->setRemoteFileReader(remote_fs_segment_reader); - ///TODO: add check for pending data return remote_fs_segment_reader; } case ReadType::REMOTE_FS_READ_BYPASS_CACHE: @@ -242,9 +249,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); - - read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; - return getRemoteFSReadBuffer(file_segment, read_type); } download_offset = file_segment->getDownloadOffset(); @@ -669,7 +673,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() { size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; size = std::min(size, remaining_size_to_read); - assert(implementation_buffer->buffer().size() <= nextimpl_working_buffer_offset + size); + assert(implementation_buffer->buffer().size() >= nextimpl_working_buffer_offset + size); implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); } From a7c9fe5c782eb2471a67b642bc5122dd74f2199e Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 17 Mar 2022 20:29:07 +0100 Subject: [PATCH 006/117] Finally-finally fixed --- src/Common/FileCache.h | 1 + src/Common/FileSegment.cpp | 203 +++++++++++------- src/Common/FileSegment.h | 53 +++-- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 19 +- 4 files changed, 160 insertions(+), 116 deletions(-) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index 3ffa578a997..e788c10bf0e 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -25,6 +25,7 @@ namespace DB class IFileCache : private boost::noncopyable { friend class FileSegment; +friend struct FileSegmentsHolder; public: using Key = UInt128; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index e3c45949fe4..f4e36965574 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -159,6 +159,17 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) remote_file_reader = remote_file_reader_; } +void FileSegment::resetRemoteFileReader() +{ + if (!isDownloader()) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Only downloader can use remote filesystem file reader"); + + if (!remote_file_reader) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader does not exist"); + + remote_file_reader.reset(); +} + void FileSegment::write(const char * from, size_t size, size_t offset_) { if (!size) @@ -206,11 +217,14 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) downloaded_size += size; } - catch (...) + catch (Exception & e) { std::lock_guard segment_lock(mutex); - LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLogImpl(segment_lock)); + auto info = getInfoForLogImpl(segment_lock); + e.addMessage("while writing into cache, info: " + info); + + LOG_ERROR(log, "Failed to write to cache. File segment info: {}", info); download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; @@ -290,7 +304,6 @@ void FileSegment::setDownloaded(std::lock_guard & /* segment_lock */ download_state = State::DOWNLOADED; is_downloaded = true; - assert(cache_writer); if (cache_writer) { cache_writer->finalize(); @@ -319,81 +332,72 @@ void FileSegment::completeBatchAndResetDownloader() void FileSegment::complete(State state) { - { - std::lock_guard segment_lock(mutex); - - bool is_downloader = downloader_id == getCallerId(); - if (!is_downloader) - { - cv.notify_all(); - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "File segment can be completed only by downloader or downloader's FileSegmentsHodler"); - } - - if (state != State::DOWNLOADED - && state != State::PARTIALLY_DOWNLOADED - && state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) - { - cv.notify_all(); - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Cannot complete file segment with state: {}", stateToString(state)); - } - - download_state = state; - } - - try - { - completeImpl(); - } - catch (...) - { - std::lock_guard segment_lock(mutex); - if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) - downloader_id.clear(); - - cv.notify_all(); - throw; - } - - cv.notify_all(); -} - -void FileSegment::complete() -{ - { - std::lock_guard segment_lock(mutex); - - if (download_state == State::SKIP_CACHE || detached) - return; - - if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size()) - setDownloaded(segment_lock); - } - - try - { - completeImpl(true); - } - catch (...) - { - std::lock_guard segment_lock(mutex); - if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) - downloader_id.clear(); - - cv.notify_all(); - throw; - } - - cv.notify_all(); -} - -void FileSegment::completeImpl(bool allow_non_strict_checking) -{ - /// cache lock is always taken before segment lock. std::lock_guard cache_lock(cache->mutex); std::lock_guard segment_lock(mutex); + bool is_downloader = downloader_id == getCallerId(); + if (!is_downloader) + { + cv.notify_all(); + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "File segment can be completed only by downloader or downloader's FileSegmentsHodler"); + } + + if (state != State::DOWNLOADED + && state != State::PARTIALLY_DOWNLOADED + && state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) + { + cv.notify_all(); + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Cannot complete file segment with state: {}", stateToString(state)); + } + + download_state = state; + + try + { + completeImpl(cache_lock, segment_lock); + } + catch (...) + { + if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + downloader_id.clear(); + + cv.notify_all(); + throw; + } + + cv.notify_all(); +} + +void FileSegment::complete(std::lock_guard & cache_lock) +{ + std::lock_guard segment_lock(mutex); + + if (download_state == State::SKIP_CACHE || detached) + return; + + if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size()) + setDownloaded(segment_lock); + + try + { + completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true); + } + catch (...) + { + if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + downloader_id.clear(); + + cv.notify_all(); + throw; + } + + cv.notify_all(); +} + +void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock, bool allow_non_strict_checking) +{ bool download_can_continue = false; if (download_state == State::PARTIALLY_DOWNLOADED @@ -435,12 +439,12 @@ void FileSegment::completeImpl(bool allow_non_strict_checking) downloader_id.clear(); } - if (!download_can_continue && cache_writer) - { - cache_writer->finalize(); - cache_writer.reset(); - remote_file_reader.reset(); - } + // if (!download_can_continue && cache_writer) + // { + // cache_writer->finalize(); + // cache_writer.reset(); + // remote_file_reader.reset(); + // } assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); } @@ -482,6 +486,43 @@ String FileSegment::stateToString(FileSegment::State state) } } +FileSegmentsHolder::~FileSegmentsHolder() +{ + /// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from + /// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here + /// remain only uncompleted file segments. + + IFileCache * cache = nullptr; + + for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();) + { + auto current_file_segment_it = file_segment_it++; + auto & file_segment = *current_file_segment_it; + + if (!cache) + cache = file_segment->cache; + + try + { + /// File segment pointer must be reset right after calling complete() and + /// under the same mutex, because complete() checks for segment pointers. + std::lock_guard cache_lock(cache->mutex); + + file_segment->complete(cache_lock); + + file_segments.erase(current_file_segment_it); + } + catch (...) + { +#ifndef NDEBUG + throw; +#else + tryLogCurrentException(__PRETTY_FUNCTION__); +#endif + } + } +} + String FileSegmentsHolder::toString() { String ranges; diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index 03e2a1ee866..ec6e17a1e5c 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -101,6 +101,8 @@ public: void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_); + void resetRemoteFileReader(); + String getOrSetDownloader(); String getDownloader() const; @@ -123,15 +125,29 @@ public: private: size_t availableSize() const { return reserved_size - downloaded_size; } - bool lastFileSegmentHolder() const; - void complete(); - void completeImpl(bool allow_non_strict_checking = false); - void setDownloaded(std::lock_guard & segment_lock); - static String getCallerIdImpl(bool allow_non_strict_checking = false); - void resetDownloaderImpl(std::lock_guard & segment_lock); - String getInfoForLogImpl(std::lock_guard & segment_lock) const; + size_t getDownloadedSize(std::lock_guard & segment_lock) const; + void setDownloaded(std::lock_guard & segment_lock); + + bool lastFileSegmentHolder() const; + + /// complete() without any completion state is called from destructor of + /// FileSegmentsHolder. complete() might check if the caller of the method + /// is the last alive holder of the segment. Therefore, complete() and destruction + /// of the file segment pointer must be done under the same cache mutex. + void complete(std::lock_guard & cache_lock); + + void completeImpl( + std::lock_guard & cache_lock, + std::lock_guard & segment_lock, bool allow_non_strict_checking = false); + + static String getCallerIdImpl(bool allow_non_strict_checking = false); + + void resetDownloaderImpl(std::lock_guard & segment_lock); + + String getInfoForLogImpl(std::lock_guard & segment_lock) const; + const Range segment_range; State download_state; @@ -169,28 +185,7 @@ struct FileSegmentsHolder : private boost::noncopyable explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {} FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {} - ~FileSegmentsHolder() - { - /// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from - /// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here - /// remain only uncompleted file segments. - - for (auto & segment : file_segments) - { - try - { - segment->complete(); - } - catch (...) - { -#ifndef NDEBUG - throw; -#else - tryLogCurrentException(__PRETTY_FUNCTION__); -#endif - } - } - } + ~FileSegmentsHolder(); FileSegments file_segments{}; diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index bdfee262901..c555846b2cf 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -96,13 +96,20 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSe if (remote_fs_segment_reader) { - if (remote_fs_segment_reader->hasPendingData()) - { - remote_fs_segment_reader = remote_file_reader_creator(); - file_segment->setRemoteFileReader(remote_fs_segment_reader); - } + /// There might be pending data if some previous downloader has downloaded the data, but + /// failed to fully write it. + if (!remote_fs_segment_reader->hasPendingData()) + return remote_fs_segment_reader; - return remote_fs_segment_reader; + /// TODO: Finish this. + // if (remote_fs_segment_reader->getPosition() >= file_offset_of_buffer_end) + // { + // auto to_ignore = remote_fs_segment_reader->getFileOffsetOfBufferEnd() - file_offset_of_buffer_end; + // remote_fs_segment_reader->ignore(to_ignore); + // return remote_fs_segment_reader; + // } + + file_segment->resetRemoteFileReader(); } remote_fs_segment_reader = remote_file_reader_creator(); From 44fb6fb86fcd40cd48b49f263a934e7d40ba79df Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 18 Mar 2022 10:16:06 +0100 Subject: [PATCH 007/117] Revert some changes --- src/Common/FileSegment.cpp | 2 +- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 42 ++++++------------- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index f4e36965574..33ee181be6b 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -200,7 +200,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) { if (downloaded_size > 0) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cache writer should be finalized (downloaded size: {}, state: {})", + "Cache writer was finalized (downloaded size: {}, state: {})", downloaded_size, stateToString(download_state)); auto download_path = cache->getPathInLocalCache(key(), offset()); diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index c555846b2cf..f2ce48cf050 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -95,22 +95,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSe auto remote_fs_segment_reader = file_segment->getRemoteFileReader(); if (remote_fs_segment_reader) - { - /// There might be pending data if some previous downloader has downloaded the data, but - /// failed to fully write it. - if (!remote_fs_segment_reader->hasPendingData()) - return remote_fs_segment_reader; - - /// TODO: Finish this. - // if (remote_fs_segment_reader->getPosition() >= file_offset_of_buffer_end) - // { - // auto to_ignore = remote_fs_segment_reader->getFileOffsetOfBufferEnd() - file_offset_of_buffer_end; - // remote_fs_segment_reader->ignore(to_ignore); - // return remote_fs_segment_reader; - // } - - file_segment->resetRemoteFileReader(); - } + return remote_fs_segment_reader; remote_fs_segment_reader = remote_file_reader_creator(); file_segment->setRemoteFileReader(remote_fs_segment_reader); @@ -436,11 +421,6 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) } size_t current_predownload_size = std::min(implementation_buffer->buffer().size(), bytes_to_predownload); - if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) - { - size_t remaining_size_to_read = std::min(file_segment->range().right, read_until_position - 1) - file_offset_of_buffer_end + 1; - current_predownload_size = std::min(current_predownload_size, remaining_size_to_read); - } if (file_segment->reserve(current_predownload_size)) { @@ -676,14 +656,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() if (result) { - if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) - { - size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; - size = std::min(size, remaining_size_to_read); - assert(implementation_buffer->buffer().size() >= nextimpl_working_buffer_offset + size); - implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); - } - if (download_current_segment) { assert(file_offset_of_buffer_end + size - 1 <= file_segment->range().right); @@ -725,6 +697,18 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() } } + /// - If last file segment was read from remote fs, then we read up to segment->range().right, but + /// the requested right boundary cound be segment->range().left < requested_right_boundary < segment->range().right. + /// Therefore need to resize to a smaller size. And resize must be done after write into cache. + /// - If last file segment was read from local fs, then we could read more than file_segemnt->range().right, so resize is also needed. + if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) + { + size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; + size = std::min(size, remaining_size_to_read); + assert(implementation_buffer->buffer().size() >= nextimpl_working_buffer_offset + size); + implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); + } + file_offset_of_buffer_end += size; } From 4bc0128800a7e87f12aa1d48b7c64e362cd1841b Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 18 Mar 2022 10:33:43 +0100 Subject: [PATCH 008/117] Add test --- .../02235_remote_fs_cache_stress.reference | 32 +++++++++ .../02235_remote_fs_cache_stress.sh | 69 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 tests/queries/0_stateless/02235_remote_fs_cache_stress.reference create mode 100755 tests/queries/0_stateless/02235_remote_fs_cache_stress.sh diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.reference b/tests/queries/0_stateless/02235_remote_fs_cache_stress.reference new file mode 100644 index 00000000000..1fa6905307d --- /dev/null +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.reference @@ -0,0 +1,32 @@ +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh new file mode 100755 index 00000000000..c291f66f169 --- /dev/null +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Tags: no-ubsan, no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" + +DROP TABLE IF EXISTS t_01411; +DROP TABLE IF EXISTS t_01411_num; + +CREATE TABLE t_01411( + str LowCardinality(String), + arr Array(LowCardinality(String)) default [str] +) ENGINE = MergeTree() +ORDER BY tuple(); + +INSERT INTO t_01411 (str) SELECT concat('asdf', toString(number % 10000)) FROM numbers(1000000); + +CREATE TABLE t_01411_num( + num UInt8, + arr Array(LowCardinality(Int64)) default [num] +) ENGINE = MergeTree() +ORDER BY tuple(); + +INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(100000); + +""" + +function go() +{ + +${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" + +SELECT count() FROM t_01411 WHERE str = 'asdf337'; +SELECT count() FROM t_01411 WHERE arr[1] = 'asdf337'; +SELECT count() FROM t_01411 WHERE has(arr, 'asdf337'); +SELECT count() FROM t_01411 WHERE indexOf(arr, 'asdf337') > 0; + +SELECT count() FROM t_01411 WHERE arr[1] = str; +SELECT count() FROM t_01411 WHERE has(arr, str); +SELECT count() FROM t_01411 WHERE indexOf(arr, str) > 0; + +SELECT count() FROM t_01411_num WHERE num = 42; +SELECT count() FROM t_01411_num WHERE arr[1] = 42; +SELECT count() FROM t_01411_num WHERE has(arr, 42); +SELECT count() FROM t_01411_num WHERE indexOf(arr, 42) > 0; + +SELECT count() FROM t_01411_num WHERE arr[1] = num; +SELECT count() FROM t_01411_num WHERE has(arr, num); +SELECT count() FROM t_01411_num WHERE indexOf(arr, num) > 0; +SELECT count() FROM t_01411_num WHERE indexOf(arr, num % 337) > 0; + +SELECT indexOf(['a', 'b', 'c'], toLowCardinality('a')); +SELECT indexOf(['a', 'b', NULL], toLowCardinality('a')); + +""" +} + +for i in `seq 1 32`; do go | grep -q "Exception" && echo 'FAIL' || echo 'OK' ||: & done + +wait + +${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +DROP TABLE IF EXISTS t_01411; +DROP TABLE IF EXISTS t_01411_num; +""" From b756af7a146ab144ca4dc157db1ce01e7c35e091 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 18 Mar 2022 10:57:32 +0100 Subject: [PATCH 009/117] Uncomment code, update test --- src/Common/FileSegment.cpp | 12 ++++++------ .../0_stateless/02235_remote_fs_cache_stress.sh | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 33ee181be6b..fc6806ff55f 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -439,12 +439,12 @@ void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lo downloader_id.clear(); } - // if (!download_can_continue && cache_writer) - // { - // cache_writer->finalize(); - // cache_writer.reset(); - // remote_file_reader.reset(); - // } + if (!download_can_continue && cache_writer) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); + } assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); } diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index c291f66f169..2259ed7b378 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -17,7 +17,7 @@ CREATE TABLE t_01411( ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO t_01411 (str) SELECT concat('asdf', toString(number % 10000)) FROM numbers(1000000); +INSERT INTO t_01411 (str) SELECT concat('asdf', toString(number % 10000)) FROM numbers(2000000); CREATE TABLE t_01411_num( num UInt8, @@ -25,7 +25,7 @@ CREATE TABLE t_01411_num( ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(100000); +INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(200000); """ From 7d50bd1eb3281acaa1be55bd26ec7f1abd86de15 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 21 Mar 2022 11:19:45 +0800 Subject: [PATCH 010/117] add access type hive --- src/Access/Common/AccessType.h | 1 + src/Storages/Hive/StorageHive.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 0b69bd5fd0e..fb5eafbe679 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -182,6 +182,7 @@ enum class AccessType M(JDBC, "", GLOBAL, SOURCES) \ M(HDFS, "", GLOBAL, SOURCES) \ M(S3, "", GLOBAL, SOURCES) \ + M(HIVE, "", GLOBAL, SOURCES) \ M(SOURCES, "", GROUP, ALL) \ \ M(ALL, "ALL PRIVILEGES", GROUP, NONE) /* full access */ \ diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 7b6a8db568f..89da6a10e47 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -740,6 +740,7 @@ void registerStorageHive(StorageFactory & factory) StorageFactory::StorageFeatures{ .supports_settings = true, .supports_sort_order = true, + .source_access_type = AccessType::HIVE, }); } From 6fcdb9dd783bcf231abfa83b11e9022ee61c1641 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 21 Mar 2022 14:28:48 +0800 Subject: [PATCH 011/117] fix fasttest --- tests/queries/0_stateless/01271_show_privileges.reference | 1 + .../0_stateless/02117_show_create_table_system.reference | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 06bd6ab04e4..b27c0d10d3b 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -133,6 +133,7 @@ ODBC [] GLOBAL SOURCES JDBC [] GLOBAL SOURCES HDFS [] GLOBAL SOURCES S3 [] GLOBAL SOURCES +HIVE [] GLOBAL SOURCES SOURCES [] \N ALL ALL ['ALL PRIVILEGES'] \N \N NONE ['USAGE','NO PRIVILEGES'] \N \N diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index cecdd0498b1..22c51e230f8 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -35,7 +35,7 @@ CREATE TABLE system.one\n(\n `dummy` UInt8\n)\nENGINE = SystemOne()\nCOMMENT CREATE TABLE system.part_moves_between_shards\n(\n `database` String,\n `table` String,\n `task_name` String,\n `task_uuid` UUID,\n `create_time` DateTime,\n `part_name` String,\n `part_uuid` UUID,\n `to_shard` String,\n `dst_part_name` String,\n `update_time` DateTime,\n `state` String,\n `rollback` UInt8,\n `num_tries` UInt32,\n `last_exception` String\n)\nENGINE = SystemShardMoves()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `serialization_kind` String,\n `subcolumns.names` Array(String),\n `subcolumns.types` Array(String),\n `subcolumns.serializations` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.privileges\n(\n `privilege` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'SOURCES\' = 135, \'ALL\' = 136, \'NONE\' = 137),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'SOURCES\' = 135, \'ALL\' = 136, \'NONE\' = 137))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.privileges\n(\n `privilege` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'HIVE\' = 135, \'SOURCES\' = 136, \'ALL\' = 137, \'NONE\' = 138),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'HIVE\' = 135, \'SOURCES\' = 136, \'ALL\' = 137, \'NONE\' = 138))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `distributed_depth` UInt64,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts_columns\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' From e5c870add20d0110d677b2e1b2ebf34cc919e44c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Mar 2022 19:48:13 +0100 Subject: [PATCH 012/117] Fix bug, fix unit tests --- src/Common/FileCache.cpp | 38 ++++++++++++++--------- src/Common/FileCache.h | 1 + src/Common/FileSegment.cpp | 27 +++++++++++++++- src/Common/FileSegment.h | 3 ++ src/Common/tests/gtest_lru_file_cache.cpp | 2 +- 5 files changed, 54 insertions(+), 17 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 568b5b12ef4..2a64b64ee63 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -241,6 +241,10 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t std::lock_guard cache_lock(mutex); +#ifndef NDEBUG + assertCacheCorrectness(key, cache_lock); +#endif + /// Get all segments which intersect with the given range. auto file_segments = getImpl(key, range, cache_lock); @@ -691,28 +695,32 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU } } -String LRUFileCache::dumpStructure(const Key & key_) +String LRUFileCache::dumpStructure(const Key & key) { std::lock_guard cache_lock(mutex); - return dumpStructureImpl(key_, cache_lock); + return dumpStructureImpl(key, cache_lock); } -String LRUFileCache::dumpStructureImpl(const Key & key_, std::lock_guard & /* cache_lock */) +String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard & /* cache_lock */) { - std::lock_guard cache_lock(mutex); - WriteBufferFromOwnString result; - for (auto it = queue.begin(); it != queue.end(); ++it) - { - auto [key, offset] = *it; - if (key == key_) - { - auto * cell = getCell(key, offset, cache_lock); - result << (it != queue.begin() ? ", " : "") << cell->file_segment->range().toString(); - result << "(state: " << cell->file_segment->download_state << ")"; - } - } + const auto & cells_by_offset = files[key]; + + for (const auto & [offset, cell] : cells_by_offset) + result << cell.file_segment->getInfoForLog() << "\n"; + return result.str(); } +void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard & /* cache_lock */) +{ + const auto & cells_by_offset = files[key]; + + for (const auto & [_, cell] : cells_by_offset) + { + const auto & file_segment = cell.file_segment; + file_segment->assertCorrectness(); + } +} + } diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index e788c10bf0e..d58711cef0a 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -211,6 +211,7 @@ public: Stat getStat(); String dumpStructure(const Key & key_) override; + void assertCacheCorrectness(const Key & key, std::lock_guard & cache_lock); }; } diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index fc6806ff55f..40e270f4fab 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -380,6 +380,18 @@ void FileSegment::complete(std::lock_guard & cache_lock) if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size()) setDownloaded(segment_lock); + if (download_state == State::DOWNLOADING || download_state == State::EMPTY) + { + /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the + /// downloader or the only owner of the segment. + + bool can_update_segment_state = downloader_id == getCallerIdImpl(true) + || cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); + + if (can_update_segment_state) + download_state = State::PARTIALLY_DOWNLOADED; + } + try { completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true); @@ -446,7 +458,7 @@ void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lo remote_file_reader.reset(); } - assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); + assertCorrectnessImpl(segment_lock); } String FileSegment::getInfoForLog() const @@ -486,6 +498,19 @@ String FileSegment::stateToString(FileSegment::State state) } } +void FileSegment::assertCorrectness() const +{ + std::lock_guard segment_lock(mutex); + assertCorrectnessImpl(segment_lock); +} + +void FileSegment::assertCorrectnessImpl(std::lock_guard & /* segment_lock */) const +{ + assert(downloader_id.empty() == (download_state != FileSegment::State::DOWNLOADING)); + assert(!downloader_id.empty() == (download_state == FileSegment::State::DOWNLOADING)); + assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); +} + FileSegmentsHolder::~FileSegmentsHolder() { /// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index 5edc34cc90b..b7501640913 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -123,11 +123,14 @@ public: String getInfoForLog() const; + void assertCorrectness() const; + private: size_t availableSize() const { return reserved_size - downloaded_size; } size_t getDownloadedSize(std::lock_guard & segment_lock) const; String getInfoForLogImpl(std::lock_guard & segment_lock) const; + void assertCorrectnessImpl(std::lock_guard & segment_lock) const; void setDownloaded(std::lock_guard & segment_lock); diff --git a/src/Common/tests/gtest_lru_file_cache.cpp b/src/Common/tests/gtest_lru_file_cache.cpp index d5a76f9daad..c08b12857a1 100644 --- a/src/Common/tests/gtest_lru_file_cache.cpp +++ b/src/Common/tests/gtest_lru_file_cache.cpp @@ -67,7 +67,7 @@ void download(DB::FileSegmentPtr file_segment) fs::create_directories(subdir); std::string data(size, '0'); - file_segment->write(data.data(), size); + file_segment->write(data.data(), size, file_segment->getDownloadOffset()); } void prepareAndDownload(DB::FileSegmentPtr file_segment) From ccdf7d02316382b78f9dcaca007fd94de75501da Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Mar 2022 21:51:17 +0100 Subject: [PATCH 013/117] Fix typos check --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index f2ce48cf050..150d3ff7207 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -698,7 +698,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() } /// - If last file segment was read from remote fs, then we read up to segment->range().right, but - /// the requested right boundary cound be segment->range().left < requested_right_boundary < segment->range().right. + /// the requested right boundary could be segment->range().left < requested_right_boundary < segment->range().right. /// Therefore need to resize to a smaller size. And resize must be done after write into cache. /// - If last file segment was read from local fs, then we could read more than file_segemnt->range().right, so resize is also needed. if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) From d8739318eae37d1921f6788a39c4f166beec5b01 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 22 Mar 2022 10:39:58 +0100 Subject: [PATCH 014/117] Fix bug --- src/Common/FileSegment.cpp | 67 ++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 40e270f4fab..370a6a59a6f 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -410,54 +410,51 @@ void FileSegment::complete(std::lock_guard & cache_lock) void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock, bool allow_non_strict_checking) { - bool download_can_continue = false; + bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); + bool download_can_continue = !is_last_holder + && download_state != State::DOWNLOADED + && download_state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; - if (download_state == State::PARTIALLY_DOWNLOADED - || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) + if (!download_can_continue + && (download_state == State::PARTIALLY_DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)) { - bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); - download_can_continue = !is_last_holder && download_state == State::PARTIALLY_DOWNLOADED; - - if (!download_can_continue) + size_t current_downloaded_size = getDownloadedSize(segment_lock); + if (current_downloaded_size == 0) { - size_t current_downloaded_size = getDownloadedSize(segment_lock); - if (current_downloaded_size == 0) - { - download_state = State::SKIP_CACHE; - LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString()); - cache->remove(key(), offset(), cache_lock, segment_lock); + download_state = State::SKIP_CACHE; + LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString()); + cache->remove(key(), offset(), cache_lock, segment_lock); - detached = true; - } - else if (is_last_holder) - { - /** - * Only last holder of current file segment can resize the cell, - * because there is an invariant that file segments returned to users - * in FileSegmentsHolder represent a contiguous range, so we can resize - * it only when nobody needs it. - */ - LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size); - cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock); + detached = true; + } + else if (is_last_holder) + { + /** + * Only last holder of current file segment can resize the cell, + * because there is an invariant that file segments returned to users + * in FileSegmentsHolder represent a contiguous range, so we can resize + * it only when nobody needs it. + */ + LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size); + cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock); - detached = true; - } + detached = true; + } + + if (cache_writer) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); } } - if (!downloader_id.empty() && downloader_id == getCallerIdImpl(allow_non_strict_checking)) + if (!downloader_id.empty() && (downloader_id == getCallerIdImpl(allow_non_strict_checking) || is_last_holder)) { LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state)); downloader_id.clear(); } - if (!download_can_continue && cache_writer) - { - cache_writer->finalize(); - cache_writer.reset(); - remote_file_reader.reset(); - } - assertCorrectnessImpl(segment_lock); } From a5aa75f102301dd0c3adf7ec9c10e49708af4d47 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 22 Mar 2022 11:25:08 +0100 Subject: [PATCH 015/117] Update test --- tests/queries/0_stateless/02235_remote_fs_cache_stress.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index 2259ed7b378..1a2f70ecdd7 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -10,6 +10,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" DROP TABLE IF EXISTS t_01411; DROP TABLE IF EXISTS t_01411_num; +drop table if exists lc_dict_reading; CREATE TABLE t_01411( str LowCardinality(String), @@ -27,6 +28,8 @@ ORDER BY tuple(); INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(200000); +create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val; +insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000; """ function go() @@ -34,6 +37,8 @@ function go() ${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +select sum(toUInt64(str)), sum(toUInt64(pat)) from lc_dict_reading where val < 8129 or val > 8192 * 4; + SELECT count() FROM t_01411 WHERE str = 'asdf337'; SELECT count() FROM t_01411 WHERE arr[1] = 'asdf337'; SELECT count() FROM t_01411 WHERE has(arr, 'asdf337'); @@ -55,7 +60,6 @@ SELECT count() FROM t_01411_num WHERE indexOf(arr, num % 337) > 0; SELECT indexOf(['a', 'b', 'c'], toLowCardinality('a')); SELECT indexOf(['a', 'b', NULL], toLowCardinality('a')); - """ } From 9d88494b430fd5092d6640311e9fe157454fd9a3 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 23 Mar 2022 10:55:07 +0800 Subject: [PATCH 016/117] fix fasttest --- .../0_stateless/02117_show_create_table_system.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 22c51e230f8..dc586341030 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -20,7 +20,7 @@ CREATE TABLE system.errors\n(\n `name` String,\n `code` Int32,\n `value CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `description` String\n)\nENGINE = SystemEvents()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'SOURCES\' = 135, \'ALL\' = 136, \'NONE\' = 137),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'HIVE\' = 135, \'SOURCES\' = 136, \'ALL\' = 137, \'NONE\' = 138),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' From f84a88e48fdb7b3d5895d07113355a6be52267fc Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 23 Mar 2022 21:41:42 +0800 Subject: [PATCH 017/117] push again --- src/TableFunctions/ITableFunction.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index b419c4cfeed..44ec3757bef 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -74,7 +74,6 @@ private: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0; virtual const char * getStorageTypeName() const = 0; - virtual AccessType getSourceAccessType() const; }; From 95dd4a5c14aa7f4f1d3b235bf1e4c5610317ae31 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 24 Mar 2022 11:32:35 +0800 Subject: [PATCH 018/117] push again --- src/TableFunctions/ITableFunction.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 44ec3757bef..b419c4cfeed 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -74,6 +74,7 @@ private: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0; virtual const char * getStorageTypeName() const = 0; + virtual AccessType getSourceAccessType() const; }; From f9b538fb0c99099e98f9e5ceba7fd4d5477058b4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 24 Mar 2022 12:12:17 +0100 Subject: [PATCH 019/117] Fix --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 -- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 28 +++++++++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 150d3ff7207..e31cf7a5da3 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -429,8 +429,6 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) assert(file_segment->getDownloadOffset() == static_cast(implementation_buffer->getPosition())); file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset); - - assert(file_segment->getDownloadOffset() <= std::min(file_segment->range().right + 1, read_until_position)); assert(std::next(current_file_segment_it) == file_segments_holder->file_segments.end() || file_segment->getDownloadOffset() == implementation_buffer->getFileOffsetOfBufferEnd()); current_offset += current_predownload_size; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index d07373df3b7..3aa7f8d9d65 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -63,24 +63,34 @@ std::future ThreadPoolRemoteFSReader::submit(Reques if (running_group) thread_status.attachQuery(running_group); - SCOPE_EXIT_SAFE({ - if (running_group) - CurrentThread::detachQuery(); - }); - setThreadName("VFSRead"); CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; auto * remote_fs_fd = assert_cast(request.descriptor.get()); Stopwatch watch(CLOCK_MONOTONIC); - auto [bytes_read, offset] = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore); + + ReadBufferFromRemoteFSGather::ReadResult result; + try + { + result = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore); + } + catch (...) + { + if (running_group) + CurrentThread::detachQuery(); + throw; + } + watch.stop(); - ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read); + if (running_group) + CurrentThread::detachQuery(); - return Result{ .size = bytes_read, .offset = offset }; + ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, result.offset ? result.size - result.offset : result.size); + + return Result{ .size = result.size, .offset = result.offset }; }); auto future = task->get_future(); From dad4e5b0c5477113605c3fae1395bd37ff348ef2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 24 Mar 2022 17:10:04 +0100 Subject: [PATCH 020/117] Fix --- src/Storages/StorageS3.cpp | 8 +++++++- tests/integration/test_storage_hdfs/test.py | 10 ++++++++++ tests/integration/test_storage_s3/test.py | 9 +++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f319bd1097b..e3bead8a42c 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -648,8 +648,14 @@ Pipe StorageS3::read( Block block_for_format; if (isColumnOriented()) { + auto fetch_columns = column_names; + fetch_columns.erase(std::remove_if(fetch_columns.begin(), fetch_columns.end(), + [](const String & col){return col == "_path" || col == "_file"; })); + if (fetch_columns.empty()) + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 7f340424ccf..81182d44ab2 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -554,6 +554,16 @@ def test_insert_select_schema_inference(started_cluster): assert int(result) == 1 +def test_virtual_column(started_cluster): + hdfs_api = started_cluster.hdfs_api + + table_function = (f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')") + node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") + + result = node1.query(f"SELECT _path FROM {table_function}") + assert result.strip() == "parquet" + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index dd29d0a5d6a..5a2c7722e2b 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1375,3 +1375,12 @@ def test_insert_select_schema_inference(started_cluster): f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')" ) assert int(result) == 1 + + +def test_virtual_columns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + name = "test_table" + + result = instance.query("SELECT _path FROM s3(s3_parquet, format='Parquet')") + assert result.strip() == "root/test_parquet" From e91e30aaa41a7e7321e7f178b0b8cb2e524e7ef1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 24 Mar 2022 17:10:04 +0100 Subject: [PATCH 021/117] Fix --- src/Storages/StorageS3.cpp | 8 +++++++- tests/integration/test_storage_hdfs/test.py | 10 ++++++++++ tests/integration/test_storage_s3/test.py | 9 +++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ce31308fdd7..ab48afda533 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -660,8 +660,14 @@ Pipe StorageS3::read( Block block_for_format; if (isColumnOriented()) { + auto fetch_columns = column_names; + fetch_columns.erase(std::remove_if(fetch_columns.begin(), fetch_columns.end(), + [](const String & col){return col == "_path" || col == "_file"; })); + if (fetch_columns.empty()) + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 7f340424ccf..81182d44ab2 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -554,6 +554,16 @@ def test_insert_select_schema_inference(started_cluster): assert int(result) == 1 +def test_virtual_column(started_cluster): + hdfs_api = started_cluster.hdfs_api + + table_function = (f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')") + node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") + + result = node1.query(f"SELECT _path FROM {table_function}") + assert result.strip() == "parquet" + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index dd29d0a5d6a..5a2c7722e2b 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1375,3 +1375,12 @@ def test_insert_select_schema_inference(started_cluster): f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')" ) assert int(result) == 1 + + +def test_virtual_columns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + name = "test_table" + + result = instance.query("SELECT _path FROM s3(s3_parquet, format='Parquet')") + assert result.strip() == "root/test_parquet" From edf0436f5c6b1eb3df6c35c160bb45b75559e2c4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Mar 2022 10:14:14 +0100 Subject: [PATCH 022/117] Fix style check --- tests/integration/test_storage_hdfs/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 81182d44ab2..1f4b61c1030 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -557,7 +557,9 @@ def test_insert_select_schema_inference(started_cluster): def test_virtual_column(started_cluster): hdfs_api = started_cluster.hdfs_api - table_function = (f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')") + table_function = ( + f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')" + ) node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") result = node1.query(f"SELECT _path FROM {table_function}") From 9aaba43a0ab0d58ef19cacb989d59d3637b4152f Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Mar 2022 16:29:30 +0100 Subject: [PATCH 023/117] Remove incorrect assertion --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index e31cf7a5da3..84b46f36222 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -429,7 +429,6 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) assert(file_segment->getDownloadOffset() == static_cast(implementation_buffer->getPosition())); file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset); - assert(std::next(current_file_segment_it) == file_segments_holder->file_segments.end() || file_segment->getDownloadOffset() == implementation_buffer->getFileOffsetOfBufferEnd()); current_offset += current_predownload_size; From e59dbe72c7c280da0d33159154b3ccffec3a2833 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Mar 2022 16:32:56 +0100 Subject: [PATCH 024/117] Revert accidental commit --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 1 - src/Storages/StorageS3.cpp | 8 +------- tests/integration/test_storage_hdfs/test.py | 10 ---------- tests/integration/test_storage_s3/test.py | 9 --------- 4 files changed, 1 insertion(+), 27 deletions(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 84b46f36222..7cd4f4b491a 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -122,7 +122,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( { auto range = file_segment->range(); - /// Each wait() call has a timeout of 1 second. size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec; size_t wait_download_tries = 0; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ab48afda533..ce31308fdd7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -660,14 +660,8 @@ Pipe StorageS3::read( Block block_for_format; if (isColumnOriented()) { - auto fetch_columns = column_names; - fetch_columns.erase(std::remove_if(fetch_columns.begin(), fetch_columns.end(), - [](const String & col){return col == "_path" || col == "_file"; })); - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; + storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 81182d44ab2..7f340424ccf 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -554,16 +554,6 @@ def test_insert_select_schema_inference(started_cluster): assert int(result) == 1 -def test_virtual_column(started_cluster): - hdfs_api = started_cluster.hdfs_api - - table_function = (f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')") - node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") - - result = node1.query(f"SELECT _path FROM {table_function}") - assert result.strip() == "parquet" - - if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 5a2c7722e2b..dd29d0a5d6a 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1375,12 +1375,3 @@ def test_insert_select_schema_inference(started_cluster): f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')" ) assert int(result) == 1 - - -def test_virtual_columns(started_cluster): - bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - name = "test_table" - - result = instance.query("SELECT _path FROM s3(s3_parquet, format='Parquet')") - assert result.strip() == "root/test_parquet" From c6706b8bfa0676f18760e9e163e078339c77bb1d Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Mar 2022 11:42:51 +0100 Subject: [PATCH 025/117] Better --- src/Storages/StorageS3.cpp | 8 ++++++-- tests/integration/test_storage_s3/test.py | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ab48afda533..6c610271ba7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -661,8 +661,12 @@ Pipe StorageS3::read( if (isColumnOriented()) { auto fetch_columns = column_names; - fetch_columns.erase(std::remove_if(fetch_columns.begin(), fetch_columns.end(), - [](const String & col){return col == "_path" || col == "_file"; })); + const auto & virtuals = getVirtuals(); + std::erase_if( + fetch_columns, + [&](const String & col) + { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); + if (fetch_columns.empty()) fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 5a2c7722e2b..cb56468ae82 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1382,5 +1382,6 @@ def test_virtual_columns(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance name = "test_table" + instance.query("insert into table function s3(s3_parquet, format='Parquet') select 1, 'kek' settings s3_truncate_on_insert=1") result = instance.query("SELECT _path FROM s3(s3_parquet, format='Parquet')") assert result.strip() == "root/test_parquet" From 96b22ab2ab9da6ff8db1be2e890614bda15bf8cb Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 27 Mar 2022 14:59:05 +0200 Subject: [PATCH 026/117] Ping CI From 9d20339dcbae560ffc2876968a9da94ec7154465 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 28 Mar 2022 09:25:18 +0200 Subject: [PATCH 027/117] Mark test as long --- tests/queries/0_stateless/02235_remote_fs_cache_stress.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index 1a2f70ecdd7..65caf0eaef6 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest +# Tags: no-fasttest, long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From a2013ebe2b189ab0ecc7279a78f98da96a721c65 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 28 Mar 2022 21:18:20 +0200 Subject: [PATCH 028/117] Better --- docs/en/operations/named-collections.md | 5 +- src/Storages/HDFS/StorageHDFS.cpp | 155 ++++++++---------- src/Storages/HDFS/StorageHDFS.h | 21 +-- src/Storages/StorageS3.cpp | 74 ++++----- src/Storages/StorageS3.h | 9 +- src/Storages/getVirtualsForStorage.cpp | 22 +++ src/Storages/getVirtualsForStorage.h | 9 + src/TableFunctions/TableFunctionS3.cpp | 3 + tests/config/config.d/named_collection.xml | 6 + tests/integration/test_storage_s3/test.py | 10 -- .../02245_s3_virtual_columns.reference | 15 ++ .../0_stateless/02245_s3_virtual_columns.sql | 15 ++ 12 files changed, 192 insertions(+), 152 deletions(-) create mode 100644 src/Storages/getVirtualsForStorage.cpp create mode 100644 src/Storages/getVirtualsForStorage.h create mode 100644 tests/queries/0_stateless/02245_s3_virtual_columns.reference create mode 100644 tests/queries/0_stateless/02245_s3_virtual_columns.sql diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index dce7938f98b..ab972c72345 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -36,6 +36,7 @@ Example of configuration: AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY CSV + https://s3.us-east-1.amazonaws.com/yourbucket/mydata/ @@ -44,12 +45,12 @@ Example of configuration: ### Example of using named connections with the s3 function ```sql -INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', +INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') SELECT * FROM numbers(10000); SELECT count() -FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz') +FROM s3(s3_mydata, filename = 'test_file.tsv.gz') ┌─count()─┐ │ 10000 │ diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 74f6937dbae..51c887b7a17 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -164,6 +165,13 @@ StorageHDFS::StorageHDFS( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); + virtual_columns = getVirtualsForStorage(columns, default_virtuals); } ColumnsDescription StorageHDFS::getTableStructureFromData( @@ -273,36 +281,6 @@ private: Strings::iterator uris_iter; }; -Block HDFSSource::getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column) -{ - auto header = metadata_snapshot->getSampleBlock(); - /// Note: AddingDefaultsBlockInputStream doesn't change header. - if (need_path_column) - header.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_path"}); - if (need_file_column) - header.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_file"}); - return header; -} - -Block HDFSSource::getBlockForSource( - const StorageHDFSPtr & storage, - const StorageSnapshotPtr & storage_snapshot, - const ColumnsDescription & columns_description, - bool need_path_column, - bool need_file_column) -{ - if (storage->isColumnOriented()) - return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - else - return getHeader(storage_snapshot->metadata, need_path_column, need_file_column); -} - HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri) : pimpl(std::make_shared(context_, uri)) {} @@ -321,22 +299,28 @@ String HDFSSource::URISIterator::next() return pimpl->next(); } +Block HDFSSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) +{ + for (const auto & virtual_column : requested_virtual_columns) + sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); + + return sample_block; +} + HDFSSource::HDFSSource( StorageHDFSPtr storage_, - const StorageSnapshotPtr & storage_snapshot_, + const Block & block_for_format_, + const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, - bool need_path_column_, - bool need_file_column_, std::shared_ptr file_iterator_, ColumnsDescription columns_description_) - : SourceWithProgress(getBlockForSource(storage_, storage_snapshot_, columns_description_, need_path_column_, need_file_column_)) + : SourceWithProgress(getHeader(block_for_format_, requested_virtual_columns_)) , WithContext(context_) , storage(std::move(storage_)) - , storage_snapshot(storage_snapshot_) + , block_for_format(block_for_format_) + , requested_virtual_columns(requested_virtual_columns_) , max_block_size(max_block_size_) - , need_path_column(need_path_column_) - , need_file_column(need_file_column_) , file_iterator(file_iterator_) , columns_description(std::move(columns_description_)) { @@ -361,14 +345,7 @@ bool HDFSSource::initialize() auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef()), compression); - auto get_block_for_format = [&]() -> Block - { - if (storage->isColumnOriented()) - return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - return storage_snapshot->metadata->getSampleBlock(); - }; - - auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, get_block_for_format(), max_block_size); + auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -402,20 +379,21 @@ Chunk HDFSSource::generate() Columns columns = chunk.getColumns(); UInt64 num_rows = chunk.getNumRows(); - /// Enrich with virtual columns. - if (need_path_column) + for (const auto & virtual_column : requested_virtual_columns) { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); - columns.push_back(column->convertToFullColumnIfConst()); - } + if (virtual_column.name == "_path") + { + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); + columns.push_back(column->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = current_path.find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); - if (need_file_column) - { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - columns.push_back(column->convertToFullColumnIfConst()); + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); + columns.push_back(column->convertToFullColumnIfConst()); + } } return Chunk(std::move(columns), num_rows); @@ -526,17 +504,6 @@ Pipe StorageHDFS::read( size_t max_block_size, unsigned num_streams) { - bool need_path_column = false; - bool need_file_column = false; - - for (const auto & column : column_names) - { - if (column == "_path") - need_path_column = true; - if (column == "_file") - need_file_column = true; - } - std::shared_ptr iterator_wrapper{nullptr}; if (distributed_processing) { @@ -563,27 +530,51 @@ Pipe StorageHDFS::read( }); } + std::unordered_set column_names_set(column_names.begin(), column_names.end()); + std::vector requested_virtual_columns; + + for (const auto & virtual_column : getVirtuals()) + { + if (column_names_set.contains(virtual_column.name)) + requested_virtual_columns.push_back(virtual_column); + } + + ColumnsDescription columns_description; + Block block_for_format; + if (isColumnOriented()) + { + auto fetch_columns = column_names; + const auto & virtuals = getVirtuals(); + std::erase_if( + fetch_columns, + [&](const String & col) + { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); + + if (fetch_columns.empty()) + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + + columns_description = ColumnsDescription{ + storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; + block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = storage_snapshot->metadata->getColumns(); + block_for_format = storage_snapshot->metadata->getSampleBlock(); + } + Pipes pipes; auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) { - const auto get_columns_for_format = [&]() -> ColumnsDescription - { - if (isColumnOriented()) - return ColumnsDescription{storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; - else - return storage_snapshot->metadata->getColumns(); - }; - pipes.emplace_back(std::make_shared( this_ptr, - storage_snapshot, + block_for_format, + requested_virtual_columns, context_, max_block_size, - need_path_column, - need_file_column, iterator_wrapper, - get_columns_for_format())); + columns_description)); } return Pipe::unitePipes(std::move(pipes)); } @@ -715,9 +706,7 @@ void registerStorageHDFS(StorageFactory & factory) NamesAndTypesList StorageHDFS::getVirtuals() const { - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; + return virtual_columns; } } diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index e87564aef32..08842de3bd6 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -76,6 +76,7 @@ private: const bool distributed_processing; ASTPtr partition_by; bool is_path_with_globs; + NamesAndTypesList virtual_columns; Poco::Logger * log = &Poco::Logger::get("StorageHDFS"); }; @@ -110,25 +111,14 @@ public: using IteratorWrapper = std::function; using StorageHDFSPtr = std::shared_ptr; - static Block getHeader( - const StorageMetadataPtr & metadata_snapshot, - bool need_path_column, - bool need_file_column); - - static Block getBlockForSource( - const StorageHDFSPtr & storage, - const StorageSnapshotPtr & storage_snapshot_, - const ColumnsDescription & columns_description, - bool need_path_column, - bool need_file_column); + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); HDFSSource( StorageHDFSPtr storage_, - const StorageSnapshotPtr & storage_snapshot_, + const Block & block_for_foramt_, + const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, - bool need_path_column_, - bool need_file_column_, std::shared_ptr file_iterator_, ColumnsDescription columns_description_); @@ -140,7 +130,8 @@ public: private: StorageHDFSPtr storage; - StorageSnapshotPtr storage_snapshot; + Block block_for_format; + std::vector requested_virtual_columns; UInt64 max_block_size; bool need_path_column; bool need_file_column; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6c610271ba7..334a87f429c 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -208,25 +209,16 @@ String StorageS3Source::KeysIterator::next() return pimpl->next(); } -Block StorageS3Source::getHeader(Block sample_block, bool with_path_column, bool with_file_column) +Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) { - if (with_path_column) - sample_block.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_path"}); - if (with_file_column) - sample_block.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_file"}); + for (const auto & virtual_column : requested_virtual_columns) + sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); return sample_block; } StorageS3Source::StorageS3Source( - bool need_path, - bool need_file, + const std::vector & requested_virtual_columns_, const String & format_, String name_, const Block & sample_block_, @@ -239,7 +231,7 @@ StorageS3Source::StorageS3Source( const std::shared_ptr & client_, const String & bucket_, std::shared_ptr file_iterator_) - : SourceWithProgress(getHeader(sample_block_, need_path, need_file)) + : SourceWithProgress(getHeader(sample_block_, requested_virtual_columns_)) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) @@ -251,8 +243,7 @@ StorageS3Source::StorageS3Source( , client(client_) , sample_block(sample_block_) , format_settings(format_settings_) - , with_file_column(need_file) - , with_path_column(need_path) + , requested_virtual_columns(requested_virtual_columns_) , file_iterator(file_iterator_) { initialize(); @@ -314,16 +305,18 @@ Chunk StorageS3Source::generate() { UInt64 num_rows = chunk.getNumRows(); - if (with_path_column) - chunk.addColumn(DataTypeLowCardinality{std::make_shared()} - .createColumnConst(num_rows, file_path) - ->convertToFullColumnIfConst()); - if (with_file_column) + for (const auto & virtual_column : requested_virtual_columns) { - size_t last_slash_pos = file_path.find_last_of('/'); - chunk.addColumn(DataTypeLowCardinality{std::make_shared()} - .createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)) - ->convertToFullColumnIfConst()); + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = file_path.find_last_of('/'); + auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); + chunk.addColumn(column->convertToFullColumnIfConst()); + } } return chunk; @@ -597,6 +590,13 @@ StorageS3::StorageS3( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); + virtual_columns = getVirtualsForStorage(columns, default_virtuals); } std::shared_ptr StorageS3::createFileIterator(const ClientAuthentication & client_auth, const std::vector & keys, bool is_key_with_globs, bool distributed_processing, ContextPtr local_context) @@ -644,14 +644,14 @@ Pipe StorageS3::read( updateClientAndAuthSettings(local_context, client_auth); Pipes pipes; - bool need_path_column = false; - bool need_file_column = false; - for (const auto & column : column_names) + + std::unordered_set column_names_set(column_names.begin(), column_names.end()); + std::vector requested_virtual_columns; + + for (const auto & virtual_column : getVirtuals()) { - if (column == "_path") - need_path_column = true; - if (column == "_file") - need_file_column = true; + if (column_names_set.contains(virtual_column.name)) + requested_virtual_columns.push_back(virtual_column); } std::shared_ptr iterator_wrapper = createFileIterator(client_auth, keys, is_key_with_globs, distributed_processing, local_context); @@ -683,8 +683,7 @@ Pipe StorageS3::read( for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - need_path_column, - need_file_column, + requested_virtual_columns, format_name, getName(), block_for_format, @@ -860,11 +859,14 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt configuration.access_key_id = arg_value->as()->value.safeGet(); else if (arg_name == "secret_access_key") configuration.secret_access_key = arg_value->as()->value.safeGet(); + else if (arg_name == "filename") + configuration.url = std::filesystem::path(configuration.url) / arg_value->as()->value.safeGet(); else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", arg_name); } + std::cerr << "\n\n\nkssenii: " << configuration.url << "\n\n\n\n\n"; } else { @@ -1059,9 +1061,7 @@ void registerStorageCOS(StorageFactory & factory) NamesAndTypesList StorageS3::getVirtuals() const { - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; + return virtual_columns; } bool StorageS3::supportsPartitionBy() const diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 300b7becb93..ce918a2c364 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -58,11 +58,10 @@ public: using IteratorWrapper = std::function; - static Block getHeader(Block sample_block, bool with_path_column, bool with_file_column); + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); StorageS3Source( - bool need_path, - bool need_file, + const std::vector & requested_virtual_columns_, const String & format, String name_, const Block & sample_block, @@ -102,8 +101,7 @@ private: /// onCancel and generate can be called concurrently std::mutex reader_mutex; bool initialized = false; - bool with_file_column = false; - bool with_path_column = false; + std::vector requested_virtual_columns; std::shared_ptr file_iterator; /// Recreate ReadBuffer and BlockInputStream for each file. @@ -191,6 +189,7 @@ private: ClientAuthentication client_auth; std::vector keys; + NamesAndTypesList virtual_columns; String format_name; UInt64 max_single_read_retries; diff --git a/src/Storages/getVirtualsForStorage.cpp b/src/Storages/getVirtualsForStorage.cpp new file mode 100644 index 00000000000..93b2aa97856 --- /dev/null +++ b/src/Storages/getVirtualsForStorage.cpp @@ -0,0 +1,22 @@ +#include "getVirtualsForStorage.h" + +namespace DB +{ + +NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_) +{ + auto default_virtuals = default_virtuals_; + auto storage_columns = storage_columns_; + default_virtuals.sort(); + storage_columns.sort(); + + NamesAndTypesList result_virtuals; + std::set_difference( + default_virtuals.begin(), default_virtuals.end(), storage_columns.begin(), storage_columns.end(), + std::back_inserter(result_virtuals), + [](const NameAndTypePair & lhs, const NameAndTypePair & rhs){ return lhs.name < rhs.name; }); + + return result_virtuals; +} + +} diff --git a/src/Storages/getVirtualsForStorage.h b/src/Storages/getVirtualsForStorage.h new file mode 100644 index 00000000000..861b1e564fc --- /dev/null +++ b/src/Storages/getVirtualsForStorage.h @@ -0,0 +1,9 @@ +#include + + +namespace DB +{ + +NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_); + +} diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index f844772983a..9134593a26a 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -12,6 +12,7 @@ #include #include #include "registerTableFunctions.h" +#include namespace DB @@ -56,6 +57,8 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con configuration.access_key_id = arg_value->as()->value.safeGet(); else if (arg_name == "secret_access_key") configuration.secret_access_key = arg_value->as()->value.safeGet(); + else if (arg_name == "filename") + configuration.url = std::filesystem::path(configuration.url) / arg_value->as()->value.safeGet(); else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Unknown key-value argument `{}` for StorageS3, expected: " diff --git a/tests/config/config.d/named_collection.xml b/tests/config/config.d/named_collection.xml index f3b7074e1ce..72fa3d43f15 100644 --- a/tests/config/config.d/named_collection.xml +++ b/tests/config/config.d/named_collection.xml @@ -14,5 +14,11 @@ default s
+ + http://localhost:11111/test/ + test + testtest + auto + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index cb56468ae82..dd29d0a5d6a 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1375,13 +1375,3 @@ def test_insert_select_schema_inference(started_cluster): f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')" ) assert int(result) == 1 - - -def test_virtual_columns(started_cluster): - bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - name = "test_table" - - instance.query("insert into table function s3(s3_parquet, format='Parquet') select 1, 'kek' settings s3_truncate_on_insert=1") - result = instance.query("SELECT _path FROM s3(s3_parquet, format='Parquet')") - assert result.strip() == "root/test_parquet" diff --git a/tests/queries/0_stateless/02245_s3_virtual_columns.reference b/tests/queries/0_stateless/02245_s3_virtual_columns.reference new file mode 100644 index 00000000000..c876207357f --- /dev/null +++ b/tests/queries/0_stateless/02245_s3_virtual_columns.reference @@ -0,0 +1,15 @@ +-- { echo } +drop table if exists test_02245; +create table test_02245 (a UInt64) engine = S3(s3_conn, filename='test_02245', format=Parquet); +insert into test_02245 select 1 settings s3_truncate_on_insert=1; +select * from test_02245; +1 +select _path from test_02245; +clickhouse-experiment-kseniia-eu-west-1.clickhouse-dev.com/stateless/test_02245 +drop table if exists test_02245_2; +create table test_02245_2 (a UInt64, _path Int32) engine = S3(s3_conn, filename='test_02245_2', format=Parquet); +insert into test_02245_2 select 1, 2 settings s3_truncate_on_insert=1; +select * from test_02245_2; +1 2 +select _path from test_02245_2; +2 diff --git a/tests/queries/0_stateless/02245_s3_virtual_columns.sql b/tests/queries/0_stateless/02245_s3_virtual_columns.sql new file mode 100644 index 00000000000..e86344d2094 --- /dev/null +++ b/tests/queries/0_stateless/02245_s3_virtual_columns.sql @@ -0,0 +1,15 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +-- { echo } +drop table if exists test_02245; +create table test_02245 (a UInt64) engine = S3(s3_conn, filename='test_02245', format=Parquet); +insert into test_02245 select 1 settings s3_truncate_on_insert=1; +select * from test_02245; +select _path from test_02245; + +drop table if exists test_02245_2; +create table test_02245_2 (a UInt64, _path Int32) engine = S3(s3_conn, filename='test_02245_2', format=Parquet); +insert into test_02245_2 select 1, 2 settings s3_truncate_on_insert=1; +select * from test_02245_2; +select _path from test_02245_2; From c05bf7beb4ee275cd05166265888f45de40cc1f2 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Mon, 28 Mar 2022 23:09:17 -0600 Subject: [PATCH 029/117] Testing new /en folder --- docs/en/_category_.yml | 8 + docs/en/commercial/cloud.md | 9 - docs/en/commercial/index.md | 13 - docs/en/commercial/support.md | 9 - docs/en/development/_category_.yml | 7 + docs/en/development/index.md | 10 - docs/en/engines/_category_.yml | 7 + docs/en/engines/index.md | 15 - .../mergetree-family/mergetree.md | 6 +- docs/en/example-datasets/_category_.yml | 7 + .../example-datasets/amplab-benchmark.md | 0 .../example-datasets/brown-benchmark.md | 0 .../example-datasets/cell-towers.md | 0 .../example-datasets/criteo.md | 0 .../example-datasets/github-events.md | 0 .../example-datasets/menus.md | 0 .../example-datasets/metrica.md | 0 .../example-datasets/nyc-taxi.md | 0 .../example-datasets/ontime.md | 0 .../example-datasets/opensky.md | 0 .../example-datasets/recipes.md | 0 .../example-datasets/star-schema.md | 0 .../example-datasets/uk-price-paid.md | 0 .../example-datasets/wikistat.md | 0 docs/en/faq/general/columnar-database.md | 25 - docs/en/faq/general/dbms-naming.md | 17 - .../how-do-i-contribute-code-to-clickhouse.md | 15 - docs/en/faq/general/index.md | 25 - docs/en/faq/general/mapreduce.md | 13 - docs/en/faq/general/ne-tormozit.md | 26 - docs/en/faq/general/olap.md | 39 -- .../en/faq/general/who-is-using-clickhouse.md | 19 - .../faq/general/why-clickhouse-is-so-fast.md | 63 -- docs/en/faq/index.md | 47 -- docs/en/faq/integration/file-export.md | 37 - docs/en/faq/integration/index.md | 19 - docs/en/faq/integration/json-import.md | 33 - docs/en/faq/integration/oracle-odbc.md | 15 - docs/en/faq/operations/delete-old-data.md | 42 -- docs/en/faq/operations/index.md | 19 - .../operations/multi-region-replication.md | 13 - docs/en/faq/operations/production.md | 70 -- docs/en/faq/use-cases/index.md | 18 - docs/en/faq/use-cases/key-value.md | 17 - docs/en/faq/use-cases/time-series.md | 15 - .../getting-started/example-datasets/index.md | 28 - docs/en/getting-started/index.md | 15 - docs/en/getting-started/playground.md | 59 -- docs/en/getting-started/tutorial.md | 662 ------------------ docs/en/guides/apply-catboost-model.md | 242 ------- docs/en/guides/index.md | 14 - docs/en/index.md | 95 --- docs/en/{getting-started => }/install.md | 94 ++- docs/en/interfaces/index.md | 7 +- docs/en/introduction/adopters.md | 199 ------ docs/en/introduction/distinctive-features.md | 96 --- docs/en/introduction/history.md | 54 -- docs/en/introduction/index.md | 6 - docs/en/introduction/performance.md | 30 - docs/en/operations/_category_.yml | 7 + docs/en/sql-reference/_category_.yml | 7 + .../functions/encoding-functions.md | 4 +- .../sql-reference/statements/select/sample.md | 11 +- docs/en/whats-new/changelog/2017.md | 4 +- docs/en/whats-new/changelog/2018.md | 4 +- docs/en/whats-new/changelog/2019.md | 4 +- docs/en/whats-new/changelog/2020.md | 4 +- docs/en/whats-new/changelog/2021.md | 6 +- docs/en/whats-new/changelog/index.md | 499 ++++++++++++- docs/en/whats-new/index.md | 8 +- docs/en/whats-new/roadmap.md | 1 - docs/en/whats-new/security-changelog.md | 10 +- 72 files changed, 660 insertions(+), 2188 deletions(-) create mode 100644 docs/en/_category_.yml delete mode 100644 docs/en/commercial/cloud.md delete mode 100644 docs/en/commercial/index.md delete mode 100644 docs/en/commercial/support.md create mode 100644 docs/en/development/_category_.yml delete mode 100644 docs/en/development/index.md create mode 100644 docs/en/engines/_category_.yml delete mode 100644 docs/en/engines/index.md create mode 100644 docs/en/example-datasets/_category_.yml rename docs/en/{getting-started => }/example-datasets/amplab-benchmark.md (100%) rename docs/en/{getting-started => }/example-datasets/brown-benchmark.md (100%) rename docs/en/{getting-started => }/example-datasets/cell-towers.md (100%) rename docs/en/{getting-started => }/example-datasets/criteo.md (100%) rename docs/en/{getting-started => }/example-datasets/github-events.md (100%) rename docs/en/{getting-started => }/example-datasets/menus.md (100%) rename docs/en/{getting-started => }/example-datasets/metrica.md (100%) rename docs/en/{getting-started => }/example-datasets/nyc-taxi.md (100%) rename docs/en/{getting-started => }/example-datasets/ontime.md (100%) rename docs/en/{getting-started => }/example-datasets/opensky.md (100%) rename docs/en/{getting-started => }/example-datasets/recipes.md (100%) rename docs/en/{getting-started => }/example-datasets/star-schema.md (100%) rename docs/en/{getting-started => }/example-datasets/uk-price-paid.md (100%) rename docs/en/{getting-started => }/example-datasets/wikistat.md (100%) delete mode 100644 docs/en/faq/general/columnar-database.md delete mode 100644 docs/en/faq/general/dbms-naming.md delete mode 100644 docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md delete mode 100644 docs/en/faq/general/index.md delete mode 100644 docs/en/faq/general/mapreduce.md delete mode 100644 docs/en/faq/general/ne-tormozit.md delete mode 100644 docs/en/faq/general/olap.md delete mode 100644 docs/en/faq/general/who-is-using-clickhouse.md delete mode 100644 docs/en/faq/general/why-clickhouse-is-so-fast.md delete mode 100644 docs/en/faq/index.md delete mode 100644 docs/en/faq/integration/file-export.md delete mode 100644 docs/en/faq/integration/index.md delete mode 100644 docs/en/faq/integration/json-import.md delete mode 100644 docs/en/faq/integration/oracle-odbc.md delete mode 100644 docs/en/faq/operations/delete-old-data.md delete mode 100644 docs/en/faq/operations/index.md delete mode 100644 docs/en/faq/operations/multi-region-replication.md delete mode 100644 docs/en/faq/operations/production.md delete mode 100644 docs/en/faq/use-cases/index.md delete mode 100644 docs/en/faq/use-cases/key-value.md delete mode 100644 docs/en/faq/use-cases/time-series.md delete mode 100644 docs/en/getting-started/example-datasets/index.md delete mode 100644 docs/en/getting-started/index.md delete mode 100644 docs/en/getting-started/playground.md delete mode 100644 docs/en/getting-started/tutorial.md delete mode 100644 docs/en/guides/apply-catboost-model.md delete mode 100644 docs/en/guides/index.md delete mode 100644 docs/en/index.md rename docs/en/{getting-started => }/install.md (69%) delete mode 100644 docs/en/introduction/adopters.md delete mode 100644 docs/en/introduction/distinctive-features.md delete mode 100644 docs/en/introduction/history.md delete mode 100644 docs/en/introduction/index.md delete mode 100644 docs/en/introduction/performance.md create mode 100644 docs/en/operations/_category_.yml create mode 100644 docs/en/sql-reference/_category_.yml diff --git a/docs/en/_category_.yml b/docs/en/_category_.yml new file mode 100644 index 00000000000..8009b548223 --- /dev/null +++ b/docs/en/_category_.yml @@ -0,0 +1,8 @@ +position: 50 +label: 'Reference Guides' +collapsible: true +collapsed: true +link: + type: generated-index + title: Reference Guides + slug: /en \ No newline at end of file diff --git a/docs/en/commercial/cloud.md b/docs/en/commercial/cloud.md deleted file mode 100644 index afa2e23b7a8..00000000000 --- a/docs/en/commercial/cloud.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -toc_priority: 1 -toc_title: Cloud ---- - -# ClickHouse Cloud Service {#clickhouse-cloud-service} - -!!! info "Info" - Detailed public description for ClickHouse cloud services is not ready yet, please [contact us](https://clickhouse.com/company/#contact) to learn more. diff --git a/docs/en/commercial/index.md b/docs/en/commercial/index.md deleted file mode 100644 index 1f1911b8c4d..00000000000 --- a/docs/en/commercial/index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -toc_folder_title: Commercial -toc_priority: 70 -toc_title: Introduction ---- - -# ClickHouse Commercial Services {#clickhouse-commercial-services} - -Service categories: - -- [Cloud](../commercial/cloud.md) -- [Support](../commercial/support.md) - diff --git a/docs/en/commercial/support.md b/docs/en/commercial/support.md deleted file mode 100644 index 33b69b40b2d..00000000000 --- a/docs/en/commercial/support.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -toc_priority: 3 -toc_title: Support ---- - -# ClickHouse Commercial Support Service {#clickhouse-commercial-support-service} - -!!! info "Info" - Detailed public description for ClickHouse support services is not ready yet, please [contact us](https://clickhouse.com/company/#contact) to learn more. diff --git a/docs/en/development/_category_.yml b/docs/en/development/_category_.yml new file mode 100644 index 00000000000..ef272510d47 --- /dev/null +++ b/docs/en/development/_category_.yml @@ -0,0 +1,7 @@ +position: 100 +label: 'Development' +collapsible: true +collapsed: true +link: + type: generated-index + title: Reference \ No newline at end of file diff --git a/docs/en/development/index.md b/docs/en/development/index.md deleted file mode 100644 index f9f0d644973..00000000000 --- a/docs/en/development/index.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -toc_folder_title: Development -toc_hidden: true -toc_priority: 58 -toc_title: hidden ---- - -# ClickHouse Development {#clickhouse-development} - -[Original article](https://clickhouse.com/docs/en/development/) diff --git a/docs/en/engines/_category_.yml b/docs/en/engines/_category_.yml new file mode 100644 index 00000000000..f8554057fdc --- /dev/null +++ b/docs/en/engines/_category_.yml @@ -0,0 +1,7 @@ +position: 30 +label: 'Database & Table Engines' +collapsible: true +collapsed: true +link: + type: generated-index + title: Database & Table Engines \ No newline at end of file diff --git a/docs/en/engines/index.md b/docs/en/engines/index.md deleted file mode 100644 index b3f4a4f7b69..00000000000 --- a/docs/en/engines/index.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -toc_folder_title: Engines -toc_hidden: true -toc_priority: 25 -toc_title: hidden ---- - -# ClickHouse Engines {#clickhouse-engines} - -There are two key engine kinds in ClickHouse: - -- [Table engines](../engines/table-engines/index.md) -- [Database engines](../engines/database-engines/index.md) - -{## [Original article](https://clickhouse.com/docs/en/engines/) ##} diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index a0acda5d5c6..9d820e4961b 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -304,8 +304,8 @@ CREATE TABLE table_name Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries: ``` sql -SELECT count() FROM table WHERE s < 'z' -SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 ``` #### Available Types of Indices {#available-types-of-indices} @@ -364,7 +364,7 @@ The `set` index can be used with all functions. Function subsets for other index | Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | |------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| | [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, <>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, <>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | | [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ | | [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | | [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | diff --git a/docs/en/example-datasets/_category_.yml b/docs/en/example-datasets/_category_.yml new file mode 100644 index 00000000000..5824de77e1d --- /dev/null +++ b/docs/en/example-datasets/_category_.yml @@ -0,0 +1,7 @@ +position: 10 +label: 'Example Datasets' +collapsible: true +collapsed: true +link: + type: generated-index + title: Example Datasets \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/amplab-benchmark.md b/docs/en/example-datasets/amplab-benchmark.md similarity index 100% rename from docs/en/getting-started/example-datasets/amplab-benchmark.md rename to docs/en/example-datasets/amplab-benchmark.md diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/example-datasets/brown-benchmark.md similarity index 100% rename from docs/en/getting-started/example-datasets/brown-benchmark.md rename to docs/en/example-datasets/brown-benchmark.md diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/example-datasets/cell-towers.md similarity index 100% rename from docs/en/getting-started/example-datasets/cell-towers.md rename to docs/en/example-datasets/cell-towers.md diff --git a/docs/en/getting-started/example-datasets/criteo.md b/docs/en/example-datasets/criteo.md similarity index 100% rename from docs/en/getting-started/example-datasets/criteo.md rename to docs/en/example-datasets/criteo.md diff --git a/docs/en/getting-started/example-datasets/github-events.md b/docs/en/example-datasets/github-events.md similarity index 100% rename from docs/en/getting-started/example-datasets/github-events.md rename to docs/en/example-datasets/github-events.md diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/example-datasets/menus.md similarity index 100% rename from docs/en/getting-started/example-datasets/menus.md rename to docs/en/example-datasets/menus.md diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/example-datasets/metrica.md similarity index 100% rename from docs/en/getting-started/example-datasets/metrica.md rename to docs/en/example-datasets/metrica.md diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/example-datasets/nyc-taxi.md similarity index 100% rename from docs/en/getting-started/example-datasets/nyc-taxi.md rename to docs/en/example-datasets/nyc-taxi.md diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/example-datasets/ontime.md similarity index 100% rename from docs/en/getting-started/example-datasets/ontime.md rename to docs/en/example-datasets/ontime.md diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/example-datasets/opensky.md similarity index 100% rename from docs/en/getting-started/example-datasets/opensky.md rename to docs/en/example-datasets/opensky.md diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/example-datasets/recipes.md similarity index 100% rename from docs/en/getting-started/example-datasets/recipes.md rename to docs/en/example-datasets/recipes.md diff --git a/docs/en/getting-started/example-datasets/star-schema.md b/docs/en/example-datasets/star-schema.md similarity index 100% rename from docs/en/getting-started/example-datasets/star-schema.md rename to docs/en/example-datasets/star-schema.md diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/example-datasets/uk-price-paid.md similarity index 100% rename from docs/en/getting-started/example-datasets/uk-price-paid.md rename to docs/en/example-datasets/uk-price-paid.md diff --git a/docs/en/getting-started/example-datasets/wikistat.md b/docs/en/example-datasets/wikistat.md similarity index 100% rename from docs/en/getting-started/example-datasets/wikistat.md rename to docs/en/example-datasets/wikistat.md diff --git a/docs/en/faq/general/columnar-database.md b/docs/en/faq/general/columnar-database.md deleted file mode 100644 index 11bbd2e63f6..00000000000 --- a/docs/en/faq/general/columnar-database.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: What is a columnar database? -toc_hidden: true -toc_priority: 101 ---- - -# What Is a Columnar Database? {#what-is-a-columnar-database} - -A columnar database stores data of each column independently. This allows to read data from disks only for those columns that are used in any given query. The cost is that operations that affect whole rows become proportionally more expensive. The synonym for a columnar database is a column-oriented database management system. ClickHouse is a typical example of such a system. - -Key columnar database advantages are: - -- Queries that use only a few columns out of many. -- Aggregating queries against large volumes of data. -- Column-wise data compression. - -Here is the illustration of the difference between traditional row-oriented systems and columnar databases when building reports: - -**Traditional row-oriented** -![Traditional row-oriented](https://clickhouse.com/docs/en/images/row-oriented.gif#) - -**Columnar** -![Columnar](https://clickhouse.com/docs/en/images/column-oriented.gif#) - -A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing and data warehousing, because they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. diff --git a/docs/en/faq/general/dbms-naming.md b/docs/en/faq/general/dbms-naming.md deleted file mode 100644 index d4e87ff450a..00000000000 --- a/docs/en/faq/general/dbms-naming.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: "What does \u201CClickHouse\u201D mean?" -toc_hidden: true -toc_priority: 10 ---- - -# What Does “ClickHouse” Mean? {#what-does-clickhouse-mean} - -It’s a combination of “**Click**stream” and “Data ware**House**”. It comes from the original use case at Yandex.Metrica, where ClickHouse was supposed to keep records of all clicks by people from all over the Internet, and it still does the job. You can read more about this use case on [ClickHouse history](../../introduction/history.md) page. - -This two-part meaning has two consequences: - -- The only correct way to write Click**H**ouse is with capital H. -- If you need to abbreviate it, use **CH**. For some historical reasons, abbreviating as CK is also popular in China, mostly because one of the first talks about ClickHouse in Chinese used this form. - -!!! info "Fun fact" - Many years after ClickHouse got its name, this approach of combining two words that are meaningful on their own has been highlighted as the best way to name a database in a [research by Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), an Associate Professor of Databases at Carnegie Mellon University. ClickHouse shared his “best database name of all time” award with Postgres. diff --git a/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md deleted file mode 100644 index 731dc9dface..00000000000 --- a/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: How do I contribute code to ClickHouse? -toc_hidden: true -toc_priority: 120 ---- - -# How do I contribute code to ClickHouse? {#how-do-i-contribute-code-to-clickhouse} - -ClickHouse is an open-source project [developed on GitHub](https://github.com/ClickHouse/ClickHouse). - -As customary, contribution instructions are published in [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md) file in the root of the source code repository. - -If you want to suggest a substantial change to ClickHouse, consider [opening a GitHub issue](https://github.com/ClickHouse/ClickHouse/issues/new/choose) explaining what you want to do, to discuss it with maintainers and community first. [Examples of such RFC issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc). - -If your contributions are security related, please check out [our security policy](https://github.com/ClickHouse/ClickHouse/security/policy/) too. diff --git a/docs/en/faq/general/index.md b/docs/en/faq/general/index.md deleted file mode 100644 index 51fff9a53ae..00000000000 --- a/docs/en/faq/general/index.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: General questions about ClickHouse -toc_hidden_folder: true -toc_priority: 1 -toc_title: General ---- - -# General Questions About ClickHouse {#general-questions} - -Questions: - -- [What is ClickHouse?](../../index.md#what-is-clickhouse) -- [Why ClickHouse is so fast?](../../faq/general/why-clickhouse-is-so-fast.md) -- [Who is using ClickHouse?](../../faq/general/who-is-using-clickhouse.md) -- [What does “ClickHouse” mean?](../../faq/general/dbms-naming.md) -- [What does “Не тормозит” mean?](../../faq/general/ne-tormozit.md) -- [What is OLAP?](../../faq/general/olap.md) -- [What is a columnar database?](../../faq/general/columnar-database.md) -- [Why not use something like MapReduce?](../../faq/general/mapreduce.md) -- [How do I contribute code to ClickHouse?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md) - -!!! info "Don’t see what you were looking for?" - Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. - -{## [Original article](https://clickhouse.com/docs/en/faq/general/) ##} diff --git a/docs/en/faq/general/mapreduce.md b/docs/en/faq/general/mapreduce.md deleted file mode 100644 index 30cae65cba2..00000000000 --- a/docs/en/faq/general/mapreduce.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Why not use something like MapReduce? -toc_hidden: true -toc_priority: 110 ---- - -# Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce} - -We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Large IT companies often have proprietary in-house solutions. - -These systems aren’t appropriate for online queries due to their high latency. In other words, they can’t be used as the back-end for a web interface. These types of systems aren’t useful for real-time data updates. Distributed sorting isn’t the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks. - -Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. diff --git a/docs/en/faq/general/ne-tormozit.md b/docs/en/faq/general/ne-tormozit.md deleted file mode 100644 index e8dc7388eff..00000000000 --- a/docs/en/faq/general/ne-tormozit.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "What does \u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\ - \u201D mean?" -toc_hidden: true -toc_priority: 11 ---- - -# What Does “Не тормозит” Mean? {#what-does-ne-tormozit-mean} - -This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. - -Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, Yandex. That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. - -One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. - -So, what does it mean? Here are some ways to translate *“не тормозит”*: - -- If you translate it literally, it’d be something like *“ClickHouse does not press the brake pedal”*. -- If you’d want to express it as close to how it sounds to a Russian person with IT background, it’d be something like *“If your larger system lags, it’s not because it uses ClickHouse”*. -- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse does not lag”* or just *“ClickHouse is fast”*. - -If you haven’t seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one: - -![iframe](https://www.youtube.com/embed/bSyQahMVZ7w) - -P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.com/#meet), usually for best questions or other forms of active participation. diff --git a/docs/en/faq/general/olap.md b/docs/en/faq/general/olap.md deleted file mode 100644 index 1f6df183f8c..00000000000 --- a/docs/en/faq/general/olap.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: What is OLAP? -toc_hidden: true -toc_priority: 100 ---- - -# What Is OLAP? {#what-is-olap} - -[OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward: - -Processing -: Some source data is processed… - -Analytical -: …to produce some analytical reports and insights… - -Online -: …in real-time. - -## OLAP from the Business Perspective {#olap-from-the-business-perspective} - -In recent years, business people started to realize the value of data. Companies who make their decisions blindly, more often than not fail to keep up with the competition. The data-driven approach of successful companies forces them to collect all data that might be remotely useful for making business decisions and need mechanisms to timely analyze them. Here’s where OLAP database management systems (DBMS) come in. - -In a business sense, OLAP allows companies to continuously plan, analyze, and report operational activities, thus maximizing efficiency, reducing expenses, and ultimately conquering the market share. It could be done either in an in-house system or outsourced to SaaS providers like web/mobile analytics services, CRM services, etc. OLAP is the technology behind many BI applications (Business Intelligence). - -ClickHouse is an OLAP database management system that is pretty often used as a backend for those SaaS solutions for analyzing domain-specific data. However, some businesses are still reluctant to share their data with third-party providers and an in-house data warehouse scenario is also viable. - -## OLAP from the Technical Perspective {#olap-from-the-technical-perspective} - -All database management systems could be classified into two groups: OLAP (Online **Analytical** Processing) and OLTP (Online **Transactional** Processing). Former focuses on building reports, each based on large volumes of historical data, but doing it not so frequently. While the latter usually handle a continuous stream of transactions, constantly modifying the current state of data. - -In practice OLAP and OLTP are not categories, it’s more like a spectrum. Most real systems usually focus on one of them but provide some solutions or workarounds if the opposite kind of workload is also desired. This situation often forces businesses to operate multiple storage systems integrated, which might be not so big deal but having more systems make it more expensive to maintain. So the trend of recent years is HTAP (**Hybrid Transactional/Analytical Processing**) when both kinds of the workload are handled equally well by a single database management system. - -Even if a DBMS started as a pure OLAP or pure OLTP, they are forced to move towards that HTAP direction to keep up with their competition. And ClickHouse is no exception, initially, it has been designed as [fast-as-possible OLAP system](../../faq/general/why-clickhouse-is-so-fast.md) and it still does not have full-fledged transaction support, but some features like consistent read/writes and mutations for updating/deleting data had to be added. - -The fundamental trade-off between OLAP and OLTP systems remains: - -- To build analytical reports efficiently it’s crucial to be able to read columns separately, thus most OLAP databases are [columnar](../../faq/general/columnar-database.md), -- While storing columns separately increases costs of operations on rows, like append or in-place modification, proportionally to the number of columns (which can be huge if the systems try to collect all details of an event just in case). Thus, most OLTP systems store data arranged by rows. diff --git a/docs/en/faq/general/who-is-using-clickhouse.md b/docs/en/faq/general/who-is-using-clickhouse.md deleted file mode 100644 index b7ff867d726..00000000000 --- a/docs/en/faq/general/who-is-using-clickhouse.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Who is using ClickHouse? -toc_hidden: true -toc_priority: 9 ---- - -# Who Is Using ClickHouse? {#who-is-using-clickhouse} - -Being an open-source product makes this question not so straightforward to answer. You do not have to tell anyone if you want to start using ClickHouse, you just go grab source code or pre-compiled packages. There’s no contract to sign and the [Apache 2.0 license](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) allows for unconstrained software distribution. - -Also, the technology stack is often in a grey zone of what’s covered by an NDA. Some companies consider technologies they use as a competitive advantage even if they are open-source and do not allow employees to share any details publicly. Some see some PR risks and allow employees to share implementation details only with their PR department approval. - -So how to tell who is using ClickHouse? - -One way is to **ask around**. If it’s not in writing, people are much more willing to share what technologies are used in their companies, what the use cases are, what kind of hardware is used, data volumes, etc. We’re talking with users regularly on [ClickHouse Meetups](https://www.youtube.com/channel/UChtmrD-dsdpspr42P_PyRAw/playlists) all over the world and have heard stories about 1000+ companies that use ClickHouse. Unfortunately, that’s not reproducible and we try to treat such stories as if they were told under NDA to avoid any potential troubles. But you can come to any of our future meetups and talk with other users on your own. There are multiple ways how meetups are announced, for example, you can subscribe to [our Twitter](http://twitter.com/ClickHouseDB/). - -The second way is to look for companies **publicly saying** that they use ClickHouse. It’s more substantial because there’s usually some hard evidence like a blog post, talk video recording, slide deck, etc. We collect the collection of links to such evidence on our **[Adopters](../../introduction/adopters.md)** page. Feel free to contribute the story of your employer or just some links you’ve stumbled upon (but try not to violate your NDA in the process). - -You can find names of very large companies in the adopters list, like Bloomberg, Cisco, China Telecom, Tencent, or Uber, but with the first approach, we found that there are many more. For example, if you take [the list of largest IT companies by Forbes (2020)](https://www.forbes.com/sites/hanktucker/2020/05/13/worlds-largest-technology-companies-2020-apple-stays-on-top-zoom-and-uber-debut/) over half of them are using ClickHouse in some way. Also, it would be unfair not to mention [Yandex](../../introduction/history.md), the company which initially open-sourced ClickHouse in 2016 and happens to be one of the largest IT companies in Europe. diff --git a/docs/en/faq/general/why-clickhouse-is-so-fast.md b/docs/en/faq/general/why-clickhouse-is-so-fast.md deleted file mode 100644 index 1ccf2595768..00000000000 --- a/docs/en/faq/general/why-clickhouse-is-so-fast.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Why ClickHouse is so fast? -toc_hidden: true -toc_priority: 8 ---- - -# Why ClickHouse Is So Fast? {#why-clickhouse-is-so-fast} - -It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system. - -ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. That’s what needs to be done to build a typical analytical report and that’s what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible: - -Column-oriented storage -: Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted. - -Indexes -: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns. - -Data compression -: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs) that can make data even more compact. - -Vectorized query execution -: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage. - -Scalability -: ClickHouse can leverage all available CPU cores and disks to execute even a single query. Not only on a single server but all CPU cores and disks of a cluster as well. - -But many other database management systems use similar techniques. What really makes ClickHouse stand out is **attention to low-level details**. Most programming languages provide implementations for most common algorithms and data structures, but they tend to be too generic to be effective. Every task can be considered as a landscape with various characteristics, instead of just throwing in random implementation. For example, if you need a hash table, here are some key questions to consider: - -- Which hash function to choose? -- Collision resolution algorithm: [open addressing](https://en.wikipedia.org/wiki/Open_addressing) vs [chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)? -- Memory layout: one array for keys and values or separate arrays? Will it store small or large values? -- Fill factor: when and how to resize? How to move values around on resize? -- Will values be removed and which algorithm will work better if they will? -- Will we need fast probing with bitmaps, inline placement of string keys, support for non-movable values, prefetch, and batching? - -Hash table is a key data structure for `GROUP BY` implementation and ClickHouse automatically chooses one of [30+ variations](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) for each specific query. - -The same goes for algorithms, for example, in sorting you might consider: - -- What will be sorted: an array of numbers, tuples, strings, or structures? -- Is all data available completely in RAM? -- Do we need a stable sort? -- Do we need a full sort? Maybe partial sort or n-th element will suffice? -- How to implement comparisons? -- Are we sorting data that has already been partially sorted? - -Algorithms that they rely on characteristics of data they are working with can often do better than their generic counterparts. If it is not really known in advance, the system can try various implementations and choose the one that works best in runtime. For example, see an [article on how LZ4 decompression is implemented in ClickHouse](https://habr.com/en/company/yandex/blog/457612/). - -Last but not least, the ClickHouse team always monitors the Internet on people claiming that they came up with the best implementation, algorithm, or data structure to do something and tries it out. Those claims mostly appear to be false, but from time to time you’ll indeed find a gem. - -!!! info "Tips for building your own high-performance software" - - - - Keep in mind low-level details when designing your system. - - Design based on hardware capabilities. - - Choose data structures and abstractions based on the needs of the task. - - Provide specializations for special cases. - - Try new, “best” algorithms, that you read about yesterday. - - Choose an algorithm in runtime based on statistics. - - Benchmark on real datasets. - - Test for performance regressions in CI. - - Measure and observe everything. diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md deleted file mode 100644 index 891e1ea464e..00000000000 --- a/docs/en/faq/index.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -toc_folder_title: F.A.Q. -toc_hidden: true -toc_priority: 76 ---- - -# ClickHouse F.A.Q {#clickhouse-f-a-q} - -This section of the documentation is a place to collect answers to ClickHouse-related questions that arise often. - -Categories: - -- **[General](../faq/general/index.md)** - - [What is ClickHouse?](../index.md#what-is-clickhouse) - - [Why ClickHouse is so fast?](../faq/general/why-clickhouse-is-so-fast.md) - - [Who is using ClickHouse?](../faq/general/who-is-using-clickhouse.md) - - [What does “ClickHouse” mean?](../faq/general/dbms-naming.md) - - [What does “Не тормозит” mean?](../faq/general/ne-tormozit.md) - - [What is OLAP?](../faq/general/olap.md) - - [What is a columnar database?](../faq/general/columnar-database.md) - - [Why not use something like MapReduce?](../faq/general/mapreduce.md) -- **[Use Cases](../faq/use-cases/index.md)** - - [Can I use ClickHouse as a time-series database?](../faq/use-cases/time-series.md) - - [Can I use ClickHouse as a key-value storage?](../faq/use-cases/key-value.md) -- **[Operations](../faq/operations/index.md)** - - [Which ClickHouse version to use in production?](../faq/operations/production.md) - - [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md) - - [Does ClickHouse support multi-region replication?](../faq/operations/multi-region-replication.md) -- **[Integration](../faq/integration/index.md)** - - [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md) - - [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md) - -{## TODO -Question candidates: -- How to choose a primary key? -- How to add a column in ClickHouse? -- Too many parts -- How to filter ClickHouse table by an array column contents? -- How to insert all rows from one table to another of identical structure? -- How to kill a process (query) in ClickHouse? -- How to implement pivot (like in pandas)? -- How to remove the default ClickHouse user through users.d? -- Importing MySQL dump to ClickHouse -- Window function workarounds (row_number, lag/lead, running diff/sum/average) -##} - -{## [Original article](https://clickhouse.com/docs/en/faq) ##} diff --git a/docs/en/faq/integration/file-export.md b/docs/en/faq/integration/file-export.md deleted file mode 100644 index f8f458929f9..00000000000 --- a/docs/en/faq/integration/file-export.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: How do I export data from ClickHouse to a file? -toc_hidden: true -toc_priority: 10 ---- - -# How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file} - -## Using INTO OUTFILE Clause {#using-into-outfile-clause} - -Add an [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query. - -For example: - -``` sql -SELECT * FROM table INTO OUTFILE 'file' -``` - -By default, ClickHouse uses the [TabSeparated](../../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../../interfaces/formats.md), use the [FORMAT clause](../../sql-reference/statements/select/format.md#format-clause). - -For example: - -``` sql -SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV -``` - -## Using a File-Engine Table {#using-a-file-engine-table} - -See [File](../../engines/table-engines/special/file.md) table engine. - -## Using Command-Line Redirection {#using-command-line-redirection} - -``` bash -$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt -``` - -See [clickhouse-client](../../interfaces/cli.md). diff --git a/docs/en/faq/integration/index.md b/docs/en/faq/integration/index.md deleted file mode 100644 index 51a2593b751..00000000000 --- a/docs/en/faq/integration/index.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Questions about integrating ClickHouse and other systems -toc_hidden_folder: true -toc_priority: 4 -toc_title: Integration ---- - -# Questions About Integrating ClickHouse and Other Systems {#question-about-integrating-clickhouse-and-other-systems} - -Questions: - -- [How do I export data from ClickHouse to a file?](../../faq/integration/file-export.md) -- [How to import JSON into ClickHouse?](../../faq/integration/json-import.md) -- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../../faq/integration/oracle-odbc.md) - -!!! info "Don’t see what you were looking for?" - Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. - -{## [Original article](https://clickhouse.com/docs/en/faq/integration/) ##} diff --git a/docs/en/faq/integration/json-import.md b/docs/en/faq/integration/json-import.md deleted file mode 100644 index 3fa026c794a..00000000000 --- a/docs/en/faq/integration/json-import.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: How to import JSON into ClickHouse? -toc_hidden: true -toc_priority: 11 ---- - -# How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse} - -ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline. - -## Examples {#examples} - -Using [HTTP interface](../../interfaces/http.md): - -``` bash -$ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @- -``` - -Using [CLI interface](../../interfaces/cli.md): - -``` bash -$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow" -``` - -Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead. - -## Useful Settings {#useful-settings} - -- `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them). -- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. - -!!! note "Note" - Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. diff --git a/docs/en/faq/integration/oracle-odbc.md b/docs/en/faq/integration/oracle-odbc.md deleted file mode 100644 index 91265a3daa2..00000000000 --- a/docs/en/faq/integration/oracle-odbc.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: What if I have a problem with encodings when using Oracle via ODBC? -toc_hidden: true -toc_priority: 20 ---- - -# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings} - -If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). - -**Example** - -``` sql -NLS_LANG=RUSSIAN_RUSSIA.UTF8 -``` diff --git a/docs/en/faq/operations/delete-old-data.md b/docs/en/faq/operations/delete-old-data.md deleted file mode 100644 index 32fc485e98a..00000000000 --- a/docs/en/faq/operations/delete-old-data.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Is it possible to delete old records from a ClickHouse table? -toc_hidden: true -toc_priority: 20 ---- - -# Is It Possible to Delete Old Records from a ClickHouse Table? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table} - -The short answer is “yes”. ClickHouse has multiple mechanisms that allow freeing up disk space by removing old data. Each mechanism is aimed for different scenarios. - -## TTL {#ttl} - -ClickHouse allows to automatically drop values when some condition happens. This condition is configured as an expression based on any columns, usually just static offset for any timestamp column. - -The key advantage of this approach is that it does not need any external system to trigger, once TTL is configured, data removal happens automatically in background. - -!!! note "Note" - TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD. - -More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). - -## ALTER DELETE {#alter-delete} - -ClickHouse does not have real-time point deletes like in [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) databases. The closest thing to them are mutations. They are issued as `ALTER ... DELETE` or `ALTER ... UPDATE` queries to distinguish from normal `DELETE` or `UPDATE` as they are asynchronous batch operations, not immediate modifications. The rest of syntax after `ALTER TABLE` prefix is similar. - -`ALTER DELETE` can be issued to flexibly remove old data. If you need to do it regularly, the main downside will be the need to have an external system to submit the query. There are also some performance considerations since mutation rewrite complete parts even there’s only a single row to be deleted. - -This is the most common approach to make your system based on ClickHouse [GDPR](https://gdpr-info.eu)-compliant. - -More details on [mutations](../../sql-reference/statements/alter/index.md#alter-mutations). - -## DROP PARTITION {#drop-partition} - -`ALTER TABLE ... DROP PARTITION` provides a cost-efficient way to drop a whole partition. It’s not that flexible and needs proper partitioning scheme configured on table creation, but still covers most common cases. Like mutations need to be executed from an external system for regular use. - -More details on [manipulating partitions](../../sql-reference/statements/alter/partition.md#alter_drop-partition). - -## TRUNCATE {#truncate} - -It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need. - -More details on [table truncation](../../sql-reference/statements/truncate.md). diff --git a/docs/en/faq/operations/index.md b/docs/en/faq/operations/index.md deleted file mode 100644 index 81aec18b9cf..00000000000 --- a/docs/en/faq/operations/index.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Question about operating ClickHouse servers and clusters -toc_hidden_folder: true -toc_priority: 3 -toc_title: Operations ---- - -# Question About Operating ClickHouse Servers and Clusters {#question-about-operating-clickhouse-servers-and-clusters} - -Questions: - -- [Which ClickHouse version to use in production?](../../faq/operations/production.md) -- [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md) -- [Does ClickHouse support multi-region replication?](../../faq/operations/multi-region-replication.md) - -!!! info "Don’t see what you were looking for?" - Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. - -{## [Original article](https://clickhouse.com/docs/en/faq/production/) ##} diff --git a/docs/en/faq/operations/multi-region-replication.md b/docs/en/faq/operations/multi-region-replication.md deleted file mode 100644 index 7d78737544a..00000000000 --- a/docs/en/faq/operations/multi-region-replication.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Does ClickHouse support multi-region replication? -toc_hidden: true -toc_priority: 30 ---- - -# Does ClickHouse support multi-region replication? {#does-clickhouse-support-multi-region-replication} - -The short answer is "yes". However, we recommend keeping latency between all regions/datacenters in two-digit range, otherwise write performance will suffer as it goes through distributed consensus protocol. For example, replication between US coasts will likely work fine, but between the US and Europe won't. - -Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas. - -For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md). diff --git a/docs/en/faq/operations/production.md b/docs/en/faq/operations/production.md deleted file mode 100644 index 52ca300ced0..00000000000 --- a/docs/en/faq/operations/production.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Which ClickHouse version to use in production? -toc_hidden: true -toc_priority: 10 ---- - -# Which ClickHouse Version to Use in Production? {#which-clickhouse-version-to-use-in-production} - -First of all, let’s discuss why people ask this question in the first place. There are two key reasons: - -1. ClickHouse is developed with pretty high velocity and usually, there are 10+ stable releases per year. It makes a wide range of releases to choose from, which is not so trivial choice. -2. Some users want to avoid spending time figuring out which version works best for their use case and just follow someone else’s advice. - -The second reason is more fundamental, so we’ll start with it and then get back to navigating through various ClickHouse releases. - -## Which ClickHouse Version Do You Recommend? {#which-clickhouse-version-do-you-recommend} - -It’s tempting to hire consultants or trust some known experts to get rid of responsibility for your production environment. You install some specific ClickHouse version that someone else recommended, now if there’s some issue with it - it’s not your fault, it’s someone else’s. This line of reasoning is a big trap. No external person knows better what’s going on in your company’s production environment. - -So how to properly choose which ClickHouse version to upgrade to? Or how to choose your first ClickHouse version? First of all, you need to invest in setting up a **realistic pre-production environment**. In an ideal world, it could be a completely identical shadow copy, but that’s usually expensive. - -Here’re some key points to get reasonable fidelity in a pre-production environment with not so high costs: - -- Pre-production environment needs to run an as close set of queries as you intend to run in production: - - Don’t make it read-only with some frozen data. - - Don’t make it write-only with just copying data without building some typical reports. - - Don’t wipe it clean instead of applying schema migrations. -- Use a sample of real production data and queries. Try to choose a sample that’s still representative and makes `SELECT` queries return reasonable results. Use obfuscation if your data is sensitive and internal policies do not allow it to leave the production environment. -- Make sure that pre-production is covered by your monitoring and alerting software the same way as your production environment does. -- If your production spans across multiple datacenters or regions, make your pre-production does the same. -- If your production uses complex features like replication, distributed table, cascading materialize views, make sure they are configured similarly in pre-production. -- There’s a trade-off on using the roughly same number of servers or VMs in pre-production as in production, but of smaller size, or much less of them, but of the same size. The first option might catch extra network-related issues, while the latter is easier to manage. - -The second area to invest in is **automated testing infrastructure**. Don’t assume that if some kind of query has executed successfully once, it’ll continue to do so forever. It’s ok to have some unit tests where ClickHouse is mocked but make sure your product has a reasonable set of automated tests that are run against real ClickHouse and check that all important use cases are still working as expected. - -Extra step forward could be contributing those automated tests to [ClickHouse’s open-source test infrastructure](https://github.com/ClickHouse/ClickHouse/tree/master/tests) that’s continuously used in its day-to-day development. It definitely will take some additional time and effort to learn [how to run it](../../development/tests.md) and then how to adapt your tests to this framework, but it’ll pay off by ensuring that ClickHouse releases are already tested against them when they are announced stable, instead of repeatedly losing time on reporting the issue after the fact and then waiting for a bugfix to be implemented, backported and released. Some companies even have such test contributions to infrastructure by its use as an internal policy, most notably it’s called [Beyonce’s Rule](https://www.oreilly.com/library/view/software-engineering-at/9781492082781/ch01.html#policies_that_scale_well) at Google. - -When you have your pre-production environment and testing infrastructure in place, choosing the best version is straightforward: - -1. Routinely run your automated tests against new ClickHouse releases. You can do it even for ClickHouse releases that are marked as `testing`, but going forward to the next steps with them is not recommended. -2. Deploy the ClickHouse release that passed the tests to pre-production and check that all processes are running as expected. -3. Report any issues you discovered to [ClickHouse GitHub Issues](https://github.com/ClickHouse/ClickHouse/issues). -4. If there were no major issues, it should be safe to start deploying ClickHouse release to your production environment. Investing in gradual release automation that implements an approach similar to [canary releases](https://martinfowler.com/bliki/CanaryRelease.html) or [green-blue deployments](https://martinfowler.com/bliki/BlueGreenDeployment.html) might further reduce the risk of issues in production. - -As you might have noticed, there’s nothing specific to ClickHouse in the approach described above, people do that for any piece of infrastructure they rely on if they take their production environment seriously. - -## How to Choose Between ClickHouse Releases? {#how-to-choose-between-clickhouse-releases} - -If you look into contents of ClickHouse package repository, you’ll see four kinds of packages: - -1. `testing` -2. `prestable` -3. `stable` -4. `lts` (long-term support) - -As was mentioned earlier, `testing` is good mostly to notice issues early, running them in production is not recommended because each of them is not tested as thoroughly as other kinds of packages. - -`prestable` is a release candidate which generally looks promising and is likely to become announced as `stable` soon. You can try them out in pre-production and report issues if you see any. - -For production use, there are two key options: `stable` and `lts`. Here is some guidance on how to choose between them: - -- `stable` is the kind of package we recommend by default. They are released roughly monthly (and thus provide new features with reasonable delay) and three latest stable releases are supported in terms of diagnostics and backporting of bugfixes. -- `lts` are released twice a year and are supported for a year after their initial release. You might prefer them over `stable` in the following cases: - - Your company has some internal policies that do not allow for frequent upgrades or using non-LTS software. - - You are using ClickHouse in some secondary products that either does not require any complex ClickHouse features and do not have enough resources to keep it updated. - -Many teams who initially thought that `lts` is the way to go, often switch to `stable` anyway because of some recent feature that’s important for their product. - -!!! warning "Important" - One more thing to keep in mind when upgrading ClickHouse: we’re always keeping eye on compatibility across releases, but sometimes it’s not reasonable to keep and some minor details might change. So make sure you check the [changelog](../../whats-new/changelog/index.md) before upgrading to see if there are any notes about backward-incompatible changes. diff --git a/docs/en/faq/use-cases/index.md b/docs/en/faq/use-cases/index.md deleted file mode 100644 index aac5493b105..00000000000 --- a/docs/en/faq/use-cases/index.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Questions about ClickHouse use cases -toc_hidden_folder: true -toc_priority: 2 -toc_title: Use Cases ---- - -# Questions About ClickHouse Use Cases {#questions-about-clickhouse-use-cases} - -Questions: - -- [Can I use ClickHouse as a time-series database?](../../faq/use-cases/time-series.md) -- [Can I use ClickHouse as a key-value storage?](../../faq/use-cases/key-value.md) - -!!! info "Don’t see what you were looking for?" - Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. - -{## [Original article](https://clickhouse.com/docs/en/faq/use-cases/) ##} diff --git a/docs/en/faq/use-cases/key-value.md b/docs/en/faq/use-cases/key-value.md deleted file mode 100644 index 2827dd2fa58..00000000000 --- a/docs/en/faq/use-cases/key-value.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Can I use ClickHouse as a key-value storage? -toc_hidden: true -toc_priority: 101 ---- - -# Can I Use ClickHouse As a Key-Value Storage? {#can-i-use-clickhouse-as-a-key-value-storage} - -The short answer is **“no”**. The key-value workload is among top positions in the list of cases when **NOT**{.text-danger} to use ClickHouse. It’s an [OLAP](../../faq/general/olap.md) system after all, while there are many excellent key-value storage systems out there. - -However, there might be situations where it still makes sense to use ClickHouse for key-value-like queries. Usually, it’s some low-budget products where the main workload is analytical in nature and fits ClickHouse well, but there’s also some secondary process that needs a key-value pattern with not so high request throughput and without strict latency requirements. If you had an unlimited budget, you would have installed a secondary key-value database for thus secondary workload, but in reality, there’s an additional cost of maintaining one more storage system (monitoring, backups, etc.) which might be desirable to avoid. - -If you decide to go against recommendations and run some key-value-like queries against ClickHouse, here’re some tips: - -- The key reason why point queries are expensive in ClickHouse is its sparse primary index of main [MergeTree table engine family](../../engines/table-engines/mergetree-family/mergetree.md). This index can’t point to each specific row of data, instead, it points to each N-th and the system has to scan from the neighboring N-th row to the desired one, reading excessive data along the way. In a key-value scenario, it might be useful to reduce the value of N with the `index_granularity` setting. -- ClickHouse keeps each column in a separate set of files, so to assemble one complete row it needs to go through each of those files. Their count increases linearly with the number of columns, so in the key-value scenario, it might be worth to avoid using many columns and put all your payload in a single `String` column encoded in some serialization format like JSON, Protobuf or whatever makes sense. -- There’s an alternative approach that uses [Join](../../engines/table-engines/special/join.md) table engine instead of normal `MergeTree` tables and [joinGet](../../sql-reference/functions/other-functions.md#joinget) function to retrieve the data. It can provide better query performance but might have some usability and reliability issues. Here’s an [usage example](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00800_versatile_storage_join.sql#L49-L51). diff --git a/docs/en/faq/use-cases/time-series.md b/docs/en/faq/use-cases/time-series.md deleted file mode 100644 index bf97ac4b1e2..00000000000 --- a/docs/en/faq/use-cases/time-series.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: Can I use ClickHouse as a time-series database? -toc_hidden: true -toc_priority: 101 ---- - -# Can I Use ClickHouse As a Time-Series Database? {#can-i-use-clickhouse-as-a-time-series-database} - -ClickHouse is a generic data storage solution for [OLAP](../../faq/general/olap.md) workloads, while there are many specialized time-series database management systems. Nevertheless, ClickHouse’s [focus on query execution speed](../../faq/general/why-clickhouse-is-so-fast.md) allows it to outperform specialized systems in many cases. There are many independent benchmarks on this topic out there, so we’re not going to conduct one here. Instead, let’s focus on ClickHouse features that are important to use if that’s your use case. - -First of all, there are **[specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs)** which make typical time-series. Either common algorithms like `DoubleDelta` and `Gorilla` or specific to ClickHouse like `T64`. - -Second, time-series queries often hit only recent data, like one day or one week old. It makes sense to use servers that have both fast nVME/SSD drives and high-capacity HDD drives. ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) feature allows to configure keeping fresh hot data on fast drives and gradually move it to slower drives as it ages. Rollup or removal of even older data is also possible if your requirements demand it. - -Even though it’s against ClickHouse philosophy of storing and processing raw data, you can use [materialized views](../../sql-reference/statements/create/view.md) to fit into even tighter latency or costs requirements. diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md deleted file mode 100644 index d4c9bab2441..00000000000 --- a/docs/en/getting-started/example-datasets/index.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -toc_folder_title: Example Datasets -toc_priority: 10 -toc_title: Introduction ---- - -# Example Datasets {#example-datasets} - -This section describes how to obtain example datasets and import them into ClickHouse. For some datasets example queries are also available. - -The list of documented datasets: - -- [GitHub Events](../../getting-started/example-datasets/github-events.md) -- [Anonymized Web Analytics Dataset](../../getting-started/example-datasets/metrica.md) -- [Recipes](../../getting-started/example-datasets/recipes.md) -- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) -- [WikiStat](../../getting-started/example-datasets/wikistat.md) -- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) -- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) -- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md) -- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) -- [OpenSky](../../getting-started/example-datasets/opensky.md) -- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md) -- [Cell Towers](../../getting-started/example-datasets/cell-towers.md) -- [What's on the Menu?](../../getting-started/example-datasets/menus.md) -- [OnTime](../../getting-started/example-datasets/ontime.md) - -[Original article](https://clickhouse.com/docs/en/getting_started/example_datasets) diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md deleted file mode 100644 index 372e8d7bd64..00000000000 --- a/docs/en/getting-started/index.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -toc_folder_title: Getting Started -toc_hidden: true -toc_priority: 8 -toc_title: hidden ---- - -# Getting Started {#getting-started} - -If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can: - -- [Go through detailed tutorial](../getting-started/tutorial.md) -- [Experiment with example datasets](../getting-started/example-datasets/ontime.md) - -[Original article](https://clickhouse.com/docs/en/getting_started/) diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md deleted file mode 100644 index 6c44f250242..00000000000 --- a/docs/en/getting-started/playground.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -toc_priority: 14 -toc_title: Playground ---- - -# ClickHouse Playground {#clickhouse-playground} - -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. - -[ClickHouse Playground](https://play.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. -Several example datasets are available in Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with. - -You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). - -## Credentials {#credentials} - -| Parameter | Value | -|:--------------------|:----------------------------------------| -| HTTPS endpoint | `https://play-api.clickhouse.com:8443` | -| Native TCP endpoint | `play-api.clickhouse.com:9440` | -| User | `playground` | -| Password | `clickhouse` | - -There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above): - -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` - -!!! note "Note" - All these endpoints require a secure TLS connection. - -## Limitations {#limitations} - -The queries are executed as a read-only user. It implies some limitations: - -- DDL queries are not allowed -- INSERT queries are not allowed - -The following settings are also enforced: - -- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time) - -## Examples {#examples} - -HTTPS endpoint example with `curl`: - -``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" -``` - -TCP endpoint example with [CLI](../interfaces/cli.md): - -``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" -``` diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md deleted file mode 100644 index 9f43cc8769d..00000000000 --- a/docs/en/getting-started/tutorial.md +++ /dev/null @@ -1,662 +0,0 @@ ---- -toc_priority: 12 -toc_title: Tutorial ---- - -# ClickHouse Tutorial {#clickhouse-tutorial} - -## What to Expect from This Tutorial? {#what-to-expect-from-this-tutorial} - -By going through this tutorial, you’ll learn how to set up a simple ClickHouse cluster. It’ll be small, but fault-tolerant and scalable. Then we will use one of the example datasets to fill it with data and execute some demo queries. - -## Single Node Setup {#single-node-setup} - -To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them. - -For example, you have chosen `deb` packages and executed: - -``` bash -{% include 'install/deb.sh' %} -``` - -What do we have in the packages that got installed: - -- `clickhouse-client` package contains [clickhouse-client](../interfaces/cli.md) application, interactive ClickHouse console client. -- `clickhouse-common` package contains a ClickHouse executable file. -- `clickhouse-server` package contains configuration files to run ClickHouse as a server. - -Server config files are located in `/etc/clickhouse-server/`. Before going further, please notice the `` element in `config.xml`. Path determines the location for data storage, so it should be located on volume with large disk capacity; the default value is `/var/lib/clickhouse/`. If you want to adjust the configuration, it’s not handy to directly edit `config.xml` file, considering it might get rewritten on future package updates. The recommended way to override the config elements is to create [files in config.d directory](../operations/configuration-files.md) which serve as “patches” to config.xml. - -As you might have noticed, `clickhouse-server` is not launched automatically after package installation. It won’t be automatically restarted after updates, either. The way you start the server depends on your init system, usually, it is: - -``` bash -sudo service clickhouse-server start -``` - -or - -``` bash -sudo /etc/init.d/clickhouse-server start -``` - -The default location for server logs is `/var/log/clickhouse-server/`. The server is ready to handle client connections once it logs the `Ready for connections` message. - -Once the `clickhouse-server` is up and running, we can use `clickhouse-client` to connect to the server and run some test queries like `SELECT "Hello, world!";`. - -
- -Quick tips for clickhouse-client - -Interactive mode: - -``` bash -clickhouse-client -clickhouse-client --host=... --port=... --user=... --password=... -``` - -Enable multiline queries: - -``` bash -clickhouse-client -m -clickhouse-client --multiline -``` - -Run queries in batch-mode: - -``` bash -clickhouse-client --query='SELECT 1' -echo 'SELECT 1' | clickhouse-client -clickhouse-client <<< 'SELECT 1' -``` - -Insert data from a file in specified format: - -``` bash -clickhouse-client --query='INSERT INTO table VALUES' < data.txt -clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv -``` - -
- -## Import Sample Dataset {#import-sample-dataset} - -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use some anonymized web analytics data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. - -### Download and Extract Table Data {#download-and-extract-table-data} - -``` bash -curl https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv -curl https://datasets.clickhouse.com/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv -``` - -The extracted files are about 10GB in size. - -### Create Tables {#create-tables} - -As in most databases management systems, ClickHouse logically groups tables into “databases”. There’s a `default` database, but we’ll create a new one named `tutorial`: - -``` bash -clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" -``` - -Syntax for creating tables is way more complicated compared to databases (see [reference](../sql-reference/statements/create/table.md). In general `CREATE TABLE` statement has to specify three key things: - -1. Name of table to create. -2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md). -3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed. - -There are two tables to create: - -- `hits` is a table with each action done by all users on all websites covered by the service. -- `visits` is a table that contains pre-built sessions instead of individual actions. - -Let’s see and execute the real create table queries for these tables: - -``` sql -CREATE TABLE tutorial.hits_v1 -( - `WatchID` UInt64, - `JavaEnable` UInt8, - `Title` String, - `GoodEvent` Int16, - `EventTime` DateTime, - `EventDate` Date, - `CounterID` UInt32, - `ClientIP` UInt32, - `ClientIP6` FixedString(16), - `RegionID` UInt32, - `UserID` UInt64, - `CounterClass` Int8, - `OS` UInt8, - `UserAgent` UInt8, - `URL` String, - `Referer` String, - `URLDomain` String, - `RefererDomain` String, - `Refresh` UInt8, - `IsRobot` UInt8, - `RefererCategories` Array(UInt16), - `URLCategories` Array(UInt16), - `URLRegions` Array(UInt32), - `RefererRegions` Array(UInt32), - `ResolutionWidth` UInt16, - `ResolutionHeight` UInt16, - `ResolutionDepth` UInt8, - `FlashMajor` UInt8, - `FlashMinor` UInt8, - `FlashMinor2` String, - `NetMajor` UInt8, - `NetMinor` UInt8, - `UserAgentMajor` UInt16, - `UserAgentMinor` FixedString(2), - `CookieEnable` UInt8, - `JavascriptEnable` UInt8, - `IsMobile` UInt8, - `MobilePhone` UInt8, - `MobilePhoneModel` String, - `Params` String, - `IPNetworkID` UInt32, - `TraficSourceID` Int8, - `SearchEngineID` UInt16, - `SearchPhrase` String, - `AdvEngineID` UInt8, - `IsArtifical` UInt8, - `WindowClientWidth` UInt16, - `WindowClientHeight` UInt16, - `ClientTimeZone` Int16, - `ClientEventTime` DateTime, - `SilverlightVersion1` UInt8, - `SilverlightVersion2` UInt8, - `SilverlightVersion3` UInt32, - `SilverlightVersion4` UInt16, - `PageCharset` String, - `CodeVersion` UInt32, - `IsLink` UInt8, - `IsDownload` UInt8, - `IsNotBounce` UInt8, - `FUniqID` UInt64, - `HID` UInt32, - `IsOldCounter` UInt8, - `IsEvent` UInt8, - `IsParameter` UInt8, - `DontCountHits` UInt8, - `WithHash` UInt8, - `HitColor` FixedString(1), - `UTCEventTime` DateTime, - `Age` UInt8, - `Sex` UInt8, - `Income` UInt8, - `Interests` UInt16, - `Robotness` UInt8, - `GeneralInterests` Array(UInt16), - `RemoteIP` UInt32, - `RemoteIP6` FixedString(16), - `WindowName` Int32, - `OpenerName` Int32, - `HistoryLength` Int16, - `BrowserLanguage` FixedString(2), - `BrowserCountry` FixedString(2), - `SocialNetwork` String, - `SocialAction` String, - `HTTPError` UInt16, - `SendTiming` Int32, - `DNSTiming` Int32, - `ConnectTiming` Int32, - `ResponseStartTiming` Int32, - `ResponseEndTiming` Int32, - `FetchTiming` Int32, - `RedirectTiming` Int32, - `DOMInteractiveTiming` Int32, - `DOMContentLoadedTiming` Int32, - `DOMCompleteTiming` Int32, - `LoadEventStartTiming` Int32, - `LoadEventEndTiming` Int32, - `NSToDOMContentLoadedTiming` Int32, - `FirstPaintTiming` Int32, - `RedirectCount` Int8, - `SocialSourceNetworkID` UInt8, - `SocialSourcePage` String, - `ParamPrice` Int64, - `ParamOrderID` String, - `ParamCurrency` FixedString(3), - `ParamCurrencyID` UInt16, - `GoalsReached` Array(UInt32), - `OpenstatServiceName` String, - `OpenstatCampaignID` String, - `OpenstatAdID` String, - `OpenstatSourceID` String, - `UTMSource` String, - `UTMMedium` String, - `UTMCampaign` String, - `UTMContent` String, - `UTMTerm` String, - `FromTag` String, - `HasGCLID` UInt8, - `RefererHash` UInt64, - `URLHash` UInt64, - `CLID` UInt32, - `YCLID` UInt64, - `ShareService` String, - `ShareURL` String, - `ShareTitle` String, - `ParsedParams` Nested( - Key1 String, - Key2 String, - Key3 String, - Key4 String, - Key5 String, - ValueDouble Float64), - `IslandID` FixedString(16), - `RequestNum` UInt32, - `RequestTry` UInt8 -) -ENGINE = MergeTree() -PARTITION BY toYYYYMM(EventDate) -ORDER BY (CounterID, EventDate, intHash32(UserID)) -SAMPLE BY intHash32(UserID) -``` - -``` sql -CREATE TABLE tutorial.visits_v1 -( - `CounterID` UInt32, - `StartDate` Date, - `Sign` Int8, - `IsNew` UInt8, - `VisitID` UInt64, - `UserID` UInt64, - `StartTime` DateTime, - `Duration` UInt32, - `UTCStartTime` DateTime, - `PageViews` Int32, - `Hits` Int32, - `IsBounce` UInt8, - `Referer` String, - `StartURL` String, - `RefererDomain` String, - `StartURLDomain` String, - `EndURL` String, - `LinkURL` String, - `IsDownload` UInt8, - `TraficSourceID` Int8, - `SearchEngineID` UInt16, - `SearchPhrase` String, - `AdvEngineID` UInt8, - `PlaceID` Int32, - `RefererCategories` Array(UInt16), - `URLCategories` Array(UInt16), - `URLRegions` Array(UInt32), - `RefererRegions` Array(UInt32), - `IsYandex` UInt8, - `GoalReachesDepth` Int32, - `GoalReachesURL` Int32, - `GoalReachesAny` Int32, - `SocialSourceNetworkID` UInt8, - `SocialSourcePage` String, - `MobilePhoneModel` String, - `ClientEventTime` DateTime, - `RegionID` UInt32, - `ClientIP` UInt32, - `ClientIP6` FixedString(16), - `RemoteIP` UInt32, - `RemoteIP6` FixedString(16), - `IPNetworkID` UInt32, - `SilverlightVersion3` UInt32, - `CodeVersion` UInt32, - `ResolutionWidth` UInt16, - `ResolutionHeight` UInt16, - `UserAgentMajor` UInt16, - `UserAgentMinor` UInt16, - `WindowClientWidth` UInt16, - `WindowClientHeight` UInt16, - `SilverlightVersion2` UInt8, - `SilverlightVersion4` UInt16, - `FlashVersion3` UInt16, - `FlashVersion4` UInt16, - `ClientTimeZone` Int16, - `OS` UInt8, - `UserAgent` UInt8, - `ResolutionDepth` UInt8, - `FlashMajor` UInt8, - `FlashMinor` UInt8, - `NetMajor` UInt8, - `NetMinor` UInt8, - `MobilePhone` UInt8, - `SilverlightVersion1` UInt8, - `Age` UInt8, - `Sex` UInt8, - `Income` UInt8, - `JavaEnable` UInt8, - `CookieEnable` UInt8, - `JavascriptEnable` UInt8, - `IsMobile` UInt8, - `BrowserLanguage` UInt16, - `BrowserCountry` UInt16, - `Interests` UInt16, - `Robotness` UInt8, - `GeneralInterests` Array(UInt16), - `Params` Array(String), - `Goals` Nested( - ID UInt32, - Serial UInt32, - EventTime DateTime, - Price Int64, - OrderID String, - CurrencyID UInt32), - `WatchIDs` Array(UInt64), - `ParamSumPrice` Int64, - `ParamCurrency` FixedString(3), - `ParamCurrencyID` UInt16, - `ClickLogID` UInt64, - `ClickEventID` Int32, - `ClickGoodEvent` Int32, - `ClickEventTime` DateTime, - `ClickPriorityID` Int32, - `ClickPhraseID` Int32, - `ClickPageID` Int32, - `ClickPlaceID` Int32, - `ClickTypeID` Int32, - `ClickResourceID` Int32, - `ClickCost` UInt32, - `ClickClientIP` UInt32, - `ClickDomainID` UInt32, - `ClickURL` String, - `ClickAttempt` UInt8, - `ClickOrderID` UInt32, - `ClickBannerID` UInt32, - `ClickMarketCategoryID` UInt32, - `ClickMarketPP` UInt32, - `ClickMarketCategoryName` String, - `ClickMarketPPName` String, - `ClickAWAPSCampaignName` String, - `ClickPageName` String, - `ClickTargetType` UInt16, - `ClickTargetPhraseID` UInt64, - `ClickContextType` UInt8, - `ClickSelectType` Int8, - `ClickOptions` String, - `ClickGroupBannerID` Int32, - `OpenstatServiceName` String, - `OpenstatCampaignID` String, - `OpenstatAdID` String, - `OpenstatSourceID` String, - `UTMSource` String, - `UTMMedium` String, - `UTMCampaign` String, - `UTMContent` String, - `UTMTerm` String, - `FromTag` String, - `HasGCLID` UInt8, - `FirstVisit` DateTime, - `PredLastVisit` Date, - `LastVisit` Date, - `TotalVisits` UInt32, - `TraficSource` Nested( - ID Int8, - SearchEngineID UInt16, - AdvEngineID UInt8, - PlaceID UInt16, - SocialSourceNetworkID UInt8, - Domain String, - SearchPhrase String, - SocialSourcePage String), - `Attendance` FixedString(16), - `CLID` UInt32, - `YCLID` UInt64, - `NormalizedRefererHash` UInt64, - `SearchPhraseHash` UInt64, - `RefererDomainHash` UInt64, - `NormalizedStartURLHash` UInt64, - `StartURLDomainHash` UInt64, - `NormalizedEndURLHash` UInt64, - `TopLevelDomain` UInt64, - `URLScheme` UInt64, - `OpenstatServiceNameHash` UInt64, - `OpenstatCampaignIDHash` UInt64, - `OpenstatAdIDHash` UInt64, - `OpenstatSourceIDHash` UInt64, - `UTMSourceHash` UInt64, - `UTMMediumHash` UInt64, - `UTMCampaignHash` UInt64, - `UTMContentHash` UInt64, - `UTMTermHash` UInt64, - `FromHash` UInt64, - `WebVisorEnabled` UInt8, - `WebVisorActivity` UInt32, - `ParsedParams` Nested( - Key1 String, - Key2 String, - Key3 String, - Key4 String, - Key5 String, - ValueDouble Float64), - `Market` Nested( - Type UInt8, - GoalID UInt32, - OrderID String, - OrderPrice Int64, - PP UInt32, - DirectPlaceID UInt32, - DirectOrderID UInt32, - DirectBannerID UInt32, - GoodID String, - GoodName String, - GoodQuantity Int32, - GoodPrice Int64), - `IslandID` FixedString(16) -) -ENGINE = CollapsingMergeTree(Sign) -PARTITION BY toYYYYMM(StartDate) -ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) -SAMPLE BY intHash32(UserID) -``` - -You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want. - -As we can see, `hits_v1` uses the [basic MergeTree engine](../engines/table-engines/mergetree-family/mergetree.md), while the `visits_v1` uses the [Collapsing](../engines/table-engines/mergetree-family/collapsingmergetree.md) variant. - -### Import Data {#import-data} - -Data import to ClickHouse is done via [INSERT INTO](../sql-reference/statements/insert-into.md) query like in many other SQL databases. However, data is usually provided in one of the [supported serialization formats](../interfaces/formats.md) instead of `VALUES` clause (which is also supported). - -The files we downloaded earlier are in tab-separated format, so here’s how to import them via console client: - -``` bash -clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv -clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv -``` - -ClickHouse has a lot of [settings to tune](../operations/settings/index.md) and one way to specify them in console client is via arguments, as we can see with `--max_insert_block_size`. The easiest way to figure out what settings are available, what do they mean and what the defaults are is to query the `system.settings` table: - -``` sql -SELECT name, value, changed, description -FROM system.settings -WHERE name LIKE '%max_insert_b%' -FORMAT TSV - -max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." -``` - -Optionally you can [OPTIMIZE](../sql-reference/statements/optimize.md) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: - -``` bash -clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" -clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" -``` - -These queries start an I/O and CPU intensive operation, so if the table consistently receives new data, it’s better to leave it alone and let merges run in the background. - -Now we can check if the table import was successful: - -``` bash -clickhouse-client --query "SELECT COUNT(*) FROM tutorial.hits_v1" -clickhouse-client --query "SELECT COUNT(*) FROM tutorial.visits_v1" -``` - -## Example Queries {#example-queries} - -``` sql -SELECT - StartURL AS URL, - AVG(Duration) AS AvgDuration -FROM tutorial.visits_v1 -WHERE StartDate BETWEEN '2014-03-23' AND '2014-03-30' -GROUP BY URL -ORDER BY AvgDuration DESC -LIMIT 10 -``` - -``` sql -SELECT - sum(Sign) AS visits, - sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits, - (100. * goal_visits) / visits AS goal_percent -FROM tutorial.visits_v1 -WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') -``` - -## Cluster Deployment {#cluster-deployment} - -ClickHouse cluster is a homogenous cluster. Steps to set up: - -1. Install ClickHouse server on all machines of the cluster -2. Set up cluster configs in configuration files -3. Create local tables on each instance -4. Create a [Distributed table](../engines/table-engines/special/distributed.md) - -[Distributed table](../engines/table-engines/special/distributed.md) is actually a kind of “view” to local tables of ClickHouse cluster. SELECT query from a distributed table executes using resources of all cluster’s shards. You may specify configs for multiple clusters and create multiple distributed tables providing views to different clusters. - -Example config for a cluster with three shards, one replica each: - -``` xml - - - - - example-perftest01j - 9000 - - - - - example-perftest02j - 9000 - - - - - example-perftest03j - 9000 - - - - -``` - -For further demonstration, let’s create a new local table with the same `CREATE TABLE` query that we used for `hits_v1`, but different table name: - -``` sql -CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... -``` - -Creating a distributed table providing a view into local tables of the cluster: - -``` sql -CREATE TABLE tutorial.hits_all AS tutorial.hits_local -ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); -``` - -A common practice is to create similar Distributed tables on all machines of the cluster. It allows running distributed queries on any machine of the cluster. Also there’s an alternative option to create temporary distributed table for a given SELECT query using [remote](../sql-reference/table-functions/remote.md) table function. - -Let’s run [INSERT SELECT](../sql-reference/statements/insert-into.md) into the Distributed table to spread the table to multiple servers. - -``` sql -INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; -``` - -!!! warning "Notice" - This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utilities/clickhouse-copier.md) that can re-shard arbitrary large tables. - -As you could expect, computationally heavy queries run N times faster if they utilize 3 servers instead of one. - -In this case, we have used a cluster with 3 shards, and each contains a single replica. - -To provide resilience in a production environment, we recommend that each shard should contain 2-3 replicas spread between multiple availability zones or datacenters (or at least racks). Note that ClickHouse supports an unlimited number of replicas. - -Example config for a cluster of one shard containing three replicas: - -``` xml - - ... - - - - example-perftest01j - 9000 - - - example-perftest02j - 9000 - - - example-perftest03j - 9000 - - - - -``` - -To enable native replication [ZooKeeper](http://zookeeper.apache.org/) is required. ClickHouse takes care of data consistency on all replicas and runs restore procedure after failure automatically. It’s recommended to deploy the ZooKeeper cluster on separate servers (where no other processes including ClickHouse are running). - -!!! note "Note" - ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. - -ZooKeeper locations are specified in the configuration file: - -``` xml - - - zoo01 - 2181 - - - zoo02 - 2181 - - - zoo03 - 2181 - - -``` - -Also, we need to set macros for identifying each shard and replica which are used on table creation: - -``` xml - - 01 - 01 - -``` - -If there are no replicas at the moment on replicated table creation, a new first replica is instantiated. If there are already live replicas, the new replica clones data from existing ones. You have an option to create all replicated tables first, and then insert data to it. Another option is to create some replicas and add the others after or during data insertion. - -``` sql -CREATE TABLE tutorial.hits_replica (...) -ENGINE = ReplicatedMergeTree( - '/clickhouse_perftest/tables/{shard}/hits', - '{replica}' -) -... -``` - -Here we use [ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md) table engine. In parameters we specify ZooKeeper path containing shard and replica identifiers. - -``` sql -INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; -``` - -Replication operates in multi-master mode. Data can be loaded into any replica, and the system then syncs it with other instances automatically. Replication is asynchronous so at a given moment, not all replicas may contain recently inserted data. At least one replica should be up to allow data ingestion. Others will sync up data and repair consistency once they will become active again. Note that this approach allows for the low possibility of a loss of recently inserted data. - -[Original article](https://clickhouse.com/docs/en/getting_started/tutorial/) diff --git a/docs/en/guides/apply-catboost-model.md b/docs/en/guides/apply-catboost-model.md deleted file mode 100644 index 859703a31df..00000000000 --- a/docs/en/guides/apply-catboost-model.md +++ /dev/null @@ -1,242 +0,0 @@ ---- -toc_priority: 41 -toc_title: Applying CatBoost Models ---- - -# Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} - -[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at Yandex for machine learning. - -With this instruction, you will learn to apply pre-trained models in ClickHouse by running model inference from SQL. - -To apply a CatBoost model in ClickHouse: - -1. [Create a Table](#create-table). -2. [Insert the Data to the Table](#insert-data-to-table). -3. [Integrate CatBoost into ClickHouse](#integrate-catboost-into-clickhouse) (Optional step). -4. [Run the Model Inference from SQL](#run-model-inference). - -For more information about training CatBoost models, see [Training and applying models](https://catboost.ai/docs/features/training.html#training). - -You can reload CatBoost models if the configuration was updated without restarting the server using [RELOAD MODEL](../sql-reference/statements/system.md#query_language-system-reload-model) and [RELOAD MODELS](../sql-reference/statements/system.md#query_language-system-reload-models) system queries. - -## Prerequisites {#prerequisites} - -If you do not have the [Docker](https://docs.docker.com/install/) yet, install it. - -!!! note "Note" - [Docker](https://www.docker.com) is a software platform that allows you to create containers that isolate a CatBoost and ClickHouse installation from the rest of the system. - -Before applying a CatBoost model: - -**1.** Pull the [Docker image](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) from the registry: - -``` bash -$ docker pull yandex/tutorial-catboost-clickhouse -``` - -This Docker image contains everything you need to run CatBoost and ClickHouse: code, runtime, libraries, environment variables, and configuration files. - -**2.** Make sure the Docker image has been successfully pulled: - -``` bash -$ docker image ls -REPOSITORY TAG IMAGE ID CREATED SIZE -yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB -``` - -**3.** Start a Docker container based on this image: - -``` bash -$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse -``` - -## 1. Create a Table {#create-table} - -To create a ClickHouse table for the training sample: - -**1.** Start ClickHouse console client in the interactive mode: - -``` bash -$ clickhouse client -``` - -!!! note "Note" - The ClickHouse server is already running inside the Docker container. - -**2.** Create the table using the command: - -``` sql -:) CREATE TABLE amazon_train -( - date Date MATERIALIZED today(), - ACTION UInt8, - RESOURCE UInt32, - MGR_ID UInt32, - ROLE_ROLLUP_1 UInt32, - ROLE_ROLLUP_2 UInt32, - ROLE_DEPTNAME UInt32, - ROLE_TITLE UInt32, - ROLE_FAMILY_DESC UInt32, - ROLE_FAMILY UInt32, - ROLE_CODE UInt32 -) -ENGINE = MergeTree ORDER BY date -``` - -**3.** Exit from ClickHouse console client: - -``` sql -:) exit -``` - -## 2. Insert the Data to the Table {#insert-data-to-table} - -To insert the data: - -**1.** Run the following command: - -``` bash -$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv -``` - -**2.** Start ClickHouse console client in the interactive mode: - -``` bash -$ clickhouse client -``` - -**3.** Make sure the data has been uploaded: - -``` sql -:) SELECT count() FROM amazon_train - -SELECT count() -FROM amazon_train - -+-count()-+ -| 65538 | -+-------+ -``` - -## 3. Integrate CatBoost into ClickHouse {#integrate-catboost-into-clickhouse} - -!!! note "Note" - **Optional step.** The Docker image contains everything you need to run CatBoost and ClickHouse. - -To integrate CatBoost into ClickHouse: - -**1.** Build the evaluation library. - -The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.` library. For more information about how to build the library, see [CatBoost documentation](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). - -**2.** Create a new directory anywhere and with any name, for example, `data` and put the created library in it. The Docker image already contains the library `data/libcatboostmodel.so`. - -**3.** Create a new directory for config model anywhere and with any name, for example, `models`. - -**4.** Create a model configuration file with any name, for example, `models/amazon_model.xml`. - -**5.** Describe the model configuration: - -``` xml - - - - catboost - - amazon - - /home/catboost/tutorial/catboost_model.bin - - 0 - - -``` - -**6.** Add the path to CatBoost and the model configuration to the ClickHouse configuration: - -``` xml - -/home/catboost/data/libcatboostmodel.so -/home/catboost/models/*_model.xml -``` - -!!! note "Note" - You can change path to the CatBoost model configuration later without restarting server. - -## 4. Run the Model Inference from SQL {#run-model-inference} - -For test model run the ClickHouse client `$ clickhouse client`. - -Let’s make sure that the model is working: - -``` sql -:) SELECT - modelEvaluate('amazon', - RESOURCE, - MGR_ID, - ROLE_ROLLUP_1, - ROLE_ROLLUP_2, - ROLE_DEPTNAME, - ROLE_TITLE, - ROLE_FAMILY_DESC, - ROLE_FAMILY, - ROLE_CODE) > 0 AS prediction, - ACTION AS target -FROM amazon_train -LIMIT 10 -``` - -!!! note "Note" - Function [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) returns tuple with per-class raw predictions for multiclass models. - -Let’s predict the probability: - -``` sql -:) SELECT - modelEvaluate('amazon', - RESOURCE, - MGR_ID, - ROLE_ROLLUP_1, - ROLE_ROLLUP_2, - ROLE_DEPTNAME, - ROLE_TITLE, - ROLE_FAMILY_DESC, - ROLE_FAMILY, - ROLE_CODE) AS prediction, - 1. / (1 + exp(-prediction)) AS probability, - ACTION AS target -FROM amazon_train -LIMIT 10 -``` - -!!! note "Note" - More info about [exp()](../sql-reference/functions/math-functions.md) function. - -Let’s calculate LogLoss on the sample: - -``` sql -:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss -FROM -( - SELECT - modelEvaluate('amazon', - RESOURCE, - MGR_ID, - ROLE_ROLLUP_1, - ROLE_ROLLUP_2, - ROLE_DEPTNAME, - ROLE_TITLE, - ROLE_FAMILY_DESC, - ROLE_FAMILY, - ROLE_CODE) AS prediction, - 1. / (1. + exp(-prediction)) AS prob, - ACTION AS tg - FROM amazon_train -) -``` - -!!! note "Note" - More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions. - -[Original article](https://clickhouse.com/docs/en/guides/apply_catboost_model/) diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md deleted file mode 100644 index eb4ca9af367..00000000000 --- a/docs/en/guides/index.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -toc_folder_title: Guides -toc_priority: 38 -toc_title: Overview ---- - -# ClickHouse Guides {#clickhouse-guides} - -List of detailed step-by-step instructions that help to solve various tasks using ClickHouse: - -- [Tutorial on simple cluster set-up](../getting-started/tutorial.md) -- [Applying a CatBoost model in ClickHouse](../guides/apply-catboost-model.md) - -[Original article](https://clickhouse.com/docs/en/guides/) diff --git a/docs/en/index.md b/docs/en/index.md deleted file mode 100644 index 532be035bbc..00000000000 --- a/docs/en/index.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -toc_priority: 0 -toc_title: Overview ---- - -# What Is ClickHouse? {#what-is-clickhouse} - -ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP). - -In a “normal” row-oriented DBMS, data is stored in this order: - -| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | -|-----|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | - -In other words, all the values related to a row are physically stored next to each other. - -Examples of a row-oriented DBMS are MySQL, Postgres, and MS SQL Server. - -In a column-oriented DBMS, data is stored like this: - -| Row: | #0 | #1 | #2 | #N | -|-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | - -These examples only show the order that data is arranged in. The values from different columns are stored separately, and data from the same column is stored together. - -Examples of a column-oriented DBMS: Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, and kdb+. - -Different orders for storing data are better suited to different scenarios. The data access scenario refers to what queries are made, how often, and in what proportion; how much data is read for each type of query – rows, columns, and bytes; the relationship between reading and updating data; the working size of the data and how locally it is used; whether transactions are used, and how isolated they are; requirements for data replication and logical integrity; requirements for latency and throughput for each type of query, and so on. - -The higher the load on the system, the more important it is to customize the system set up to match the requirements of the usage scenario, and the more fine grained this customization becomes. There is no system that is equally well-suited to significantly different scenarios. If a system is adaptable to a wide set of scenarios, under a high load, the system will handle all the scenarios equally poorly, or will work well for just one or few of possible scenarios. - -## Key Properties of OLAP Scenario {#key-properties-of-olap-scenario} - -- The vast majority of requests are for read access. -- Data is updated in fairly large batches (\> 1000 rows), not by single rows; or it is not updated at all. -- Data is added to the DB but is not modified. -- For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. -- Tables are “wide,” meaning they contain a large number of columns. -- Queries are relatively rare (usually hundreds of queries per server or less per second). -- For simple queries, latencies around 50 ms are allowed. -- Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). -- Requires high throughput when processing a single query (up to billions of rows per second per server). -- Transactions are not necessary. -- Low requirements for data consistency. -- There is one large table per query. All tables are small, except for one. -- A query result is significantly smaller than the source data. In other words, data is filtered or aggregated, so the result fits in a single server’s RAM. - -It is easy to see that the OLAP scenario is very different from other popular scenarios (such as OLTP or Key-Value access). So it does not make sense to try to use OLTP or a Key-Value DB for processing analytical queries if you want to get decent performance. For example, if you try to use MongoDB or Redis for analytics, you will get very poor performance compared to OLAP databases. - -## Why Column-Oriented Databases Work Better in the OLAP Scenario {#why-column-oriented-databases-work-better-in-the-olap-scenario} - -Column-oriented databases are better suited to OLAP scenarios: they are at least 100 times faster in processing most queries. The reasons are explained in detail below, but the fact is easier to demonstrate visually: - -**Row-oriented DBMS** - -![Row-oriented](images/row-oriented.gif#) - -**Column-oriented DBMS** - -![Column-oriented](images/column-oriented.gif#) - -See the difference? - -### Input/output {#inputoutput} - -1. For an analytical query, only a small number of table columns need to be read. In a column-oriented database, you can read just the data you need. For example, if you need 5 columns out of 100, you can expect a 20-fold reduction in I/O. -2. Since data is read in packets, it is easier to compress. Data in columns is also easier to compress. This further reduces the I/O volume. -3. Due to the reduced I/O, more data fits in the system cache. - -For example, the query “count the number of records for each advertising platform” requires reading one “advertising platform ID” column, which takes up 1 byte uncompressed. If most of the traffic was not from advertising platforms, you can expect at least 10-fold compression of this column. When using a quick compression algorithm, data decompression is possible at a speed of at least several gigabytes of uncompressed data per second. In other words, this query can be processed at a speed of approximately several billion rows per second on a single server. This speed is actually achieved in practice. - -### CPU {#cpu} - -Since executing a query requires processing a large number of rows, it helps to dispatch all operations for entire vectors instead of for separate rows, or to implement the query engine so that there is almost no dispatching cost. If you do not do this, with any half-decent disk subsystem, the query interpreter inevitably stalls the CPU. It makes sense to both store data in columns and process it, when possible, by columns. - -There are two ways to do this: - -1. A vector engine. All operations are written for vectors, instead of for separate values. This means you do not need to call operations very often, and dispatching costs are negligible. Operation code contains an optimized internal cycle. - -2. Code generation. The code generated for the query has all the indirect calls in it. - -This is not done in “normal” databases, because it does not make sense when running simple queries. However, there are exceptions. For example, MemSQL uses code generation to reduce latency when processing SQL queries. (For comparison, analytical DBMSs require optimization of throughput, not latency.) - -Note that for CPU efficiency, the query language must be declarative (SQL or MDX), or at least a vector (J, K). The query should only contain implicit loops, allowing for optimization. - -{## [Original article](https://clickhouse.com/docs/en/) ##} diff --git a/docs/en/getting-started/install.md b/docs/en/install.md similarity index 69% rename from docs/en/getting-started/install.md rename to docs/en/install.md index cd734d4dc8b..b499b584865 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/install.md @@ -1,6 +1,8 @@ --- -toc_priority: 11 -toc_title: Installation +sidebar_label: Installation +sidebar_position: 1 +keywords: [clickhouse, install, installation, docs] +description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture. --- # Installation {#installation} @@ -24,15 +26,36 @@ To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or P It is recommended to use official pre-compiled `deb` packages for Debian or Ubuntu. Run these commands to install packages: ``` bash -{% include 'install/deb.sh' %} +sudo apt-get install apt-transport-https ca-certificates dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 + +echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client + +sudo service clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. ``` -
- +
Deprecated Method for installing deb-packages + ``` bash -{% include 'install/deb_repo.sh' %} +sudo apt-get install apt-transport-https ca-certificates dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 + +echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client + +sudo service clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. ``` +
You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs. @@ -57,15 +80,28 @@ It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat First, you need to add the official repository: ``` bash -{% include 'install/rpm.sh' %} +sudo yum install -y yum-utils +sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo +sudo yum install -y clickhouse-server clickhouse-client + +sudo /etc/init.d/clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. ```
Deprecated Method for installing rpm-packages + ``` bash -{% include 'install/rpm_repo.sh' %} +sudo yum install yum-utils +sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo +sudo yum install clickhouse-server clickhouse-client + +sudo /etc/init.d/clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. ``` +
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available. @@ -86,14 +122,52 @@ The required version can be downloaded with `curl` or `wget` from repository htt After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version: ``` bash -{% include 'install/tgz.sh' %} +LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ + grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) +export LATEST_VERSION +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" + +tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" + +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" + +tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" +sudo /etc/init.d/clickhouse-server start + +tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ```
Deprecated Method for installing tgz archives + ``` bash -{% include 'install/tgz_repo.sh' %} +export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ + grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz + +tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz +sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz +sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-server-$LATEST_VERSION.tgz +sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +sudo /etc/init.d/clickhouse-server start + +tar -xzvf clickhouse-client-$LATEST_VERSION.tgz +sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh ```
diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index e747b93a1a6..16e97ed7c62 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -1,7 +1,8 @@ --- -toc_folder_title: Interfaces -toc_priority: 14 -toc_title: Introduction +sidebar_label: Interfaces +sidebar_position: 34 +keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, client, jdbc, odbc, driver] +description: ClickHouse provides three network interfaces --- # Interfaces {#interfaces} diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md deleted file mode 100644 index 9c7fab7424d..00000000000 --- a/docs/en/introduction/adopters.md +++ /dev/null @@ -1,199 +0,0 @@ ---- -toc_priority: 8 -toc_title: Adopters ---- - -# ClickHouse Adopters {#clickhouse-adopters} - -!!! warning "Disclaimer" - The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. - -| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | -|---------|----------|---------|--------------|------------------------------------------------------------------------------|-----------| -| 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | -| Adapty | Subscription Analytics | Main product | — | — | [Tweet, November 2021](https://twitter.com/iwitaly/status/1462698148061659139) | -| Admiral | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) | -| AdScribe | Ads | TV Analytics | — | — | [A quote from CTO](https://altinity.com/24x7-support/) | -| Ahrefs | SEO | Analytics | — | — | [Job listing](https://ahrefs.com/jobs/data-scientist-search) | -| Alibaba Cloud | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) | -| Alibaba Cloud | Cloud | E-MapReduce | — | — | [Official Website](https://help.aliyun.com/document_detail/212195.html) | -| Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.com/meetup22/aloha.pdf) | -| Altinity | Cloud, SaaS | Main product | — | — | [Official Website](https://altinity.com/) | -| Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | -| ApiRoad | API marketplace | Analytics | — | — | [Blog post, November 2018, March 2020](https://pixeljets.com/blog/clickhouse-vs-elasticsearch/) | -| Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | -| ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | -| Argedor | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) | -| Avito | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) | -| Badoo | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) | -| Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | -| Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | -| BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | -| BiliBili | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) | -| Bloomberg | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | -| Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | -| Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | -| CardsMobile | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) | -| CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | -| CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| Checkly | Software Development | Analytics | — | — | [Tweet, October 2021](https://twitter.com/tim_nolet/status/1445810665743081474?s=20) | -| ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | -| Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | -| Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | -| Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | -| Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | -| Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | -| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | -| Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | -| Crazypanda | Games | | — | — | Live session on ClickHouse meetup | -| Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| Cryptology | Digital Assets Trading Platform | — | — | — | [Job advertisement, March 2021](https://career.habr.com/companies/cryptology/vacancies) | -| Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | -| Deepl | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) | -| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | -| Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | -| Ecommpay | Payment Processing | Logs | — | — | [Video, Nov 2019](https://www.youtube.com/watch?v=d3GdZTOWGLk) | -| Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | -| eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | -| Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | -| EventBunker.io | Serverless Data Processing | — | — | — | [Tweet, April 2021](https://twitter.com/Halil_D_/status/1379839133472985091) | -| FastNetMon | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) | -| Firebolt | Analytics | Main product | - | - | [YouTube Tech Talk](https://www.youtube.com/watch?v=9rW9uEJ15tU) | -| Flipkart | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) | -| FunCorp | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | -| Futurra Group | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) | -| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | -| Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | -| Gigapipe | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) | -| Gigasheet | Analytics | Main product | — | — | Direct Reference, February 2022| -| Glaber | Monitoring | Main product | — | — | [Website](https://glaber.io/) | -| GraphCDN | CDN | Traffic Analytics | — | — | [Blog Post in English, August 2021](https://altinity.com/blog/delivering-insight-on-graphql-apis-with-clickhouse-at-graphcdn/) | -| Grouparoo | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) | -| HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Hydrolix | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) | -| Hystax | Cloud Operations | Observability Analytics | - | - | [Blog](https://hystax.com/clickhouse-for-real-time-cost-saving-analytics-how-to-stop-hammering-screws-and-use-an-electric-screwdriver/) | -| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | -| Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| Infobaleen | AI markting tool | Analytics | — | — | [Official site](https://infobaleen.com) | -| Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | -| InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | -| Instabug | APM Platform | Main product | — | — | [A quote from Co-Founder](https://altinity.com/) | -| Instana | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | -| Integros | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| Ippon Technologies | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) | -| Ivi | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) | -| Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | -| Jitsu | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News post](https://news.ycombinator.com/item?id=29106082) | -| JuiceFS | Storage | Shopping Cart | - | - | [Blog](https://juicefs.com/blog/en/posts/shopee-clickhouse-with-juicefs/) | -| kakaocorp | Internet company | — | — | — | [if(kakao)2020](https://tv.kakao.com/channel/3693125/cliplink/414129353), [if(kakao)2021](https://if.kakao.com/session/24) | -| Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | -| Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | -| KGK Global | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) | -| LANCOM Systems | Network Solutions | Traffic analysis | - | - | [ClickHouse Operator for Kubernetes](https://www.lancom-systems.com/), [Hacker News post] (https://news.ycombinator.com/item?id=29413660) | -| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | -| Lever | Talent Management | Recruiting | - | - | [Hacker News post](https://news.ycombinator.com/item?id=29558544) | -| LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | -| Lookforsale | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) | -| Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| MAXILECT | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) | -| Marilyn | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) | -| Mello | Marketing | Analytics | 1 server | — | [Article, October 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) | -| MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| Microsoft | Web Analytics | Clarity (Main Product) | — | — | [A question on GitHub](https://github.com/ClickHouse/ClickHouse/issues/21556) | -| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) | -| MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | -| MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | -| Muse Group | Music Software | Performance Monitoring | — | — | [Blog post in Russian, January 2021](https://habr.com/en/post/647079/) | -| Netskope | Network Security | — | — | — | [Job advertisement, March 2021](https://www.mendeley.com/careers/job/senior-software-developer-backend-developer-1346348) | -| NIC Labs | Network Monitoring | RaTA-DNS | — | — | [Blog post, March 2021](https://niclabs.cl/ratadns/2021/03/Clickhouse) | -| NLMK | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) | -| NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | -| Noction | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability) -| ntop | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) | -| Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | -| Ok.ru | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | -| Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | -| OneAPM | Monitoring and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | -| Opensee | Financial Analytics | Main product | - | - | [Blog](https://opensee.io/news/from-moscow-to-wall-street-the-remarkable-journey-of-clickhouse/) | -| Open Targets | Genome Research | Genome Search | — | — | [Tweet, October 2021](https://twitter.com/OpenTargets/status/1452570865342758913?s=20), [Blog](https://blog.opentargets.org/graphql/) | -| OZON | E-commerce | — | — | — | [Official website](https://job.ozon.ru/vacancy/razrabotchik-clickhouse-ekspluatatsiya-40991870/) | -| Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) | -| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | -| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | -| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | -| PostHog | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) | -| Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | -| Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | -| PRANA | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) | -| QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | -| Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| R-Vision | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) | -| Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | -| Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | -| Replica | Urban Planning | Analytics | — | — | [Job advertisement](https://boards.greenhouse.io/replica/jobs/5547732002?gh_jid=5547732002) | -| Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | -| Rollbar | Software Development | Main Product | — | — | [Official Website](https://www.rollbar.com) | -| Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | -| RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | -| S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| Sber | Banking, Fintech, Retail, Cloud, Media | — | 128 servers | >1 PB | [Job advertisement, March 2021](https://career.habr.com/vacancies/1000073536) | -| scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | -| Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | -| sembot.io | Shopping Ads | — | — | — | A comment on LinkedIn, 2020 | -| SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | -| Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | -| seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| SGK | Government Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | -| SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | -| Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | -| Sipfront | Software Development | Analytics | — | — | [Tweet, October 2021](https://twitter.com/andreasgranig/status/1446404332337913895?s=20) | -| SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | -| Spark New Zealand | Telecommunications | Security Operations | — | — | [Blog Post, Feb 2020](https://blog.n0p.me/2020/02/2020-02-05-dnsmonster/) | -| Splitbee | Analytics | Main Product | — | — | [Blog Post, Mai 2021](https://splitbee.io/blog/new-pricing) | -| Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | -| Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | -| Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | -| Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | -| Superwall | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) | -| Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | -| Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | -| Tencent Music Entertainment (TME) | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) | -| Tesla | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) | -| Timeflow | Software | Analytics | — | — | [Blog](https://timeflow.systems/why-we-moved-from-druid-to-clickhouse/ ) | -| Tinybird | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) | -| Traffic Stars | AD network | — | 300 servers in Europe/US | 1.8 PiB, 700 000 insert rps (as of 2021) | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | -| Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/uber.pdf) | -| UseTech | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) | -| UTMSTAT | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) | -| Vercel | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 | -| VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| VMware | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) | -| Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | -| Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | -| Wildberries | E-commerce | | — | — | [Official website](https://it.wildberries.ru/) | -| Wisebits | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| Workato | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) | -| Xenoss | Marketing, Advertising | — | — | — | [Instagram, March 2021](https://www.instagram.com/p/CNATV7qBgB1/) | -| Xiaoxin Tech | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | -| Ximalaya | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | -| Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) | -| Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) | -| | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) | -| Yotascale | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) | -| Your Analytics | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) | -| Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | -| ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | -| ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | -| Цифровой Рабочий | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) | -| ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | -| ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | -| АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | - -[Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/introduction/distinctive-features.md b/docs/en/introduction/distinctive-features.md deleted file mode 100644 index 951a8a9d3e5..00000000000 --- a/docs/en/introduction/distinctive-features.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -toc_priority: 4 -toc_title: Distinctive Features ---- - -# Distinctive Features of ClickHouse {#distinctive-features-of-clickhouse} - -## True Column-Oriented Database Management System {#true-column-oriented-dbms} - -In a real column-oriented DBMS, no extra data is stored with the values. Among other things, this means that constant-length values must be supported, to avoid storing their length “number” next to the values. For example, a billion UInt8-type values should consume around 1 GB uncompressed, or this strongly affects the CPU use. It is essential to store data compactly (without any “garbage”) even when uncompressed since the speed of decompression (CPU usage) depends mainly on the volume of uncompressed data. - -It is worth noting because there are systems that can store values of different columns separately, but that can’t effectively process analytical queries due to their optimization for other scenarios. Examples are HBase, BigTable, Cassandra, and HyperTable. You would get throughput around a hundred thousand rows per second in these systems, but not hundreds of millions of rows per second. - -It’s also worth noting that ClickHouse is a database management system, not a single database. ClickHouse allows creating tables and databases in runtime, loading data, and running queries without reconfiguring and restarting the server. - -## Data Compression {#data-compression} - -Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance. - -In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create/table.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones. - -## Disk Storage of Data {#disk-storage-of-data} - -Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. - -ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. - -## Parallel Processing on Multiple Cores {#parallel-processing-on-multiple-cores} - -Large queries are parallelized naturally, taking all the necessary resources available on the current server. - -## Distributed Processing on Multiple Servers {#distributed-processing-on-multiple-servers} - -Almost none of the columnar DBMSs mentioned above have support for distributed query processing. - -In ClickHouse, data can reside on different shards. Each shard can be a group of replicas used for fault tolerance. All shards are used to run a query in parallel, transparently for the user. - -## SQL Support {#sql-support} - -ClickHouse supports a [declarative query language based on SQL](../sql-reference/index.md) that is identical to the ANSI SQL standard in [many cases](../sql-reference/ansi.md). - -Supported queries include [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), subqueries in [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md) clause, [IN](../sql-reference/operators/in.md) operator, [window functions](../sql-reference/window-functions/index.md) and scalar subqueries. - -Correlated (dependent) subqueries are not supported at the time of writing but might become available in the future. - -## Vector Computation Engine {#vector-engine} - -Data is not only stored by columns but is processed by vectors (parts of columns), which allows achieving high CPU efficiency. - -## Real-time Data Updates {#real-time-data-updates} - -ClickHouse supports tables with a primary key. To quickly perform queries on the range of the primary key, the data is sorted incrementally using the merge tree. Due to this, data can continually be added to the table. No locks are taken when new data is ingested. - -## Primary Index {#primary-index} - -Having a data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. - -## Secondary Indexes {#secondary-indexes} - -Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldn’t match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). - -## Suitable for Online Queries {#suitable-for-online-queries} - -Most OLAP database management systems do not aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with “come back later”). - -In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online. - -## Support for Approximated Calculations {#support-for-approximated-calculations} - -ClickHouse provides various ways to trade accuracy for performance: - -1. Aggregate functions for approximated calculation of the number of distinct values, medians, and quantiles. -2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk. -3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources. - -## Adaptive Join Algorithm {#adaptive-join-algorithm} - -ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if there’s more than one large table. - -## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support} - -ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases. - -For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md). - -## Role-Based Access Control {#role-based-access-control} - -ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems. - -## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} - -1. No full-fledged transactions. -2. Lack of ability to modify or delete already inserted data with a high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example, to comply with [GDPR](https://gdpr-info.eu). -3. The sparse index makes ClickHouse not so efficient for point queries retrieving single rows by their keys. - -[Original article](https://clickhouse.com/docs/en/introduction/distinctive-features/) diff --git a/docs/en/introduction/history.md b/docs/en/introduction/history.md deleted file mode 100644 index d192eff80ea..00000000000 --- a/docs/en/introduction/history.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -toc_priority: 7 -toc_title: History ---- - -# ClickHouse History {#clickhouse-history} - -ClickHouse has been developed initially to power [Yandex.Metrica](https://metrica.yandex.com/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all), and continues to be the core component of this system. With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows generating custom reports on the fly directly from non-aggregated data. This article briefly covers the goals of ClickHouse in the early stages of its development. - -Yandex.Metrica builds customized reports on the fly based on hits and sessions, with arbitrary segments defined by the user. Doing so often requires building complex aggregates, such as the number of unique users. New data for building a report arrives in real-time. - -As of April 2014, Yandex.Metrica was tracking about 12 billion events (page views and clicks) daily. All these events must be stored to build custom reports. A single query may require scanning millions of rows within a few hundred milliseconds, or hundreds of millions of rows in just a few seconds. - -## Usage in Yandex.Metrica and Other Yandex Services {#usage-in-yandex-metrica-and-other-yandex-services} - -ClickHouse serves multiple purposes in Yandex.Metrica. -Its main task is to build reports in online mode using non-aggregated data. It uses a cluster of 374 servers, which store over 20.3 trillion rows in the database. The volume of compressed data is about 2 PB, without accounting for duplicates and replicas. The volume of uncompressed data (in TSV format) would be approximately 17 PB. - -ClickHouse also plays a key role in the following processes: - -- Storing data for Session Replay from Yandex.Metrica. -- Processing intermediate data. -- Building global reports with Analytics. -- Running queries for debugging the Yandex.Metrica engine. -- Analyzing logs from the API and the user interface. - -Nowadays, there are multiple dozen ClickHouse installations in other Yandex services and departments: search verticals, e-commerce, advertisement, business analytics, mobile development, personal services, and others. - -## Aggregated and Non-aggregated Data {#aggregated-and-non-aggregated-data} - -There is a widespread opinion that to calculate statistics effectively, you must aggregate data since this reduces the volume of data. - -But data aggregation comes with a lot of limitations: - -- You must have a pre-defined list of required reports. -- The user can’t make custom reports. -- When aggregating over a large number of distinct keys, the data volume is barely reduced, so aggregation is useless. -- For a large number of reports, there are too many aggregation variations (combinatorial explosion). -- When aggregating keys with high cardinality (such as URLs), the volume of data is not reduced by much (less than twofold). -- For this reason, the volume of data with aggregation might grow instead of shrink. -- Users do not view all the reports we generate for them. A large portion of those calculations is useless. -- The logical integrity of data may be violated for various aggregations. - -If we do not aggregate anything and work with non-aggregated data, this might reduce the volume of calculations. - -However, with aggregation, a significant part of the work is taken offline and completed relatively calmly. In contrast, online calculations require calculating as fast as possible, since the user is waiting for the result. - -Yandex.Metrica has a specialized system for aggregating data called Metrage, which was used for the majority of reports. -Starting in 2009, Yandex.Metrica also used a specialized OLAP database for non-aggregated data called OLAPServer, which was previously used for the report builder. -OLAPServer worked well for non-aggregated data, but it had many restrictions that did not allow it to be used for all reports as desired. These included the lack of support for data types (only numbers), and the inability to incrementally update data in real-time (it could only be done by rewriting data daily). OLAPServer is not a DBMS, but a specialized DB. - -The initial goal for ClickHouse was to remove the limitations of OLAPServer and solve the problem of working with non-aggregated data for all reports, but over the years, it has grown into a general-purpose database management system suitable for a wide range of analytical tasks. - -[Original article](https://clickhouse.com/docs/en/introduction/history/) diff --git a/docs/en/introduction/index.md b/docs/en/introduction/index.md deleted file mode 100644 index ba80f9c2640..00000000000 --- a/docs/en/introduction/index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -toc_folder_title: Introduction -toc_priority: 1 ---- - - diff --git a/docs/en/introduction/performance.md b/docs/en/introduction/performance.md deleted file mode 100644 index 684b4ee4179..00000000000 --- a/docs/en/introduction/performance.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -toc_priority: 6 -toc_title: Performance ---- - -# Performance {#performance} - -ClickHouse shows the best performance (both the highest throughput for long queries and the lowest latency on short queries) for comparable operating scenarios among systems of its class that were available for testing. You can view the test results on a [separate page](https://clickhouse.com/benchmark/dbms/). - -Numerous independent benchmarks came to similar conclusions. They are not difficult to find using an internet search, or you can see [our small collection of related links](https://clickhouse.com/#independent-benchmarks). - -## Throughput for a Single Large Query {#throughput-for-a-single-large-query} - -Throughput can be measured in rows per second or megabytes per second. If the data is placed in the page cache, a query that is not too complex is processed on modern hardware at a speed of approximately 2-10 GB/s of uncompressed data on a single server (for the most straightforward cases, the speed may reach 30 GB/s). If data is not placed in the page cache, the speed depends on the disk subsystem and the data compression rate. For example, if the disk subsystem allows reading data at 400 MB/s, and the data compression rate is 3, the speed is expected to be around 1.2 GB/s. To get the speed in rows per second, divide the speed in bytes per second by the total size of the columns used in the query. For example, if 10 bytes of columns are extracted, the speed is expected to be around 100-200 million rows per second. - -The processing speed increases almost linearly for distributed processing, but only if the number of rows resulting from aggregation or sorting is not too large. - -## Latency When Processing Short Queries {#latency-when-processing-short-queries} - -If a query uses a primary key and does not select too many columns and rows to process (hundreds of thousands), you can expect less than 50 milliseconds of latency (single digits of milliseconds in the best case) if data is placed in the page cache. Otherwise, latency is mostly dominated by the number of seeks. If you use rotating disk drives, for a system that is not overloaded, the latency can be estimated with this formula: `seek time (10 ms) * count of columns queried * count of data parts`. - -## Throughput When Processing a Large Quantity of Short Queries {#throughput-when-processing-a-large-quantity-of-short-queries} - -Under the same conditions, ClickHouse can handle several hundred queries per second on a single server (up to several thousand in the best case). Since this scenario is not typical for analytical DBMSs, we recommend expecting a maximum of 100 queries per second. - -## Performance When Inserting Data {#performance-when-inserting-data} - -We recommend inserting data in packets of at least 1000 rows, or no more than a single request per second. When inserting to a MergeTree table from a tab-separated dump, the insertion speed can be from 50 to 200 MB/s. If the inserted rows are around 1 KB in size, the speed will be from 50,000 to 200,000 rows per second. If the rows are small, the performance can be higher in rows per second (on Banner System data -`>` 500,000 rows per second; on Graphite data -`>` 1,000,000 rows per second). To improve performance, you can make multiple INSERT queries in parallel, which scales linearly. - -{## [Original article](https://clickhouse.com/docs/en/introduction/performance/) ##} diff --git a/docs/en/operations/_category_.yml b/docs/en/operations/_category_.yml new file mode 100644 index 00000000000..011ab58d26d --- /dev/null +++ b/docs/en/operations/_category_.yml @@ -0,0 +1,7 @@ +position: 70 +label: 'Operations' +collapsible: true +collapsed: true +link: + type: generated-index + title: Operations \ No newline at end of file diff --git a/docs/en/sql-reference/_category_.yml b/docs/en/sql-reference/_category_.yml new file mode 100644 index 00000000000..cfddcf46548 --- /dev/null +++ b/docs/en/sql-reference/_category_.yml @@ -0,0 +1,7 @@ +position: 15 +label: 'SQL Reference' +collapsible: true +collapsed: true +link: + type: generated-index + title: SQL Reference \ No newline at end of file diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index ec1524f1fa3..e3d5a4b18db 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -168,7 +168,7 @@ Result: Performs the opposite operation of [hex](#hex). It interprets each pair of hexadecimal digits (in the argument) as a number and converts it to the byte represented by the number. The return value is a binary string (BLOB). -If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions. +If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions. !!! note "Note" If `unhex` is invoked from within the `clickhouse-client`, binary strings display using UTF-8. @@ -326,7 +326,7 @@ unbin(arg) Alias: `UNBIN`. -For a numeric argument `unbin()` does not return the inverse of `bin()`. If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) functions. +For a numeric argument `unbin()` does not return the inverse of `bin()`. If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) functions. !!! note "Note" If `unbin` is invoked from within the `clickhouse-client`, binary strings are displayed using UTF-8. diff --git a/docs/en/sql-reference/statements/select/sample.md b/docs/en/sql-reference/statements/select/sample.md index 2405cb0a03c..a587731e563 100644 --- a/docs/en/sql-reference/statements/select/sample.md +++ b/docs/en/sql-reference/statements/select/sample.md @@ -25,11 +25,12 @@ The features of data sampling are listed below: For the `SAMPLE` clause the following syntax is supported: -| SAMPLE Clause Syntax | Description | -|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `SAMPLE k` | Here `k` is the number from 0 to 1.
The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) | -| `SAMPLE n` | Here `n` is a sufficiently large integer.
The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) | -| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1.
The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | +| SAMPLE Clause Syntax | Description | +|----------------------|------------------------------| +| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) | +| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) | +| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | + ## SAMPLE K {#select-sample-k} diff --git a/docs/en/whats-new/changelog/2017.md b/docs/en/whats-new/changelog/2017.md index af82c69386a..6a9f599daa0 100644 --- a/docs/en/whats-new/changelog/2017.md +++ b/docs/en/whats-new/changelog/2017.md @@ -1,6 +1,6 @@ --- -toc_priority: 79 -toc_title: '2017' +sidebar_label: 2017 +sidebar_position: 26 --- ### ClickHouse Release 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21} diff --git a/docs/en/whats-new/changelog/2018.md b/docs/en/whats-new/changelog/2018.md index db09bcd8a03..d4edca54e52 100644 --- a/docs/en/whats-new/changelog/2018.md +++ b/docs/en/whats-new/changelog/2018.md @@ -1,6 +1,6 @@ --- -toc_priority: 78 -toc_title: '2018' +sidebar_label: 2018 +sidebar_position: 25 --- ## ClickHouse Release 18.16 {#clickhouse-release-18-16} diff --git a/docs/en/whats-new/changelog/2019.md b/docs/en/whats-new/changelog/2019.md index aa06f5cb1e3..c41041705d9 100644 --- a/docs/en/whats-new/changelog/2019.md +++ b/docs/en/whats-new/changelog/2019.md @@ -1,6 +1,6 @@ --- -toc_priority: 77 -toc_title: '2019' +sidebar_label: 2019 +sidebar_position: 22 --- ## ClickHouse Release 19.17 {#clickhouse-release-v19-17} diff --git a/docs/en/whats-new/changelog/2020.md b/docs/en/whats-new/changelog/2020.md index e0afe256777..7ec37c51eb1 100644 --- a/docs/en/whats-new/changelog/2020.md +++ b/docs/en/whats-new/changelog/2020.md @@ -1,6 +1,6 @@ --- -toc_priority: 76 -toc_title: '2020' +sidebar_label: 2020 +sidebar_position: 21 --- ### ClickHouse release 20.12 diff --git a/docs/en/whats-new/changelog/2021.md b/docs/en/whats-new/changelog/2021.md index 2e81d981990..e4c430342ce 100644 --- a/docs/en/whats-new/changelog/2021.md +++ b/docs/en/whats-new/changelog/2021.md @@ -1,6 +1,8 @@ --- -toc_priority: 75 -toc_title: '2021' +sidebar_label: 2021 +sidebar_position: 20 +keywords: [clickhouse, changelog] +description: Changelog --- ### ClickHouse release v21.12, 2021-12-15 diff --git a/docs/en/whats-new/changelog/index.md b/docs/en/whats-new/changelog/index.md index 517ea16f3e7..22f6a30452d 100644 --- a/docs/en/whats-new/changelog/index.md +++ b/docs/en/whats-new/changelog/index.md @@ -1,7 +1,498 @@ --- -toc_folder_title: Changelog -toc_priority: 74 -toc_title: '2022' +sidebar_label: Changelog +sidebar_position: 1 +keywords: [clickhouse, changelog] +description: Changelog --- -{% include "content/changelog.md" %} +# ClickHouse Changelog + +### Table of Contents +**[ClickHouse release v22.3-lts, 2022-03-17](#223)**
+**[ClickHouse release v22.2, 2022-02-17](#222)**
+**[ClickHouse release v22.1, 2022-01-18](#221)**
+**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**
+ + +## ClickHouse release v22.3-lts, 2022-03-17 + +#### Backward Incompatible Change + +* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). +* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). + +#### New Feature + +* Support for caching data locally for remote filesystems. It can be enabled for `s3` disks. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, we enabled the test suite on s3 filesystem and no more known issues exist, so it is started to be production ready. +* Add new table function `hive`. It can be used as follows `hive('', '', '', '', '')` for example `SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', 'id Nullable(String), score Nullable(Int32), day Nullable(String)', 'day')`. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)). +* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)). +* Support schema inference for inserting into table functions `file`/`hdfs`/`s3`/`url`. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)). +* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)). +* Display CPU and memory metrics in clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)). +* Implement `startsWith` and `endsWith` function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)). +* Add three functions for Map data type: 1. `mapReplace(map1, map2)` - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. `mapFilter` 3. `mapMap`. mapFilter and mapMap are higher order functions, accepting two arguments, the first argument is a lambda function with k, v pair as arguments, the second argument is a column of type Map. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)). +* Allow getting default user and password for clickhouse-client from the `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)). + +#### Experimental Feature + +* New data type `Object()`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`. +* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to only create `Replicated` tables or tables with stateless engines in `Replicated` databases. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). Note that `Replicated` database is still an experimental feature. + +#### Performance Improvement + +* Improve performance of insertion into `MergeTree` tables by optimizing sorting. Up to 2x improvement is observed on realistic benchmarks. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)). +* Columns pruning when reading Parquet, ORC and Arrow files from URL and S3. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Columns pruning when reading Parquet, ORC and Arrow files from Hive. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)). +* A bunch of performance optimizations from a performance superhero. Improve performance of processing queries with large `IN` section. Improve performance of `direct` dictionary if its source is `ClickHouse`. Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of `any` aggregate function by using more batching. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)). +* Multiple improvements for performance of `clickhouse-keeper`: less locking [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)), lower memory usage by streaming reading and writing of snapshot instead of full copy. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)), optimizing compaction of log store in the RAFT implementation. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)), versioning of the internal data structure [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)). + +#### Improvement + +* Allow asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)). +* Implicit type casting of the key argument for functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants`. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)). +* `EXPLAIN AST` query can output AST in form of a graph in Graphviz format: `EXPLAIN AST graph = 1 SELECT * FROM system.parts`. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)). +* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve schema inference with globs in File/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)). +* Play UI now correctly detects the preferred light/dark theme from the OS. [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)). +* Added `date_time_input_format = 'best_effort_us'`. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)). +* A new settings called `allow_plaintext_password` and `allow_no_password` are added in server configuration which turn on/off authentication types that can be potentially insecure in some environments. They are allowed by default. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Support for `DateTime64` data type in `Arrow` format, closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)). +* Reload `remote_url_allow_hosts` (filtering of outgoing connections) on config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Support `--testmode` parameter for `clickhouse-local`. This parameter enables interpretation of test hints that we use in functional tests. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `distributed_depth` to query log. It is like a more detailed variant of `is_initial_query` [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)). +* Respect `remote_url_allow_hosts` for `MySQL` and `PostgreSQL` table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Added `disk_name` field to `system.part_log`. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)). +* Do not retry non-rertiable errors when querying remote URLs. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support distributed INSERT SELECT queries (the setting `parallel_distributed_insert_select`) table function `view()`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)). +* More precise memory tracking during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)). +* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)). +* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use connection pool for Hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)). +* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)). +* Allow `LowCardinality` strings for `ngrambf_v1`/`tokenbf_v1` indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)). +* Allow opening empty sqlite db if the file doesn't exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Implement memory statistics for FreeBSD - this is required for `max_server_memory_usage` to work correctly. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)). +* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is an `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)). +* Show hints when user mistyped the name of a data skipping index. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)). +* Support `remote()`/`cluster()` table functions for `parallel_distributed_insert_select`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)). +* Do not reset logging that configured via `--log-file`/`--errorlog-file` command line options in case of empty configuration in the config file. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)). +* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow specifying argument names for executable UDFs. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)). +* `MaterializedMySQL` (experimental feature) now supports `materialized_mysql_tables_list` (a comma-separated list of MySQL database tables, which will be replicated by the MaterializedMySQL database engine. Default value: empty list — means all the tables will be replicated), mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve OpenTelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)). +* Make the znode `ctime` and `mtime` consistent between servers in ClickHouse Keeper. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)). + +#### Build/Testing/Packaging Improvement + +* Package repository is migrated to JFrog Artifactory (**Mikhail f. Shiryaev**). +* Randomize some settings in functional tests, so more possible combinations of settings will be tested. This is yet another fuzzing method to ensure better test coverage. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)). +* Drop PVS-Studio from our CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add an ability to build stripped binaries with CMake. In previous versions it was performed by dh-tools. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)). +* Smaller "fat-free" `clickhouse-keeper` build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)). +* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `clickhouse-test` debian package as unneeded complication. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* A fix for HDFS integration: When the inner buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3) for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)). +* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)). +* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)). While using lz4 compression with a small max_read_buffer_size setting value. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)). While using lzma compression with small `max_read_buffer_size` setting value. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)). While using `brotli` compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible segfault in `JSONEachRow` schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)). +* Avoid std::terminate in case of exception in reading from remote VFS. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)). +* Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)). +* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)). +* Fix a corner case of `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)). +* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix MaterializedPostgreSQL (experimental feature) `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix MaterializedPostgreSQL (experimental feature) adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)). +* Fixed slightly incorrect translation of YAML configs to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)). +* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)). +* The `update_lag` external dictionary configuration option was unusable showing the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)). +* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)). +* Fix missing alias after function is optimized to a subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)). +* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary` (relevant to operation on remote VFS). Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix unexpected result when use -State type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)). +* Fix possible segfault in FileLog (experimental feature). Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible rare error `Cannot push block to port which already has data`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)). +* Integration with Hive: Fix unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)). +* Avoid busy polling in ClickHouse Keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)). +* Fix DateTime64 conversion from PostgreSQL. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible "Part directory doesn't exist" during `INSERT` into MergeTree table backed by VFS over s3. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)). +* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix bugs for multiple columns group by in `WindowView` (experimental feature). [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)). +* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix bug for H3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix `No such file or directory` with enabled `fsync_part_directory` and vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)). +* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)). +* Stop to select part for mutate when the other replica has already updated the transaction log for `ReplatedMergeTree` engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix incorrect result of trivial count query when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). +* Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). + + +### ClickHouse release v22.2, 2022-02-17 + +#### Upgrade Notes + +* Applying data skipping indexes for queries with FINAL may produce incorrect result. In this release we disabled data skipping indexes by default for queries with FINAL (a new setting `use_skip_indexes_if_final` is introduced and disabled by default). [#34243](https://github.com/ClickHouse/ClickHouse/pull/34243) ([Azat Khuzhin](https://github.com/azat)). + +#### New Feature + +* Projections are production ready. Set `allow_experimental_projection_optimization` by default and deprecate this setting. [#34456](https://github.com/ClickHouse/ClickHouse/pull/34456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* An option to create a new files on insert for `File`/`S3`/`HDFS` engines. Allow to overwrite a file in `HDFS`. Throw an exception in attempt to overwrite a file in `S3` by default. Throw an exception in attempt to append data to file in formats that have a suffix (and thus don't support appends, like `Parquet`, `ORC`). Closes [#31640](https://github.com/ClickHouse/ClickHouse/issues/31640) Closes [#31622](https://github.com/ClickHouse/ClickHouse/issues/31622) Closes [#23862](https://github.com/ClickHouse/ClickHouse/issues/23862) Closes [#15022](https://github.com/ClickHouse/ClickHouse/issues/15022) Closes [#16674](https://github.com/ClickHouse/ClickHouse/issues/16674). [#33302](https://github.com/ClickHouse/ClickHouse/pull/33302) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a setting that allows a user to provide own deduplication semantic in `MergeTree`/`ReplicatedMergeTree` If provided, it's used instead of data digest to generate block ID. So, for example, by providing a unique value for the setting in each INSERT statement, the user can avoid the same inserted data being deduplicated. This closes: [#7461](https://github.com/ClickHouse/ClickHouse/issues/7461). [#32304](https://github.com/ClickHouse/ClickHouse/pull/32304) ([Igor Nikonov](https://github.com/devcrafter)). +* Add support of `DEFAULT` keyword for INSERT statements. Closes [#6331](https://github.com/ClickHouse/ClickHouse/issues/6331). [#33141](https://github.com/ClickHouse/ClickHouse/pull/33141) ([Andrii Buriachevskyi](https://github.com/1over)). +* `EPHEMERAL` column specifier is added to `CREATE TABLE` query. Closes [#9436](https://github.com/ClickHouse/ClickHouse/issues/9436). [#34424](https://github.com/ClickHouse/ClickHouse/pull/34424) ([yakov-olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support `IF EXISTS` clause for `TTL expr TO [DISK|VOLUME] [IF EXISTS] 'xxx'` feature. Parts will be moved to disk or volume only if it exists on replica, so `MOVE TTL` rules will be able to behave differently on replicas according to the existing storage policies. Resolves [#34455](https://github.com/ClickHouse/ClickHouse/issues/34455). [#34504](https://github.com/ClickHouse/ClickHouse/pull/34504) ([Anton Popov](https://github.com/CurtizJ)). +* Allow set default table engine and to create tables without specifying ENGINE. [#34187](https://github.com/ClickHouse/ClickHouse/pull/34187) ([Ilya Yatsishin](https://github.com/qoega)). +* Add table function `format(format_name, data)`. [#34125](https://github.com/ClickHouse/ClickHouse/pull/34125) ([Kruglov Pavel](https://github.com/Avogar)). +* Detect format in `clickhouse-local` by file name even in the case when it is passed to stdin. [#33829](https://github.com/ClickHouse/ClickHouse/pull/33829) ([Kruglov Pavel](https://github.com/Avogar)). +* Add schema inference for `values` table function. Closes [#33811](https://github.com/ClickHouse/ClickHouse/issues/33811). [#34017](https://github.com/ClickHouse/ClickHouse/pull/34017) ([Kruglov Pavel](https://github.com/Avogar)). +* Dynamic reload of server TLS certificates on config reload. Closes [#15764](https://github.com/ClickHouse/ClickHouse/issues/15764). [#15765](https://github.com/ClickHouse/ClickHouse/pull/15765) ([johnskopis](https://github.com/johnskopis)). [#31257](https://github.com/ClickHouse/ClickHouse/pull/31257) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Now ReplicatedMergeTree can recover data when some of its disks are broken. [#13544](https://github.com/ClickHouse/ClickHouse/pull/13544) ([Amos Bird](https://github.com/amosbird)). +* Fault-tolerant connections in clickhouse-client: `clickhouse-client ... --host host1 --host host2 --port port2 --host host3 --port port --host host4`. [#34490](https://github.com/ClickHouse/ClickHouse/pull/34490) ([Kruglov Pavel](https://github.com/Avogar)). [#33824](https://github.com/ClickHouse/ClickHouse/pull/33824) ([Filippov Denis](https://github.com/DF5HSE)). +* Add `DEGREES` and `RADIANS` functions for MySQL compatibility. [#33769](https://github.com/ClickHouse/ClickHouse/pull/33769) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `h3ToCenterChild` function. [#33313](https://github.com/ClickHouse/ClickHouse/pull/33313) ([Bharat Nallan](https://github.com/bharatnc)). Add new h3 miscellaneous functions: `edgeLengthKm`,`exactEdgeLengthKm`,`exactEdgeLengthM`,`exactEdgeLengthRads`,`numHexagons`. [#33621](https://github.com/ClickHouse/ClickHouse/pull/33621) ([Bharat Nallan](https://github.com/bharatnc)). +* Add function `bitSlice` to extract bit subsequences from String/FixedString. [#33360](https://github.com/ClickHouse/ClickHouse/pull/33360) ([RogerYK](https://github.com/RogerYK)). +* Implemented `meanZTest` aggregate function. [#33354](https://github.com/ClickHouse/ClickHouse/pull/33354) ([achimbab](https://github.com/achimbab)). +* Add confidence intervals to T-tests aggregate functions. [#33260](https://github.com/ClickHouse/ClickHouse/pull/33260) ([achimbab](https://github.com/achimbab)). +* Add function `addressToLineWithInlines`. Close [#26211](https://github.com/ClickHouse/ClickHouse/issues/26211). [#33467](https://github.com/ClickHouse/ClickHouse/pull/33467) ([SuperDJY](https://github.com/cmsxbc)). +* Added `#!` and `# ` as a recognised start of a single line comment. Closes [#34138](https://github.com/ClickHouse/ClickHouse/issues/34138). [#34230](https://github.com/ClickHouse/ClickHouse/pull/34230) ([Aaron Katz](https://github.com/aaronstephenkatz)). + +#### Experimental Feature + +* Functions for text classification: language and charset detection. See [#23271](https://github.com/ClickHouse/ClickHouse/issues/23271). [#33314](https://github.com/ClickHouse/ClickHouse/pull/33314) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add memory overcommit to `MemoryTracker`. Added `guaranteed` settings for memory limits which represent soft memory limits. In case when hard memory limit is reached, `MemoryTracker` tries to cancel the most overcommited query. New setting `memory_usage_overcommit_max_wait_microseconds` specifies how long queries may wait another query to stop. Closes [#28375](https://github.com/ClickHouse/ClickHouse/issues/28375). [#31182](https://github.com/ClickHouse/ClickHouse/pull/31182) ([Dmitry Novik](https://github.com/novikd)). +* Enable stream to table join in WindowView. [#33729](https://github.com/ClickHouse/ClickHouse/pull/33729) ([vxider](https://github.com/Vxider)). +* Support `SET`, `YEAR`, `TIME` and `GEOMETRY` data types in `MaterializedMySQL` (experimental feature). Fixes [#18091](https://github.com/ClickHouse/ClickHouse/issues/18091), [#21536](https://github.com/ClickHouse/ClickHouse/issues/21536), [#26361](https://github.com/ClickHouse/ClickHouse/issues/26361). [#33429](https://github.com/ClickHouse/ClickHouse/pull/33429) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix various issues when projection is enabled by default. Each issue is described in separate commit. This is for [#33678](https://github.com/ClickHouse/ClickHouse/issues/33678) . This fixes [#34273](https://github.com/ClickHouse/ClickHouse/issues/34273). [#34305](https://github.com/ClickHouse/ClickHouse/pull/34305) ([Amos Bird](https://github.com/amosbird)). + +#### Performance Improvement + +* Support `optimize_read_in_order` if prefix of sorting key is already sorted. E.g. if we have sorting key `ORDER BY (a, b)` in table and query with `WHERE a = const ORDER BY b` clauses, now it will be applied reading in order of sorting key instead of full sort. [#32748](https://github.com/ClickHouse/ClickHouse/pull/32748) ([Anton Popov](https://github.com/CurtizJ)). +* Improve performance of partitioned insert into table functions `URL`, `S3`, `File`, `HDFS`. Closes [#34348](https://github.com/ClickHouse/ClickHouse/issues/34348). [#34510](https://github.com/ClickHouse/ClickHouse/pull/34510) ([Maksim Kita](https://github.com/kitaisreal)). +* Multiple performance improvements of clickhouse-keeper. [#34484](https://github.com/ClickHouse/ClickHouse/pull/34484) [#34587](https://github.com/ClickHouse/ClickHouse/pull/34587) ([zhanglistar](https://github.com/zhanglistar)). +* `FlatDictionary` improve performance of dictionary data load. [#33871](https://github.com/ClickHouse/ClickHouse/pull/33871) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of `mapPopulateSeries` function. Closes [#33944](https://github.com/ClickHouse/ClickHouse/issues/33944). [#34318](https://github.com/ClickHouse/ClickHouse/pull/34318) ([Maksim Kita](https://github.com/kitaisreal)). +* `_file` and `_path` virtual columns (in file-like table engines) are made `LowCardinality` - it will make queries for multiple files faster. Closes [#34300](https://github.com/ClickHouse/ClickHouse/issues/34300). [#34317](https://github.com/ClickHouse/ClickHouse/pull/34317) ([flynn](https://github.com/ucasfl)). +* Speed up loading of data parts. It was not parallelized before: the setting `part_loading_threads` did not have effect. See [#4699](https://github.com/ClickHouse/ClickHouse/issues/4699). [#34310](https://github.com/ClickHouse/ClickHouse/pull/34310) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of `LineAsString` format. This closes [#34303](https://github.com/ClickHouse/ClickHouse/issues/34303). [#34306](https://github.com/ClickHouse/ClickHouse/pull/34306) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Optimize `quantilesExact{Low,High}` to use `nth_element` instead of `sort`. [#34287](https://github.com/ClickHouse/ClickHouse/pull/34287) ([Danila Kutenin](https://github.com/danlark1)). +* Slightly improve performance of `Regexp` format. [#34202](https://github.com/ClickHouse/ClickHouse/pull/34202) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Minor improvement for analysis of scalar subqueries. [#34128](https://github.com/ClickHouse/ClickHouse/pull/34128) ([Federico Rodriguez](https://github.com/fedrod)). +* Make ORDER BY tuple almost as fast as ORDER BY columns. We have special optimizations for multiple column ORDER BY: https://github.com/ClickHouse/ClickHouse/pull/10831 . It's beneficial to also apply to tuple columns. [#34060](https://github.com/ClickHouse/ClickHouse/pull/34060) ([Amos Bird](https://github.com/amosbird)). +* Rework and reintroduce the scalar subqueries cache to Materialized Views execution. [#33958](https://github.com/ClickHouse/ClickHouse/pull/33958) ([Raúl Marín](https://github.com/Algunenano)). +* Slightly improve performance of `ORDER BY` by adding x86-64 AVX-512 support for `memcmpSmall` functions to accelerate memory comparison. It works only if you compile ClickHouse by yourself. [#33706](https://github.com/ClickHouse/ClickHouse/pull/33706) ([hanqf-git](https://github.com/hanqf-git)). +* Improve `range_hashed` dictionary performance if for key there are a lot of intervals. Fixes [#23821](https://github.com/ClickHouse/ClickHouse/issues/23821). [#33516](https://github.com/ClickHouse/ClickHouse/pull/33516) ([Maksim Kita](https://github.com/kitaisreal)). +* For inserts and merges into S3, write files in parallel whenever possible (TODO: check if it's merged). [#33291](https://github.com/ClickHouse/ClickHouse/pull/33291) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve `clickhouse-keeper` performance and fix several memory leaks in NuRaft library. [#33329](https://github.com/ClickHouse/ClickHouse/pull/33329) ([alesapin](https://github.com/alesapin)). + +#### Improvement + +* Support asynchronous inserts in `clickhouse-client` for queries with inlined data. [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) ([Anton Popov](https://github.com/CurtizJ)). +* Functions `dictGet`, `dictHas` implicitly cast key argument to dictionary key structure, if they are different. [#33672](https://github.com/ClickHouse/ClickHouse/pull/33672) ([Maksim Kita](https://github.com/kitaisreal)). +* Improvements for `range_hashed` dictionaries. Improve performance of load time if there are multiple attributes. Allow to create a dictionary without attributes. Added option to specify strategy when intervals `start` and `end` have `Nullable` type `convert_null_range_bound_to_open` by default is `true`. Closes [#29791](https://github.com/ClickHouse/ClickHouse/issues/29791). Allow to specify `Float`, `Decimal`, `DateTime64`, `Int128`, `Int256`, `UInt128`, `UInt256` as range types. `RangeHashedDictionary` added support for range values that extend `Int64` type. Closes [#28322](https://github.com/ClickHouse/ClickHouse/issues/28322). Added option `range_lookup_strategy` to specify range lookup type `min`, `max` by default is `min` . Closes [#21647](https://github.com/ClickHouse/ClickHouse/issues/21647). Fixed allocated bytes calculations. Fixed type name in `system.dictionaries` in case of `ComplexKeyHashedDictionary`. [#33927](https://github.com/ClickHouse/ClickHouse/pull/33927) ([Maksim Kita](https://github.com/kitaisreal)). +* `flat`, `hashed`, `hashed_array` dictionaries now support creating with empty attributes, with support of reading the keys and using `dictHas`. Fixes [#33820](https://github.com/ClickHouse/ClickHouse/issues/33820). [#33918](https://github.com/ClickHouse/ClickHouse/pull/33918) ([Maksim Kita](https://github.com/kitaisreal)). +* Added support for `DateTime64` data type in dictionaries. [#33914](https://github.com/ClickHouse/ClickHouse/pull/33914) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to write `s3(url, access_key_id, secret_access_key)` (autodetect of data format and table structure, but with explicit credentials). [#34503](https://github.com/ClickHouse/ClickHouse/pull/34503) ([Kruglov Pavel](https://github.com/Avogar)). +* Added sending of the output format back to client like it's done in HTTP protocol as suggested in [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). Closes [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). [#34499](https://github.com/ClickHouse/ClickHouse/pull/34499) ([Vitaly Baranov](https://github.com/vitlibar)). +* Send ProfileEvents statistics in case of INSERT SELECT query (to display query metrics in `clickhouse-client` for this type of queries). [#34498](https://github.com/ClickHouse/ClickHouse/pull/34498) ([Dmitry Novik](https://github.com/novikd)). +* Recognize `.jsonl` extension for JSONEachRow format. [#34496](https://github.com/ClickHouse/ClickHouse/pull/34496) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve schema inference in clickhouse-local. Allow to write just `clickhouse-local -q "select * from table" < data.format`. [#34495](https://github.com/ClickHouse/ClickHouse/pull/34495) ([Kruglov Pavel](https://github.com/Avogar)). +* Privileges CREATE/ALTER/DROP ROW POLICY now can be granted on a table or on `database.*` as well as globally `*.*`. [#34489](https://github.com/ClickHouse/ClickHouse/pull/34489) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow to export arbitrary large files to `s3`. Add two new settings: `s3_upload_part_size_multiply_factor` and `s3_upload_part_size_multiply_parts_count_threshold`. Now each time `s3_upload_part_size_multiply_parts_count_threshold` uploaded to S3 from a single query `s3_min_upload_part_size` multiplied by `s3_upload_part_size_multiply_factor`. Fixes [#34244](https://github.com/ClickHouse/ClickHouse/issues/34244). [#34422](https://github.com/ClickHouse/ClickHouse/pull/34422) ([alesapin](https://github.com/alesapin)). +* Allow to skip not found (404) URLs for globs when using URL storage / table function. Also closes [#34359](https://github.com/ClickHouse/ClickHouse/issues/34359). [#34392](https://github.com/ClickHouse/ClickHouse/pull/34392) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Default input and output formats for `clickhouse-local` that can be overriden by --input-format and --output-format. Close [#30631](https://github.com/ClickHouse/ClickHouse/issues/30631). [#34352](https://github.com/ClickHouse/ClickHouse/pull/34352) ([李扬](https://github.com/taiyang-li)). +* Add options for `clickhouse-format`. Which close [#30528](https://github.com/ClickHouse/ClickHouse/issues/30528) - `max_query_size` - `max_parser_depth`. [#34349](https://github.com/ClickHouse/ClickHouse/pull/34349) ([李扬](https://github.com/taiyang-li)). +* Better handling of pre-inputs before client start. This is for [#34308](https://github.com/ClickHouse/ClickHouse/issues/34308). [#34336](https://github.com/ClickHouse/ClickHouse/pull/34336) ([Amos Bird](https://github.com/amosbird)). +* `REGEXP_MATCHES` and `REGEXP_REPLACE` function aliases for compatibility with PostgreSQL. Close [#30885](https://github.com/ClickHouse/ClickHouse/issues/30885). [#34334](https://github.com/ClickHouse/ClickHouse/pull/34334) ([李扬](https://github.com/taiyang-li)). +* Some servers expect a User-Agent header in their HTTP requests. A `User-Agent` header entry has been added to HTTP requests of the form: User-Agent: ClickHouse/VERSION_STRING. [#34330](https://github.com/ClickHouse/ClickHouse/pull/34330) ([Saad Ur Rahman](https://github.com/surahman)). +* Cancel merges before acquiring table lock for `TRUNCATE` query to avoid `DEADLOCK_AVOIDED` error in some cases. Fixes [#34302](https://github.com/ClickHouse/ClickHouse/issues/34302). [#34304](https://github.com/ClickHouse/ClickHouse/pull/34304) ([tavplubix](https://github.com/tavplubix)). +* Change severity of the "Cancelled merging parts" message in logs, because it's not an error. This closes [#34148](https://github.com/ClickHouse/ClickHouse/issues/34148). [#34232](https://github.com/ClickHouse/ClickHouse/pull/34232) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add ability to compose PostgreSQL-style cast operator `::` with expressions using `[]` and `.` operators (array and tuple indexing). [#34229](https://github.com/ClickHouse/ClickHouse/pull/34229) ([Nikolay Degterinsky](https://github.com/evillique)). +* Recognize `YYYYMMDD-hhmmss` format in `parseDateTimeBestEffort` function. This closes [#34206](https://github.com/ClickHouse/ClickHouse/issues/34206). [#34208](https://github.com/ClickHouse/ClickHouse/pull/34208) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow carriage return in the middle of the line while parsing by `Regexp` format. This closes [#34200](https://github.com/ClickHouse/ClickHouse/issues/34200). [#34205](https://github.com/ClickHouse/ClickHouse/pull/34205) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to parse dictionary's `PRIMARY KEY` as `PRIMARY KEY (id, value)`; previously supported only `PRIMARY KEY id, value`. Closes [#34135](https://github.com/ClickHouse/ClickHouse/issues/34135). [#34141](https://github.com/ClickHouse/ClickHouse/pull/34141) ([Maksim Kita](https://github.com/kitaisreal)). +* An optional argument for `splitByChar` to limit the number of resulting elements. close [#34081](https://github.com/ClickHouse/ClickHouse/issues/34081). [#34140](https://github.com/ClickHouse/ClickHouse/pull/34140) ([李扬](https://github.com/taiyang-li)). +* Improving the experience of multiple line editing for clickhouse-client. This is a follow-up of [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123). [#34114](https://github.com/ClickHouse/ClickHouse/pull/34114) ([Amos Bird](https://github.com/amosbird)). +* Add `UUID` suport in `MsgPack` input/output format. [#34065](https://github.com/ClickHouse/ClickHouse/pull/34065) ([Kruglov Pavel](https://github.com/Avogar)). +* Tracing context (for OpenTelemetry) is now propagated from GRPC client metadata (this change is relevant for GRPC client-server protocol). [#34064](https://github.com/ClickHouse/ClickHouse/pull/34064) ([andremarianiello](https://github.com/andremarianiello)). +* Supports all types of `SYSTEM` queries with `ON CLUSTER` clause. [#34005](https://github.com/ClickHouse/ClickHouse/pull/34005) ([小路](https://github.com/nicelulu)). +* Improve memory accounting for queries that are using less than `max_untracker_memory`. [#34001](https://github.com/ClickHouse/ClickHouse/pull/34001) ([Azat Khuzhin](https://github.com/azat)). +* Fixed UTF-8 string case-insensitive search when lowercase and uppercase characters are represented by different number of bytes. Example is `ẞ` and `ß`. This closes [#7334](https://github.com/ClickHouse/ClickHouse/issues/7334). [#33992](https://github.com/ClickHouse/ClickHouse/pull/33992) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Detect format and schema from stdin in `clickhouse-local`. [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960) ([Kruglov Pavel](https://github.com/Avogar)). +* Correctly handle the case of misconfiguration when multiple disks are using the same path on the filesystem. [#29072](https://github.com/ClickHouse/ClickHouse/issues/29072). [#33905](https://github.com/ClickHouse/ClickHouse/pull/33905) ([zhongyuankai](https://github.com/zhongyuankai)). +* Try every resolved IP address while getting S3 proxy. S3 proxies are rarely used, mostly in Yandex Cloud. [#33862](https://github.com/ClickHouse/ClickHouse/pull/33862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Support EXPLAIN AST CREATE FUNCTION query `EXPLAIN AST CREATE FUNCTION mycast AS (n) -> cast(n as String)` will return `EXPLAIN AST CREATE FUNCTION mycast AS n -> CAST(n, 'String')`. [#33819](https://github.com/ClickHouse/ClickHouse/pull/33819) ([李扬](https://github.com/taiyang-li)). +* Added support for cast from `Map(Key, Value)` to `Array(Tuple(Key, Value))`. [#33794](https://github.com/ClickHouse/ClickHouse/pull/33794) ([Maksim Kita](https://github.com/kitaisreal)). +* Add some improvements and fixes for `Bool` data type. Fixes [#33244](https://github.com/ClickHouse/ClickHouse/issues/33244). [#33737](https://github.com/ClickHouse/ClickHouse/pull/33737) ([Kruglov Pavel](https://github.com/Avogar)). +* Parse and store OpenTelemetry trace-id in big-endian order. [#33723](https://github.com/ClickHouse/ClickHouse/pull/33723) ([Frank Chen](https://github.com/FrankChen021)). +* Improvement for `fromUnixTimestamp64` family functions.. They now accept any integer value that can be converted to `Int64`. This closes: [#14648](https://github.com/ClickHouse/ClickHouse/issues/14648). [#33505](https://github.com/ClickHouse/ClickHouse/pull/33505) ([Andrey Zvonov](https://github.com/zvonand)). +* Reimplement `_shard_num` from constants (see [#7624](https://github.com/ClickHouse/ClickHouse/issues/7624)) with `shardNum()` function (seee [#27020](https://github.com/ClickHouse/ClickHouse/issues/27020)), to avoid possible issues (like those that had been found in [#16947](https://github.com/ClickHouse/ClickHouse/issues/16947)). [#33392](https://github.com/ClickHouse/ClickHouse/pull/33392) ([Azat Khuzhin](https://github.com/azat)). +* Enable binary arithmetic (plus, minus, multiply, division, least, greatest) between Decimal and Float. [#33355](https://github.com/ClickHouse/ClickHouse/pull/33355) ([flynn](https://github.com/ucasfl)). +* Respect cgroups limits in max_threads autodetection. [#33342](https://github.com/ClickHouse/ClickHouse/pull/33342) ([JaySon](https://github.com/JaySon-Huang)). +* Add new clickhouse-keeper setting `min_session_timeout_ms`. Now clickhouse-keeper will determine client session timeout according to `min_session_timeout_ms` and `session_timeout_ms` settings. [#33288](https://github.com/ClickHouse/ClickHouse/pull/33288) ([JackyWoo](https://github.com/JackyWoo)). +* Added `UUID` data type support for functions `hex` and `bin`. [#32170](https://github.com/ClickHouse/ClickHouse/pull/32170) ([Frank Chen](https://github.com/FrankChen021)). +* Fix reading of subcolumns with dots in their names. In particular fixed reading of `Nested` columns, if their element names contain dots (e.g ```Nested(`keys.name` String, `keys.id` UInt64, values UInt64)```). [#34228](https://github.com/ClickHouse/ClickHouse/pull/34228) ([Anton Popov](https://github.com/CurtizJ)). +* Fixes `parallel_view_processing = 0` not working when inserting into a table using `VALUES`. - Fixes `view_duration_ms` in the `query_views_log` not being set correctly for materialized views. [#34067](https://github.com/ClickHouse/ClickHouse/pull/34067) ([Raúl Marín](https://github.com/Algunenano)). +* Fix parsing tables structure from ZooKeeper: now metadata from ZooKeeper compared with local metadata in canonical form. It helps when canonical function names can change between ClickHouse versions. [#33933](https://github.com/ClickHouse/ClickHouse/pull/33933) ([sunny](https://github.com/sunny19930321)). +* Properly escape some characters for interaction with LDAP. [#33401](https://github.com/ClickHouse/ClickHouse/pull/33401) ([IlyaTsoi](https://github.com/IlyaTsoi)). + +#### Build/Testing/Packaging Improvement + +* Remove unbundled build support. [#33690](https://github.com/ClickHouse/ClickHouse/pull/33690) ([Azat Khuzhin](https://github.com/azat)). +* Ensure that tests don't depend on the result of non-stable sorting of equal elements. Added equal items ranges randomization in debug after sort to prevent issues when we rely on equal items sort order. [#34393](https://github.com/ClickHouse/ClickHouse/pull/34393) ([Maksim Kita](https://github.com/kitaisreal)). +* Add verbosity to a style check. [#34289](https://github.com/ClickHouse/ClickHouse/pull/34289) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove `clickhouse-test` debian package because it's obsolete. [#33948](https://github.com/ClickHouse/ClickHouse/pull/33948) ([Ilya Yatsishin](https://github.com/qoega)). +* Multiple improvements for build system to remove the possibility of occasionally using packages from the OS and to enforce hermetic builds. [#33695](https://github.com/ClickHouse/ClickHouse/pull/33695) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fixed the assertion in case of using `allow_experimental_parallel_reading_from_replicas` with `max_parallel_replicas` equals to 1. This fixes [#34525](https://github.com/ClickHouse/ClickHouse/issues/34525). [#34613](https://github.com/ClickHouse/ClickHouse/pull/34613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. It can reproduce if you have mostly empty arrays, but not always. And reading is performed in backward direction with ORDER BY ... DESC. This error is extremely unlikely to happen. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)). +* Fix wrong result of `round`/`roundBankers` if integer values of small types are rounded. Closes [#33267](https://github.com/ClickHouse/ClickHouse/issues/33267). [#34562](https://github.com/ClickHouse/ClickHouse/pull/34562) ([李扬](https://github.com/taiyang-li)). +* Sometimes query cancellation did not work immediately when we were reading multiple files from s3 or HDFS. Fixes [#34301](https://github.com/ClickHouse/ClickHouse/issues/34301) Relates to [#34397](https://github.com/ClickHouse/ClickHouse/issues/34397). [#34539](https://github.com/ClickHouse/ClickHouse/pull/34539) ([Dmitry Novik](https://github.com/novikd)). +* Fix exception `Chunk should have AggregatedChunkInfo in MergingAggregatedTransform` (in case of `optimize_aggregation_in_order = 1` and `distributed_aggregation_memory_efficient = 0`). Fixes [#34526](https://github.com/ClickHouse/ClickHouse/issues/34526). [#34532](https://github.com/ClickHouse/ClickHouse/pull/34532) ([Anton Popov](https://github.com/CurtizJ)). +* Fix comparison between integers and floats in index analysis. Previously it could lead to skipping some granules for reading by mistake. Fixes [#34493](https://github.com/ClickHouse/ClickHouse/issues/34493). [#34528](https://github.com/ClickHouse/ClickHouse/pull/34528) ([Anton Popov](https://github.com/CurtizJ)). +* Fix compression support in URL engine. [#34524](https://github.com/ClickHouse/ClickHouse/pull/34524) ([Frank Chen](https://github.com/FrankChen021)). +* Fix possible error 'file_size: Operation not supported' in files' schema autodetection. [#34479](https://github.com/ClickHouse/ClickHouse/pull/34479) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixes possible race with table deletion. [#34416](https://github.com/ClickHouse/ClickHouse/pull/34416) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible error `Cannot convert column Function to mask` in short circuit function evaluation. Closes [#34171](https://github.com/ClickHouse/ClickHouse/issues/34171). [#34415](https://github.com/ClickHouse/ClickHouse/pull/34415) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix potential crash when doing schema inference from url source. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)). +* For UDFs access permissions were checked for database level instead of global level as it should be. Closes [#34281](https://github.com/ClickHouse/ClickHouse/issues/34281). [#34404](https://github.com/ClickHouse/ClickHouse/pull/34404) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix wrong engine syntax in result of `SHOW CREATE DATABASE` query for databases with engine `Memory`. This closes [#34335](https://github.com/ClickHouse/ClickHouse/issues/34335). [#34345](https://github.com/ClickHouse/ClickHouse/pull/34345) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed a couple of extremely rare race conditions that might lead to broken state of replication queue and "intersecting parts" error. [#34297](https://github.com/ClickHouse/ClickHouse/pull/34297) ([tavplubix](https://github.com/tavplubix)). +* Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix current_user/current_address client information fields for inter-server communication (before this patch current_user/current_address will be preserved from the previous query). [#34263](https://github.com/ClickHouse/ClickHouse/pull/34263) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory leak in case of some Exception during query processing with `optimize_aggregation_in_order=1`. [#34234](https://github.com/ClickHouse/ClickHouse/pull/34234) ([Azat Khuzhin](https://github.com/azat)). +* Fix metric `Query`, which shows the number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)). +* Fix schema inference for table runction `s3`. [#34186](https://github.com/ClickHouse/ClickHouse/pull/34186) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare and benign race condition in `HDFS`, `S3` and `URL` storage engines which can lead to additional connections. [#34172](https://github.com/ClickHouse/ClickHouse/pull/34172) ([alesapin](https://github.com/alesapin)). +* Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3 (virtual filesystem over s3 is an experimental feature that is not ready for production). [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)). +* Fix inserts to distributed tables in case of a change of native protocol. The last change was in the version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible data race in `File` table engine that was introduced in [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960). Closes [#34111](https://github.com/ClickHouse/ClickHouse/issues/34111). [#34113](https://github.com/ClickHouse/ClickHouse/pull/34113) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([tavplubix](https://github.com/tavplubix)). +* Fix asynchronous inserts with `Native` format. [#34068](https://github.com/ClickHouse/ClickHouse/pull/34068) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug which lead to inability for server to start when both replicated access storage and keeper (embedded in clickhouse-server) are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)). +* Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix parsing IPv6 from query parameter (prepared statements) and fix IPv6 to string conversion. Closes [#33928](https://github.com/ClickHouse/ClickHouse/issues/33928). [#33971](https://github.com/ClickHouse/ClickHouse/pull/33971) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). +* Aggregate function combinator `-If` did not correctly process `Nullable` filter argument. This closes [#27073](https://github.com/ClickHouse/ClickHouse/issues/27073). [#33920](https://github.com/ClickHouse/ClickHouse/pull/33920) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential race condition when doing remote disk read (virtual filesystem over s3 is an experimental feature that is not ready for production). [#33912](https://github.com/ClickHouse/ClickHouse/pull/33912) ([Amos Bird](https://github.com/amosbird)). +* Fix crash if SQL UDF is created with lambda with non identifier arguments. Closes [#33866](https://github.com/ClickHouse/ClickHouse/issues/33866). [#33868](https://github.com/ClickHouse/ClickHouse/pull/33868) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `replica is not readonly` logical error on `SYSTEM RESTORE REPLICA` query when replica is actually readonly. Fixes [#33806](https://github.com/ClickHouse/ClickHouse/issues/33806). [#33847](https://github.com/ClickHouse/ClickHouse/pull/33847) ([tavplubix](https://github.com/tavplubix)). +* Fix memory leak in `clickhouse-keeper` in case of compression is used (default). [#33840](https://github.com/ClickHouse/ClickHouse/pull/33840) ([Azat Khuzhin](https://github.com/azat)). +* Fix index analysis with no common types available. [#33833](https://github.com/ClickHouse/ClickHouse/pull/33833) ([Amos Bird](https://github.com/amosbird)). +* Fix schema inference for `JSONEachRow` and `JSONCompactEachRow`. [#33830](https://github.com/ClickHouse/ClickHouse/pull/33830) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix usage of external dictionaries with `redis` source and large number of keys. [#33804](https://github.com/ClickHouse/ClickHouse/pull/33804) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug in client that led to 'Connection reset by peer' in server. Closes [#33309](https://github.com/ClickHouse/ClickHouse/issues/33309). [#33790](https://github.com/ClickHouse/ClickHouse/pull/33790) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix parsing query INSERT INTO ... VALUES SETTINGS ... (...), ... [#33776](https://github.com/ClickHouse/ClickHouse/pull/33776) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug of check table when creating data part with wide format and projection. [#33774](https://github.com/ClickHouse/ClickHouse/pull/33774) ([李扬](https://github.com/taiyang-li)). +* Fix tiny race between count() and INSERT/merges/... in MergeTree (it is possible to return incorrect number of rows for SELECT with optimize_trivial_count_query). [#33753](https://github.com/ClickHouse/ClickHouse/pull/33753) ([Azat Khuzhin](https://github.com/azat)). +* Throw exception when directory listing request has failed in storage HDFS. [#33724](https://github.com/ClickHouse/ClickHouse/pull/33724) ([LiuNeng](https://github.com/liuneng1994)). +* Fix mutation when table contains projections. This fixes [#33010](https://github.com/ClickHouse/ClickHouse/issues/33010). This fixes [#33275](https://github.com/ClickHouse/ClickHouse/issues/33275). [#33679](https://github.com/ClickHouse/ClickHouse/pull/33679) ([Amos Bird](https://github.com/amosbird)). +* Correctly determine current database if `CREATE TEMPORARY TABLE AS SELECT` is queried inside a named HTTP session. This is a very rare use case. This closes [#8340](https://github.com/ClickHouse/ClickHouse/issues/8340). [#33676](https://github.com/ClickHouse/ClickHouse/pull/33676) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow some queries with sorting, LIMIT BY, ARRAY JOIN and lambda functions. This closes [#7462](https://github.com/ClickHouse/ClickHouse/issues/7462). [#33675](https://github.com/ClickHouse/ClickHouse/pull/33675) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug in "zero copy replication" (a feature that is under development and should not be used in production) which lead to data duplication in case of TTL move. Fixes [#33643](https://github.com/ClickHouse/ClickHouse/issues/33643). [#33642](https://github.com/ClickHouse/ClickHouse/pull/33642) ([alesapin](https://github.com/alesapin)). +* Fix `Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform` (in case of `optimize_aggregation_in_order = 1`). [#33637](https://github.com/ClickHouse/ClickHouse/pull/33637) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `Bad cast from type ... to DB::DataTypeArray` which may happen when table has `Nested` column with dots in name, and default value is generated for it (e.g. during insert, when column is not listed). Continuation of [#28762](https://github.com/ClickHouse/ClickHouse/issues/28762). [#33588](https://github.com/ClickHouse/ClickHouse/pull/33588) ([Alexey Pavlenko](https://github.com/alexeypavlenko)). +* Export into `lz4` files has been fixed. Closes [#31421](https://github.com/ClickHouse/ClickHouse/issues/31421). [#31862](https://github.com/ClickHouse/ClickHouse/pull/31862) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix potential crash if `group_by_overflow_mode` was set to `any` (approximate GROUP BY) and aggregation was performed by single column of type `LowCardinality`. [#34506](https://github.com/ClickHouse/ClickHouse/pull/34506) ([DR](https://github.com/freedomDR)). +* Fix inserting to temporary tables via gRPC client-server protocol. Fixes [#34347](https://github.com/ClickHouse/ClickHouse/issues/34347), issue `#2`. [#34364](https://github.com/ClickHouse/ClickHouse/pull/34364) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix issue [#19429](https://github.com/ClickHouse/ClickHouse/issues/19429). [#34225](https://github.com/ClickHouse/ClickHouse/pull/34225) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix issue [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)). +* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). + + +### ClickHouse release v22.1, 2022-01-18 + +#### Upgrade Notes + +* The functions `left` and `right` were previously implemented in parser and now full-featured. Distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Resource usage by scalar subqueries is fully accounted since this version. With this change, rows read in scalar subqueries are now reported in the query_log. If the scalar subquery is cached (repeated or called for several rows) the rows read are only counted once. This change allows KILLing queries and reporting progress while they are executing scalar subqueries. [#32271](https://github.com/ClickHouse/ClickHouse/pull/32271) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature + +* Implement data schema inference for input formats. Allow to skip structure (or write just `auto`) in table functions `file`, `url`, `s3`, `hdfs` and in parameters of `clickhouse-local` . Allow to skip structure in create query for table engines `File`, `HDFS`, `S3`, `URL`, `Merge`, `Buffer`, `Distributed` and `ReplicatedMergeTree` (if we add new replicas). [#32455](https://github.com/ClickHouse/ClickHouse/pull/32455) ([Kruglov Pavel](https://github.com/Avogar)). +* Detect format by file extension in `file`/`hdfs`/`s3`/`url` table functions and `HDFS`/`S3`/`URL` table engines and also for `SELECT INTO OUTFILE` and `INSERT FROM INFILE` [#33565](https://github.com/ClickHouse/ClickHouse/pull/33565) ([Kruglov Pavel](https://github.com/Avogar)). Close [#30918](https://github.com/ClickHouse/ClickHouse/issues/30918). [#33443](https://github.com/ClickHouse/ClickHouse/pull/33443) ([OnePiece](https://github.com/zhongyuankai)). +* A tool for collecting diagnostics data if you need support. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). +* Automatic cluster discovery via Zoo/Keeper. It allows to add replicas to the cluster without changing configuration on every server. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([vdimir](https://github.com/vdimir)). +* Implement hive table engine to access apache hive from clickhouse. This implements: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([taiyang-li](https://github.com/taiyang-li)). +* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorical values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([alexey-milovidov](https://github.com/alexey-milovidov)). Initial implementation by [Vanyok-All-is-OK](https://github.com/Vanyok-All-is-OK) and [antikvist](https://github.com/antikvist). +* Added table function `hdfsCluster` which allows processing files from HDFS in parallel from many nodes in a specified cluster, similarly to `s3Cluster`. [#32400](https://github.com/ClickHouse/ClickHouse/pull/32400) ([Zhichang Yu](https://github.com/yuzhichang)). +* Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)). +* Allow `COMMENT` in `CREATE VIEW` (for all VIEW kinds). [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)). +* Dynamically reinitialize listening ports and protocols when configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Added `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add new functions for `H3` coordinate system: `h3HexAreaKm2`, `h3CellAreaM2`, `h3CellAreaRads2`. [#33479](https://github.com/ClickHouse/ClickHouse/pull/33479) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `MONTHNAME` function. [#33436](https://github.com/ClickHouse/ClickHouse/pull/33436) ([usurai](https://github.com/usurai)). +* Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)). +* Add function `decodeURLFormComponent` slightly different to `decodeURLComponent`. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). +* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). + + +#### Performance Improvement + +* Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). +* More efficient handling of globs for URL storage. Now you can easily query million URLs in parallel with retries. Closes [#32866](https://github.com/ClickHouse/ClickHouse/issues/32866). [#32907](https://github.com/ClickHouse/ClickHouse/pull/32907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Avoid exponential backtracking in parser. This closes [#20158](https://github.com/ClickHouse/ClickHouse/issues/20158). [#33481](https://github.com/ClickHouse/ClickHouse/pull/33481) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Abuse of `untuple` function was leading to exponential complexity of query analysis (found by fuzzer). This closes [#33297](https://github.com/ClickHouse/ClickHouse/issues/33297). [#33445](https://github.com/ClickHouse/ClickHouse/pull/33445) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Reduce allocated memory for dictionaries with string attributes. [#33466](https://github.com/ClickHouse/ClickHouse/pull/33466) ([Maksim Kita](https://github.com/kitaisreal)). +* Slight performance improvement of `reinterpret` function. [#32587](https://github.com/ClickHouse/ClickHouse/pull/32587) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Non significant change. In extremely rare cases when data part is lost on every replica, after merging of some data parts, the subsequent queries may skip less amount of partitions during partition pruning. This hardly affects anything. [#32220](https://github.com/ClickHouse/ClickHouse/pull/32220) ([Azat Khuzhin](https://github.com/azat)). +* Improve `clickhouse-keeper` writing performance by optimization the size calculation logic. [#32366](https://github.com/ClickHouse/ClickHouse/pull/32366) ([zhanglistar](https://github.com/zhanglistar)). +* Optimize single part projection materialization. This closes [#31669](https://github.com/ClickHouse/ClickHouse/issues/31669). [#31885](https://github.com/ClickHouse/ClickHouse/pull/31885) ([Amos Bird](https://github.com/amosbird)). +* Improve query performance of system tables. [#33312](https://github.com/ClickHouse/ClickHouse/pull/33312) ([OnePiece](https://github.com/zhongyuankai)). +* Optimize selecting of MergeTree parts that can be moved between volumes. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([OnePiece](https://github.com/zhongyuankai)). +* Fix `sparse_hashed` dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). + + +#### Experimental Feature + +* Parallel reading from multiple replicas within a shard during distributed query without using sample key. To enable this, set `allow_experimental_parallel_reading_from_replicas = 1` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). +* Add "TABLE OVERRIDE" feature for customizing MaterializedMySQL table schemas. [#32325](https://github.com/ClickHouse/ClickHouse/pull/32325) ([Stig Bakken](https://github.com/stigsb)). +* Add `EXPLAIN TABLE OVERRIDE` query. [#32836](https://github.com/ClickHouse/ClickHouse/pull/32836) ([Stig Bakken](https://github.com/stigsb)). +* Support TABLE OVERRIDE clause for MaterializedPostgreSQL. RFC: [#31480](https://github.com/ClickHouse/ClickHouse/issues/31480). [#32749](https://github.com/ClickHouse/ClickHouse/pull/32749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change ZooKeeper path for zero-copy marks for shared data. Note that "zero-copy replication" is non-production feature (in early stages of development) that you shouldn't use anyway. But in case if you have used it, let you keep in mind this change. [#32061](https://github.com/ClickHouse/ClickHouse/pull/32061) ([ianton-ru](https://github.com/ianton-ru)). +* Events clause support for WINDOW VIEW watch query. [#32607](https://github.com/ClickHouse/ClickHouse/pull/32607) ([vxider](https://github.com/Vxider)). +* Fix ACL with explicit digit hash in `clickhouse-keeper`: now the behavior consistent with ZooKeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)). [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246). +* Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). + + +#### Improvement + +* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. +* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). +* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). +* Support hints for mistyped setting names for clickhouse-client and clickhouse-local. Closes [#32237](https://github.com/ClickHouse/ClickHouse/issues/32237). [#32841](https://github.com/ClickHouse/ClickHouse/pull/32841) ([凌涛](https://github.com/lingtaolf)). +* Allow to use virtual columns in Materialized Views. Close [#11210](https://github.com/ClickHouse/ClickHouse/issues/11210). [#33482](https://github.com/ClickHouse/ClickHouse/pull/33482) ([OnePiece](https://github.com/zhongyuankai)). +* Add config to disable IPv6 in clickhouse-keeper if needed. This close [#33381](https://github.com/ClickHouse/ClickHouse/issues/33381). [#33450](https://github.com/ClickHouse/ClickHouse/pull/33450) ([Wu Xueyang](https://github.com/wuxueyang96)). +* Add more info to `system.build_options` about current git revision. [#33431](https://github.com/ClickHouse/ClickHouse/pull/33431) ([taiyang-li](https://github.com/taiyang-li)). +* `clickhouse-local`: track memory under `--max_memory_usage_in_client` option. [#33341](https://github.com/ClickHouse/ClickHouse/pull/33341) ([Azat Khuzhin](https://github.com/azat)). +* Allow negative intervals in function `intervalLengthSum`. Their length will be added as well. This closes [#33323](https://github.com/ClickHouse/ClickHouse/issues/33323). [#33335](https://github.com/ClickHouse/ClickHouse/pull/33335) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `LineAsString` can be used as output format. This closes [#30919](https://github.com/ClickHouse/ClickHouse/issues/30919). [#33331](https://github.com/ClickHouse/ClickHouse/pull/33331) ([Sergei Trifonov](https://github.com/serxa)). +* Support `` in cluster configuration, as an alternative form of `1`. Close [#33270](https://github.com/ClickHouse/ClickHouse/issues/33270). [#33330](https://github.com/ClickHouse/ClickHouse/pull/33330) ([SuperDJY](https://github.com/cmsxbc)). +* Pressing Ctrl+C twice will terminate `clickhouse-benchmark` immediately without waiting for in-flight queries. This closes [#32586](https://github.com/ClickHouse/ClickHouse/issues/32586). [#33303](https://github.com/ClickHouse/ClickHouse/pull/33303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support Unix timestamp with milliseconds in `parseDateTimeBestEffort` function. [#33276](https://github.com/ClickHouse/ClickHouse/pull/33276) ([Ben](https://github.com/benbiti)). +* Allow to cancel query while reading data from external table in the formats: `Arrow` / `Parquet` / `ORC` - it failed to be cancelled it case of big files and setting input_format_allow_seeks as false. Closes [#29678](https://github.com/ClickHouse/ClickHouse/issues/29678). [#33238](https://github.com/ClickHouse/ClickHouse/pull/33238) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If table engine supports `SETTINGS` clause, allow to pass the settings as key-value or via config. Add this support for MySQL. [#33231](https://github.com/ClickHouse/ClickHouse/pull/33231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly prevent Nullable primary keys if necessary. This is for [#32780](https://github.com/ClickHouse/ClickHouse/issues/32780). [#33218](https://github.com/ClickHouse/ClickHouse/pull/33218) ([Amos Bird](https://github.com/amosbird)). +* Add retry for `PostgreSQL` connections in case nothing has been fetched yet. Closes [#33199](https://github.com/ClickHouse/ClickHouse/issues/33199). [#33209](https://github.com/ClickHouse/ClickHouse/pull/33209) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095#issuecomment-1000577517). [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Send profile info inside `clickhouse-local`. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Short circuit evaluation: support for function `throwIf`. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)). +* (This only happens in unofficial builds). Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)). +* Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)). +* Flush all In-Memory data parts when WAL is not enabled while shutdown server or detaching table. [#32742](https://github.com/ClickHouse/ClickHouse/pull/32742) ([nauta](https://github.com/nautaa)). +* Allow to control connection timeouts for MySQL (previously was supported only for dictionary source). Closes [#16669](https://github.com/ClickHouse/ClickHouse/issues/16669). Previously default connect_timeout was rather small, now it is configurable. [#32734](https://github.com/ClickHouse/ClickHouse/pull/32734) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support `authSource` option for storage `MongoDB`. Closes [#32594](https://github.com/ClickHouse/ClickHouse/issues/32594). [#32702](https://github.com/ClickHouse/ClickHouse/pull/32702) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support `Date32` type in `genarateRandom` table function. [#32643](https://github.com/ClickHouse/ClickHouse/pull/32643) ([nauta](https://github.com/nautaa)). +* Add settings `max_concurrent_select_queries` and `max_concurrent_insert_queries` for control concurrent queries by query kind. Close [#3575](https://github.com/ClickHouse/ClickHouse/issues/3575). [#32609](https://github.com/ClickHouse/ClickHouse/pull/32609) ([SuperDJY](https://github.com/cmsxbc)). +* Improve handling nested structures with missing columns while reading data in `Protobuf` format. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/31988. [#32531](https://github.com/ClickHouse/ClickHouse/pull/32531) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow empty credentials for `MongoDB` engine. Closes [#26267](https://github.com/ClickHouse/ClickHouse/issues/26267). [#32460](https://github.com/ClickHouse/ClickHouse/pull/32460) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable some optimizations for window functions that may lead to exceptions. Closes [#31535](https://github.com/ClickHouse/ClickHouse/issues/31535). Closes [#31620](https://github.com/ClickHouse/ClickHouse/issues/31620). [#32453](https://github.com/ClickHouse/ClickHouse/pull/32453) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allows to connect to MongoDB 5.0. Closes [#31483](https://github.com/ClickHouse/ClickHouse/issues/31483),. [#32416](https://github.com/ClickHouse/ClickHouse/pull/32416) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Enable comparison between `Decimal` and `Float`. Closes [#22626](https://github.com/ClickHouse/ClickHouse/issues/22626). [#31966](https://github.com/ClickHouse/ClickHouse/pull/31966) ([flynn](https://github.com/ucasFL)). +* Added settings `command_read_timeout`, `command_write_timeout` for `StorageExecutable`, `StorageExecutablePool`, `ExecutableDictionary`, `ExecutablePoolDictionary`, `ExecutableUserDefinedFunctions`. Setting `command_read_timeout` controls timeout for reading data from command stdout in milliseconds. Setting `command_write_timeout` timeout for writing data to command stdin in milliseconds. Added settings `command_termination_timeout` for `ExecutableUserDefinedFunction`, `ExecutableDictionary`, `StorageExecutable`. Added setting `execute_direct` for `ExecutableUserDefinedFunction`, by default true. Added setting `execute_direct` for `ExecutableDictionary`, `ExecutablePoolDictionary`, by default false. [#30957](https://github.com/ClickHouse/ClickHouse/pull/30957) ([Maksim Kita](https://github.com/kitaisreal)). +* Bitmap aggregate functions will give correct result for out of range argument instead of wraparound. [#33127](https://github.com/ClickHouse/ClickHouse/pull/33127) ([DR](https://github.com/freedomDR)). +* Fix parsing incorrect queries with `FROM INFILE` statement. [#33521](https://github.com/ClickHouse/ClickHouse/pull/33521) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't allow to write into `S3` if path contains globs. [#33142](https://github.com/ClickHouse/ClickHouse/pull/33142) ([Kruglov Pavel](https://github.com/Avogar)). +* `--echo` option was not used by `clickhouse-client` in batch mode with single query. [#32843](https://github.com/ClickHouse/ClickHouse/pull/32843) ([N. Kolotov](https://github.com/nkolotov)). +* Use `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix surprisingly bad code in SQL ordinary function `file`. Now it supports symlinks. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Updating `modification_time` for data part in `system.parts` after part movement [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). +* Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). + + +#### Build/Testing/Packaging Improvement + +* Add packages, functional tests and Docker builds for AArch64 (ARM) version of ClickHouse. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) +* Prepare ClickHouse to be built with musl-libc. It is not enabled by default. [#33134](https://github.com/ClickHouse/ClickHouse/pull/33134) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make installation script working on FreeBSD. This closes [#33384](https://github.com/ClickHouse/ClickHouse/issues/33384). [#33418](https://github.com/ClickHouse/ClickHouse/pull/33418) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `actionlint` for GitHub Actions workflows and verify workflow files via `act --list` to check the correct workflow syntax. [#33612](https://github.com/ClickHouse/ClickHouse/pull/33612) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add more tests for the nullable primary key feature. Add more tests with different types and merge tree kinds, plus randomly generated data. [#33228](https://github.com/ClickHouse/ClickHouse/pull/33228) ([Amos Bird](https://github.com/amosbird)). +* Add a simple tool to visualize flaky tests in web browser. [#33185](https://github.com/ClickHouse/ClickHouse/pull/33185) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Enable hermetic build for shared builds. This is mainly for developers. [#32968](https://github.com/ClickHouse/ClickHouse/pull/32968) ([Amos Bird](https://github.com/amosbird)). +* Update `libc++` and `libc++abi` to the latest. [#32484](https://github.com/ClickHouse/ClickHouse/pull/32484) ([Raúl Marín](https://github.com/Algunenano)). +* Added integration test for external .NET client ([ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)). [#23230](https://github.com/ClickHouse/ClickHouse/pull/23230) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)). +* Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([taiyang-li](https://github.com/taiyang-li)). +* Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Several fixes for format parsing. This is relevant if `clickhouse-server` is open for write access to adversary. Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). Fixed Apache Avro Union type index out of boundary issue in Apache Avro binary format. [#33022](https://github.com/ClickHouse/ClickHouse/pull/33022) ([Harry Lee](https://github.com/HarryLeeIBM)). Fix null pointer dereference in `LowCardinality` data when deserializing `LowCardinality` data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). +* ClickHouse Keeper handler will correctly remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). +* Potential off-by-one miscalculation of quotas: quota limit was not reached, but the limit was exceeded. This fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). +* Fixed CASTing from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)) [#27914](https://github.com/ClickHouse/ClickHouse/pull/27914) ([Vasily Nemkov](https://github.com/Enmk)). +* Fixed an exception like `Unknown aggregate function nothing` during an execution on a remote server. This fixes [#16689](https://github.com/ClickHouse/ClickHouse/issues/16689). [#26074](https://github.com/ClickHouse/ClickHouse/pull/26074) ([hexiaoting](https://github.com/hexiaoting)). +* Fix wrong database for JOIN without explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). +* Fix segfault in Apache `Avro` format that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault in Apache `Arrow` format if schema contains `Dictionary` type. Closes [#33507](https://github.com/ClickHouse/ClickHouse/issues/33507). [#33529](https://github.com/ClickHouse/ClickHouse/pull/33529) ([Kruglov Pavel](https://github.com/Avogar)). +* Out of band `offset` and `limit` settings may be applied incorrectly for views. Close [#33289](https://github.com/ClickHouse/ClickHouse/issues/33289) [#33518](https://github.com/ClickHouse/ClickHouse/pull/33518) ([hexiaoting](https://github.com/hexiaoting)). +* Fix an exception `Block structure mismatch` which may happen during insertion into table with default nested `LowCardinality` column. Fixes [#33028](https://github.com/ClickHouse/ClickHouse/issues/33028). [#33504](https://github.com/ClickHouse/ClickHouse/pull/33504) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix dictionary expressions for `range_hashed` range min and range max attributes when created using DDL. Closes [#30809](https://github.com/ClickHouse/ClickHouse/issues/30809). [#33478](https://github.com/ClickHouse/ClickHouse/pull/33478) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible use-after-free for INSERT into Materialized View with concurrent DROP ([Azat Khuzhin](https://github.com/azat)). +* Do not try to read pass EOF (to workaround for a bug in the Linux kernel), this bug can be reproduced on kernels (3.14..5.9), and requires `index_granularity_bytes=0` (i.e. turn off adaptive index granularity). [#33372](https://github.com/ClickHouse/ClickHouse/pull/33372) ([Azat Khuzhin](https://github.com/azat)). +* The commands `SYSTEM SUSPEND` and `SYSTEM ... THREAD FUZZER` missed access control. It is fixed. Author: Kevin Michel. [#33333](https://github.com/ClickHouse/ClickHouse/pull/33333) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix when `COMMENT` for dictionaries does not appear in `system.tables`, `system.dictionaries`. Allow to modify the comment for `Dictionary` engine. Closes [#33251](https://github.com/ClickHouse/ClickHouse/issues/33251). [#33261](https://github.com/ClickHouse/ClickHouse/pull/33261) ([Maksim Kita](https://github.com/kitaisreal)). +* Add asynchronous inserts (with enabled setting `async_insert`) to query log. Previously such queries didn't appear in the query log. [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239) ([Anton Popov](https://github.com/CurtizJ)). +* Fix sending `WHERE 1 = 0` expressions for external databases query. Closes [#33152](https://github.com/ClickHouse/ClickHouse/issues/33152). [#33214](https://github.com/ClickHouse/ClickHouse/pull/33214) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix DDL validation for MaterializedPostgreSQL. Fix setting `materialized_postgresql_allow_automatic_update`. Closes [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33200](https://github.com/ClickHouse/ClickHouse/pull/33200) ([Kseniia Sumarokova](https://github.com/kssenii)). Make sure unused replication slots are always removed. Found in [#26952](https://github.com/ClickHouse/ClickHouse/issues/26952). [#33187](https://github.com/ClickHouse/ClickHouse/pull/33187) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix MaterializedPostreSQL detach/attach (removing / adding to replication) tables with non-default schema. Found in [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33179](https://github.com/ClickHouse/ClickHouse/pull/33179) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix DROP MaterializedPostgreSQL database. [#33468](https://github.com/ClickHouse/ClickHouse/pull/33468) ([Kseniia Sumarokova](https://github.com/kssenii)). +* The metric `StorageBufferBytes` sometimes was miscalculated. [#33159](https://github.com/ClickHouse/ClickHouse/pull/33159) ([xuyatian](https://github.com/xuyatian)). +* Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `s3` table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong tuple output in `CSV` format in case of custom csv delimiter. [#32981](https://github.com/ClickHouse/ClickHouse/pull/32981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix HDFS URL check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix throwing exception like positional argument out of bounds for non-positional arguments. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173)#event-5789668239. [#32961](https://github.com/ClickHouse/ClickHouse/pull/32961) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix UB in case of unexpected EOF during filling a set from HTTP query (i.e. if the client interrupted in the middle, i.e. `timeout 0.15s curl -Ss -F 's=@t.csv;' 'http://127.0.0.1:8123/?s_structure=key+Int&query=SELECT+dummy+IN+s'` and with large enough `t.csv`). [#32955](https://github.com/ClickHouse/ClickHouse/pull/32955) ([Azat Khuzhin](https://github.com/azat)). +* Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `ORC` format stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([kreuzerkrieg](https://github.com/kreuzerkrieg)). +* `topKWeightedState` failed for some input types. [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([vdimir](https://github.com/vdimir)). +* Fix exception `Single chunk is expected from view inner query (LOGICAL_ERROR)` in materialized view. Fixes [#31419](https://github.com/ClickHouse/ClickHouse/issues/31419). [#32862](https://github.com/ClickHouse/ClickHouse/pull/32862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix optimization with lazy seek for async reads from remote filesystems. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([tavplubix](https://github.com/tavplubix)). +* Avoid reusing the scalar subquery cache when processing MV blocks. This fixes a bug when the scalar query reference the source table but it means that all subscalar queries in the MV definition will be calculated for each block. [#32811](https://github.com/ClickHouse/ClickHouse/pull/32811) ([Raúl Marín](https://github.com/Algunenano)). +* Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([tavplubix](https://github.com/tavplubix)). +* Fix crash when used `fuzzBits` function, close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). +* Fix error `Column is not under aggregate function` in case of MV with `GROUP BY (list of columns)` (which is pared as `GROUP BY tuple(...)`) over `Kafka`/`RabbitMQ`. Fixes [#32668](https://github.com/ClickHouse/ClickHouse/issues/32668) and [#32744](https://github.com/ClickHouse/ClickHouse/issues/32744). [#32751](https://github.com/ClickHouse/ClickHouse/pull/32751) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `ALTER TABLE ... MATERIALIZE TTL` query with `TTL ... DELETE WHERE ...` and `TTL ... GROUP BY ...` modes. [#32695](https://github.com/ClickHouse/ClickHouse/pull/32695) ([Anton Popov](https://github.com/CurtizJ)). +* Fix `optimize_read_in_order` optimization in case when table engine is `Distributed` or `Merge` and its underlying `MergeTree` tables have monotonous function in prefix of sorting key. [#32670](https://github.com/ClickHouse/ClickHouse/pull/32670) ([Anton Popov](https://github.com/CurtizJ)). +* Fix LOGICAL_ERROR exception when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). +* Inserting into S3 with multipart upload to Google Cloud Storage may trigger abort. [#32504](https://github.com/ClickHouse/ClickHouse/issues/32504). [#32649](https://github.com/ClickHouse/ClickHouse/pull/32649) ([vdimir](https://github.com/vdimir)). +* Fix possible exception at `RabbitMQ` storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). +* Fix async inserts with formats `CustomSeparated`, `Template`, `Regexp`, `MsgPack` and `JSONAsString`. Previousely the async inserts with these formats didn't read any data. [#32530](https://github.com/ClickHouse/ClickHouse/pull/32530) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `groupBitmapAnd` function on distributed table. [#32529](https://github.com/ClickHouse/ClickHouse/pull/32529) ([minhthucdao](https://github.com/dmthuc)). +* Fix crash in JOIN found by fuzzer, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([vdimir](https://github.com/vdimir)). +* Proper handling of the case with Apache Arrow column duplication. [#32507](https://github.com/ClickHouse/ClickHouse/pull/32507) ([Dmitriy Mokhnatkin](https://github.com/DMokhnatkin)). +* Fix issue with ambiguous query formatting in distributed queries that led to errors when some table columns were named `ALL` or `DISTINCT`. This closes [#32391](https://github.com/ClickHouse/ClickHouse/issues/32391). [#32490](https://github.com/ClickHouse/ClickHouse/pull/32490) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). +* Fix broken select query when there are more than 2 row policies on same column, begin at second queries on the same session. [#31606](https://github.com/ClickHouse/ClickHouse/issues/31606). [#32291](https://github.com/ClickHouse/ClickHouse/pull/32291) ([SuperDJY](https://github.com/cmsxbc)). +* Fix fractional unix timestamp conversion to `DateTime64`, fractional part was reversed for negative unix timestamps (before 1970-01-01). [#32240](https://github.com/ClickHouse/ClickHouse/pull/32240) ([Ben](https://github.com/benbiti)). +* Some entries of replication queue might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([tavplubix](https://github.com/tavplubix)). +* Fix parsing of `APPLY lambda` column transformer which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `base64Encode` adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix hang up with command `DROP TABLE system.query_log sync`. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). + + +## [Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md) \ No newline at end of file diff --git a/docs/en/whats-new/index.md b/docs/en/whats-new/index.md index 8033fdf71d9..ac2b41a6637 100644 --- a/docs/en/whats-new/index.md +++ b/docs/en/whats-new/index.md @@ -1,8 +1,10 @@ --- -toc_folder_title: What's New -toc_priority: 82 +sidebar_label: What's New +sidebar_position: 500 +keywords: [clickhouse, what's, new, roadmap, changelog] +description: What's New in ClickHouse --- -# What’s New in ClickHouse? {#whats-new-in-clickhouse} +# What’s New in ClickHouse {#whats-new-in-clickhouse} There’s a short high-level [roadmap](../whats-new/roadmap.md) and a detailed [changelog](../whats-new/changelog/index.md) for releases that have already been published. diff --git a/docs/en/whats-new/roadmap.md b/docs/en/whats-new/roadmap.md index 54f8f9d68a3..be7298ccd79 100644 --- a/docs/en/whats-new/roadmap.md +++ b/docs/en/whats-new/roadmap.md @@ -7,4 +7,3 @@ toc_title: Roadmap The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513). -{## [Original article](https://clickhouse.com/docs/en/roadmap/) ##} diff --git a/docs/en/whats-new/security-changelog.md b/docs/en/whats-new/security-changelog.md index bcfeaa06e24..0a5c926f227 100644 --- a/docs/en/whats-new/security-changelog.md +++ b/docs/en/whats-new/security-changelog.md @@ -1,8 +1,12 @@ --- -toc_priority: 76 -toc_title: Security Changelog +sidebar_label: Security Changelog +sidebar_position: 100 +keywords: [clickhouse, security, changelog] +description: Security Changelog --- +# Security Changelog + ## Fixed in ClickHouse 21.4.3.21, 2021-04-12 {#fixed-in-clickhouse-release-21-4-3-21-2021-04-12} ### CVE-2021-25263 {#cve-2021-25263} @@ -80,5 +84,3 @@ Credits: Andrey Krasichkov and Evgeny Sidorov of Yandex Information Security Tea Incorrect configuration in deb package could lead to the unauthorized use of the database. Credits: the UK’s National Cyber Security Centre (NCSC) - -{## [Original article](https://clickhouse.com/docs/en/security_changelog/) ##} From ef34f7dbd7f6bc92f5c381b22baeb03290305820 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 29 Mar 2022 03:54:13 -0400 Subject: [PATCH 030/117] allow EPHEMERAL without expression --- src/Parsers/ParserCreateQuery.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index a4dbe635664..f2b43d13962 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace DB @@ -185,8 +187,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E } Pos pos_before_specifier = pos; - if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || - s_ephemeral.ignore(pos, expected) || s_alias.ignore(pos, expected)) + if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || s_alias.ignore(pos, expected)) { default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end}); @@ -194,6 +195,15 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (!expr_parser.parse(pos, default_expression, expected)) return false; } + else + { + if (s_ephemeral.ignore(pos, expected)) + { + default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end}); + if (!expr_parser.parse(pos, default_expression, expected) && type) + default_expression = std::make_shared(DataTypeFactory::instance().get(type)->getDefault()); + } + } if (require_type && !type && !default_expression) return false; /// reject column name without type From de58f5988d3305c3084a82d498228f2af4977e2d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Tue, 29 Mar 2022 04:01:20 -0400 Subject: [PATCH 031/117] Update table.md allow EPHEMERAL without expression --- docs/en/sql-reference/statements/create/table.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 409ec422ade..ee663c92695 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -114,9 +114,9 @@ In addition, this column is not substituted when using an asterisk in a SELECT q ### EPHEMERAL {#ephemeral} -`EPHEMERAL expr` +`EPHEMERAL [expr]` -Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. +Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. ### ALIAS {#alias} From 4c14c47786f878f3e9ece17356a11bc5dad14ccd Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Tue, 29 Mar 2022 04:06:01 -0400 Subject: [PATCH 032/117] Update table.md allow EPHEMERAL without expression --- docs/ru/sql-reference/statements/create/table.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index b9c2a4f0f0b..48cce437b8d 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -110,9 +110,9 @@ SELECT x, toTypeName(x) FROM t1; ### EPHEMERAL {#ephemeral} -`EPHEMERAL expr` +`EPHEMERAL [expr]` -Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. +Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. Если значение по умолчанию `expr` не указано, то тип колонки должен быть специфицирован. INSERT без списка столбцов игнорирует этот столбец, таким образом сохраняется инвариант - т.е. дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов. ### ALIAS {#alias} From eab04674fa97cc5bf2562d5c3d9089c574f60379 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 29 Mar 2022 01:59:53 +0200 Subject: [PATCH 033/117] Fix review comments --- src/Common/FileSegment.cpp | 15 ++++++--------- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 1 - 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 370a6a59a6f..4cd3f2e2eb3 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -411,11 +411,8 @@ void FileSegment::complete(std::lock_guard & cache_lock) void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock, bool allow_non_strict_checking) { bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); - bool download_can_continue = !is_last_holder - && download_state != State::DOWNLOADED - && download_state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; - if (!download_can_continue + if (is_last_holder && (download_state == State::PARTIALLY_DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)) { size_t current_downloaded_size = getDownloadedSize(segment_lock); @@ -518,7 +515,7 @@ FileSegmentsHolder::~FileSegmentsHolder() for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();) { - auto current_file_segment_it = file_segment_it++; + auto current_file_segment_it = file_segment_it; auto & file_segment = *current_file_segment_it; if (!cache) @@ -532,14 +529,14 @@ FileSegmentsHolder::~FileSegmentsHolder() file_segment->complete(cache_lock); - file_segments.erase(current_file_segment_it); + file_segment_it = file_segments.erase(current_file_segment_it); } catch (...) { -#ifndef NDEBUG - throw; -#else +#ifdef NDEBUG tryLogCurrentException(__PRETTY_FUNCTION__); +#else + throw; #endif } } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 3aa7f8d9d65..af545d15c0e 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -1,6 +1,5 @@ #include "ThreadPoolRemoteFSReader.h" -#include #include #include #include From 7a70e3a6da522a41cd8fc53066d353062a74fab5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 29 Mar 2022 14:08:24 +0200 Subject: [PATCH 034/117] Fix --- src/Common/FileSegment.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 4cd3f2e2eb3..03f1d358f2f 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -421,10 +421,8 @@ void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lo download_state = State::SKIP_CACHE; LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString()); cache->remove(key(), offset(), cache_lock, segment_lock); - - detached = true; } - else if (is_last_holder) + else { /** * Only last holder of current file segment can resize the cell, @@ -434,10 +432,10 @@ void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lo */ LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size); cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock); - - detached = true; } + detached = true; + if (cache_writer) { cache_writer->finalize(); From 4374415bd7adffcecb0a199592625b3c2dceb3f2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 29 Mar 2022 01:44:30 +0200 Subject: [PATCH 035/117] Fix clang-tidy, style check --- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/StorageS3.cpp | 1 - src/Storages/getVirtualsForStorage.h | 2 +- src/TableFunctions/TableFunctionS3.cpp | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 08842de3bd6..d2cf77b3098 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -115,7 +115,7 @@ public: HDFSSource( StorageHDFSPtr storage_, - const Block & block_for_foramt_, + const Block & block_for_format_, const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 334a87f429c..b5e9ecbc0f7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -866,7 +866,6 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", arg_name); } - std::cerr << "\n\n\nkssenii: " << configuration.url << "\n\n\n\n\n"; } else { diff --git a/src/Storages/getVirtualsForStorage.h b/src/Storages/getVirtualsForStorage.h index 861b1e564fc..19e13425959 100644 --- a/src/Storages/getVirtualsForStorage.h +++ b/src/Storages/getVirtualsForStorage.h @@ -1,6 +1,6 @@ +#pragma once #include - namespace DB { diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index daba1ddb9ca..a80eab219e8 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -39,7 +39,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar else if (arg_name == "secret_access_key") s3_configuration.secret_access_key = arg_value->as()->value.safeGet(); else if (arg_name == "filename") - s3_configuration.url = std::filesystem::path(configuration.url) / arg_value->as()->value.safeGet(); + s3_configuration.url = std::filesystem::path(s3_configuration.url) / arg_value->as()->value.safeGet(); else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); } From 46fd69d2bfe9a1d3fab17c05db127f07251ebbdd Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Tue, 29 Mar 2022 09:15:27 -0400 Subject: [PATCH 036/117] Update ParserCreateQuery.h style fix --- src/Parsers/ParserCreateQuery.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index f2b43d13962..2f2c927c63b 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -195,14 +195,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (!expr_parser.parse(pos, default_expression, expected)) return false; } - else + else if (s_ephemeral.ignore(pos, expected)) { - if (s_ephemeral.ignore(pos, expected)) - { - default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end}); - if (!expr_parser.parse(pos, default_expression, expected) && type) - default_expression = std::make_shared(DataTypeFactory::instance().get(type)->getDefault()); - } + default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end}); + if (!expr_parser.parse(pos, default_expression, expected) && type) + default_expression = std::make_shared(DataTypeFactory::instance().get(type)->getDefault()); } if (require_type && !type && !default_expression) From 0f10a2de9c593e0ff32a7c47ea2688602f727001 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Tue, 29 Mar 2022 09:20:05 -0400 Subject: [PATCH 037/117] Update 02205_ephemeral_1.sql tests without default added --- .../queries/0_stateless/02205_ephemeral_1.sql | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/queries/0_stateless/02205_ephemeral_1.sql b/tests/queries/0_stateless/02205_ephemeral_1.sql index 5d0565cbfc0..eecb074d42c 100644 --- a/tests/queries/0_stateless/02205_ephemeral_1.sql +++ b/tests/queries/0_stateless/02205_ephemeral_1.sql @@ -37,4 +37,43 @@ INSERT INTO t_ephemeral_02205_1 (x, y) VALUES (21, 7); SELECT * FROM t_ephemeral_02205_1; +DROP TABLE IF EXISTS t_ephemeral_02205_1; + +# Test without default +CREATE TABLE t_ephemeral_02205_1 (x UInt32 DEFAULT y, y UInt32 EPHEMERAL, z UInt32 DEFAULT 5) ENGINE = Memory; + +DESCRIBE t_ephemeral_02205_1; + +# Test INSERT without columns list - should participate only ordinary columns (x, z) +INSERT INTO t_ephemeral_02205_1 VALUES (1, 2); +# SELECT * should only return ordinary columns (x, z) - ephemeral is not stored in the table +SELECT * FROM t_ephemeral_02205_1; + +TRUNCATE TABLE t_ephemeral_02205_1; + +INSERT INTO t_ephemeral_02205_1 VALUES (DEFAULT, 2); +SELECT * FROM t_ephemeral_02205_1; + +TRUNCATE TABLE t_ephemeral_02205_1; + +# Test INSERT using ephemerals default +INSERT INTO t_ephemeral_02205_1 (x, y) VALUES (DEFAULT, DEFAULT); +SELECT * FROM t_ephemeral_02205_1; + +TRUNCATE TABLE t_ephemeral_02205_1; + +# Test INSERT using explicit ephemerals value +INSERT INTO t_ephemeral_02205_1 (x, y) VALUES (DEFAULT, 7); +SELECT * FROM t_ephemeral_02205_1; + +# Test ALTER TABLE DELETE +ALTER TABLE t_ephemeral_02205_1 DELETE WHERE x = 7; +SELECT * FROM t_ephemeral_02205_1; + +TRUNCATE TABLE t_ephemeral_02205_1; + +# Test INSERT into column, defaulted to ephemeral, but explicitly provided with value +INSERT INTO t_ephemeral_02205_1 (x, y) VALUES (21, 7); +SELECT * FROM t_ephemeral_02205_1; + DROP TABLE IF EXISTS t_ephemeral_02205_1; From f68f0cce04a06d3645274907fc8a5d801036b316 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Tue, 29 Mar 2022 09:24:00 -0400 Subject: [PATCH 038/117] Update 02205_ephemeral_1.reference tests without default is added --- tests/queries/0_stateless/02205_ephemeral_1.reference | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/02205_ephemeral_1.reference b/tests/queries/0_stateless/02205_ephemeral_1.reference index 6e98ffd2495..ba39033668f 100644 --- a/tests/queries/0_stateless/02205_ephemeral_1.reference +++ b/tests/queries/0_stateless/02205_ephemeral_1.reference @@ -6,3 +6,11 @@ z UInt32 DEFAULT 5 17 5 7 5 21 5 +x UInt32 DEFAULT y +y UInt32 EPHEMERAL 0 +z UInt32 DEFAULT 5 +1 2 +0 2 +0 5 +7 5 +21 5 From 33e28bcb18874dbd3514468ea81c577197a99f22 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Tue, 29 Mar 2022 08:26:35 -0600 Subject: [PATCH 039/117] Updated /development folder --- docs/en/development/_category_.yml | 4 +- docs/en/development/adding_test_queries.md | 6 + docs/en/development/architecture.md | 9 +- docs/en/development/browse-code.md | 11 +- docs/en/development/build-cross-arm.md | 9 +- docs/en/development/build-cross-osx.md | 9 +- docs/en/development/build-cross-riscv.md | 6 +- docs/en/development/build-osx.md | 19 +- docs/en/development/build.md | 7 +- docs/en/development/continuous-integration.md | 7 +- docs/en/development/contrib.md | 7 +- docs/en/development/developer-instruction.md | 28 +-- docs/en/development/style.md | 7 +- docs/en/development/tests.md | 9 +- .../database-engines/materialized-mysql.md | 2 +- docs/en/engines/database-engines/mysql.md | 2 - .../table-engines/integrations/hive.md | 2 +- docs/en/example-datasets/amplab-benchmark.md | 4 +- docs/en/example-datasets/brown-benchmark.md | 4 +- docs/en/example-datasets/cell-towers.md | 5 +- docs/en/example-datasets/criteo.md | 5 +- docs/en/example-datasets/github-events.md | 3 +- docs/en/example-datasets/menus.md | 5 +- docs/en/example-datasets/metrica.md | 6 +- docs/en/example-datasets/nyc-taxi.md | 6 +- docs/en/example-datasets/ontime.md | 6 +- docs/en/example-datasets/opensky.md | 8 +- docs/en/example-datasets/recipes.md | 3 +- docs/en/example-datasets/star-schema.md | 8 +- docs/en/example-datasets/uk-price-paid.md | 5 +- docs/en/example-datasets/wikistat.md | 7 +- docs/en/install.md | 27 ++- docs/en/interfaces/formats.md | 49 +--- docs/en/operations/caches.md | 2 +- docs/en/operations/clickhouse-keeper.md | 6 +- docs/en/operations/named-collections.md | 229 ------------------ docs/en/operations/quotas.md | 2 +- .../settings.md | 11 - .../operations/settings/memory-overcommit.md | 31 --- docs/en/operations/settings/settings.md | 43 +--- .../external-dicts-dict-layout.md | 26 +- .../dictionaries/internal-dicts.md | 2 + .../functions/array-functions.md | 12 - docs/en/sql-reference/functions/geo/h3.md | 181 -------------- .../functions/ip-address-functions.md | 36 +-- .../sql-reference/functions/json-functions.md | 4 +- .../functions/rounding-functions.md | 2 +- docs/en/sql-reference/functions/statistics.md | 48 ---- docs/en/whats-new/security-changelog.md | 43 ---- 49 files changed, 160 insertions(+), 813 deletions(-) delete mode 100644 docs/en/operations/named-collections.md delete mode 100644 docs/en/operations/settings/memory-overcommit.md delete mode 100644 docs/en/sql-reference/functions/statistics.md diff --git a/docs/en/development/_category_.yml b/docs/en/development/_category_.yml index ef272510d47..e0291aa2aa6 100644 --- a/docs/en/development/_category_.yml +++ b/docs/en/development/_category_.yml @@ -1,7 +1,7 @@ position: 100 -label: 'Development' +label: 'Building ClickHouse' collapsible: true collapsed: true link: type: generated-index - title: Reference \ No newline at end of file + title: Building ClickHouse \ No newline at end of file diff --git a/docs/en/development/adding_test_queries.md b/docs/en/development/adding_test_queries.md index a73b0e1ac5d..9b993a96ed5 100644 --- a/docs/en/development/adding_test_queries.md +++ b/docs/en/development/adding_test_queries.md @@ -1,3 +1,9 @@ +--- +sidebar_label: Adding Test Queries +sidebar_position: 63 +description: Instructions on how to add a test case to ClickHouse continuous integration +--- + # How to add test queries to ClickHouse CI ClickHouse has hundreds (or even thousands) of features. Every commit gets checked by a complex set of tests containing many thousands of test cases. diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index b696c441374..d824ace0c65 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -1,11 +1,12 @@ --- -toc_priority: 62 -toc_title: Architecture Overview +sidebar_label: Architecture Overview +sidebar_position: 62 --- -# Overview of ClickHouse Architecture {#overview-of-clickhouse-architecture} +# Overview of ClickHouse Architecture -ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called “vectorized query execution” and it helps lower the cost of actual data processing. +ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns). +Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called “vectorized query execution” and it helps lower the cost of actual data processing. > This idea is nothing new. It dates back to the `APL` (A programming language, 1957) and its descendants: `A +` (APL dialect), `J` (1990), `K` (1993), and `Q` (programming language from Kx Systems, 2003). Array programming is used in scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `VectorWise` system (also known as Actian Vector Analytic Database by Actian Corporation). diff --git a/docs/en/development/browse-code.md b/docs/en/development/browse-code.md index 0fe8a46873c..da924c359ff 100644 --- a/docs/en/development/browse-code.md +++ b/docs/en/development/browse-code.md @@ -1,12 +1,13 @@ --- -toc_priority: 72 -toc_title: Source Code Browser +sidebar_label: Source Code Browser +sidebar_position: 72 +description: Various ways to browse and edit the source code --- -# Browse ClickHouse Source Code {#browse-clickhouse-source-code} +# Browse ClickHouse Source Code -You can use **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. +You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. -If you’re interested what IDE to use, we recommend CLion, QT Creator, VS Code and KDevelop (with caveats). You can use any favourite IDE. Vim and Emacs also count. +If you’re interested what IDE to use, we recommend CLion, QT Creator, VS Code and KDevelop (with caveats). You can use any favorite IDE. Vim and Emacs also count. diff --git a/docs/en/development/build-cross-arm.md b/docs/en/development/build-cross-arm.md index eb99105a857..305c09ae217 100644 --- a/docs/en/development/build-cross-arm.md +++ b/docs/en/development/build-cross-arm.md @@ -1,11 +1,12 @@ --- -toc_priority: 67 -toc_title: Build on Linux for AARCH64 (ARM64) +sidebar_position: 67 +sidebar_label: Build on Linux for AARCH64 (ARM64) --- -# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} +# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. +This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. +This is intended for continuous integration checks that run on Linux servers. The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first. diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index c7e40013113..1dbd0ec6430 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -1,11 +1,12 @@ --- -toc_priority: 66 -toc_title: Build on Linux for Mac OS X +sidebar_position: 66 +sidebar_label: Build on Linux for Mac OS X --- -# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x} +# How to Build ClickHouse on Linux for Mac OS X -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md). +This is for the case when you have a Linux machine and want to use it to build `clickhouse` binary that will run on OS X. +This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md). The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first. diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index 5cdce710b41..94c0f47a05d 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -1,9 +1,9 @@ --- -toc_priority: 68 -toc_title: Build on Linux for RISC-V 64 +sidebar_position: 68 +sidebar_label: Build on Linux for RISC-V 64 --- -# How to Build ClickHouse on Linux for RISC-V 64 Architecture {#how-to-build-clickhouse-on-linux-for-risc-v-64-architecture} +# How to Build ClickHouse on Linux for RISC-V 64 Architecture As of writing (11.11.2021) building for risc-v considered to be highly experimental. Not all features can be enabled. diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 19d157bcd7d..5d5706f6e6b 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -1,16 +1,21 @@ --- -toc_priority: 65 -toc_title: Build on Mac OS X +sidebar_position: 65 +sidebar_label: Build on Mac OS X +description: How to build ClickHouse on Mac OS X --- -# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x} +# How to Build ClickHouse on Mac OS X -!!! info "You don't have to build ClickHouse yourself" - You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start). - Follow `macOS (Intel)` or `macOS (Apple silicon)` installation instructions. +:::info You don't have to build ClickHouse yourself! +You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start). Follow **macOS (Intel)** or **macOS (Apple silicon)** installation instructions. +::: Build should work on x86_64 (Intel) and arm64 (Apple silicon) based macOS 10.15 (Catalina) and higher with Homebrew's vanilla Clang. -It is always recommended to use vanilla `clang` compiler. It is possible to use XCode's `apple-clang` or `gcc` but it's strongly discouraged. +It is always recommended to use vanilla `clang` compiler. + +:::note +It is possible to use XCode's `apple-clang` or `gcc`, but it's strongly discouraged. +::: ## Install Homebrew {#install-homebrew} diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 5379fc37937..b128412a55e 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -1,9 +1,10 @@ --- -toc_priority: 64 -toc_title: Build on Linux +sidebar_position: 64 +sidebar_label: Build on Linux +description: How to build ClickHouse on Linux --- -# How to Build ClickHouse on Linux {#how-to-build-clickhouse-for-development} +# How to Build ClickHouse on Linux Supported platforms: diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index f9dfebff3f9..379b78a2c42 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -1,6 +1,7 @@ --- -toc_priority: 62 -toc_title: Continuous Integration Checks +sidebar_position: 62 +sidebar_label: Continuous Integration Checks +description: When you submit a pull request, some automated checks are ran for your code by the ClickHouse continuous integration (CI) system --- # Continuous Integration Checks @@ -71,8 +72,6 @@ This check means that the CI system started to process the pull request. When it Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally). If it fails, fix the style errors following the [code style guide](style.md). -Python code is checked with [black](https://github.com/psf/black/). - ### Report Details - [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html) - `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt). diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 6c12a3d9055..7cbe32fdd8b 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -1,9 +1,10 @@ --- -toc_priority: 71 -toc_title: Third-Party Libraries Used +sidebar_position: 71 +sidebar_label: Third-Party Libraries +description: A list of third-party libraries used --- -# Third-Party Libraries Used {#third-party-libraries-used} +# Third-Party Libraries Used The list of third-party libraries: diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index db78637f104..291e57fef66 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -1,11 +1,12 @@ --- -toc_priority: 61 -toc_title: For Beginners +sidebar_position: 61 +sidebar_label: Getting Started +description: Prerequisites and an overview of how to build ClickHouse --- -# The Beginner ClickHouse Developer Instruction {#the-beginner-clickhouse-developer-instruction} +# Getting Started Guide for Building ClickHouse -Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. +The building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. @@ -229,25 +230,6 @@ As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion. -## Debugging - -Many graphical IDEs offer with an integrated debugger but you can also use a standalone debugger. - -### GDB - -### LLDB - - # tell LLDB where to find the source code - settings set target.source-map /path/to/build/dir /path/to/source/dir - - # configure LLDB to display code before/after currently executing line - settings set stop-line-count-before 10 - settings set stop-line-count-after 10 - - target create ./clickhouse-client - # - process launch -- --query="SELECT * FROM TAB" - ## Writing Code {#writing-code} The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/ diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 03121880555..82cd9273680 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -1,9 +1,10 @@ --- -toc_priority: 69 -toc_title: C++ Guide +sidebar_position: 69 +sidebar_label: C++ Guide +description: A list of recommendations regarding coding style, naming convention, formatting and more --- -# How to Write C++ Code {#how-to-write-c-code} +# How to Write C++ Code ## General Recommendations {#general-recommendations} diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index be9fc7907af..29b69f0b697 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -1,11 +1,12 @@ --- -toc_priority: 70 -toc_title: Testing +sidebar_position: 70 +sidebar_label: Testing +description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. --- -# ClickHouse Testing {#clickhouse-testing} +# ClickHouse Testing -## Functional Tests {#functional-tests} +## Functional Tests Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index d7dcf21cb02..3dc14c87be7 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -76,7 +76,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree]( | FLOAT | [Float32](../../sql-reference/data-types/float.md) | | DOUBLE | [Float64](../../sql-reference/data-types/float.md) | | DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) | -| DATE, NEWDATE | [Date32](../../sql-reference/data-types/date32.md) | +| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | | YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) | diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md index df4965b1f8c..c5a1bba44b2 100644 --- a/docs/en/engines/database-engines/mysql.md +++ b/docs/en/engines/database-engines/mysql.md @@ -49,8 +49,6 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') All other MySQL data types are converted into [String](../../sql-reference/data-types/string.md). -Because of the ClickHouse date type has a different range from the MySQL date range,If the MySQL date type is out of the range of ClickHouse date, you can use the setting mysql_datatypes_support_level to modify the mapping from the MySQL date type to the Clickhouse date type: date2Date32 (convert MySQL's date type to ClickHouse Date32) or date2String(convert MySQL's date type to ClickHouse String,this is usually used when your mysql data is less than 1925) or default(convert MySQL's date type to ClickHouse Date). - [Nullable](../../sql-reference/data-types/nullable.md) is supported. ## Global Variables Support {#global-variables-support} diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index 61147467690..b804b9c2279 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -137,7 +137,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/en/example-datasets/amplab-benchmark.md b/docs/en/example-datasets/amplab-benchmark.md index b410a3595ec..a87ac53e2e3 100644 --- a/docs/en/example-datasets/amplab-benchmark.md +++ b/docs/en/example-datasets/amplab-benchmark.md @@ -1,6 +1,6 @@ --- -toc_priority: 19 -toc_title: AMPLab Big Data Benchmark +sidebar_label: AMPLab Big Data Benchmark +description: A benchmark dataset used for comparing the performance of data warehousing solutions. --- # AMPLab Big Data Benchmark {#amplab-big-data-benchmark} diff --git a/docs/en/example-datasets/brown-benchmark.md b/docs/en/example-datasets/brown-benchmark.md index 93049d1f76a..0960756dbe9 100644 --- a/docs/en/example-datasets/brown-benchmark.md +++ b/docs/en/example-datasets/brown-benchmark.md @@ -1,6 +1,6 @@ --- -toc_priority: 20 -toc_title: Brown University Benchmark +sidebar_label: Brown University Benchmark +description: A new analytical benchmark for machine-generated log data --- # Brown University Benchmark diff --git a/docs/en/example-datasets/cell-towers.md b/docs/en/example-datasets/cell-towers.md index 1f681fc32d8..7a35a28faa6 100644 --- a/docs/en/example-datasets/cell-towers.md +++ b/docs/en/example-datasets/cell-towers.md @@ -1,9 +1,8 @@ --- -toc_priority: 21 -toc_title: Cell Towers +sidebar_label: Cell Towers --- -# Cell Towers {#cell-towers} +# Cell Towers This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. diff --git a/docs/en/example-datasets/criteo.md b/docs/en/example-datasets/criteo.md index 08298172c70..2d1c700d15c 100644 --- a/docs/en/example-datasets/criteo.md +++ b/docs/en/example-datasets/criteo.md @@ -1,9 +1,8 @@ --- -toc_priority: 18 -toc_title: Terabyte Click Logs from Criteo +sidebar_label: Terabyte Click Logs from Criteo --- -# Terabyte of Click Logs from Criteo {#terabyte-of-click-logs-from-criteo} +# Terabyte of Click Logs from Criteo Download the data from http://labs.criteo.com/downloads/download-terabyte-click-logs/ diff --git a/docs/en/example-datasets/github-events.md b/docs/en/example-datasets/github-events.md index e470e88b182..3a0cbc3324d 100644 --- a/docs/en/example-datasets/github-events.md +++ b/docs/en/example-datasets/github-events.md @@ -1,6 +1,5 @@ --- -toc_priority: 11 -toc_title: GitHub Events +sidebar_label: GitHub Events --- # GitHub Events Dataset diff --git a/docs/en/example-datasets/menus.md b/docs/en/example-datasets/menus.md index 665944b3e6f..c572dcdb491 100644 --- a/docs/en/example-datasets/menus.md +++ b/docs/en/example-datasets/menus.md @@ -1,9 +1,8 @@ --- -toc_priority: 21 -toc_title: Menus +sidebar_label: New York Public Library "What's on the Menu?" Dataset --- -# New York Public Library "What's on the Menu?" Dataset {#menus-dataset} +# New York Public Library "What's on the Menu?" Dataset The dataset is created by the New York Public Library. It contains historical data on the menus of hotels, restaurants and cafes with the dishes along with their prices. diff --git a/docs/en/example-datasets/metrica.md b/docs/en/example-datasets/metrica.md index d9d8beb0181..2194ad85091 100644 --- a/docs/en/example-datasets/metrica.md +++ b/docs/en/example-datasets/metrica.md @@ -1,9 +1,9 @@ --- -toc_priority: 15 -toc_title: Web Analytics Data +sidebar_label: Web Analytics Data +description: Dataset consists of two tables containing anonymized web analytics data with hits and visits --- -# Anonymized Web Analytics Data {#anonymized-web-analytics-data} +# Anonymized Web Analytics Data Dataset consists of two tables containing anonymized web analytics data with hits (`hits_v1`) and visits (`visits_v1`). diff --git a/docs/en/example-datasets/nyc-taxi.md b/docs/en/example-datasets/nyc-taxi.md index a7825988695..da7be71d46b 100644 --- a/docs/en/example-datasets/nyc-taxi.md +++ b/docs/en/example-datasets/nyc-taxi.md @@ -1,9 +1,9 @@ --- -toc_priority: 20 -toc_title: New York Taxi Data +sidebar_label: New York Taxi Data +description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009 --- -# New York Taxi Data {#new-york-taxi-data} +# New York Taxi Data This dataset can be obtained in two ways: diff --git a/docs/en/example-datasets/ontime.md b/docs/en/example-datasets/ontime.md index efc807b75fa..51df6186bd5 100644 --- a/docs/en/example-datasets/ontime.md +++ b/docs/en/example-datasets/ontime.md @@ -1,9 +1,9 @@ --- -toc_priority: 21 -toc_title: OnTime +sidebar_label: OnTime Airline Flight Data +description: Dataset containing the on-time performance of airline flights --- -# OnTime {#ontime} +# OnTime This dataset can be obtained in two ways: diff --git a/docs/en/example-datasets/opensky.md b/docs/en/example-datasets/opensky.md index 2d901397cb2..f55ebc79590 100644 --- a/docs/en/example-datasets/opensky.md +++ b/docs/en/example-datasets/opensky.md @@ -1,11 +1,11 @@ --- -toc_priority: 20 -toc_title: OpenSky +sidebar_label: Air Traffic Data +description: The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. --- -# Crowdsourced air traffic data from The OpenSky Network 2020 {#opensky} +# Crowdsourced air traffic data from The OpenSky Network 2020 -"The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic". +The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic. Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd diff --git a/docs/en/example-datasets/recipes.md b/docs/en/example-datasets/recipes.md index 70a56a0547f..9a27255e6a8 100644 --- a/docs/en/example-datasets/recipes.md +++ b/docs/en/example-datasets/recipes.md @@ -1,6 +1,5 @@ --- -toc_priority: 16 -toc_title: Recipes Dataset +sidebar_label: Recipes Dataset --- # Recipes Dataset diff --git a/docs/en/example-datasets/star-schema.md b/docs/en/example-datasets/star-schema.md index 14fa7cef654..a8949ef74b9 100644 --- a/docs/en/example-datasets/star-schema.md +++ b/docs/en/example-datasets/star-schema.md @@ -1,9 +1,11 @@ --- -toc_priority: 16 -toc_title: Star Schema Benchmark +sidebar_label: Star Schema Benchmark +description: "Dataset based on the TPC-H dbgen source. The coding style and architecture +follows the TPCH dbgen." --- -# Star Schema Benchmark {#star-schema-benchmark} +# Star Schema Benchmark + Compiling dbgen: diff --git a/docs/en/example-datasets/uk-price-paid.md b/docs/en/example-datasets/uk-price-paid.md index 4b0ba25907d..e0f20639aea 100644 --- a/docs/en/example-datasets/uk-price-paid.md +++ b/docs/en/example-datasets/uk-price-paid.md @@ -1,9 +1,8 @@ --- -toc_priority: 20 -toc_title: UK Property Price Paid +sidebar_label: UK Property Price Paid --- -# UK Property Price Paid {#uk-property-price-paid} +# UK Property Price Paid The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995. The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse. diff --git a/docs/en/example-datasets/wikistat.md b/docs/en/example-datasets/wikistat.md index 3e3f7b164ce..1185338a1da 100644 --- a/docs/en/example-datasets/wikistat.md +++ b/docs/en/example-datasets/wikistat.md @@ -1,11 +1,10 @@ --- -toc_priority: 17 -toc_title: WikiStat +sidebar_label: WikiStat --- -# WikiStat {#wikistat} +# WikiStat -See: http://dumps.wikimedia.org/other/pagecounts-raw/ +See http://dumps.wikimedia.org/other/pagecounts-raw/ for details. Creating a table: diff --git a/docs/en/install.md b/docs/en/install.md index b499b584865..ecb4eb93042 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -188,18 +188,29 @@ sudo ./clickhouse install ### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux} -For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). +For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). These builds are not recommended for use in production environments because they are less thoroughly tested, and they also only contain a subset of ClickHouse features available. -- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse` -- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse` -- [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse` -- [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse` -After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. +- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse) + ```bash + curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse + ``` +- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse) + ```bash + curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse + ``` +- [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse) + ```bash + curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse + ``` +- [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse) + ```bash + curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse + ``` -Run `sudo ./clickhouse install` if you want to install clickhouse system-wide (also with needed configuration files, configuring users etc.). After that run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it. +Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it. -These builds are not recommended for use in production environments because they are less thoroughly tested, but you can do so on your own risk. They also have only a subset of ClickHouse features available. +Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. ### From Sources {#from-sources} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a7066fca087..058c9b6fd4a 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -51,7 +51,6 @@ The supported formats are: | [PrettySpace](#prettyspace) | ✗ | ✔ | | [Protobuf](#protobuf) | ✔ | ✔ | | [ProtobufSingle](#protobufsingle) | ✔ | ✔ | -| [ProtobufList](#protobuflist) | ✔ | ✔ | | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | | [Parquet](#data-format-parquet) | ✔ | ✔ | @@ -65,7 +64,7 @@ The supported formats are: | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | | [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✔ | +| [LineAsString](#lineasstring) | ✔ | ✗ | | [Regexp](#data-format-regexp) | ✔ | ✗ | | [RawBLOB](#rawblob) | ✔ | ✔ | | [MsgPack](#msgpack) | ✔ | ✔ | @@ -402,7 +401,7 @@ Parsing allows the presence of the additional field `tskv` without the equal sig Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, strings are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv @@ -410,7 +409,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR \*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information. -When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Strings can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing strings without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. +When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. If setting [input_format_csv_empty_as_default](../operations/settings/settings.md#settings-input_format_csv_empty_as_default) is enabled, empty unquoted input values are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too. @@ -1231,38 +1230,7 @@ See also [how to read/write length-delimited protobuf messages in popular langua ## ProtobufSingle {#protobufsingle} -Same as [Protobuf](#protobuf) but for storing/parsing a single Protobuf message without length delimiter. -As a result, only a single table row can be written/read. - -## ProtobufList {#protobuflist} - -Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope". - -Usage example: - -``` sql -SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType' -``` - -``` bash -cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'" -``` - -where the file `schemafile.proto` looks like this: - -``` capnp -syntax = "proto3"; - -message Envelope { - message MessageType { - string name = 1; - string surname = 2; - uint32 birthDate = 3; - repeated string phoneNumbers = 4; - }; - MessageType row = 1; -}; -``` +Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters. ## Avro {#data-format-avro} @@ -1396,8 +1364,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | +| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1454,8 +1421,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | +| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1517,8 +1483,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | +| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 9aa6419d89c..279204a8af1 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -5,7 +5,7 @@ toc_title: Caches # Cache Types {#cache-types} -When performing queries, ClickHouse uses different caches. +When performing queries, ClichHouse uses different caches. Main cache types: diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index a8ca2079070..35ec5d858f5 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -55,7 +55,7 @@ Internal coordination settings are located in `..` section and contain servers description. @@ -121,7 +121,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a allow list configuration `four_letter_word_allow_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro". +The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro". You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -201,7 +201,7 @@ Server stats reset. ``` server_id=1 tcp_port=2181 -four_letter_word_allow_list=* +four_letter_word_white_list=* log_storage_path=./coordination/logs snapshot_storage_path=./coordination/snapshots max_requests_batch_size=100 diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md deleted file mode 100644 index dce7938f98b..00000000000 --- a/docs/en/operations/named-collections.md +++ /dev/null @@ -1,229 +0,0 @@ ---- -toc_priority: 69 -toc_title: "Named connections" ---- - -# Storing details for connecting to external sources in configuration files {#named-collections} - -Details for connecting to external sources (dictionaries, tables, table functions) can be saved -in configuration files and thus simplify the creation of objects and hide credentials -from users with only SQL access. - -Parameters can be set in XML `CSV` and overridden in SQL `, format = 'TSV'`. -The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`. - -Named connections are stored in the `config.xml` file of the ClickHouse server in the `` section and are applied when ClickHouse starts. - -Example of configuration: -```xml -$ cat /etc/clickhouse-server/config.d/named_collections.xml - - - ... - - -``` - -## Named connections for accessing S3. - -The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md). - -Example of configuration: -```xml - - - - AKIAIOSFODNN7EXAMPLE - wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - CSV - - - -``` - -### Example of using named connections with the s3 function - -```sql -INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', - format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') -SELECT * FROM numbers(10000); - -SELECT count() -FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz') - -┌─count()─┐ -│ 10000 │ -└─────────┘ -1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.) -``` - -### Example of using named connections with an S3 table - -```sql -CREATE TABLE s3_engine_table (number Int64) -ENGINE=S3(s3_mydata, url='https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', format = 'TSV') -SETTINGS input_format_with_names_use_header = 0; - -SELECT * FROM s3_engine_table LIMIT 3; -┌─number─┐ -│ 0 │ -│ 1 │ -│ 2 │ -└────────┘ -``` - -## Named connections for accessing MySQL database - -The description of parameters see [mysql](../sql-reference/table-functions/mysql.md). - -Example of configuration: -```xml - - - - myuser - mypass - 127.0.0.1 - 3306 - test - 8 - 1 - 1 - - - -``` - -### Example of using named connections with the mysql function - -```sql -SELECT count() FROM mysql(mymysql, table = 'test'); - -┌─count()─┐ -│ 3 │ -└─────────┘ -``` - -### Example of using named connections with an MySQL table - -```sql -CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0); -SELECT count() FROM mytable; - -┌─count()─┐ -│ 3 │ -└─────────┘ -``` - -### Example of using named connections with database with engine MySQL - -```sql -CREATE DATABASE mydatabase ENGINE = MySQL(mymysql); - -SHOW TABLES FROM mydatabase; - -┌─name───┐ -│ source │ -│ test │ -└────────┘ -``` - -### Example of using named connections with an external dictionary with source MySQL - -```sql -CREATE DICTIONARY dict (A Int64, B String) -PRIMARY KEY A -SOURCE(MYSQL(NAME mymysql TABLE 'source')) -LIFETIME(MIN 1 MAX 2) -LAYOUT(HASHED()); - -SELECT dictGet('dict', 'B', 2); - -┌─dictGet('dict', 'B', 2)─┐ -│ two │ -└─────────────────────────┘ -``` - -## Named connections for accessing PostgreSQL database - -The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). - -Example of configuration: -```xml - - - - pguser - jw8s0F4 - 127.0.0.1 - 5432 - test - test_schema - 8 - - - -``` - -### Example of using named connections with the postgresql function - -```sql -SELECT * FROM postgresql(mypg, table = 'test'); - -┌─a─┬─b───┐ -│ 2 │ two │ -│ 1 │ one │ -└───┴─────┘ - - -SELECT * FROM postgresql(mypg, table = 'test', schema = 'public'); - -┌─a─┐ -│ 1 │ -│ 2 │ -│ 3 │ -└───┘ -``` - - -### Example of using named connections with database with engine PostgreSQL - -```sql -CREATE TABLE mypgtable (a Int64) ENGINE = PostgreSQL(mypg, table = 'test', schema = 'public'); - -SELECT * FROM mypgtable; - -┌─a─┐ -│ 1 │ -│ 2 │ -│ 3 │ -└───┘ -``` - -### Example of using named connections with database with engine PostgreSQL - -```sql -CREATE DATABASE mydatabase ENGINE = PostgreSQL(mypg); - -SHOW TABLES FROM mydatabase - -┌─name─┐ -│ test │ -└──────┘ -``` - -### Example of using named connections with an external dictionary with source POSTGRESQL - -```sql -CREATE DICTIONARY dict (a Int64, b String) -PRIMARY KEY a -SOURCE(POSTGRESQL(NAME mypg TABLE test)) -LIFETIME(MIN 1 MAX 2) -LAYOUT(HASHED()); - -SELECT dictGet('dict', 'b', 2); - -┌─dictGet('dict', 'b', 2)─┐ -│ two │ -└─────────────────────────┘ -``` diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index 6c6fbbf9cfb..6d22a5f2a33 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -101,7 +101,7 @@ Quotas can use the “quota key” feature to report on resources for multiple k diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index cf3f92580aa..469a66d460f 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -13,18 +13,10 @@ Alias: `INET_NTOA`. ## IPv4StringToNum(s) {#ipv4stringtonums} -The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it throws exception. +The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0. Alias: `INET_ATON`. -## IPv4StringToNumOrDefault(s) {#ipv4stringtonums} - -Same as `IPv4StringToNum`, but if the IPv4 address has an invalid format, it returns 0. - -## IPv4StringToNumOrNull(s) {#ipv4stringtonums} - -Same as `IPv4StringToNum`, but if the IPv4 address has an invalid format, it returns null. - ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum} Similar to IPv4NumToString, but using xxx instead of the last octet. @@ -131,7 +123,7 @@ LIMIT 10 ## IPv6StringToNum {#ipv6stringtonums} -The reverse function of [IPv6NumToString](#ipv6numtostringx). If the IPv6 address has an invalid format, it throws exception. +The reverse function of [IPv6NumToString](#ipv6numtostringx). If the IPv6 address has an invalid format, it returns a string of null bytes. If the input string contains a valid IPv4 address, returns its IPv6 equivalent. HEX can be uppercase or lowercase. @@ -176,14 +168,6 @@ Result: - [cutIPv6](#cutipv6x-bytestocutforipv6-bytestocutforipv4). -## IPv6StringToNumOrDefault(s) {#ipv6stringtonums} - -Same as `IPv6StringToNum`, but if the IPv6 address has an invalid format, it returns 0. - -## IPv6StringToNumOrNull(s) {#ipv6stringtonums} - -Same as `IPv6StringToNum`, but if the IPv6 address has an invalid format, it returns null. - ## IPv4ToIPv6(x) {#ipv4toipv6x} Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples: @@ -277,14 +261,6 @@ SELECT └───────────────────────────────────┴──────────────────────────┘ ``` -## toIPv4OrDefault(string) {#toipv4ordefaultstring} - -Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0. - -## toIPv4OrNull(string) {#toipv4ornullstring} - -Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null. - ## toIPv6 {#toipv6string} Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. @@ -341,14 +317,6 @@ Result: └─────────────────────┘ ``` -## IPv6StringToNumOrDefault(s) {#toipv6ordefaultstring} - -Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns 0. - -## IPv6StringToNumOrNull(s) {#toipv6ornullstring} - -Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null. - ## isIPv4String {#isipv4string} Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`. diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 8270864de74..d5622ac5fdc 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -5,7 +5,9 @@ toc_title: JSON # Functions for Working with JSON {#functions-for-working-with-json} -ClickHouse has special functions for working with this JSON. The `visitParam` functions make strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. The following assumptions are made: +ClickHouse has special functions for working with this JSON. All the JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. + +The following assumptions are made: 1. The field name (function argument) must be a constant. 2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index c9044c62ca4..b224e7ab406 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -189,7 +189,7 @@ Accepts a number. If the number is less than one, it returns 0. Otherwise, it ro ## roundDuration(num) {#rounddurationnum} -Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to numbers from the set: 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000. This function was specifically implemented for a web analytics use case for reporting on session lengths. +Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to numbers from the set: 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000. ## roundAge(num) {#roundagenum} diff --git a/docs/en/sql-reference/functions/statistics.md b/docs/en/sql-reference/functions/statistics.md deleted file mode 100644 index 3f337b05cbc..00000000000 --- a/docs/en/sql-reference/functions/statistics.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -toc_priority: 69 -toc_title: Statistics ---- - -# Functions for Working with Statistics {#functions-for-working-with-statistics} - -# proportionsZTest {#proportionsztest} - -Applies proportion z-test to samples from two populations (X and Y). The alternative is 'two-sided'. - -**Syntax** - -``` sql -proportionsZTest(successes_x, successes_y, trials_x, trials_y, significance_level, usevar) -``` - -**Arguments** - -- `successes_x` — The number of successes for X in trials. -- `successes_y` — The number of successes for X in trials. -- `trials_x` — The number of trials for X. -- `trials_y` — The number of trials for Y. -- `significance_level` -- `usevar` - It can be `'pooled'` or `'unpooled'`. - - `'pooled'` - The variance of the two populations are assumed to be equal. - - `'unpooled'` - The assumption of equal variances is dropped. - -**Returned value** - -- A tuple with the (z-statistic, p-value, confidence-interval-lower, confidence-interval-upper). - -Type: [Tuple](../../sql-reference/data-types/tuple.md). - -**Example** - -Query: - -``` sql -SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled'); -``` - -Result: - -``` text -(-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) -``` - diff --git a/docs/en/whats-new/security-changelog.md b/docs/en/whats-new/security-changelog.md index aad3da91aa4..0a5c926f227 100644 --- a/docs/en/whats-new/security-changelog.md +++ b/docs/en/whats-new/security-changelog.md @@ -4,49 +4,6 @@ sidebar_position: 100 keywords: [clickhouse, security, changelog] description: Security Changelog --- -## Fixed in ClickHouse 21.10.2.15, 2021-10-18 {#fixed-in-clickhouse-release-21-10-2-215-2021-10-18} - -### CVE-2021-43304 {#cve-2021-43304} - -Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy(op, ip, copy_end), don’t exceed the destination buffer’s limits. - -Credits: JFrog Security Research Team - -### CVE-2021-43305 {#cve-2021-43305} - -Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy(op, ip, copy_end), don’t exceed the destination buffer’s limits. This issue is very similar to CVE-2021-43304, but the vulnerable copy operation is in a different wildCopy call. - -Credits: JFrog Security Research Team - -### CVE-2021-42387 {#cve-2021-42387} - -Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the upper bounds of the source of the copy operation. - -Credits: JFrog Security Research Team - -### CVE-2021-42388 {#cve-2021-42388} - -Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the lower bounds of the source of the copy operation. - -Credits: JFrog Security Research Team - -### CVE-2021-42389 {#cve-2021-42389} - -Divide-by-zero in Clickhouse's Delta compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. - -Credits: JFrog Security Research Team - -### CVE-2021-42390 {#cve-2021-42390} - -Divide-by-zero in Clickhouse's DeltaDouble compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. - -Credits: JFrog Security Research Team - -### CVE-2021-42391 {#cve-2021-42391} - -Divide-by-zero in Clickhouse's Gorilla compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. - -Credits: JFrog Security Research Team # Security Changelog From e2370d9622584efcd9a1001103f82413453503cb Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 29 Mar 2022 19:11:36 +0200 Subject: [PATCH 040/117] Update 02235_remote_fs_cache_stress.sh --- tests/queries/0_stateless/02235_remote_fs_cache_stress.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index 65caf0eaef6..7f1af5e854c 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long +# Tags: no-fasttest, long, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From caacc7d385575dc37a16fe57179aa3ba21a86206 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 29 Mar 2022 17:25:34 +0000 Subject: [PATCH 041/117] add quota for written bytes --- src/Access/Common/QuotaDefs.cpp | 5 +++ src/Access/Common/QuotaDefs.h | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 9 ++++ src/Interpreters/InterpreterInsertQuery.cpp | 5 ++- .../Transforms/CountingTransform.cpp | 10 ++++- src/Processors/Transforms/CountingTransform.h | 10 ++++- .../02247_written_bytes_quota.reference | 9 ++++ .../0_stateless/02247_written_bytes_quota.sh | 45 +++++++++++++++++++ 8 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02247_written_bytes_quota.reference create mode 100755 tests/queries/0_stateless/02247_written_bytes_quota.sh diff --git a/src/Access/Common/QuotaDefs.cpp b/src/Access/Common/QuotaDefs.cpp index 5d36a0bdd01..f9f8a56d534 100644 --- a/src/Access/Common/QuotaDefs.cpp +++ b/src/Access/Common/QuotaDefs.cpp @@ -107,6 +107,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type) static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */); return info; } + case QuotaType::WRITTEN_BYTES: + { + static const auto info = make_info("WRITTEN_BYTES", 1); + return info; + } case QuotaType::MAX: break; } throw Exception("Unexpected quota type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); diff --git a/src/Access/Common/QuotaDefs.h b/src/Access/Common/QuotaDefs.h index 7a69f811ea5..dfe2b56ef31 100644 --- a/src/Access/Common/QuotaDefs.h +++ b/src/Access/Common/QuotaDefs.h @@ -20,6 +20,7 @@ enum class QuotaType READ_ROWS, /// Number of rows read from tables. READ_BYTES, /// Number of bytes read from tables. EXECUTION_TIME, /// Total amount of query execution time in nanoseconds. + WRITTEN_BYTES, /// Number of bytes written to tables. MAX }; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 6102066f85b..c6f63c3c36e 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -197,6 +198,14 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) copyData(*read_buf, write_buf); } + if (auto quota = query_context->getQuota()) + { + /// Do not throw if quota exceded right now, because + /// bytes are not written now actually. + quota->checkExceeded(QuotaType::WRITTEN_BYTES); + quota->used(QuotaType::WRITTEN_BYTES, bytes.size(), /*check_exceeded=*/ false); + } + auto entry = std::make_shared(std::move(bytes), query_context->getCurrentQueryId()); InsertQuery key{query, settings}; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index ce369182766..49e63a91721 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -51,6 +52,8 @@ InterpreterInsertQuery::InterpreterInsertQuery( , async_insert(async_insert_) { checkStackSize(); + if (auto quota = getContext()->getQuota()) + quota->checkExceeded(QuotaType::WRITTEN_BYTES); } @@ -269,7 +272,7 @@ Chain InterpreterInsertQuery::buildChainImpl( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0)); } - auto counting = std::make_shared(out.getInputHeader(), thread_status); + auto counting = std::make_shared(out.getInputHeader(), thread_status, getContext()->getQuota()); counting->setProcessListElement(context_ptr->getProcessListElement()); out.addSource(std::move(counting)); diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index eb191b36586..daf154d28ca 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -18,11 +18,17 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { + if (quota) + { + /// Do not throw if quota exceded right now, because + /// bytes are not written now actually. + quota->checkExceeded(QuotaType::WRITTEN_BYTES); + quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes(), /*check_exceeded=*/ false); + } + Progress local_progress{WriteProgress(chunk.getNumRows(), chunk.bytes())}; progress.incrementPiecewiseAtomically(local_progress); - //std::cerr << "============ counting adding progress for " << static_cast(thread_status) << ' ' << chunk.getNumRows() << " rows\n"; - if (thread_status) { thread_status->performance_counters.increment(ProfileEvents::InsertedRows, local_progress.written_rows); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 877f6a0a543..0386a7f71af 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -14,8 +15,12 @@ class ThreadStatus; class CountingTransform final : public ExceptionKeepingTransform { public: - explicit CountingTransform(const Block & header, ThreadStatus * thread_status_ = nullptr) - : ExceptionKeepingTransform(header, header), thread_status(thread_status_) {} + explicit CountingTransform( + const Block & header, + ThreadStatus * thread_status_ = nullptr, + std::shared_ptr quota_ = nullptr) + : ExceptionKeepingTransform(header, header) + , thread_status(thread_status_), quota(std::move(quota_)) {} String getName() const override { return "CountingTransform"; } @@ -47,6 +52,7 @@ protected: ProgressCallback progress_callback; QueryStatus * process_elem = nullptr; ThreadStatus * thread_status = nullptr; + std::shared_ptr quota; Chunk cur_chunk; }; diff --git a/tests/queries/0_stateless/02247_written_bytes_quota.reference b/tests/queries/0_stateless/02247_written_bytes_quota.reference new file mode 100644 index 00000000000..37ec8358721 --- /dev/null +++ b/tests/queries/0_stateless/02247_written_bytes_quota.reference @@ -0,0 +1,9 @@ +QUOTA_EXPIRED +QUOTA_EXPIRED +1 +2 +QUOTA_EXPIRED +QUOTA_EXPIRED +QUOTA_EXPIRED +1 +50 diff --git a/tests/queries/0_stateless/02247_written_bytes_quota.sh b/tests/queries/0_stateless/02247_written_bytes_quota.sh new file mode 100755 index 00000000000..17e66a6bc75 --- /dev/null +++ b/tests/queries/0_stateless/02247_written_bytes_quota.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS written_bytes_02247" +${CLICKHOUSE_CLIENT} -q "DROP ROLE IF EXISTS r02247" +${CLICKHOUSE_CLIENT} -q "DROP USER IF EXISTS u02247" +${CLICKHOUSE_CLIENT} -q "DROP QUOTA IF EXISTS q02247" + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE written_bytes_02247(s String) ENGINE = Memory" + +${CLICKHOUSE_CLIENT} -q "CREATE ROLE r02247" +${CLICKHOUSE_CLIENT} -q "CREATE USER u02247" +${CLICKHOUSE_CLIENT} -q "GRANT ALL ON *.* TO r02247" +${CLICKHOUSE_CLIENT} -q "GRANT r02247 to u02247" +${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02247 FOR INTERVAL 100 YEAR MAX WRITTEN BYTES = 10 TO r02247" + +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED + +${CLICKHOUSE_CLIENT} -q "SELECT written_bytes > 10 FROM system.quotas_usage WHERE quota_name = 'q02247'" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM written_bytes_02247" + +${CLICKHOUSE_CLIENT} -q "DROP QUOTA q02247" +${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02247 FOR INTERVAL 100 YEAR MAX WRITTEN BYTES = 100 TO r02247" +${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE written_bytes_02247" + +${CLICKHOUSE_CLIENT} --user u02247 -q "INSERT INTO written_bytes_02247 SELECT toString(number) FROM numbers(50)" + +${CLICKHOUSE_CLIENT} --user u02247 -q "INSERT INTO written_bytes_02247 SELECT toString(number) FROM numbers(1)" 2>&1 | grep -m1 -o QUOTA_EXPIRED +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED + +${CLICKHOUSE_CLIENT} -q "SELECT written_bytes > 100 FROM system.quotas_usage WHERE quota_name = 'q02247'" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM written_bytes_02247" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS written_bytes_02247" +${CLICKHOUSE_CLIENT} -q "DROP ROLE IF EXISTS r02247" +${CLICKHOUSE_CLIENT} -q "DROP USER IF EXISTS u02247" +${CLICKHOUSE_CLIENT} -q "DROP QUOTA IF EXISTS q02247" From 00ddb72eead2cf03006c32fb5bd9b2078951009e Mon Sep 17 00:00:00 2001 From: rfraposa Date: Tue, 29 Mar 2022 17:43:34 -0600 Subject: [PATCH 042/117] Update /engines docs --- docs/en/engines/database-engines/atomic.md | 17 ++- docs/en/engines/database-engines/index.md | 8 +- docs/en/engines/database-engines/lazy.md | 4 +- .../database-engines/materialized-mysql.md | 35 +++-- .../materialized-postgresql.md | 143 +++++++++--------- docs/en/engines/database-engines/mysql.md | 11 +- .../en/engines/database-engines/postgresql.md | 4 +- .../en/engines/database-engines/replicated.md | 4 +- docs/en/engines/database-engines/sqlite.md | 4 +- .../integrations/ExternalDistributed.md | 7 +- .../integrations/embedded-rocksdb.md | 4 +- .../table-engines/integrations/hdfs.md | 9 +- .../table-engines/integrations/hive.md | 6 +- .../table-engines/integrations/index.md | 4 +- .../table-engines/integrations/jdbc.md | 4 +- .../table-engines/integrations/kafka.md | 9 +- .../integrations/materialized-postgresql.md | 11 +- .../table-engines/integrations/mongodb.md | 4 +- .../table-engines/integrations/mysql.md | 6 +- .../table-engines/integrations/odbc.md | 4 +- .../table-engines/integrations/postgresql.md | 9 +- .../table-engines/integrations/rabbitmq.md | 4 +- .../engines/table-engines/integrations/s3.md | 11 +- .../table-engines/integrations/sqlite.md | 9 +- .../engines/table-engines/log-family/index.md | 5 +- .../engines/table-engines/log-family/log.md | 3 + .../mergetree-family/aggregatingmergetree.md | 9 +- .../mergetree-family/collapsingmergetree.md | 9 +- .../custom-partitioning-key.md | 21 ++- .../mergetree-family/graphitemergetree.md | 27 ++-- .../table-engines/mergetree-family/index.md | 5 +- .../mergetree-family/mergetree.md | 23 +-- .../mergetree-family/replacingmergetree.md | 14 +- .../mergetree-family/replication.md | 9 +- .../mergetree-family/summingmergetree.md | 9 +- .../versionedcollapsingmergetree.md | 9 +- .../engines/table-engines/special/buffer.md | 11 +- .../table-engines/special/dictionary.md | 6 +- .../table-engines/special/distributed.md | 30 ++-- .../table-engines/special/external-data.md | 5 +- docs/en/engines/table-engines/special/file.md | 11 +- .../engines/table-engines/special/generate.md | 6 +- .../en/engines/table-engines/special/index.md | 4 +- docs/en/engines/table-engines/special/join.md | 11 +- .../table-engines/special/materializedview.md | 6 +- .../engines/table-engines/special/memory.md | 6 +- .../en/engines/table-engines/special/merge.md | 8 +- docs/en/engines/table-engines/special/null.md | 12 +- docs/en/engines/table-engines/special/set.md | 6 +- docs/en/engines/table-engines/special/url.md | 6 +- docs/en/engines/table-engines/special/view.md | 6 +- docs/en/install.md | 2 +- docs/en/operations/settings/index.md | 6 +- 53 files changed, 337 insertions(+), 279 deletions(-) diff --git a/docs/en/engines/database-engines/atomic.md b/docs/en/engines/database-engines/atomic.md index 1e555a0a502..878307121aa 100644 --- a/docs/en/engines/database-engines/atomic.md +++ b/docs/en/engines/database-engines/atomic.md @@ -1,9 +1,9 @@ --- -toc_priority: 32 -toc_title: Atomic +sidebar_label: Atomic +sidebar_position: 10 --- -# Atomic {#atomic} +# Atomic It supports non-blocking [DROP TABLE](#drop-detach-table) and [RENAME TABLE](#rename-table) queries and atomic [EXCHANGE TABLES](#exchange-tables) queries. `Atomic` database engine is used by default. @@ -18,14 +18,21 @@ CREATE DATABASE test [ENGINE = Atomic]; ### Table UUID {#table-uuid} All tables in database `Atomic` have persistent [UUID](../../sql-reference/data-types/uuid.md) and store data in directory `/clickhouse_path/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`, where `xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy` is UUID of the table. -Usually, the UUID is generated automatically, but the user can also explicitly specify the UUID in the same way when creating the table (this is not recommended). To display the `SHOW CREATE` query with the UUID you can use setting [show_table_uuid_in_table_create_query_if_not_nil](../../operations/settings/settings.md#show_table_uuid_in_table_create_query_if_not_nil). For example: +Usually, the UUID is generated automatically, but the user can also explicitly specify the UUID in the same way when creating the table (this is not recommended). + +For example: ```sql CREATE TABLE name UUID '28f1c61c-2970-457a-bffe-454156ddcfef' (n UInt64) ENGINE = ...; ``` + +:::note +You can use the [show_table_uuid_in_table_create_query_if_not_nil](../../operations/settings/settings.md#show_table_uuid_in_table_create_query_if_not_nil) setting to display the UUID with the `SHOW CREATE` query. +::: + ### RENAME TABLE {#rename-table} -[RENAME](../../sql-reference/statements/rename.md) queries are performed without changing UUID and moving table data. These queries do not wait for the completion of queries using the table and are executed instantly. +[RENAME](../../sql-reference/statements/rename.md) queries are performed without changing the UUID or moving table data. These queries do not wait for the completion of queries using the table and are executed instantly. ### DROP/DETACH TABLE {#drop-detach-table} diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index dd8959d2700..0cee580abcd 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -6,11 +6,11 @@ toc_title: Introduction # Database Engines {#database-engines} -Database engines allow you to work with tables. +Database engines allow you to work with tables. By default, ClickHouse uses the [Atomic](../../engines/database-engines/atomic.md) database engine, which provides configurable [table engines](../../engines/table-engines/index.md) and an [SQL dialect](../../sql-reference/syntax.md). -By default, ClickHouse uses database engine [Atomic](../../engines/database-engines/atomic.md). It provides configurable [table engines](../../engines/table-engines/index.md) and an [SQL dialect](../../sql-reference/syntax.md). +Here is a complete list of available database engines. Follow the links for more details: -You can also use the following database engines: +- [Atomic](../../engines/database-engines/atomic.md) - [MySQL](../../engines/database-engines/mysql.md) @@ -18,8 +18,6 @@ You can also use the following database engines: - [Lazy](../../engines/database-engines/lazy.md) -- [Atomic](../../engines/database-engines/atomic.md) - - [PostgreSQL](../../engines/database-engines/postgresql.md) - [Replicated](../../engines/database-engines/replicated.md) diff --git a/docs/en/engines/database-engines/lazy.md b/docs/en/engines/database-engines/lazy.md index ecd4b94f579..b95ade19df4 100644 --- a/docs/en/engines/database-engines/lazy.md +++ b/docs/en/engines/database-engines/lazy.md @@ -1,6 +1,6 @@ --- -toc_priority: 31 -toc_title: Lazy +sidebar_label: Lazy +sidebar_position: 20 --- # Lazy {#lazy} diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index 3dc14c87be7..df072682097 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -1,16 +1,15 @@ --- -toc_priority: 29 -toc_title: MaterializedMySQL +sidebar_label: MaterializedMySQL +sidebar_position: 70 --- -# [experimental] MaterializedMySQL {#materialized-mysql} +# [experimental] MaterializedMySQL -!!! warning "Warning" - This is an experimental feature that should not be used in production. +:::warning +This is an experimental feature that should not be used in production. +::: -Creates ClickHouse database with all the tables existing in MySQL, and all the data in those tables. - -ClickHouse server works as MySQL replica. It reads binlog and performs DDL and DML queries. +Creates a ClickHouse database with all the tables existing in MySQL, and all the data in those tables. The ClickHouse server works as MySQL replica. It reads `binlog` and performs DDL and DML queries. ## Creating a Database {#creating-a-database} @@ -31,8 +30,6 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo - `max_rows_in_buffer` — Maximum number of rows that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `65 505`. - `max_bytes_in_buffer` — Maximum number of bytes that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `1 048 576`. -- `max_rows_in_buffers` — Maximum number of rows that data is allowed to cache in memory (for database and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `65 505`. -- `max_bytes_in_buffers` — Maximum number of bytes that data is allowed to cache in memory (for database and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `1 048 576`. - `max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`. - `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`. - `allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`). @@ -52,8 +49,9 @@ For the correct work of `MaterializedMySQL`, there are few mandatory `MySQL`-sid - `default_authentication_plugin = mysql_native_password` since `MaterializedMySQL` can only authorize with this method. - `gtid_mode = on` since GTID based logging is a mandatory for providing correct `MaterializedMySQL` replication. -!!! attention "Attention" - While turning on `gtid_mode` you should also specify `enforce_gtid_consistency = on`. +:::note +While turning on `gtid_mode` you should also specify `enforce_gtid_consistency = on`. +::: ## Virtual Columns {#virtual-columns} @@ -220,13 +218,14 @@ extra care needs to be taken. You may specify overrides for tables that do not exist yet. -!!! warning "Warning" - It is easy to break replication with table overrides if not used with care. For example: +:::warning +It is easy to break replication with table overrides if not used with care. For example: - * If an ALIAS column is added with a table override, and a column with the same name is later added to the source - MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops. - * It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in - `ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop. +* If an ALIAS column is added with a table override, and a column with the same name is later added to the source + MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops. +* It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in + `ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop. +::: ## Examples of Use {#examples-of-use} diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 56793435fac..ff8f7b192e0 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -1,6 +1,6 @@ --- -toc_priority: 30 -toc_title: MaterializedPostgreSQL +sidebar_label: MaterializedPostgreSQL +sidebar_position: 60 --- # [experimental] MaterializedPostgreSQL {#materialize-postgresql} @@ -46,7 +46,9 @@ After `MaterializedPostgreSQL` database is created, it does not automatically de ATTACH TABLE postgres_database.new_table; ``` -Warning: before version 22.1 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 22.1. +:::warning +Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1. +::: ## Dynamically removing tables from replication {#dynamically-removing-table-from-replication} @@ -135,69 +137,70 @@ FROM pg_class WHERE oid = 'postgres_table'::regclass; ``` -!!! warning "Warning" - Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. +:::warning +Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. +::: ## Settings {#settings} -1. materialized_postgresql_tables_list {#materialized-postgresql-tables-list} +1. `materialized_postgresql_tables_list` {#materialized-postgresql-tables-list} -Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine. + Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine. -Default value: empty list — means whole PostgreSQL database will be replicated. + Default value: empty list — means whole PostgreSQL database will be replicated. -2. materialized_postgresql_schema {#materialized-postgresql-schema} +2. `materialized_postgresql_schema` {#materialized-postgresql-schema} -Default value: empty string. (Default schema is used) + Default value: empty string. (Default schema is used) -3. materialized_postgresql_schema_list {#materialized-postgresql-schema-list} +3. `materialized_postgresql_schema_list` {#materialized-postgresql-schema-list} -Default value: empty list. (Default schema is used) + Default value: empty list. (Default schema is used) -4. materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update} +4. `materialized_postgresql_allow_automatic_update` {#materialized-postgresql-allow-automatic-update} -Do not use this setting before 22.1 version. + Do not use this setting before 22.1 version. -Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them. + Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them. -Possible values: + Possible values: -- 0 — The table is not automatically updated in the background, when schema changes are detected. -- 1 — The table is automatically updated in the background, when schema changes are detected. + - 0 — The table is not automatically updated in the background, when schema changes are detected. + - 1 — The table is automatically updated in the background, when schema changes are detected. -Default value: `0`. + Default value: `0`. -5. materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size} +5. `materialized_postgresql_max_block_size` {#materialized-postgresql-max-block-size} -Sets the number of rows collected in memory before flushing data into PostgreSQL database table. + Sets the number of rows collected in memory before flushing data into PostgreSQL database table. -Possible values: + Possible values: -- Positive integer. + - Positive integer. -Default value: `65536`. + Default value: `65536`. -6. materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot} +6. `materialized_postgresql_replication_slot` {#materialized-postgresql-replication-slot} -A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`. + A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`. -7. materialized_postgresql_snapshot {#materialized-postgresql-snapshot} +7. `materialized_postgresql_snapshot` {#materialized-postgresql-snapshot} -A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`. + A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`. -``` sql -CREATE DATABASE database1 -ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') -SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3'; + ``` sql + CREATE DATABASE database1 + ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') + SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3'; -SELECT * FROM database1.table1; -``` + SELECT * FROM database1.table1; + ``` -The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query. + The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query. -``` sql -ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; -``` + ``` sql + ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; + ``` ## Notes {#notes} @@ -213,47 +216,47 @@ Please note that this should be used only if it is actually needed. If there is 1. Configure replication slot in PostgreSQL. -```yaml -apiVersion: "acid.zalan.do/v1" -kind: postgresql -metadata: - name: acid-demo-cluster -spec: - numberOfInstances: 2 - postgresql: - parameters: - wal_level: logical - patroni: - slots: - clickhouse_sync: - type: logical - database: demodb - plugin: pgoutput -``` + ```yaml + apiVersion: "acid.zalan.do/v1" + kind: postgresql + metadata: + name: acid-demo-cluster + spec: + numberOfInstances: 2 + postgresql: + parameters: + wal_level: logical + patroni: + slots: + clickhouse_sync: + type: logical + database: demodb + plugin: pgoutput + ``` 2. Wait for replication slot to be ready, then begin a transaction and export the transaction snapshot identifier: -```sql -BEGIN; -SELECT pg_export_snapshot(); -``` + ```sql + BEGIN; + SELECT pg_export_snapshot(); + ``` 3. In ClickHouse create database: -```sql -CREATE DATABASE demodb -ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') -SETTINGS - materialized_postgresql_replication_slot = 'clickhouse_sync', - materialized_postgresql_snapshot = '0000000A-0000023F-3', - materialized_postgresql_tables_list = 'table1,table2,table3'; -``` + ```sql + CREATE DATABASE demodb + ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') + SETTINGS + materialized_postgresql_replication_slot = 'clickhouse_sync', + materialized_postgresql_snapshot = '0000000A-0000023F-3', + materialized_postgresql_tables_list = 'table1,table2,table3'; + ``` 4. End the PostgreSQL transaction once replication to ClickHouse DB is confirmed. Verify that replication continues after failover: -```bash -kubectl exec acid-demo-cluster-0 -c postgres -- su postgres -c 'patronictl failover --candidate acid-demo-cluster-1 --force' -``` + ```bash + kubectl exec acid-demo-cluster-0 -c postgres -- su postgres -c 'patronictl failover --candidate acid-demo-cluster-1 --force' + ``` ### Required permissions diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md index c5a1bba44b2..89a0786a9ec 100644 --- a/docs/en/engines/database-engines/mysql.md +++ b/docs/en/engines/database-engines/mysql.md @@ -1,9 +1,9 @@ --- -toc_priority: 30 -toc_title: MySQL +sidebar_position: 50 +sidebar_label: MySQL --- -# MySQL {#mysql} +# MySQL Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL. @@ -59,8 +59,9 @@ These variables are supported: - `version` - `max_allowed_packet` -!!! warning "Warning" - By now these variables are stubs and don't correspond to anything. +:::warning +By now these variables are stubs and don't correspond to anything. +::: Example: diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md index 76ef484e773..bc5e93d0923 100644 --- a/docs/en/engines/database-engines/postgresql.md +++ b/docs/en/engines/database-engines/postgresql.md @@ -1,6 +1,6 @@ --- -toc_priority: 35 -toc_title: PostgreSQL +sidebar_position: 40 +sidebar_label: PostgreSQL --- # PostgreSQL {#postgresql} diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index bdc17d32393..07d6fcd9ece 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -1,6 +1,6 @@ --- -toc_priority: 36 -toc_title: Replicated +sidebar_position: 30 +sidebar_label: Replicated --- # [experimental] Replicated {#replicated} diff --git a/docs/en/engines/database-engines/sqlite.md b/docs/en/engines/database-engines/sqlite.md index ee9db90859f..2f8b44c9a09 100644 --- a/docs/en/engines/database-engines/sqlite.md +++ b/docs/en/engines/database-engines/sqlite.md @@ -1,6 +1,6 @@ --- -toc_priority: 32 -toc_title: SQLite +sidebar_position: 55 +sidebar_label: SQLite --- # SQLite {#sqlite} diff --git a/docs/en/engines/table-engines/integrations/ExternalDistributed.md b/docs/en/engines/table-engines/integrations/ExternalDistributed.md index 0ecbc5383e1..c9aae1934db 100644 --- a/docs/en/engines/table-engines/integrations/ExternalDistributed.md +++ b/docs/en/engines/table-engines/integrations/ExternalDistributed.md @@ -1,6 +1,6 @@ --- -toc_priority: 12 -toc_title: ExternalDistributed +sidebar_position: 12 +sidebar_label: ExternalDistributed --- # ExternalDistributed {#externaldistributed} @@ -51,3 +51,6 @@ You can specify any number of shards and any number of replicas for each shard. - [MySQL table engine](../../../engines/table-engines/integrations/mysql.md) - [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md) - [Distributed table engine](../../../engines/table-engines/special/distributed.md) + + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/ExternalDistributed/) diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 385abeb83ad..701d190f022 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -1,6 +1,6 @@ --- -toc_priority: 9 -toc_title: EmbeddedRocksDB +sidebar_position: 9 +sidebar_label: EmbeddedRocksDB --- # EmbeddedRocksDB Engine {#EmbeddedRocksDB-engine} diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 0d6d90f9d31..503bd779abf 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -1,6 +1,6 @@ --- -toc_priority: 6 -toc_title: HDFS +sidebar_position: 6 +sidebar_label: HDFS --- # HDFS {#table_engines-hdfs} @@ -98,8 +98,9 @@ Table consists of all the files in both directories (all files should satisfy fo CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV') ``` -!!! warning "Warning" - If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +:::warning +If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: **Example** diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index b804b9c2279..6731f0e7559 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -1,6 +1,6 @@ --- -toc_priority: 4 -toc_title: Hive +sidebar_position: 4 +sidebar_label: Hive --- # Hive {#hive} @@ -406,3 +406,5 @@ f_char: hello world f_bool: true day: 2021-09-18 ``` + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/hive/) diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index a06b4c78394..9230ad624ba 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -1,6 +1,6 @@ --- -toc_folder_title: Integrations -toc_priority: 1 +sidebar_position: 40 +sidebar_label: Integrations --- # Table Engines for Integrations {#table-engines-for-integrations} diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md index 2f442fd7753..0ce31f36070 100644 --- a/docs/en/engines/table-engines/integrations/jdbc.md +++ b/docs/en/engines/table-engines/integrations/jdbc.md @@ -1,6 +1,6 @@ --- -toc_priority: 3 -toc_title: JDBC +sidebar_position: 3 +sidebar_label: JDBC --- # JDBC {#table-engine-jdbc} diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 1d80f143098..90e0925f531 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -1,6 +1,6 @@ --- -toc_priority: 8 -toc_title: Kafka +sidebar_position: 8 +sidebar_label: Kafka --- # Kafka {#kafka} @@ -87,8 +87,9 @@ Examples: Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects. If possible, switch old projects to the method described above. +:::warning +Do not use this method in new projects. If possible, switch old projects to the method described above. +::: ``` sql Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index fa349e49af5..61f97961ddb 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -1,6 +1,6 @@ --- -toc_priority: 12 -toc_title: MaterializedPostgreSQL +sidebar_position: 12 +sidebar_label: MaterializedPostgreSQL --- # MaterializedPostgreSQL {#materialize-postgresql} @@ -52,5 +52,8 @@ PRIMARY KEY key; SELECT key, value, _version FROM postgresql_db.postgresql_replica; ``` -!!! warning "Warning" - Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. +:::warning +Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. +::: + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/materialized-postgresql) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 475416ffb94..d212ab4720f 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -1,6 +1,6 @@ --- -toc_priority: 5 -toc_title: MongoDB +sidebar_position: 5 +sidebar_label: MongoDB --- # MongoDB {#mongodb} diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index 7f28f16aa27..e962db58873 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -1,6 +1,6 @@ --- -toc_priority: 4 -toc_title: MySQL +sidebar_position: 4 +sidebar_label: MySQL --- # MySQL {#mysql} @@ -148,3 +148,5 @@ Default value: `16`. - [The mysql table function](../../../sql-reference/table-functions/mysql.md) - [Using MySQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index 0ef21d8565a..ed2b77d7ca3 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -1,6 +1,6 @@ --- -toc_priority: 2 -toc_title: ODBC +sidebar_position: 2 +sidebar_label: ODBC --- # ODBC {#table-engine-odbc} diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 789759ec521..d6826000a1a 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -1,6 +1,6 @@ --- -toc_priority: 11 -toc_title: PostgreSQL +sidebar_position: 11 +sidebar_label: PostgreSQL --- # PostgreSQL {#postgresql} @@ -73,8 +73,9 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp PostgreSQL `Array` types are converted into ClickHouse arrays. -!!! info "Note" - Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column. +:::warning +Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column. +::: Supports multiple replicas that must be listed by `|`. For example: diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 78c144ac76f..6653b76594a 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -1,6 +1,6 @@ --- -toc_priority: 10 -toc_title: RabbitMQ +sidebar_position: 10 +sidebar_label: RabbitMQ --- # RabbitMQ Engine {#rabbitmq-engine} diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index c7301a55bf0..42abc2a0b1e 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -1,6 +1,6 @@ --- -toc_priority: 7 -toc_title: S3 +sidebar_position: 7 +sidebar_label: S3 --- # S3 Table Engine {#table-engine-s3} @@ -66,8 +66,9 @@ For more information about virtual columns see [here](../../../engines/table-eng Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. -!!! warning "Warning" - If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +:::warning +If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: **Example with wildcards 1** @@ -158,3 +159,5 @@ The following settings can be specified in configuration file for given endpoint ## See also - [s3 table function](../../../sql-reference/table-functions/s3.md) + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/s3/) diff --git a/docs/en/engines/table-engines/integrations/sqlite.md b/docs/en/engines/table-engines/integrations/sqlite.md index 391f1696291..45cc1cfc28a 100644 --- a/docs/en/engines/table-engines/integrations/sqlite.md +++ b/docs/en/engines/table-engines/integrations/sqlite.md @@ -1,6 +1,6 @@ --- -toc_priority: 7 -toc_title: SQLite +sidebar_position: 7 +sidebar_label: SQLite --- # SQLite {#sqlite} @@ -56,4 +56,7 @@ SELECT * FROM sqlite_db.table2 ORDER BY col1; **See Also** - [SQLite](../../../engines/database-engines/sqlite.md) engine -- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function \ No newline at end of file +- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function + + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/sqlite/) diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md index 910df09e67f..89eb08ad7b9 100644 --- a/docs/en/engines/table-engines/log-family/index.md +++ b/docs/en/engines/table-engines/log-family/index.md @@ -1,7 +1,6 @@ --- -toc_folder_title: Log Family -toc_priority: 29 -toc_title: Introduction +sidebar_position: 20 +sidebar_label: Log Family --- # Log Engine Family {#log-engine-family} diff --git a/docs/en/engines/table-engines/log-family/log.md b/docs/en/engines/table-engines/log-family/log.md index 2aeef171128..8858699f045 100644 --- a/docs/en/engines/table-engines/log-family/log.md +++ b/docs/en/engines/table-engines/log-family/log.md @@ -10,3 +10,6 @@ The engine belongs to the family of `Log` engines. See the common properties of `Log` differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other. The `Log` engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The `Log` engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes. + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/log-family/log/) + diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 8c9f8dd8ce3..7be10cec2f5 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -1,6 +1,6 @@ --- -toc_priority: 35 -toc_title: AggregatingMergeTree +sidebar_position: 60 +sidebar_label: AggregatingMergeTree --- # AggregatingMergeTree {#aggregatingmergetree} @@ -42,8 +42,9 @@ When creating a `AggregatingMergeTree` table the same [clauses](../../../engines Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects and, if possible, switch the old projects to the method described above. +:::warning +Do not use this method in new projects and, if possible, switch the old projects to the method described above. +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index 271b8b20fdb..22863611e79 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -1,6 +1,6 @@ --- -toc_priority: 36 -toc_title: CollapsingMergeTree +sidebar_position: 70 +sidebar_label: CollapsingMergeTree --- # CollapsingMergeTree {#table_engine-collapsingmergetree} @@ -42,8 +42,9 @@ When creating a `CollapsingMergeTree` table, the same [query clauses](../../../e Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects and, if possible, switch the old projects to the method described above. +:::warning +Do not use this method in new projects and, if possible, switch old projects to the method described above. +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index b58e90a3d92..716528f8d77 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -1,12 +1,15 @@ --- -toc_priority: 32 -toc_title: Custom Partitioning Key +sidebar_position: 30 +sidebar_label: Custom Partitioning Key --- # Custom Partitioning Key {#custom-partitioning-key} -!!! warning "Warning" - In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). +:::warning +In most cases you do not need a partition key, and in most other cases you do not need a partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). + +You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression. +::: Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well. @@ -40,8 +43,9 @@ By default, the floating-point partition key is not supported. To use it enable When inserting new data to a table, this data is stored as a separate part (chunk) sorted by the primary key. In 10-15 minutes after inserting, the parts of the same partition are merged into the entire part. -!!! info "Info" - A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldn’t make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors. +:::info +A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldn’t make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors. +::: Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, let’s assume that we have a `visits` table with partitioning by month. Let’s perform the `SELECT` query for the `system.parts` table: @@ -78,8 +82,9 @@ Let’s break down the name of the part: `201901_1_9_2_11`: - `2` is the chunk level (the depth of the merge tree it is formed from). - `11` is the mutation version (if a part mutated) -!!! info "Info" - The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). +:::info +The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). +::: The `active` column shows the status of the part. `1` is active; `0` is inactive. The inactive parts are, for example, source parts remaining after merging to a larger part. The corrupted data parts are also indicated as inactive. diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index e1d571c909c..35f3f99d5a9 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: GraphiteMergeTree +sidebar_position: 90 +sidebar_label: GraphiteMergeTree --- # GraphiteMergeTree {#graphitemergetree} @@ -54,8 +54,9 @@ When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/t Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects and, if possible, switch the old projects to the method described above. +:::warning +Do not use this method in new projects and, if possible, switch old projects to the method described above. +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -119,12 +120,13 @@ default ... ``` -!!! warning "Attention" - Patterns must be strictly ordered: +:::warning +Patterns must be strictly ordered: - 1. Patterns without `function` or `retention`. - 1. Patterns with both `function` and `retention`. - 1. Pattern `default`. +1. Patterns without `function` or `retention`. +1. Patterns with both `function` and `retention`. +1. Pattern `default`. +::: When processing a row, ClickHouse checks the rules in the `pattern` sections. Each of `pattern` (including `default`) sections can contain `function` parameter for aggregation, `retention` parameters or both. If the metric name matches the `regexp`, the rules from the `pattern` section (or sections) are applied; otherwise, the rules from the `default` section are used. @@ -253,7 +255,6 @@ Valid values: ``` -!!! warning "Warning" - Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). - -[Original article](https://clickhouse.com/docs/en/operations/table_engines/graphitemergetree/) +:::warning +Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). +::: diff --git a/docs/en/engines/table-engines/mergetree-family/index.md b/docs/en/engines/table-engines/mergetree-family/index.md index 32796a252ac..37e7bf5b589 100644 --- a/docs/en/engines/table-engines/mergetree-family/index.md +++ b/docs/en/engines/table-engines/mergetree-family/index.md @@ -1,7 +1,6 @@ --- -toc_folder_title: MergeTree Family -toc_priority: 28 -toc_title: Introduction +sidebar_position: 10 +sidebar_label: MergeTree Family --- # MergeTree Engine Family {#mergetree-engine-family} diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9d820e4961b..095adc32505 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1,6 +1,6 @@ --- -toc_priority: 30 -toc_title: MergeTree +sidebar_position: 11 +sidebar_label: MergeTree --- # MergeTree {#table_engines-mergetree} @@ -27,8 +27,9 @@ Main features: If necessary, you can set the data sampling method in the table. -!!! info "Info" - The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family. +:::info +The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family. +::: ## Creating a Table {#table_engine-mergetree-creating-a-table} @@ -127,8 +128,9 @@ The `index_granularity` setting can be omitted because 8192 is the default value Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects. If possible, switch old projects to the method described above. +:::warning +Do not use this method in new projects. If possible, switch old projects to the method described above. +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -382,8 +384,10 @@ The `set` index can be used with all functions. Function subsets for other index Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. -!!! note "Note" - Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example: +:::note +Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can not be used for optimizing queries where the result of a function is expected to be false. + +For example: - Can be optimized: - `s LIKE '%test%'` @@ -391,12 +395,13 @@ Functions with a constant argument that is less than ngram size can’t be used - `s = 1` - `NOT s != 1` - `startsWith(s, 'test')` -- Can’t be optimized: +- Can not be optimized: - `NOT s LIKE '%test%'` - `s NOT LIKE '%test%'` - `NOT s = 1` - `s != 1` - `NOT startsWith(s, 'test')` +::: ## Projections {#projections} Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries. diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index ca0db24e640..47651527f99 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -1,6 +1,6 @@ --- -toc_priority: 33 -toc_title: ReplacingMergeTree +sidebar_position: 40 +sidebar_label: ReplacingMergeTree --- # ReplacingMergeTree {#replacingmergetree} @@ -29,8 +29,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md). -!!! note "Attention" - Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`. +:::warning +Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`. +::: **ReplacingMergeTree Parameters** @@ -49,8 +50,9 @@ When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/t Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects and, if possible, switch the old projects to the method described above. +:::warning +Do not use this method in new projects and, if possible, switch old projects to the method described above. +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index d574bd9449e..67c503854a9 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -1,6 +1,6 @@ --- -toc_priority: 31 -toc_title: Data Replication +sidebar_position: 20 +sidebar_label: Data Replication --- # Data Replication {#table_engines-replication} @@ -31,8 +31,9 @@ ClickHouse uses [Apache ZooKeeper](https://zookeeper.apache.org) for storing rep To use replication, set parameters in the [zookeeper](../../../operations/server-configuration-parameters/settings.md#server-settings_zookeeper) server configuration section. -!!! attention "Attention" - Don’t neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem. +:::warning +Don’t neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem. +::: Example of setting the addresses of the ZooKeeper cluster: diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index 5726acf000e..5d180782ed3 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -1,6 +1,6 @@ --- -toc_priority: 34 -toc_title: SummingMergeTree +sidebar_position: 50 +sidebar_label: SummingMergeTree --- # SummingMergeTree {#summingmergetree} @@ -41,8 +41,9 @@ When creating a `SummingMergeTree` table the same [clauses](../../../engines/tab Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects and, if possible, switch the old projects to the method described above. +:::warning +Do not use this method in new projects and, if possible, switch the old projects to the method described above. +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 8266bf34876..77cf192dcda 100644 --- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: VersionedCollapsingMergeTree +sidebar_position: 80 +sidebar_label: VersionedCollapsingMergeTree --- # VersionedCollapsingMergeTree {#versionedcollapsingmergetree} @@ -53,8 +53,9 @@ When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../. Deprecated Method for Creating a Table -!!! attention "Attention" - Do not use this method in new projects. If possible, switch the old projects to the method described above. +:::warning +Do not use this method in new projects. If possible, switch old projects to the method described above. +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index d1f92d347a4..a0aff2ec813 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: Buffer +sidebar_position: 120 +sidebar_label: Buffer --- # Buffer Table Engine {#buffer} @@ -54,8 +54,9 @@ If the set of columns in the Buffer table does not match the set of columns in a If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared. The same thing happens if the subordinate table does not exist when the buffer is flushed. -!!! attention "Attention" - Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table. +:::warning +Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table. +::: If the server is restarted abnormally, the data in the buffer is lost. @@ -73,4 +74,4 @@ A Buffer table is used when too many INSERTs are received from a large number of Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second, while inserting larger blocks of data can produce over a million rows per second (see the section “Performance”). -[Original article](https://clickhouse.com/docs/en/operations/table_engines/buffer/) +[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/buffer/) diff --git a/docs/en/engines/table-engines/special/dictionary.md b/docs/en/engines/table-engines/special/dictionary.md index d76adebe01e..67b97e37d44 100644 --- a/docs/en/engines/table-engines/special/dictionary.md +++ b/docs/en/engines/table-engines/special/dictionary.md @@ -1,6 +1,6 @@ --- -toc_priority: 35 -toc_title: Dictionary +sidebar_position: 20 +sidebar_label: Dictionary --- # Dictionary Table Engine {#dictionary} @@ -97,3 +97,5 @@ select * from products limit 1; **See Also** - [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function) + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/dictionary/) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 5072465687e..db89175e4d9 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -1,6 +1,6 @@ --- -toc_priority: 33 -toc_title: Distributed +sidebar_position: 10 +sidebar_label: Distributed --- # Distributed Table Engine {#distributed} @@ -64,19 +64,19 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 - `monitor_max_sleep_time_ms` - same as [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) -!!! note "Note" +:::note +**Durability settings** (`fsync_...`): - **Durability settings** (`fsync_...`): +- Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards. +- May significantly decrease the inserts' performance +- Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings` - - Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards. - - May significantly decrease the inserts' performance - - Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings` +For **Insert limit settings** (`..._insert`) see also: - For **Insert limit settings** (`..._insert`) see also: - - - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting - - [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting - - `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert` +- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting +- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting +- `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert` +::: **Example** @@ -215,8 +215,9 @@ To learn more about how distibuted `in` and `global in` queries are processed, r - `_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md). -!!! note "Note" - Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](../../../sql-reference/table-functions/cluster.md) table functions internally create temporary Distributed table, `_shard_num` is available there too. +:::note +Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](../../../sql-reference/table-functions/cluster.md) table functions internally create temporary Distributed table, `_shard_num` is available there too. +::: **See Also** @@ -225,3 +226,4 @@ To learn more about how distibuted `in` and `global in` queries are processed, r - [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions +[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/distributed/) diff --git a/docs/en/engines/table-engines/special/external-data.md b/docs/en/engines/table-engines/special/external-data.md index 4ec90905fe5..1f4336c74fe 100644 --- a/docs/en/engines/table-engines/special/external-data.md +++ b/docs/en/engines/table-engines/special/external-data.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: External Data +sidebar_position: 130 +sidebar_label: External Data --- # External Data for Query Processing {#external-data-for-query-processing} @@ -63,4 +63,3 @@ $ curl -F 'passwd=@passwd.tsv;' 'http://localhost:8123/?query=SELECT+shell,+coun For distributed query processing, the temporary tables are sent to all the remote servers. -[Original article](https://clickhouse.com/docs/en/operations/table_engines/external_data/) diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 7673f45ca8d..6e4449bf1a9 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: File +sidebar_position: 40 +sidebar_label: File --- # File Table Engine {#table_engines-file} @@ -30,8 +30,9 @@ When creating table using `File(Format)` it creates empty subdirectory in that f You may manually create this subfolder and file in server filesystem and then [ATTACH](../../../sql-reference/statements/attach.md) it to table information with matching name, so you can query data from that file. -!!! warning "Warning" - Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. +:::warning +Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. +::: ## Example {#example} @@ -85,4 +86,4 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 - Indices - Replication -[Original article](https://clickhouse.com/docs/en/operations/table_engines/file/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/file/) diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index fabe31897bb..453f3b5db0b 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: GenerateRandom +sidebar_position: 140 +sidebar_label: GenerateRandom --- # GenerateRandom Table Engine {#table_engines-generate} @@ -56,4 +56,4 @@ SELECT * FROM generate_engine_table LIMIT 3 - Indices - Replication -[Original article](https://clickhouse.com/docs/en/operations/table_engines/generate/) +[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/generate/) diff --git a/docs/en/engines/table-engines/special/index.md b/docs/en/engines/table-engines/special/index.md index 872c01385e0..f87cd86c891 100644 --- a/docs/en/engines/table-engines/special/index.md +++ b/docs/en/engines/table-engines/special/index.md @@ -1,6 +1,6 @@ --- -toc_folder_title: Special -toc_priority: 31 +sidebar_position: 50 +sidebar_label: Special --- # Special Table Engines {#special-table-engines} diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index 4e4a5e9fc03..7d6f6e99b9f 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -1,14 +1,15 @@ --- -toc_priority: 40 -toc_title: Join +sidebar_position: 70 +sidebar_label: Join --- # Join Table Engine {#join} Optional prepared data structure for usage in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations. -!!! note "Note" - This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself. +:::note +This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself. +::: ## Creating a Table {#creating-a-table} @@ -125,3 +126,5 @@ ALTER TABLE id_val_join DELETE WHERE id = 3; │ 1 │ 21 │ └────┴─────┘ ``` + +[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/join/) diff --git a/docs/en/engines/table-engines/special/materializedview.md b/docs/en/engines/table-engines/special/materializedview.md index 75161829a7e..6c9a5e84f60 100644 --- a/docs/en/engines/table-engines/special/materializedview.md +++ b/docs/en/engines/table-engines/special/materializedview.md @@ -1,10 +1,10 @@ --- -toc_priority: 43 -toc_title: MaterializedView +sidebar_position: 100 +sidebar_label: MaterializedView --- # MaterializedView Table Engine {#materializedview} Used for implementing materialized views (for more information, see [CREATE VIEW](../../../sql-reference/statements/create/view.md#materialized)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine. -[Original article](https://clickhouse.com/docs/en/operations/table_engines/materializedview/) +[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/materializedview/) diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index eb557d36c50..1e154a323d1 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -1,6 +1,6 @@ --- -toc_priority: 44 -toc_title: Memory +sidebar_position: 110 +sidebar_label: Memory --- # Memory Table Engine {#memory} @@ -15,4 +15,4 @@ Normally, using this table engine is not justified. However, it can be used for The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”). -[Original article](https://clickhouse.com/docs/en/operations/table_engines/memory/) +[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/memory/) diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index 27f783a3cea..bcad7a0c1f6 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -1,6 +1,6 @@ --- -toc_priority: 36 -toc_title: Merge +sidebar_position: 30 +sidebar_label: Merge --- # Merge Table Engine {#merge} @@ -12,7 +12,7 @@ Reading is automatically parallelized. Writing to a table is not supported. When ## Creating a Table {#creating-a-table} ``` sql - CREATE TABLE ... Engine=Merge(db_name, tables_regexp) +CREATE TABLE ... Engine=Merge(db_name, tables_regexp) ``` **Engine Parameters** @@ -81,3 +81,5 @@ SELECT * FROM WatchLog; - [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) - [merge](../../../sql-reference/table-functions/merge.md) table function + +[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/merge/) diff --git a/docs/en/engines/table-engines/special/null.md b/docs/en/engines/table-engines/special/null.md index 39ed9c1c1a6..309b09ba779 100644 --- a/docs/en/engines/table-engines/special/null.md +++ b/docs/en/engines/table-engines/special/null.md @@ -1,13 +1,15 @@ --- -toc_priority: 38 -toc_title: 'Null' +sidebar_position: 50 +sidebar_label: 'Null' --- # Null Table Engine {#null} When writing to a `Null` table, data is ignored. When reading from a `Null` table, the response is empty. -!!! info "Hint" - However, you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded. +:::note +If you are wondering why this is useful, note that you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded. +::: -[Original article](https://clickhouse.com/docs/en/operations/table_engines/null/) + +[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/null/) diff --git a/docs/en/engines/table-engines/special/set.md b/docs/en/engines/table-engines/special/set.md index c38c2418093..5fd80ba55fe 100644 --- a/docs/en/engines/table-engines/special/set.md +++ b/docs/en/engines/table-engines/special/set.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: Set +sidebar_position: 60 +sidebar_label: Set --- # Set Table Engine {#set} @@ -20,4 +20,4 @@ When creating a table, the following settings are applied: - [persistent](../../../operations/settings/settings.md#persistent) -[Original article](https://clickhouse.com/docs/en/operations/table_engines/set/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/set/) diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 26d928085ce..64642623f88 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -1,6 +1,6 @@ --- -toc_priority: 41 -toc_title: URL +sidebar_position: 80 +sidebar_label: URL --- # URL Table Engine {#table_engines-url} @@ -89,4 +89,4 @@ SELECT * FROM url_engine_table - Indexes. - Replication. -[Original article](https://clickhouse.com/docs/en/operations/table_engines/url/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/url/) diff --git a/docs/en/engines/table-engines/special/view.md b/docs/en/engines/table-engines/special/view.md index 9b847a0e2d5..455c301fb01 100644 --- a/docs/en/engines/table-engines/special/view.md +++ b/docs/en/engines/table-engines/special/view.md @@ -1,10 +1,10 @@ --- -toc_priority: 42 -toc_title: View +sidebar_position: 90 +sidebar_label: View --- # View Table Engine {#table_engines-view} Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query). -[Original article](https://clickhouse.com/docs/en/operations/table_engines/view/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/view/) diff --git a/docs/en/install.md b/docs/en/install.md index ecb4eb93042..35021b5bb8d 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -188,7 +188,7 @@ sudo ./clickhouse install ### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux} -For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). These builds are not recommended for use in production environments because they are less thoroughly tested, and they also only contain a subset of ClickHouse features available. +For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). - [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse) diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index f2a6bfc515a..bca49690025 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -1,7 +1,7 @@ --- -toc_folder_title: Settings -toc_priority: 55 -toc_title: Introduction +sidebar_label: Introduction +sidebar_position: 27 +slug: index --- # Settings {#session-settings-intro} From 421812a877d0ed8200aa3a7a263c84207c65feea Mon Sep 17 00:00:00 2001 From: rfraposa Date: Tue, 29 Mar 2022 17:57:11 -0600 Subject: [PATCH 043/117] Updates /interfaces docs --- docs/en/interfaces/cli.md | 4 ++-- docs/en/interfaces/cpp.md | 4 ++-- docs/en/interfaces/formats.md | 19 +++++++++++-------- docs/en/interfaces/grpc.md | 4 ++-- docs/en/interfaces/http.md | 19 +++++++++++-------- docs/en/interfaces/jdbc.md | 4 ++-- docs/en/interfaces/mysql.md | 4 ++-- docs/en/interfaces/odbc.md | 4 ++-- docs/en/interfaces/tcp.md | 4 ++-- .../third-party/client-libraries.md | 9 +++++---- docs/en/interfaces/third-party/gui.md | 4 ++-- docs/en/interfaces/third-party/index.md | 7 ++++--- .../en/interfaces/third-party/integrations.md | 9 +++++---- docs/en/interfaces/third-party/proxy.md | 4 ++-- 14 files changed, 54 insertions(+), 45 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index eaf7a96ce42..2e78bad6445 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -1,6 +1,6 @@ --- -toc_priority: 17 -toc_title: Command-Line Client +sidebar_position: 17 +sidebar_label: Command-Line Client --- # Command-line Client {#command-line-client} diff --git a/docs/en/interfaces/cpp.md b/docs/en/interfaces/cpp.md index dcd1228ea0f..a7b4188799e 100644 --- a/docs/en/interfaces/cpp.md +++ b/docs/en/interfaces/cpp.md @@ -1,6 +1,6 @@ --- -toc_priority: 24 -toc_title: C++ Client Library +sidebar_position: 24 +sidebar_label: C++ Client Library --- # C++ Client Library {#c-client-library} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 058c9b6fd4a..801b7c1a14f 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1,6 +1,6 @@ --- -toc_priority: 21 -toc_title: Input and Output Formats +sidebar_position: 21 +sidebar_label: Input and Output Formats --- # Formats for Input and Output Data {#formats} @@ -764,8 +764,9 @@ CREATE TABLE IF NOT EXISTS example_table - If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type). - If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`. -!!! note "Warning" - When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`. +:::warning +When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`. +::: ### Selecting Data {#selecting-data} @@ -787,8 +788,9 @@ The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns: Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 sequences. Values are escaped in the same way as for `JSON`. -!!! note "Note" - Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. +:::info +Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. +::: ### Usage of Nested Structures {#jsoneachrow-nested} @@ -1340,8 +1342,9 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` -!!! note "Warning" - Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. +:::warning +Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. +::: ## Parquet {#data-format-parquet} diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index b30715082ec..6ada38c6220 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -1,6 +1,6 @@ --- -toc_priority: 19 -toc_title: gRPC Interface +sidebar_position: 19 +sidebar_label: gRPC Interface --- # gRPC Interface {#grpc-interface} diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index d72fb4d6f17..a97cf6671b2 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -1,6 +1,6 @@ --- -toc_priority: 19 -toc_title: HTTP Interface +sidebar_position: 19 +sidebar_label: HTTP Interface --- # HTTP Interface {#http-interface} @@ -178,8 +178,9 @@ You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods. -!!! note "Note" - Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. +:::info +Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. +::: **Examples** @@ -439,8 +440,9 @@ Next are the configuration methods for different `type`. The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. -!!! note "Warning" - To keep the default `handlers` such as` query`, `play`,` ping`, use the `` rule. +:::warning +To keep the default `handlers` such as` query`, `play`,` ping`, add the `` rule. +::: Example: @@ -469,8 +471,9 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost: max_final_threads 2 ``` -!!! note "caution" - In one `predefined_query_handler` only supports one `query` of an insert type. +:::warning +In one `predefined_query_handler` only supports one `query` of an insert type. +::: ### dynamic_query_handler {#dynamic_query_handler} diff --git a/docs/en/interfaces/jdbc.md b/docs/en/interfaces/jdbc.md index cf97568a8de..0310156a872 100644 --- a/docs/en/interfaces/jdbc.md +++ b/docs/en/interfaces/jdbc.md @@ -1,6 +1,6 @@ --- -toc_priority: 22 -toc_title: JDBC Driver +sidebar_position: 22 +sidebar_label: JDBC Driver --- # JDBC Driver {#jdbc-driver} diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 9932e6b6cb3..df8ef38d671 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -1,6 +1,6 @@ --- -toc_priority: 20 -toc_title: MySQL Interface +sidebar_position: 20 +sidebar_label: MySQL Interface --- # MySQL Interface {#mysql-interface} diff --git a/docs/en/interfaces/odbc.md b/docs/en/interfaces/odbc.md index fa58ed8b43e..5327f6bb48a 100644 --- a/docs/en/interfaces/odbc.md +++ b/docs/en/interfaces/odbc.md @@ -1,6 +1,6 @@ --- -toc_priority: 23 -toc_title: ODBC Driver +sidebar_position: 23 +sidebar_label: ODBC Driver --- # ODBC Driver {#odbc-driver} diff --git a/docs/en/interfaces/tcp.md b/docs/en/interfaces/tcp.md index b23f8110320..5f2f400799f 100644 --- a/docs/en/interfaces/tcp.md +++ b/docs/en/interfaces/tcp.md @@ -1,6 +1,6 @@ --- -toc_priority: 18 -toc_title: Native Interface (TCP) +sidebar_position: 18 +sidebar_label: Native Interface (TCP) --- # Native Interface (TCP) {#native-interface-tcp} diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 8d1ff12cf0a..885e9f430f2 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -1,12 +1,13 @@ --- -toc_priority: 26 -toc_title: Client Libraries +sidebar_position: 26 +sidebar_label: Client Libraries --- # Client Libraries from Third-party Developers {#client-libraries-from-third-party-developers} -!!! warning "Disclaimer" - ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. +:::warning +ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. +::: - Python - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index c0e270b7207..92d00f2812c 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -1,6 +1,6 @@ --- -toc_priority: 28 -toc_title: Visual Interfaces +sidebar_position: 28 +sidebar_label: Visual Interfaces --- # Visual Interfaces from Third-party Developers {#visual-interfaces-from-third-party-developers} diff --git a/docs/en/interfaces/third-party/index.md b/docs/en/interfaces/third-party/index.md index caf100681b4..c9be2b6ada9 100644 --- a/docs/en/interfaces/third-party/index.md +++ b/docs/en/interfaces/third-party/index.md @@ -1,6 +1,6 @@ --- toc_folder_title: Third-Party -toc_priority: 24 +sidebar_position: 24 --- # Third-Party Interfaces {#third-party-interfaces} @@ -12,5 +12,6 @@ This is a collection of links to third-party tools that provide some sort of int - [GUI](../../interfaces/third-party/gui.md) - [Proxies](../../interfaces/third-party/proxy.md) -!!! note "Note" - Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. +:::note +Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. +::: \ No newline at end of file diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 3aac78f0878..ae055d63a9d 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -1,12 +1,13 @@ --- -toc_priority: 27 -toc_title: Integrations +sidebar_position: 27 +sidebar_label: Integrations --- # Integration Libraries from Third-party Developers {#integration-libraries-from-third-party-developers} -!!! warning "Disclaimer" - ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality. +:::warning Disclaimer +ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality. +::: ## Infrastructure Products {#infrastructure-products} diff --git a/docs/en/interfaces/third-party/proxy.md b/docs/en/interfaces/third-party/proxy.md index 31a2d5afae9..45077cb6a89 100644 --- a/docs/en/interfaces/third-party/proxy.md +++ b/docs/en/interfaces/third-party/proxy.md @@ -1,6 +1,6 @@ --- -toc_priority: 29 -toc_title: Proxies +sidebar_position: 29 +sidebar_label: Proxies --- # Proxy Servers from Third-party Developers {#proxy-servers-from-third-party-developers} From 01ec63c909115f37aef91507ea0dd738957db929 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Tue, 29 Mar 2022 20:38:50 -0600 Subject: [PATCH 044/117] Updates /operations docs --- docs/en/interfaces/jdbc.md | 3 +- docs/en/interfaces/odbc.md | 4 +- docs/en/operations/_category_.yml | 3 - docs/en/operations/access-rights.md | 14 ++-- docs/en/operations/backup.md | 9 +- docs/en/operations/caches.md | 4 +- docs/en/operations/clickhouse-keeper.md | 14 ++-- docs/en/operations/configuration-files.md | 4 +- .../external-authenticators/index.md | 7 +- .../external-authenticators/kerberos.md | 21 +++-- docs/en/operations/index.md | 7 +- docs/en/operations/monitoring.md | 4 +- docs/en/operations/opentelemetry.md | 9 +- .../optimizing-performance/index.md | 5 +- .../sampling-query-profiler.md | 4 +- docs/en/operations/performance-test.md | 4 +- docs/en/operations/quotas.md | 4 +- docs/en/operations/requirements.md | 4 +- .../server-configuration-parameters/index.md | 7 +- .../settings.md | 72 +++++++++------- .../settings/constraints-on-settings.md | 4 +- docs/en/operations/settings/index.md | 6 +- .../settings/permissions-for-queries.md | 4 +- .../operations/settings/query-complexity.md | 4 +- .../operations/settings/settings-profiles.md | 9 +- docs/en/operations/settings/settings-users.md | 14 ++-- docs/en/operations/settings/settings.md | 82 +++++++++++-------- docs/en/operations/ssl-zookeeper.md | 7 +- docs/en/operations/storing-data.md | 4 +- .../system-tables/asynchronous_metric_log.md | 2 +- .../system-tables/asynchronous_metrics.md | 2 +- docs/en/operations/system-tables/clusters.md | 2 +- docs/en/operations/system-tables/columns.md | 2 +- .../operations/system-tables/contributors.md | 2 +- docs/en/operations/system-tables/crash-log.md | 2 +- .../operations/system-tables/current-roles.md | 2 +- .../system-tables/data_skipping_indices.md | 2 +- .../system-tables/data_type_families.md | 2 +- docs/en/operations/system-tables/databases.md | 2 +- .../system-tables/detached_parts.md | 2 +- .../operations/system-tables/dictionaries.md | 2 +- docs/en/operations/system-tables/disks.md | 2 +- .../system-tables/distributed_ddl_queue.md | 2 +- .../system-tables/distribution_queue.md | 2 +- .../operations/system-tables/enabled-roles.md | 2 +- docs/en/operations/system-tables/errors.md | 2 +- docs/en/operations/system-tables/events.md | 2 +- docs/en/operations/system-tables/functions.md | 2 +- docs/en/operations/system-tables/grants.md | 2 +- .../system-tables/graphite_retentions.md | 2 +- docs/en/operations/system-tables/index.md | 4 +- docs/en/operations/system-tables/licenses.md | 2 +- .../system-tables/merge_tree_settings.md | 2 +- docs/en/operations/system-tables/merges.md | 2 +- .../en/operations/system-tables/metric_log.md | 2 +- docs/en/operations/system-tables/metrics.md | 2 +- docs/en/operations/system-tables/mutations.md | 7 +- docs/en/operations/system-tables/numbers.md | 2 +- .../en/operations/system-tables/numbers_mt.md | 2 +- docs/en/operations/system-tables/one.md | 2 +- .../system-tables/opentelemetry_span_log.md | 2 +- docs/en/operations/system-tables/part_log.md | 2 +- docs/en/operations/system-tables/parts.md | 7 +- .../operations/system-tables/parts_columns.md | 2 +- docs/en/operations/system-tables/processes.md | 2 +- docs/en/operations/system-tables/query_log.md | 7 +- .../system-tables/query_thread_log.md | 2 +- .../system-tables/query_views_log.md | 2 +- .../operations/system-tables/quota_limits.md | 2 +- .../operations/system-tables/quota_usage.md | 2 +- docs/en/operations/system-tables/quotas.md | 2 +- .../operations/system-tables/quotas_usage.md | 2 +- docs/en/operations/system-tables/replicas.md | 2 +- .../system-tables/replicated_fetches.md | 2 +- .../system-tables/replication_queue.md | 2 +- .../operations/system-tables/role-grants.md | 2 +- docs/en/operations/system-tables/roles.md | 2 +- .../operations/system-tables/row_policies.md | 2 +- .../operations/system-tables/session_log.md | 2 +- docs/en/operations/system-tables/settings.md | 2 +- .../settings_profile_elements.md | 2 +- .../system-tables/settings_profiles.md | 2 +- .../operations/system-tables/stack_trace.md | 2 +- .../system-tables/storage_policies.md | 2 +- .../operations/system-tables/table_engines.md | 2 +- docs/en/operations/system-tables/tables.md | 2 +- docs/en/operations/system-tables/text_log.md | 2 +- .../en/operations/system-tables/time_zones.md | 2 +- docs/en/operations/system-tables/trace_log.md | 2 +- docs/en/operations/system-tables/users.md | 2 +- docs/en/operations/system-tables/zookeeper.md | 2 +- .../operations/system-tables/zookeeper_log.md | 2 +- docs/en/operations/tips.md | 11 +-- docs/en/operations/troubleshooting.md | 4 +- docs/en/operations/update.md | 9 +- .../utilities/clickhouse-benchmark.md | 6 +- .../utilities/clickhouse-compressor.md | 3 +- .../operations/utilities/clickhouse-copier.md | 11 +-- .../operations/utilities/clickhouse-format.md | 7 +- .../operations/utilities/clickhouse-local.md | 11 +-- .../utilities/clickhouse-obfuscator.md | 2 +- docs/en/operations/utilities/index.md | 7 +- 102 files changed, 294 insertions(+), 257 deletions(-) diff --git a/docs/en/interfaces/jdbc.md b/docs/en/interfaces/jdbc.md index 0310156a872..4bea0600a2a 100644 --- a/docs/en/interfaces/jdbc.md +++ b/docs/en/interfaces/jdbc.md @@ -5,7 +5,8 @@ sidebar_label: JDBC Driver # JDBC Driver {#jdbc-driver} -- **[Official driver](https://github.com/ClickHouse/clickhouse-jdbc)** +Use the [official JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc) (and Java client) to access ClickHouse from your Java applications. + - Third-party drivers: - [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) - [clickhouse4j](https://github.com/blynkkk/clickhouse4j) diff --git a/docs/en/interfaces/odbc.md b/docs/en/interfaces/odbc.md index 5327f6bb48a..4c807654c28 100644 --- a/docs/en/interfaces/odbc.md +++ b/docs/en/interfaces/odbc.md @@ -5,6 +5,8 @@ sidebar_label: ODBC Driver # ODBC Driver {#odbc-driver} -- [Official driver](https://github.com/ClickHouse/clickhouse-odbc) +Use the [official ODBC driver](https://github.com/ClickHouse/clickhouse-odbc) for accessing ClickHouse as a data source. + + [Original article](https://clickhouse.com/docs/en/interfaces/odbc/) diff --git a/docs/en/operations/_category_.yml b/docs/en/operations/_category_.yml index 011ab58d26d..9d6dd1247db 100644 --- a/docs/en/operations/_category_.yml +++ b/docs/en/operations/_category_.yml @@ -2,6 +2,3 @@ position: 70 label: 'Operations' collapsible: true collapsed: true -link: - type: generated-index - title: Operations \ No newline at end of file diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md index 52f7fb87ffd..7d75c47df2b 100644 --- a/docs/en/operations/access-rights.md +++ b/docs/en/operations/access-rights.md @@ -1,6 +1,6 @@ --- -toc_priority: 48 -toc_title: Access Control and Account Management +sidebar_position: 48 +sidebar_label: Access Control and Account Management --- # Access Control and Account Management {#access-control} @@ -24,8 +24,9 @@ You can configure access entities using: We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow. -!!! note "Warning" - You can’t manage the same access entity by both configuration methods simultaneously. +:::warning +You can’t manage the same access entity by both configuration methods simultaneously. +::: To see all users, roles, profiles, etc. and all their grants use [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement) statement. @@ -101,8 +102,9 @@ Privileges can be granted to a role by the [GRANT](../sql-reference/statements/g Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy. -!!! note "Warning" - Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. +:::warning +Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. +::: Management queries: diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 7f0ed48928a..c39658aa4b0 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -1,6 +1,6 @@ --- -toc_priority: 49 -toc_title: Data Backup +sidebar_position: 49 +sidebar_label: Data Backup --- # Data Backup {#data-backup} @@ -11,8 +11,9 @@ In order to effectively mitigate possible human errors, you should carefully pre Each company has different resources available and business requirements, so there’s no universal solution for ClickHouse backups and restores that will fit every situation. What works for one gigabyte of data likely won’t work for tens of petabytes. There are a variety of possible approaches with their own pros and cons, which will be discussed below. It is a good idea to use several approaches instead of just one in order to compensate for their various shortcomings. -!!! note "Note" - Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly. +:::note +Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly. +::: ## Duplicating Source Data Somewhere Else {#duplicating-source-data-somewhere-else} diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 279204a8af1..f2427810184 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -1,6 +1,6 @@ --- -toc_priority: 65 -toc_title: Caches +sidebar_position: 65 +sidebar_label: Caches --- # Cache Types {#cache-types} diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 35ec5d858f5..81547736441 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -1,14 +1,15 @@ --- -toc_priority: 66 -toc_title: ClickHouse Keeper +sidebar_position: 66 +sidebar_label: ClickHouse Keeper --- # [pre-production] ClickHouse Keeper {#clickHouse-keeper} ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper. -!!! warning "Warning" - This feature is currently in the pre-production stage. We test it in our CI and on small internal installations. +:::warning +This feature is currently in the pre-production stage. We test it in our CI and on small internal installations. +::: ## Implementation details {#implementation-details} @@ -18,8 +19,9 @@ By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (lineari ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. Digest authentication scheme uses pair `username:password`. Password is encoded in Base64. -!!! info "Note" - External integrations are not supported. +:::note +External integrations are not supported. +::: ## Configuration {#configuration} diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index cbc139dd958..582e90544e0 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -1,6 +1,6 @@ --- -toc_priority: 50 -toc_title: Configuration Files +sidebar_position: 50 +sidebar_label: Configuration Files --- # Configuration Files {#configuration_files} diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index 850b6594b71..af2ba713ec1 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: External User Authenticators and Directories -toc_priority: 48 -toc_title: Introduction +sidebar_position: 48 +sidebar_label: External User Authenticators and Directories --- -# External User Authenticators and Directories {#external-authenticators} +# External User Authenticators and Directories ClickHouse supports authenticating and managing users using external services. diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index da84c1f6a89..3711bac79c3 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -51,12 +51,13 @@ With filtering by realm: ``` -!!! warning "Note" - You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication. - -!!! warning "Note" - `principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication. +:::warning +You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication. +::: +:::warning +`principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication. +::: ## Kerberos as an external authenticator for existing users {#kerberos-as-an-external-authenticator-for-existing-users} @@ -94,11 +95,13 @@ Example (goes into `users.xml`): ``` -!!! warning "Warning" - Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown. +:::warning +Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown. +::: -!!! info "Reminder" - Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in the main `config.xml` file as described previously. +:::info Reminder +Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in the main `config.xml` file as described previously. +::: ### Enabling Kerberos using SQL {#enabling-kerberos-using-sql} diff --git a/docs/en/operations/index.md b/docs/en/operations/index.md index b78633f2d6b..824e851e997 100644 --- a/docs/en/operations/index.md +++ b/docs/en/operations/index.md @@ -1,7 +1,6 @@ --- -toc_folder_title: Operations -toc_priority: 41 -toc_title: Introduction +sidebar_position: 41 +sidebar_label: Operations --- # Operations {#operations} @@ -23,4 +22,4 @@ ClickHouse operations manual consists of the following major sections: - [Settings](../operations/settings/index.md) - [Utilities](../operations/utilities/index.md) -{## [Original article](https://clickhouse.com/docs/en/operations/) ##} +[Original article](https://clickhouse.com/docs/en/operations/) diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index ffcdae16c4d..437122e106d 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: Monitoring +sidebar_position: 45 +sidebar_label: Monitoring --- # Monitoring {#monitoring} diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index ec27ecfd6b2..740537d88bc 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -1,14 +1,15 @@ --- -toc_priority: 62 -toc_title: OpenTelemetry Support +sidebar_position: 62 +sidebar_label: OpenTelemetry Support --- # [experimental] OpenTelemetry Support [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry. -!!! warning "Warning" - This is an experimental feature that will change in backwards-incompatible ways in future releases. +:::warning +This is an experimental feature that will change in backwards-incompatible ways in future releases. +::: ## Supplying Trace Context to ClickHouse diff --git a/docs/en/operations/optimizing-performance/index.md b/docs/en/operations/optimizing-performance/index.md index 142d3b2f976..ef9c6a4b664 100644 --- a/docs/en/operations/optimizing-performance/index.md +++ b/docs/en/operations/optimizing-performance/index.md @@ -1,7 +1,6 @@ --- -toc_folder_title: Optimizing Performance -toc_hidden: true -toc_priority: 52 +sidebar_label: Optimizing Performance +sidebar_position: 52 --- # Optimizing Performance {#optimizing-performance} diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 72cfa59b8b2..35e0157df6b 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -1,6 +1,6 @@ --- -toc_priority: 54 -toc_title: Query Profiling +sidebar_position: 54 +sidebar_label: Query Profiling --- # Sampling Query Profiler {#sampling-query-profiler} diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md index e410b1b2dfd..47827f331c7 100644 --- a/docs/en/operations/performance-test.md +++ b/docs/en/operations/performance-test.md @@ -1,6 +1,6 @@ --- -toc_priority: 54 -toc_title: Testing Hardware +sidebar_position: 54 +sidebar_label: Testing Hardware --- # How to Test Your Hardware with ClickHouse {#how-to-test-your-hardware-with-clickhouse} diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index 6d22a5f2a33..77b0697d483 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -1,6 +1,6 @@ --- -toc_priority: 51 -toc_title: Quotas +sidebar_position: 51 +sidebar_label: Quotas --- # Quotas {#quotas} diff --git a/docs/en/operations/requirements.md b/docs/en/operations/requirements.md index a3e61b1152b..698603dfb84 100644 --- a/docs/en/operations/requirements.md +++ b/docs/en/operations/requirements.md @@ -1,6 +1,6 @@ --- -toc_priority: 44 -toc_title: Requirements +sidebar_position: 44 +sidebar_label: Requirements --- # Requirements {#requirements} diff --git a/docs/en/operations/server-configuration-parameters/index.md b/docs/en/operations/server-configuration-parameters/index.md index a95d198bd0d..1e4ddc6368e 100644 --- a/docs/en/operations/server-configuration-parameters/index.md +++ b/docs/en/operations/server-configuration-parameters/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: Server Configuration Parameters -toc_priority: 54 -toc_title: Introduction +sidebar_position: 54 +sidebar_label: Server Configuration Parameters --- -# Server Configuration Parameters {#server-settings} +# Server Configuration Parameters This section contains descriptions of server settings that cannot be changed at the session or query level. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 266abadb087..985dc626ea4 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1,6 +1,6 @@ --- -toc_priority: 57 -toc_title: Server Settings +sidebar_position: 57 +sidebar_label: Server Settings --- # Server Settings {#server-settings} @@ -23,8 +23,9 @@ Default value: 3600. Data compression settings for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables. -!!! warning "Warning" - Don’t use it if you have just started using ClickHouse. +:::warning +Don’t use it if you have just started using ClickHouse. +::: Configuration template: @@ -87,8 +88,9 @@ Loading from config: ``` -!!! note "NOTE" - Storing keys in the configuration file is not recommended. It isn't secure. You can move the keys into a separate config file on a secure disk and put a symlink to that config file to `config.d/` folder. +:::note +Storing keys in the configuration file is not recommended. It isn't secure. You can move the keys into a separate config file on a secure disk and put a symlink to that config file to `config.d/` folder. +::: Loading from config, when the key is in hex: @@ -173,8 +175,9 @@ Possible values: Default value: `1073741824` (1 GB). -!!! info "Note" - Hard limit is configured via system tools +:::note +Hard limit is configured via system tools +::: **Example** @@ -439,11 +442,13 @@ A username and a password used to connect to other servers during [replication]( By default, if `interserver_http_credentials` section is omitted, authentication is not used during replication. -!!! note "Note" - `interserver_http_credentials` settings do not relate to a ClickHouse client credentials [configuration](../../interfaces/cli.md#configuration_files). +:::note +`interserver_http_credentials` settings do not relate to a ClickHouse client credentials [configuration](../../interfaces/cli.md#configuration_files). +::: -!!! note "Note" - These credentials are common for replication via `HTTP` and `HTTPS`. +:::note +These credentials are common for replication via `HTTP` and `HTTPS`. +::: The section contains the following parameters: @@ -675,8 +680,9 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa The maximum number of simultaneously processed queries related to MergeTree table. Queries may be limited by other settings: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). -!!! info "Note" - These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +:::note +These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +::: Possible values: @@ -695,8 +701,9 @@ Default value: `100`. The maximum number of simultaneously processed `INSERT` queries. -!!! info "Note" - These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +:::note +These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +::: Possible values: @@ -715,8 +722,9 @@ Default value: `0`. The maximum number of simultaneously processed `SELECT` queries. -!!! info "Note" - These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +:::note +These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +::: Possible values: @@ -1025,8 +1033,9 @@ Use the following parameters to configure logging: The path to the directory containing data. -!!! warning "Warning" - The trailing slash is mandatory. +:::note +The trailing slash is mandatory. +::: **Example** @@ -1306,8 +1315,9 @@ Example Path to temporary data for processing large queries. -!!! warning "Note" - The trailing slash is mandatory. +:::note +The trailing slash is mandatory. +::: **Example** @@ -1321,11 +1331,12 @@ Policy from [storage_configuration](../../engines/table-engines/mergetree-family If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored. -!!! note "Note" - - `move_factor` is ignored. - - `keep_free_space_bytes` is ignored. - - `max_data_part_size_bytes` is ignored. - - Уou must have exactly one volume in that policy. +:::note +- `move_factor` is ignored. +- `keep_free_space_bytes` is ignored. +- `max_data_part_size_bytes` is ignored. +- Уou must have exactly one volume in that policy. +::: ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} @@ -1442,10 +1453,11 @@ This setting only applies to the `MergeTree` family. It can be specified: If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../../engines/table-engines/mergetree-family/replication.md) tables store the headers of the data parts compactly using a single `znode`. If the table contains many columns, this storage method significantly reduces the volume of the data stored in Zookeeper. -!!! attention "Attention" - After applying `use_minimalistic_part_header_in_zookeeper = 1`, you can’t downgrade the ClickHouse server to a version that does not support this setting. Be careful when upgrading ClickHouse on servers in a cluster. Don’t upgrade all the servers at once. It is safer to test new versions of ClickHouse in a test environment, or on just a few servers of a cluster. +:::note +After applying `use_minimalistic_part_header_in_zookeeper = 1`, you can’t downgrade the ClickHouse server to a version that does not support this setting. Be careful when upgrading ClickHouse on servers in a cluster. Don’t upgrade all the servers at once. It is safer to test new versions of ClickHouse in a test environment, or on just a few servers of a cluster. - Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. +Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. +::: **Default value:** 0. diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md index 338949c5a6a..5adde60a460 100644 --- a/docs/en/operations/settings/constraints-on-settings.md +++ b/docs/en/operations/settings/constraints-on-settings.md @@ -1,6 +1,6 @@ --- -toc_priority: 62 -toc_title: Constraints on Settings +sidebar_position: 62 +sidebar_label: Constraints on Settings --- # Constraints on Settings {#constraints-on-settings} diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index bca49690025..c371bb0c41a 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -1,10 +1,10 @@ --- -sidebar_label: Introduction -sidebar_position: 27 +sidebar_label: Settings +sidebar_position: 52 slug: index --- -# Settings {#session-settings-intro} +# Settings There are multiple ways to make all the settings described in this section of documentation. diff --git a/docs/en/operations/settings/permissions-for-queries.md b/docs/en/operations/settings/permissions-for-queries.md index 47551f288bb..ff63f524b7d 100644 --- a/docs/en/operations/settings/permissions-for-queries.md +++ b/docs/en/operations/settings/permissions-for-queries.md @@ -1,6 +1,6 @@ --- -toc_priority: 58 -toc_title: Permissions for Queries +sidebar_position: 58 +sidebar_label: Permissions for Queries --- # Permissions for Queries {#permissions_for_queries} diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 3287caacdf8..c0c77bc809a 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -1,6 +1,6 @@ --- -toc_priority: 59 -toc_title: Restrictions on Query Complexity +sidebar_position: 59 +sidebar_label: Restrictions on Query Complexity --- # Restrictions on Query Complexity {#restrictions-on-query-complexity} diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 1939b21bfc3..b8e1e3c21c4 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -1,14 +1,15 @@ --- -toc_priority: 61 -toc_title: Settings Profiles +sidebar_position: 61 +sidebar_label: Settings Profiles --- # Settings Profiles {#settings-profiles} A settings profile is a collection of settings grouped under the same name. -!!! note "Information" - ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it. +:::note +ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it. +::: The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access. diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 1a1d2e2a0fa..6a020be2afc 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -1,14 +1,15 @@ --- -toc_priority: 63 -toc_title: User Settings +sidebar_position: 63 +sidebar_label: User Settings --- # User Settings {#user-settings} The `users` section of the `user.xml` configuration file contains user settings. -!!! note "Information" - ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it. +:::note +ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it. +::: Structure of the `users` section: @@ -116,8 +117,9 @@ To open access for user from any network, specify: ::/0 ``` -!!! warning "Warning" - It’s insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet. +:::warning +It’s insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet. +::: To open access only from localhost, specify: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c04ca5822e6..30d7dd98ee7 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -401,8 +401,9 @@ Default value: 1. When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv), [TabSeparated](../../interfaces/formats.md#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. -!!! note "Note" - When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +:::note +When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +::: Possible values: @@ -690,8 +691,9 @@ When using `partial_merge` algorithm ClickHouse sorts the data and dumps it to t Changes behaviour of join operations with `ANY` strictness. -!!! warning "Attention" - This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables. +:::warning +This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables. +::: Possible values: @@ -762,8 +764,9 @@ Default value: 64. Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations. -!!! note "Warning" - Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour. +:::warning +Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour. +::: When the legacy behaviour enabled: @@ -1137,8 +1140,9 @@ Higher values will lead to higher memory usage. The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. -!!! note "Warning" - This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. +:::warning +This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. +::: Don’t confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table). @@ -1154,8 +1158,9 @@ We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed. -!!! note "Warning" - This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. +:::warning +This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. +::: ## max_query_size {#settings-max_query_size} @@ -1243,8 +1248,9 @@ Default value: `0`. Could be used for throttling speed when replicating the data to add or replace new nodes. -!!! note "Note" - 60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). +:::note +60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). +::: ## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server} @@ -1263,8 +1269,9 @@ Default value: `0`. Could be used for throttling speed when replicating the data to add or replace new nodes. -!!! note "Note" - 60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). +:::note +60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). +::: ## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms} @@ -1426,8 +1433,9 @@ Possible values: Default value: 1. -!!! warning "Warning" - Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas). +:::warning +Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas). +::: ## totals_mode {#totals-mode} @@ -1458,8 +1466,9 @@ This setting is useful for replicated tables with a sampling key. A query may be - The sampling key is an expression that is expensive to calculate. - The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency. -!!! warning "Warning" - This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details. +:::warning +This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details. +::: ## compile_expressions {#compile-expressions} @@ -2111,8 +2120,9 @@ See also: - [distributed_push_down_limit](#distributed-push-down-limit) - [optimize_skip_unused_shards](#optimize-skip-unused-shards) -!!! note "Note" - Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key). +:::note +Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key). +::: ## optimize_throw_if_noop {#setting-optimize_throw_if_noop} @@ -2254,18 +2264,21 @@ Possible values: Default value: 0. -!!! note "Note" - This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine). +:::note +This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine). +::: -!!! warning "Warning" - You should not rely on automatic batch splitting, since this may hurt performance. +:::warning +You should not rely on automatic batch splitting, since this may hurt performance. +::: ## os_thread_priority {#setting-os-thread-priority} Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core. -!!! warning "Warning" - To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickhouse-server` package sets it up during installation. Some virtual environments do not allow you to set the `CAP_SYS_NICE` capability. In this case, `clickhouse-server` shows a message about it at the start. +:::warning +To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickhouse-server` package sets it up during installation. Some virtual environments do not allow you to set the `CAP_SYS_NICE` capability. In this case, `clickhouse-server` shows a message about it at the start. +::: Possible values: @@ -2539,9 +2552,10 @@ Possible values: Default value: `1`. -!!! note "Note" - - with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for async INSERT. - - with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware. +:::note +- with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for async INSERT. +- with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware. +::: ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} @@ -3196,11 +3210,13 @@ Possible values: Default value: `0`. -!!! warning "Warning" - Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care. +:::warning +Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care. +::: -!!! warning "Warning" - Do not enable this feature in version `<= 21.8`. It's not properly implemented and may lead to server crash. +:::warning +Do not enable this feature in version `<= 21.8`. It's not properly implemented and may lead to server crash. +::: ## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty} diff --git a/docs/en/operations/ssl-zookeeper.md b/docs/en/operations/ssl-zookeeper.md index fe899802348..d6043d521e7 100644 --- a/docs/en/operations/ssl-zookeeper.md +++ b/docs/en/operations/ssl-zookeeper.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: Secured communication with Zookeeper +sidebar_position: 45 +sidebar_label: Secured Communication with Zookeeper --- # Optional secured communication between ClickHouse and Zookeeper {#secured-communication-with-zookeeper} @@ -67,8 +67,7 @@ SELECT * FROM system.zookeeper WHERE path = '/'; On unencrypted connection you will see in `tcpdump` output something like this: ```text -..../zookeeper/q -uota. +..../zookeeper/quota. ``` On encrypted connection you should not see this. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index beffd45bcbd..2162ae066dd 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -1,6 +1,6 @@ --- -toc_priority: 68 -toc_title: External Disks for Storing Data +sidebar_position: 68 +sidebar_label: External Disks for Storing Data --- # External Disks for Storing Data {#external-disks} diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 273f1f00d71..2233406162b 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -1,4 +1,4 @@ -## system.asynchronous_metric_log {#system-tables-async-log} +# asynchronous_metric_log {#system-tables-async-log} Contains the historical values for `system.asynchronous_metrics`, which are saved once per minute. Enabled by default. diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 11255a868cc..162048b06ee 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -1,4 +1,4 @@ -# system.asynchronous_metrics {#system_tables-asynchronous_metrics} +# asynchronous_metrics {#system_tables-asynchronous_metrics} Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use. diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 18a4152df70..776c90b9936 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -1,4 +1,4 @@ -# system.clusters {#system-clusters} +# clusters {#system-clusters} Contains information about clusters available in the config file and the servers in them. diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 55e4a8284a0..dd5674fe5b1 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -1,4 +1,4 @@ -# system.columns {#system-columns} +# columns {#system-columns} Contains information about columns in all the tables. diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md index 0b6e977e0e3..3b76684b44b 100644 --- a/docs/en/operations/system-tables/contributors.md +++ b/docs/en/operations/system-tables/contributors.md @@ -1,4 +1,4 @@ -# system.contributors {#system-contributors} +# contributors {#system-contributors} Contains information about contributors. The order is random at query execution time. diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index 404010afc05..be85cb78c9f 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -1,4 +1,4 @@ -# system.crash_log {#system-tables_crash_log} +# crash_log {#system-tables_crash_log} Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur. diff --git a/docs/en/operations/system-tables/current-roles.md b/docs/en/operations/system-tables/current-roles.md index c029f367998..81d4fad24a8 100644 --- a/docs/en/operations/system-tables/current-roles.md +++ b/docs/en/operations/system-tables/current-roles.md @@ -1,4 +1,4 @@ -# system.current_roles {#system_tables-current_roles} +# current_roles {#system_tables-current_roles} Contains active roles of a current user. `SET ROLE` changes the contents of this table. diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md index add89ae9144..71dfb046dbb 100644 --- a/docs/en/operations/system-tables/data_skipping_indices.md +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -1,4 +1,4 @@ -# system.data_skipping_indices {#system-data-skipping-indices} +# data_skipping_indices {#system-data-skipping-indices} Contains information about existing data skipping indices in all the tables. diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md index 0d11b1cfefb..2e5e7b74c66 100644 --- a/docs/en/operations/system-tables/data_type_families.md +++ b/docs/en/operations/system-tables/data_type_families.md @@ -1,4 +1,4 @@ -# system.data_type_families {#system_tables-data_type_families} +# data_type_families {#system_tables-data_type_families} Contains information about supported [data types](../../sql-reference/data-types/index.md). diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index 45eebf2ae85..7245ecdcdec 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -1,4 +1,4 @@ -# system.databases {#system-databases} +# databases {#system-databases} Contains information about the databases that are available to the current user. diff --git a/docs/en/operations/system-tables/detached_parts.md b/docs/en/operations/system-tables/detached_parts.md index 7345aa4ba6c..2fe354a4471 100644 --- a/docs/en/operations/system-tables/detached_parts.md +++ b/docs/en/operations/system-tables/detached_parts.md @@ -1,4 +1,4 @@ -# system.detached_parts {#system_tables-detached_parts} +# detached_parts {#system_tables-detached_parts} Contains information about detached parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. The `reason` column specifies why the part was detached. diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 5fd326297c8..c41d506ff0a 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -1,4 +1,4 @@ -# system.dictionaries {#system_tables-dictionaries} +# dictionaries {#system_tables-dictionaries} Contains information about [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index f643e3fcfe1..869c0f3cee5 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -1,4 +1,4 @@ -# system.disks {#system_tables-disks} +# disks {#system_tables-disks} Contains information about disks defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index 47899ae5628..0597972197d 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -1,4 +1,4 @@ -# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue} +# distributed_ddl_queue {#system_tables-distributed_ddl_queue} Contains information about [distributed ddl queries (ON CLUSTER clause)](../../sql-reference/distributed-ddl.md) that were executed on a cluster. diff --git a/docs/en/operations/system-tables/distribution_queue.md b/docs/en/operations/system-tables/distribution_queue.md index 2b0ca536119..231a06458c8 100644 --- a/docs/en/operations/system-tables/distribution_queue.md +++ b/docs/en/operations/system-tables/distribution_queue.md @@ -1,4 +1,4 @@ -# system.distribution_queue {#system_tables-distribution_queue} +# distribution_queue {#system_tables-distribution_queue} Contains information about local files that are in the queue to be sent to the shards. These local files contain new parts that are created by inserting new data into the Distributed table in asynchronous mode. diff --git a/docs/en/operations/system-tables/enabled-roles.md b/docs/en/operations/system-tables/enabled-roles.md index 54569ebbca6..832fc6aba42 100644 --- a/docs/en/operations/system-tables/enabled-roles.md +++ b/docs/en/operations/system-tables/enabled-roles.md @@ -1,4 +1,4 @@ -# system.enabled_roles {#system_tables-enabled_roles} +# enabled_roles {#system_tables-enabled_roles} Contains all active roles at the moment, including current role of the current user and granted roles for current role. diff --git a/docs/en/operations/system-tables/errors.md b/docs/en/operations/system-tables/errors.md index 583cce88ca4..8e60cf93bfa 100644 --- a/docs/en/operations/system-tables/errors.md +++ b/docs/en/operations/system-tables/errors.md @@ -1,4 +1,4 @@ -# system.errors {#system_tables-errors} +# errors {#system_tables-errors} Contains error codes with the number of times they have been triggered. diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index 719216a54be..445573ec978 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -1,4 +1,4 @@ -# system.events {#system_tables-events} +# events {#system_tables-events} Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started. diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index 0f1a6184ae1..097b6ccd22a 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -1,4 +1,4 @@ -# system.functions {#system-functions} +# functions {#system-functions} Contains information about normal and aggregate functions. diff --git a/docs/en/operations/system-tables/grants.md b/docs/en/operations/system-tables/grants.md index bd0d8c3c5b8..c848972c2d8 100644 --- a/docs/en/operations/system-tables/grants.md +++ b/docs/en/operations/system-tables/grants.md @@ -1,4 +1,4 @@ -# system.grants {#system_tables-grants} +# grants {#system_tables-grants} Privileges granted to ClickHouse user accounts. diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md index af35da1f6e5..10e265815f4 100644 --- a/docs/en/operations/system-tables/graphite_retentions.md +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -1,4 +1,4 @@ -# system.graphite_retentions {#system-graphite-retentions} +# graphite_retentions {#system-graphite-retentions} Contains information about parameters [graphite_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines. diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 5e8418d0af3..7b977ab4d51 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -1,6 +1,6 @@ --- -toc_priority: 52 -toc_title: System Tables +sidebar_position: 52 +sidebar_label: System Tables --- # System Tables {#system-tables} diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index caef97697a6..fad6e16fd8a 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -1,4 +1,4 @@ -# system.licenses {#system-tables_system.licenses} +# licenses Сontains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources. diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index 1f24bdbe0cf..0324d5c633d 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -1,4 +1,4 @@ -# system.merge_tree_settings {#system-merge_tree_settings} +# merge_tree_settings {#system-merge_tree_settings} Contains information about settings for `MergeTree` tables. diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index e9ca30d5f2c..f512e00fc89 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -1,4 +1,4 @@ -# system.merges {#system-merges} +# merges {#system-merges} Contains information about merges and part mutations currently in process for tables in the MergeTree family. diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 875e443d0a6..55b0d800ead 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -1,4 +1,4 @@ -# system.metric_log {#system_tables-metric_log} +# metric_log {#system_tables-metric_log} Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 21e5923e3a0..d4e06e1aca6 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -1,4 +1,4 @@ -# system.metrics {#system_tables-metrics} +# metrics {#system_tables-metrics} Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date. diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index 66ce500f213..507146d93de 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -1,4 +1,4 @@ -# system.mutations {#system_tables-mutations} +# mutations {#system_tables-mutations} The table contains information about [mutations](../../sql-reference/statements/alter/index.md#mutations) of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row. @@ -28,8 +28,9 @@ Columns: - `1` if the mutation is completed, - `0` if the mutation is still in process. -!!! info "Note" - Even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not completed yet because of a long-running `INSERT` query, that will create a new data part needed to be mutated. +:::note +Even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not completed yet because of a long-running `INSERT` query, that will create a new data part needed to be mutated. +::: If there were problems with mutating some data parts, the following columns contain additional information: diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index 774fdf86b76..29828bfe796 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -1,4 +1,4 @@ -# system.numbers {#system-numbers} +# numbers {#system-numbers} This table contains a single UInt64 column named `number` that contains almost all the natural numbers starting from zero. diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md index 978a6565b71..02155db4711 100644 --- a/docs/en/operations/system-tables/numbers_mt.md +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -1,4 +1,4 @@ -# system.numbers_mt {#system-numbers-mt} +# numbers_mt {#system-numbers-mt} The same as [system.numbers](../../operations/system-tables/numbers.md) but reads are parallelized. The numbers can be returned in any order. diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md index 293f0412955..9b84c0bfcd6 100644 --- a/docs/en/operations/system-tables/one.md +++ b/docs/en/operations/system-tables/one.md @@ -1,4 +1,4 @@ -# system.one {#system-one} +# one {#system-one} This table contains a single row with a single `dummy` UInt8 column containing the value 0. diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index 521c155d0f7..89af72d6620 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -1,4 +1,4 @@ -# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} +# opentelemetry_span_log {#system_tables-opentelemetry_span_log} Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries. diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 1fb5b12e87e..00eaca23862 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -1,4 +1,4 @@ -# system.part_log {#system_tables-part-log} +# part_log {#system_tables-part-log} The `system.part_log` table is created only if the [part_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified. diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index cf0f93ecdd6..845c63e5626 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -1,4 +1,4 @@ -# system.parts {#system_tables-parts} +# parts {#system_tables-parts} Contains information about parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. @@ -96,8 +96,9 @@ Columns: - `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). - !!! note "Warning" - The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields. +:::warning +The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields. +::: - `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). diff --git a/docs/en/operations/system-tables/parts_columns.md b/docs/en/operations/system-tables/parts_columns.md index 0be4324bab5..e87be3fcd43 100644 --- a/docs/en/operations/system-tables/parts_columns.md +++ b/docs/en/operations/system-tables/parts_columns.md @@ -1,4 +1,4 @@ -# system.parts_columns {#system_tables-parts_columns} +# parts_columns {#system_tables-parts_columns} Contains information about parts and columns of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index ee8daf0e5bf..f261ee9b696 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -1,4 +1,4 @@ -# system.processes {#system_tables-processes} +# processes {#system_tables-processes} This system table is used for implementing the `SHOW PROCESSLIST` query. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index e3aab04f7dd..a8fda41f7c2 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -1,9 +1,10 @@ -# system.query_log {#system_tables-query_log} +# query_log {#system_tables-query_log} Contains information about executed queries, for example, start time, duration of processing, error messages. -!!! note "Note" - This table does not contain the ingested data for `INSERT` queries. +:::note +This table does not contain the ingested data for `INSERT` queries. +::: You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration. diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index aa064d675ce..072a311b7db 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -1,4 +1,4 @@ -# system.query_thread_log {#system_tables-query_thread_log} +# query_thread_log {#system_tables-query_thread_log} Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing. diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index 6a6bbef45e2..5aa69522869 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -1,4 +1,4 @@ -# system.query_views_log {#system_tables-query_views_log} +# query_views_log {#system_tables-query_views_log} Contains information about the dependent views executed when running a query, for example, the view type or the execution time. diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md index 708c4e4e33e..e1873ecfa92 100644 --- a/docs/en/operations/system-tables/quota_limits.md +++ b/docs/en/operations/system-tables/quota_limits.md @@ -1,4 +1,4 @@ -# system.quota_limits {#system_tables-quota_limits} +# quota_limits {#system_tables-quota_limits} Contains information about maximums for all intervals of all quotas. Any number of rows or zero can correspond to one quota. diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 6f4d3c9c8ee..ad9f9b8c44f 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -1,4 +1,4 @@ -# system.quota_usage {#system_tables-quota_usage} +# quota_usage {#system_tables-quota_usage} Quota usage by the current user: how much is used and how much is left. diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index bdcc13340f0..0a435919b14 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -1,4 +1,4 @@ -# system.quotas {#system_tables-quotas} +# quotas {#system_tables-quotas} Contains information about [quotas](../../operations/system-tables/quotas.md). diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index 7d39af0f601..43811a75187 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -1,4 +1,4 @@ -# system.quotas_usage {#system_tables-quotas_usage} +# quotas_usage {#system_tables-quotas_usage} Quota usage by all users. diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index ff1057ca8b1..6ec0f184e15 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -1,4 +1,4 @@ -# system.replicas {#system_tables-replicas} +# replicas {#system_tables-replicas} Contains information and status for replicated tables residing on the local server. This table can be used for monitoring. The table contains a row for every Replicated\* table. diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md index 241bb609853..438d1572109 100644 --- a/docs/en/operations/system-tables/replicated_fetches.md +++ b/docs/en/operations/system-tables/replicated_fetches.md @@ -1,4 +1,4 @@ -# system.replicated_fetches {#system_tables-replicated_fetches} +# replicated_fetches {#system_tables-replicated_fetches} Contains information about currently running background fetches. diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index 0e1d7792996..a8a51162dae 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -1,4 +1,4 @@ -# system.replication_queue {#system_tables-replication_queue} +# replication_queue {#system_tables-replication_queue} Contains information about tasks from replication queues stored in ZooKeeper for tables in the `ReplicatedMergeTree` family. diff --git a/docs/en/operations/system-tables/role-grants.md b/docs/en/operations/system-tables/role-grants.md index 6da221af1a4..cb0c5bf0b0b 100644 --- a/docs/en/operations/system-tables/role-grants.md +++ b/docs/en/operations/system-tables/role-grants.md @@ -1,4 +1,4 @@ -#system.role_grants {#system_tables-role_grants} +# role_grants Contains the role grants for users and roles. To add entries to this table, use `GRANT role TO user`. diff --git a/docs/en/operations/system-tables/roles.md b/docs/en/operations/system-tables/roles.md index 7a71270b6c4..1f8fe349c7f 100644 --- a/docs/en/operations/system-tables/roles.md +++ b/docs/en/operations/system-tables/roles.md @@ -1,4 +1,4 @@ -# system.roles {#system_tables-roles} +# roles {#system_tables-roles} Contains information about configured [roles](../../operations/access-rights.md#role-management). diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index 95a26efe952..2bff037751b 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -1,4 +1,4 @@ -# system.row_policies {#system_tables-row_policies} +# row_policies {#system_tables-row_policies} Contains filters for one particular table, as well as a list of roles and/or users which should use this row policy. diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index cf69fd8518e..9ee7e294bfd 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -1,4 +1,4 @@ -# system.session_log {#system_tables-session_log} +# session_log {#system_tables-session_log} Contains information about all successful and failed login and logout events. diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index 5d5eda2abc1..ce6f3cd4724 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -1,4 +1,4 @@ -# system.settings {#system-tables-system-settings} +# settings {#system-tables-system-settings} Contains information about session settings for current user. diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index d812d8f74e0..5a010d6239a 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -1,4 +1,4 @@ -# system.settings_profile_elements {#system_tables-settings_profile_elements} +# settings_profile_elements {#system_tables-settings_profile_elements} Describes the content of the settings profile: diff --git a/docs/en/operations/system-tables/settings_profiles.md b/docs/en/operations/system-tables/settings_profiles.md index f9b62cf5194..ab2020b375d 100644 --- a/docs/en/operations/system-tables/settings_profiles.md +++ b/docs/en/operations/system-tables/settings_profiles.md @@ -1,4 +1,4 @@ -# system.settings_profiles {#system_tables-settings_profiles} +# settings_profiles {#system_tables-settings_profiles} Contains properties of configured setting profiles. diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md index e2135e4beb6..2aa1c5af125 100644 --- a/docs/en/operations/system-tables/stack_trace.md +++ b/docs/en/operations/system-tables/stack_trace.md @@ -1,4 +1,4 @@ -# system.stack_trace {#system-tables_stack_trace} +# stack_trace {#system-tables_stack_trace} Contains stack traces of all server threads. Allows developers to introspect the server state. diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index c9d2659c289..adbb2f8434e 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -1,4 +1,4 @@ -# system.storage_policies {#system_tables-storage_policies} +# storage_policies {#system_tables-storage_policies} Contains information about storage policies and volumes defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 57fb5b0ff37..d3ac8da1d70 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -1,4 +1,4 @@ -# system.table_engines {#system-table-engines} +# table_engines {#system-table-engines} Contains description of table engines supported by server and their feature support information. diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 0ccf69bc048..8286d51aed6 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -1,4 +1,4 @@ -# system.tables {#system-tables} +# tables {#system-tables} Contains metadata of each table that the server knows about. diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index 9ed0aa1ee5b..e4967dc8d0b 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -1,4 +1,4 @@ -# system.text_log {#system_tables-text_log} +# text_log {#system_tables-text_log} Contains logging entries. The logging level which goes to this table can be limited to the `text_log.level` server setting. diff --git a/docs/en/operations/system-tables/time_zones.md b/docs/en/operations/system-tables/time_zones.md index 68f16a665cc..899e115152f 100644 --- a/docs/en/operations/system-tables/time_zones.md +++ b/docs/en/operations/system-tables/time_zones.md @@ -1,4 +1,4 @@ -# system.time_zones {#system-time_zones} +# time_zones {#system-time_zones} Contains a list of time zones that are supported by the ClickHouse server. This list of timezones might vary depending on the version of ClickHouse. diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index ab08ef7415c..ace5662e919 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -1,4 +1,4 @@ -# system.trace_log {#system_tables-trace_log} +# trace_log {#system_tables-trace_log} Contains stack traces collected by the sampling query profiler. diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index e6ab63b9df5..95691f4497c 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -1,4 +1,4 @@ -# system.users {#system_tables-users} +# users {#system_tables-users} Contains a list of [user accounts](../../operations/access-rights.md#user-account-management) configured at the server. diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index 4be12309240..e8232483f6f 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -1,4 +1,4 @@ -# system.zookeeper {#system-zookeeper} +# zookeeper {#system-zookeeper} The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. The query must either have a ‘path =’ condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for. diff --git a/docs/en/operations/system-tables/zookeeper_log.md b/docs/en/operations/system-tables/zookeeper_log.md index f7d86c6689b..919c4245d5d 100644 --- a/docs/en/operations/system-tables/zookeeper_log.md +++ b/docs/en/operations/system-tables/zookeeper_log.md @@ -1,4 +1,4 @@ -# system.zookeeper_log {#system-zookeeper_log} +# zookeeper_log {#system-zookeeper_log} This table contains information about the parameters of the request to the ZooKeeper server and the response from it. diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index c676c54a223..c727c636579 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -1,6 +1,6 @@ --- -toc_priority: 58 -toc_title: Usage Recommendations +sidebar_position: 58 +sidebar_label: Usage Recommendations --- # Usage Recommendations {#usage-recommendations} @@ -33,8 +33,9 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory Use `perf top` to watch the time spent in the kernel for memory management. Permanent huge pages also do not need to be allocated. -!!! warning "Attention" - If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate. +:::warning +If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate. +::: ## Storage Subsystem {#storage-subsystem} @@ -275,4 +276,4 @@ end script If you use antivirus software configure it to skip folders with Clickhouse datafiles (`/var/lib/clickhouse`) otherwise performance may be reduced and you may experience unexpected errors during data ingestion and background merges. -{## [Original article](https://clickhouse.com/docs/en/operations/tips/) ##} +[Original article](https://clickhouse.com/docs/en/operations/tips/) diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index f2695ce8437..e0efe4f57f5 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: Troubleshooting +sidebar_position: 46 +sidebar_label: Troubleshooting --- # Troubleshooting {#troubleshooting} diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md index ffb646ffce2..fb5fb7803a9 100644 --- a/docs/en/operations/update.md +++ b/docs/en/operations/update.md @@ -1,6 +1,6 @@ --- -toc_priority: 47 -toc_title: ClickHouse Upgrade +sidebar_position: 47 +sidebar_label: ClickHouse Upgrade --- # ClickHouse Upgrade {#clickhouse-upgrade} @@ -15,8 +15,9 @@ $ sudo service clickhouse-server restart If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method. -!!! note "Note" - You can update multiple servers at once as soon as there is no moment when all replicas of one shard are offline. +:::note +You can update multiple servers at once as soon as there is no moment when all replicas of one shard are offline. +::: The upgrade of older version of ClickHouse to specific version: diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 6aa5ea556fc..3a52ec92dc3 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -1,9 +1,9 @@ --- -toc_priority: 61 -toc_title: clickhouse-benchmark +sidebar_position: 61 +sidebar_label: clickhouse-benchmark --- -# clickhouse-benchmark {#clickhouse-benchmark} +# clickhouse-benchmark Connects to a ClickHouse server and repeatedly sends specified queries. diff --git a/docs/en/operations/utilities/clickhouse-compressor.md b/docs/en/operations/utilities/clickhouse-compressor.md index 44a1f052824..2f8f4794ba8 100644 --- a/docs/en/operations/utilities/clickhouse-compressor.md +++ b/docs/en/operations/utilities/clickhouse-compressor.md @@ -1,4 +1,5 @@ -## ClickHouse compressor + +# clickhouse-compressor Simple program for data compression and decompression. diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index 6587d45abd9..f152c177992 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -1,14 +1,15 @@ --- -toc_priority: 59 -toc_title: clickhouse-copier +sidebar_position: 59 +sidebar_label: clickhouse-copier --- -# clickhouse-copier {#clickhouse-copier} +# clickhouse-copier Copies data from the tables in one cluster to tables in another (or the same) cluster. -!!! warning "Warning" - To get a consistent copy, the data in the source tables and partitions should not change during the entire process. +:::warning +To get a consistent copy, the data in the source tables and partitions should not change during the entire process. +::: You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ZooKeeper is used for syncing the processes. diff --git a/docs/en/operations/utilities/clickhouse-format.md b/docs/en/operations/utilities/clickhouse-format.md index 333f127e125..219a170fc23 100644 --- a/docs/en/operations/utilities/clickhouse-format.md +++ b/docs/en/operations/utilities/clickhouse-format.md @@ -1,9 +1,4 @@ ---- -toc_priority: 65 -toc_title: clickhouse-format ---- - -# clickhouse-format {#clickhouse-format} +# clickhouse-format Allows formatting input queries. diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 9d28dffbc16..3c35ab933e2 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -1,9 +1,9 @@ --- -toc_priority: 60 -toc_title: clickhouse-local +sidebar_position: 60 +sidebar_label: clickhouse-local --- -# clickhouse-local {#clickhouse-local} +# clickhouse-local The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. @@ -13,8 +13,9 @@ Accepts data that represent tables and queries them using [ClickHouse SQL dialec By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument. -!!! warning "Warning" - It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error. +:::warning +It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error. +::: For temporary data, a unique temporary data directory is created by default. diff --git a/docs/en/operations/utilities/clickhouse-obfuscator.md b/docs/en/operations/utilities/clickhouse-obfuscator.md index b01a7624b56..baa0f19dda9 100644 --- a/docs/en/operations/utilities/clickhouse-obfuscator.md +++ b/docs/en/operations/utilities/clickhouse-obfuscator.md @@ -1,4 +1,4 @@ -# ClickHouse obfuscator +# clickhouse-obfuscator A simple tool for table data obfuscation. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index e307f9fde0c..7fdc783f9c4 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: Utilities -toc_priority: 56 -toc_title: Overview +sidebar_position: 56 +sidebar_label: Utilities --- -# ClickHouse Utility {#clickhouse-utility} +# ClickHouse Utility - [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. - [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. From 560471f991a0231162c7892b0471d8eabbd967b6 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Tue, 29 Mar 2022 22:06:21 -0600 Subject: [PATCH 045/117] Update /sql-reference docs --- docs/en/development/architecture.md | 12 ++--- docs/en/development/build-osx.md | 5 ++- .../en/engines/database-engines/replicated.md | 5 ++- docs/en/example-datasets/nyc-taxi.md | 5 ++- docs/en/example-datasets/ontime.md | 5 ++- docs/en/example-datasets/star-schema.md | 5 ++- docs/en/install.md | 7 +-- docs/en/sql-reference/_category_.yml | 5 +-- .../aggregate-functions/combinators.md | 4 +- .../aggregate-functions/index.md | 7 ++- .../parametric-functions.md | 14 +++--- .../aggregate-functions/reference/any.md | 2 +- .../aggregate-functions/reference/anyheavy.md | 2 +- .../aggregate-functions/reference/anylast.md | 2 +- .../aggregate-functions/reference/argmax.md | 2 +- .../aggregate-functions/reference/argmin.md | 2 +- .../aggregate-functions/reference/avg.md | 2 +- .../reference/avgweighted.md | 2 +- .../reference/categoricalinformationvalue.md | 2 +- .../aggregate-functions/reference/corr.md | 7 +-- .../aggregate-functions/reference/count.md | 2 +- .../aggregate-functions/reference/covarpop.md | 7 +-- .../reference/covarsamp.md | 7 +-- .../aggregate-functions/reference/deltasum.md | 7 +-- .../reference/deltasumtimestamp.md | 2 +- .../aggregate-functions/reference/entropy.md | 2 +- .../reference/exponentialmovingaverage.md | 2 +- .../reference/grouparray.md | 2 +- .../reference/grouparrayinsertat.md | 2 +- .../reference/grouparraymovingavg.md | 2 +- .../reference/grouparraymovingsum.md | 2 +- .../reference/grouparraysample.md | 2 +- .../reference/groupbitand.md | 2 +- .../reference/groupbitmap.md | 2 +- .../reference/groupbitmapand.md | 2 +- .../reference/groupbitmapor.md | 2 +- .../reference/groupbitmapxor.md | 2 +- .../reference/groupbitor.md | 2 +- .../reference/groupbitxor.md | 2 +- .../reference/groupuniqarray.md | 2 +- .../aggregate-functions/reference/index.md | 2 +- .../reference/intervalLengthSum.md | 9 ++-- .../aggregate-functions/reference/kurtpop.md | 2 +- .../aggregate-functions/reference/kurtsamp.md | 2 +- .../reference/mannwhitneyutest.md | 4 +- .../aggregate-functions/reference/max.md | 2 +- .../aggregate-functions/reference/maxmap.md | 2 +- .../reference/meanztest.md | 4 +- .../aggregate-functions/reference/median.md | 2 +- .../aggregate-functions/reference/min.md | 2 +- .../aggregate-functions/reference/minmap.md | 2 +- .../aggregate-functions/reference/quantile.md | 2 +- .../reference/quantilebfloat16.md | 2 +- .../reference/quantiledeterministic.md | 2 +- .../reference/quantileexact.md | 2 +- .../reference/quantileexactweighted.md | 2 +- .../reference/quantiles.md | 2 +- .../reference/quantiletdigest.md | 2 +- .../reference/quantiletdigestweighted.md | 7 +-- .../reference/quantiletiming.md | 12 ++--- .../reference/quantiletimingweighted.md | 12 ++--- .../aggregate-functions/reference/rankCorr.md | 2 +- .../reference/simplelinearregression.md | 2 +- .../aggregate-functions/reference/skewpop.md | 2 +- .../aggregate-functions/reference/skewsamp.md | 2 +- .../aggregate-functions/reference/sparkbar.md | 4 +- .../reference/stddevpop.md | 7 +-- .../reference/stddevsamp.md | 7 +-- .../reference/stochasticlinearregression.md | 2 +- .../reference/stochasticlogisticregression.md | 2 +- .../reference/studentttest.md | 4 +- .../aggregate-functions/reference/sum.md | 2 +- .../aggregate-functions/reference/sumcount.md | 2 +- .../aggregate-functions/reference/sumkahan.md | 2 +- .../aggregate-functions/reference/summap.md | 2 +- .../reference/sumwithoverflow.md | 2 +- .../aggregate-functions/reference/topk.md | 2 +- .../reference/topkweighted.md | 2 +- .../aggregate-functions/reference/uniq.md | 2 +- .../reference/uniqcombined.md | 7 +-- .../reference/uniqcombined64.md | 2 +- .../reference/uniqexact.md | 2 +- .../reference/uniqhll12.md | 2 +- .../reference/uniqthetasketch.md | 2 +- .../aggregate-functions/reference/varpop.md | 7 +-- .../aggregate-functions/reference/varsamp.md | 7 +-- .../reference/welchttest.md | 4 +- docs/en/sql-reference/ansi.md | 9 ++-- .../data-types/aggregatefunction.md | 4 +- docs/en/sql-reference/data-types/array.md | 4 +- docs/en/sql-reference/data-types/boolean.md | 4 +- docs/en/sql-reference/data-types/date.md | 4 +- docs/en/sql-reference/data-types/date32.md | 4 +- docs/en/sql-reference/data-types/datetime.md | 4 +- .../en/sql-reference/data-types/datetime64.md | 4 +- docs/en/sql-reference/data-types/decimal.md | 4 +- .../sql-reference/data-types/domains/index.md | 5 +-- .../sql-reference/data-types/domains/ipv4.md | 4 +- .../sql-reference/data-types/domains/ipv6.md | 4 +- docs/en/sql-reference/data-types/enum.md | 4 +- .../sql-reference/data-types/fixedstring.md | 4 +- docs/en/sql-reference/data-types/float.md | 4 +- docs/en/sql-reference/data-types/geo.md | 9 ++-- docs/en/sql-reference/data-types/index.md | 7 ++- docs/en/sql-reference/data-types/int-uint.md | 4 +- .../data-types/lowcardinality.md | 4 +- docs/en/sql-reference/data-types/map.md | 4 +- .../data-types/multiword-types.md | 4 +- .../nested-data-structures/index.md | 6 +-- .../nested-data-structures/nested.md | 4 +- docs/en/sql-reference/data-types/nullable.md | 9 ++-- .../data-types/simpleaggregatefunction.md | 7 +-- .../special-data-types/expression.md | 4 +- .../data-types/special-data-types/index.md | 6 +-- .../data-types/special-data-types/interval.md | 9 ++-- .../data-types/special-data-types/nothing.md | 4 +- .../data-types/special-data-types/set.md | 4 +- docs/en/sql-reference/data-types/string.md | 4 +- docs/en/sql-reference/data-types/tuple.md | 4 +- docs/en/sql-reference/data-types/uuid.md | 4 +- .../external-dictionaries/_category_.yml | 7 +++ .../external-dicts-dict-hierarchical.md | 6 +-- .../external-dicts-dict-layout.md | 16 ++++--- .../external-dicts-dict-lifetime.md | 6 +-- .../external-dicts-dict-polygon.md | 6 +-- .../external-dicts-dict-sources.md | 36 ++++++++------- .../external-dicts-dict-structure.md | 18 ++++---- .../external-dicts-dict.md | 6 +-- .../external-dictionaries/external-dicts.md | 11 ++--- .../external-dictionaries/index.md | 6 --- docs/en/sql-reference/dictionaries/index.md | 5 +-- .../dictionaries/internal-dicts.md | 4 +- docs/en/sql-reference/distributed-ddl.md | 9 ++-- .../functions/arithmetic-functions.md | 4 +- .../functions/array-functions.md | 19 ++++---- docs/en/sql-reference/functions/array-join.md | 4 +- .../sql-reference/functions/bit-functions.md | 4 +- .../functions/bitmap-functions.md | 4 +- .../functions/comparison-functions.md | 4 +- .../functions/conditional-functions.md | 4 +- .../functions/date-time-functions.md | 14 +++--- .../functions/encoding-functions.md | 14 +++--- .../functions/encryption-functions.md | 4 +- .../functions/ext-dict-functions.md | 9 ++-- docs/en/sql-reference/functions/files.md | 4 +- .../functions/functions-for-nulls.md | 4 +- .../functions/geo/coordinates.md | 4 +- .../en/sql-reference/functions/geo/geohash.md | 12 ++--- docs/en/sql-reference/functions/geo/h3.md | 2 +- docs/en/sql-reference/functions/geo/index.md | 6 +-- docs/en/sql-reference/functions/geo/s2.md | 2 +- .../sql-reference/functions/hash-functions.md | 4 +- .../sql-reference/functions/in-functions.md | 4 +- docs/en/sql-reference/functions/index.md | 7 ++- .../sql-reference/functions/introspection.md | 9 ++-- .../functions/ip-address-functions.md | 4 +- .../sql-reference/functions/json-functions.md | 19 ++++---- .../functions/logical-functions.md | 4 +- .../functions/machine-learning-functions.md | 4 +- .../sql-reference/functions/math-functions.md | 4 +- .../sql-reference/functions/nlp-functions.md | 9 ++-- .../functions/other-functions.md | 30 +++++++------ .../functions/random-functions.md | 9 ++-- .../functions/rounding-functions.md | 4 +- .../functions/splitting-merging-functions.md | 4 +- .../functions/string-functions.md | 9 ++-- .../functions/string-replace-functions.md | 9 ++-- .../functions/string-search-functions.md | 44 +++++++++++-------- .../functions/time-window-functions.md | 4 +- .../functions/tuple-functions.md | 4 +- .../functions/tuple-map-functions.md | 4 +- .../functions/type-conversion-functions.md | 14 +++--- .../sql-reference/functions/url-functions.md | 4 +- .../sql-reference/functions/uuid-functions.md | 4 +- .../functions/ym-dict-functions.md | 4 +- docs/en/sql-reference/index.md | 6 +-- docs/en/sql-reference/operators/exists.md | 5 ++- docs/en/sql-reference/operators/in.md | 5 ++- docs/en/sql-reference/operators/index.md | 15 ++++--- .../sql-reference/statements/alter/column.md | 9 ++-- .../sql-reference/statements/alter/comment.md | 4 +- .../statements/alter/constraint.md | 9 ++-- .../sql-reference/statements/alter/delete.md | 9 ++-- .../sql-reference/statements/alter/index.md | 16 ++++--- .../statements/alter/index/index.md | 9 ++-- .../statements/alter/order-by.md | 9 ++-- .../statements/alter/partition.md | 14 +++--- .../statements/alter/projection.md | 9 ++-- .../sql-reference/statements/alter/quota.md | 4 +- .../en/sql-reference/statements/alter/role.md | 4 +- .../statements/alter/row-policy.md | 4 +- .../statements/alter/sample-by.md | 9 ++-- .../sql-reference/statements/alter/setting.md | 10 ++--- .../statements/alter/settings-profile.md | 4 +- docs/en/sql-reference/statements/alter/ttl.md | 4 +- .../sql-reference/statements/alter/update.md | 9 ++-- .../en/sql-reference/statements/alter/user.md | 4 +- .../en/sql-reference/statements/alter/view.md | 4 +- docs/en/sql-reference/statements/attach.md | 4 +- .../sql-reference/statements/check-table.md | 4 +- .../statements/create/database.md | 4 +- .../statements/create/dictionary.md | 4 +- .../statements/create/function.md | 4 +- .../sql-reference/statements/create/index.md | 7 ++- .../sql-reference/statements/create/quota.md | 4 +- .../sql-reference/statements/create/role.md | 4 +- .../statements/create/row-policy.md | 22 +++++----- .../statements/create/settings-profile.md | 4 +- .../sql-reference/statements/create/table.md | 39 +++++++++------- .../sql-reference/statements/create/user.md | 10 ++--- .../sql-reference/statements/create/view.md | 39 ++++++++-------- .../statements/describe-table.md | 4 +- docs/en/sql-reference/statements/detach.md | 4 +- docs/en/sql-reference/statements/drop.md | 4 +- docs/en/sql-reference/statements/exchange.md | 9 ++-- docs/en/sql-reference/statements/exists.md | 4 +- docs/en/sql-reference/statements/explain.md | 15 ++++--- docs/en/sql-reference/statements/grant.md | 4 +- docs/en/sql-reference/statements/index.md | 7 ++- .../sql-reference/statements/insert-into.md | 6 +-- docs/en/sql-reference/statements/kill.md | 4 +- docs/en/sql-reference/statements/misc.md | 2 +- docs/en/sql-reference/statements/optimize.md | 22 ++++++---- docs/en/sql-reference/statements/rename.md | 9 ++-- docs/en/sql-reference/statements/revoke.md | 4 +- .../en/sql-reference/statements/select/all.md | 2 +- .../statements/select/array-join.md | 2 +- .../statements/select/distinct.md | 2 +- .../sql-reference/statements/select/except.md | 2 +- .../sql-reference/statements/select/format.md | 2 +- .../sql-reference/statements/select/from.md | 2 +- .../statements/select/group-by.md | 22 ++++++---- .../sql-reference/statements/select/having.md | 2 +- .../sql-reference/statements/select/index.md | 8 ++-- .../statements/select/intersect.md | 2 +- .../statements/select/into-outfile.md | 2 +- .../sql-reference/statements/select/join.md | 17 ++++--- .../statements/select/limit-by.md | 7 +-- .../sql-reference/statements/select/limit.md | 7 +-- .../sql-reference/statements/select/offset.md | 12 ++--- .../statements/select/order-by.md | 2 +- .../statements/select/prewhere.md | 7 +-- .../sql-reference/statements/select/sample.md | 7 +-- .../sql-reference/statements/select/union.md | 2 +- .../sql-reference/statements/select/where.md | 7 +-- .../sql-reference/statements/select/with.md | 2 +- docs/en/sql-reference/statements/set-role.md | 4 +- docs/en/sql-reference/statements/set.md | 4 +- docs/en/sql-reference/statements/show.md | 9 ++-- docs/en/sql-reference/statements/system.md | 14 +++--- docs/en/sql-reference/statements/truncate.md | 9 ++-- docs/en/sql-reference/statements/use.md | 4 +- docs/en/sql-reference/statements/watch.md | 16 +++---- docs/en/sql-reference/syntax.md | 4 +- .../sql-reference/table-functions/cluster.md | 9 ++-- .../table-functions/dictionary.md | 4 +- docs/en/sql-reference/table-functions/file.md | 9 ++-- .../sql-reference/table-functions/generate.md | 4 +- docs/en/sql-reference/table-functions/hdfs.md | 9 ++-- .../table-functions/hdfsCluster.md | 9 ++-- .../en/sql-reference/table-functions/index.md | 12 ++--- .../en/sql-reference/table-functions/input.md | 4 +- docs/en/sql-reference/table-functions/jdbc.md | 4 +- .../en/sql-reference/table-functions/merge.md | 4 +- .../en/sql-reference/table-functions/mysql.md | 9 ++-- docs/en/sql-reference/table-functions/null.md | 4 +- .../sql-reference/table-functions/numbers.md | 4 +- docs/en/sql-reference/table-functions/odbc.md | 4 +- .../table-functions/postgresql.md | 14 +++--- .../sql-reference/table-functions/remote.md | 4 +- docs/en/sql-reference/table-functions/s3.md | 9 ++-- .../table-functions/s3Cluster.md | 9 ++-- .../sql-reference/table-functions/sqlite.md | 4 +- docs/en/sql-reference/table-functions/url.md | 4 +- docs/en/sql-reference/table-functions/view.md | 4 +- .../sql-reference/window-functions/index.md | 4 +- 276 files changed, 903 insertions(+), 787 deletions(-) create mode 100644 docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml delete mode 100644 docs/en/sql-reference/dictionaries/external-dictionaries/index.md diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index d824ace0c65..b5cb6c321ac 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -155,8 +155,9 @@ The server initializes the `Context` class with the necessary environment for qu We maintain full backward and forward compatibility for the server TCP protocol: old clients can talk to new servers, and new clients can talk to old servers. But we do not want to maintain it eternally, and we are removing support for old versions after about one year. -!!! note "Note" - For most external applications, we recommend using the HTTP interface because it is simple and easy to use. The TCP protocol is more tightly linked to internal data structures: it uses an internal format for passing blocks of data, and it uses custom framing for compressed data. We haven’t released a C library for that protocol because it requires linking most of the ClickHouse codebase, which is not practical. +:::note +For most external applications, we recommend using the HTTP interface because it is simple and easy to use. The TCP protocol is more tightly linked to internal data structures: it uses an internal format for passing blocks of data, and it uses custom framing for compressed data. We haven’t released a C library for that protocol because it requires linking most of the ClickHouse codebase, which is not practical. +::: ## Distributed Query Execution {#distributed-query-execution} @@ -194,7 +195,8 @@ Replication is physical: only compressed parts are transferred between nodes, no Besides, each replica stores its state in ZooKeeper as the set of parts and its checksums. When the state on the local filesystem diverges from the reference state in ZooKeeper, the replica restores its consistency by downloading missing and broken parts from other replicas. When there is some unexpected or broken data in the local filesystem, ClickHouse does not remove it, but moves it to a separate directory and forgets it. -!!! note "Note" - The ClickHouse cluster consists of independent shards, and each shard consists of replicas. The cluster is **not elastic**, so after adding a new shard, data is not rebalanced between shards automatically. Instead, the cluster load is supposed to be adjusted to be uneven. This implementation gives you more control, and it is ok for relatively small clusters, such as tens of nodes. But for clusters with hundreds of nodes that we are using in production, this approach becomes a significant drawback. We should implement a table engine that spans across the cluster with dynamically replicated regions that could be split and balanced between clusters automatically. +:::note +The ClickHouse cluster consists of independent shards, and each shard consists of replicas. The cluster is **not elastic**, so after adding a new shard, data is not rebalanced between shards automatically. Instead, the cluster load is supposed to be adjusted to be uneven. This implementation gives you more control, and it is ok for relatively small clusters, such as tens of nodes. But for clusters with hundreds of nodes that we are using in production, this approach becomes a significant drawback. We should implement a table engine that spans across the cluster with dynamically replicated regions that could be split and balanced between clusters automatically. +::: -{## [Original article](https://clickhouse.com/docs/en/development/architecture/) ##} +[Original article](https://clickhouse.com/docs/en/development/architecture/) diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 5d5706f6e6b..05ef10ad020 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -94,8 +94,9 @@ cmake --build . --config RelWithDebInfo If you intend to run `clickhouse-server`, make sure to increase the system’s maxfiles variable. -!!! info "Note" - You’ll need to use sudo. +:::note +You’ll need to use sudo. +::: To do so, create the `/Library/LaunchDaemons/limit.maxfiles.plist` file with the following content: diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 07d6fcd9ece..63d955dc889 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -20,8 +20,9 @@ One ClickHouse server can have multiple replicated databases running and updatin - `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. - `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. -!!! note "Warning" - For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. +:::warning +For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. +::: ## Specifics and Recommendations {#specifics-and-recommendations} diff --git a/docs/en/example-datasets/nyc-taxi.md b/docs/en/example-datasets/nyc-taxi.md index da7be71d46b..270aeb4929c 100644 --- a/docs/en/example-datasets/nyc-taxi.md +++ b/docs/en/example-datasets/nyc-taxi.md @@ -290,8 +290,9 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "select count(*) from datasets.trips_mergetree" ``` -!!! info "Info" - If you will run the queries described below, you have to use the full table name, `datasets.trips_mergetree`. +:::info +If you will run the queries described below, you have to use the full table name, `datasets.trips_mergetree`. +::: ## Results on Single Server {#results-on-single-server} diff --git a/docs/en/example-datasets/ontime.md b/docs/en/example-datasets/ontime.md index 51df6186bd5..bb3c3644972 100644 --- a/docs/en/example-datasets/ontime.md +++ b/docs/en/example-datasets/ontime.md @@ -156,8 +156,9 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "select count(*) from datasets.ontime" ``` -!!! info "Info" - If you will run the queries described below, you have to use the full table name, `datasets.ontime`. +:::note +If you will run the queries described below, you have to use the full table name, `datasets.ontime`. +::: ## Queries {#queries} diff --git a/docs/en/example-datasets/star-schema.md b/docs/en/example-datasets/star-schema.md index a8949ef74b9..35ff492c360 100644 --- a/docs/en/example-datasets/star-schema.md +++ b/docs/en/example-datasets/star-schema.md @@ -17,8 +17,9 @@ $ make Generating data: -!!! warning "Attention" - With `-s 100` dbgen generates 600 million rows (67 GB), while while `-s 1000` it generates 6 billion rows (which takes a lot of time) +:::warning +With `-s 100` dbgen generates 600 million rows (67 GB), while while `-s 1000` it generates 6 billion rows (which takes a lot of time) +::: ``` bash $ ./dbgen -s 1000 -T c diff --git a/docs/en/install.md b/docs/en/install.md index 35021b5bb8d..a5405143d77 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -69,9 +69,10 @@ You can also download and install packages manually from [here](https://packages - `clickhouse-client` — Creates a symbolic link for `clickhouse-client` and other client-related tools. and installs client configuration files. - `clickhouse-common-static-dbg` — Installs ClickHouse compiled binary files with debug info. -!!! attention "Attention" - If you need to install specific version of ClickHouse you have to install all packages with the same version: - `sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` +:::info +If you need to install specific version of ClickHouse you have to install all packages with the same version: +`sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` +::: ### From RPM Packages {#from-rpm-packages} diff --git a/docs/en/sql-reference/_category_.yml b/docs/en/sql-reference/_category_.yml index cfddcf46548..049ba20f1f5 100644 --- a/docs/en/sql-reference/_category_.yml +++ b/docs/en/sql-reference/_category_.yml @@ -1,7 +1,4 @@ position: 15 label: 'SQL Reference' collapsible: true -collapsed: true -link: - type: generated-index - title: SQL Reference \ No newline at end of file +collapsed: true \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index daa93adaaae..6a8c178919c 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: Combinators +sidebar_position: 37 +sidebar_label: Combinators --- # Aggregate Function Combinators {#aggregate_functions_combinators} diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index d2b46f6de53..1e6cc0f88c2 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: Aggregate Functions -toc_priority: 33 -toc_title: Introduction +sidebar_label: Aggregate Functions +sidebar_position: 33 --- -# Aggregate Functions {#aggregate-functions} +# Aggregate Functions Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) way as expected by database experts. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 3adedd0ae70..7708bcb8129 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: Parametric +sidebar_position: 38 +sidebar_label: Parametric --- # Parametric Aggregate Functions {#aggregate_functions_parametric} @@ -89,8 +89,9 @@ Checks whether the sequence contains an event chain that matches the pattern. sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ``` -!!! warning "Warning" - Events that occur at the same second may lay in the sequence in an undefined order affecting the result. +:::warning +Events that occur at the same second may lay in the sequence in an undefined order affecting the result. +::: **Arguments** @@ -174,8 +175,9 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched. -!!! warning "Warning" - Events that occur at the same second may lay in the sequence in an undefined order affecting the result. +:::warning +Events that occur at the same second may lay in the sequence in an undefined order affecting the result. +::: ``` sql sequenceCount(pattern)(timestamp, cond1, cond2, ...) diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index 16306597983..3b5539c5b8d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -1,5 +1,5 @@ --- -toc_priority: 6 +sidebar_position: 6 --- # any {#agg_function-any} diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md index 5c9d6875c51..491754453e3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -1,5 +1,5 @@ --- -toc_priority: 103 +sidebar_position: 103 --- # anyHeavy {#anyheavyx} diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index 43ac72318f2..2a01a587f70 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -1,5 +1,5 @@ --- -toc_priority: 104 +sidebar_position: 104 --- ## anyLast {#anylastx} diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 0630e2f585e..f09bcd0bba2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -1,5 +1,5 @@ --- -toc_priority: 106 +sidebar_position: 106 --- # argMax {#agg-function-argmax} diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index a259a76b7d7..926fda5a512 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -1,5 +1,5 @@ --- -toc_priority: 105 +sidebar_position: 105 --- # argMin {#agg-function-argmin} diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index 9a22faedf7c..b7b5e9fbed4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -1,5 +1,5 @@ --- -toc_priority: 5 +sidebar_position: 5 --- # avg {#agg_function-avg} diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 5f4d846e81b..126c0c2f1d7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -1,5 +1,5 @@ --- -toc_priority: 107 +sidebar_position: 107 --- # avgWeighted {#avgweighted} diff --git a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md index 2e9001dec19..e836dbe868a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md +++ b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md @@ -1,5 +1,5 @@ --- -toc_priority: 250 +sidebar_position: 250 --- # categoricalInformationValue {#categoricalinformationvalue} diff --git a/docs/en/sql-reference/aggregate-functions/reference/corr.md b/docs/en/sql-reference/aggregate-functions/reference/corr.md index 88f9295a8f2..c6d7fd5baed 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corr.md @@ -1,5 +1,5 @@ --- -toc_priority: 107 +sidebar_position: 107 --- # corr {#corrx-y} @@ -8,5 +8,6 @@ Syntax: `corr(x, y)` Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 073fd267c42..8df4aef9d03 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -1,5 +1,5 @@ --- -toc_priority: 1 +sidebar_position: 1 --- # count {#agg_function-count} diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md index 2a7d805763e..363a98c3f16 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md @@ -1,5 +1,5 @@ --- -toc_priority: 36 +sidebar_position: 36 --- # covarPop {#covarpop} @@ -8,5 +8,6 @@ Syntax: `covarPop(x, y)` Calculates the value of `Σ((x - x̅)(y - y̅)) / n`. -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md index 4bdb1b02d40..977b3f3b5b4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md @@ -1,5 +1,5 @@ --- -toc_priority: 37 +sidebar_position: 37 --- # covarSamp {#covarsamp} @@ -8,5 +8,6 @@ Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. Returns Float64. When `n <= 1`, returns +∞. -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md index 2945084db77..ac35938e26d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -1,13 +1,14 @@ --- -toc_priority: 141 +sidebar_position: 141 --- # deltaSum {#agg_functions-deltasum} Sums the arithmetic difference between consecutive rows. If the difference is negative, it is ignored. -!!! info "Note" - The underlying data must be sorted for this function to work properly. If you would like to use this function in a [materialized view](../../../sql-reference/statements/create/view.md#materialized), you most likely want to use the [deltaSumTimestamp](../../../sql-reference/aggregate-functions/reference/deltasumtimestamp.md#agg_functions-deltasumtimestamp) method instead. +:::note +The underlying data must be sorted for this function to work properly. If you would like to use this function in a [materialized view](../../../sql-reference/statements/create/view.md#materialized), you most likely want to use the [deltaSumTimestamp](../../../sql-reference/aggregate-functions/reference/deltasumtimestamp.md#agg_functions-deltasumtimestamp) method instead. +::: **Syntax** diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index 7238f73bc0d..e1024e58328 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -1,5 +1,5 @@ --- -toc_priority: 141 +sidebar_position: 141 --- # deltaSumTimestamp {#agg_functions-deltasumtimestamp} diff --git a/docs/en/sql-reference/aggregate-functions/reference/entropy.md b/docs/en/sql-reference/aggregate-functions/reference/entropy.md index 5ebb678e6b5..9f1576c3ed8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/entropy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/entropy.md @@ -1,5 +1,5 @@ --- -toc_priority: 302 +sidebar_position: 302 --- # entropy {#entropy} diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index cfc9b6cd58e..2337a0c8dab 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -1,5 +1,5 @@ --- -toc_priority: 108 +sidebar_position: 108 --- ## exponentialMovingAverage {#exponential-moving-average} diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md index 86b7b83022b..348ac98c75b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md @@ -1,5 +1,5 @@ --- -toc_priority: 110 +sidebar_position: 110 --- # groupArray {#agg_function-grouparray} diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index d29550b007e..0699326725e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -1,5 +1,5 @@ --- -toc_priority: 112 +sidebar_position: 112 --- # groupArrayInsertAt {#grouparrayinsertat} diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index c732efecf58..dc3cc74721e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -1,5 +1,5 @@ --- -toc_priority: 114 +sidebar_position: 114 --- # groupArrayMovingAvg {#agg_function-grouparraymovingavg} diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index c3dfeda850e..563280b7dec 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -1,5 +1,5 @@ --- -toc_priority: 113 +sidebar_position: 113 --- # groupArrayMovingSum {#agg_function-grouparraymovingsum} diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index bd170ead577..f0406ddc93c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -1,5 +1,5 @@ --- -toc_priority: 114 +sidebar_position: 114 --- # groupArraySample {#grouparraysample} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 1275ad7536c..0ebb9aec495 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -1,5 +1,5 @@ --- -toc_priority: 125 +sidebar_position: 125 --- # groupBitAnd {#groupbitand} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md index 9317ef98783..7f1fee6a9f0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -1,5 +1,5 @@ --- -toc_priority: 128 +sidebar_position: 128 --- # groupBitmap {#groupbitmap} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md index f59bb541a42..89c94547f8b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -1,5 +1,5 @@ --- -toc_priority: 129 +sidebar_position: 129 --- # groupBitmapAnd {#groupbitmapand} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md index d3f40f63f65..172a3bb29ac 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -1,5 +1,5 @@ --- -toc_priority: 130 +sidebar_position: 130 --- # groupBitmapOr {#groupbitmapor} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md index cbe01e08145..52c45815cc5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -1,5 +1,5 @@ --- -toc_priority: 131 +sidebar_position: 131 --- # groupBitmapXor {#groupbitmapxor} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 24077de0adc..c1ee1c40894 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -1,5 +1,5 @@ --- -toc_priority: 126 +sidebar_position: 126 --- # groupBitOr {#groupbitor} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index 4b8323f92db..472bcdf65c1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -1,5 +1,5 @@ --- -toc_priority: 127 +sidebar_position: 127 --- # groupBitXor {#groupbitxor} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md index 537212e5b94..9b5058032e5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md @@ -1,5 +1,5 @@ --- -toc_priority: 111 +sidebar_position: 111 --- # groupUniqArray {#groupuniqarray} diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 59befed8785..cd71bca2556 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -1,6 +1,6 @@ --- toc_folder_title: Reference -toc_priority: 36 +sidebar_position: 36 toc_hidden: true --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md index 05adbb2ffe8..33c5686cbbc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -1,6 +1,6 @@ --- -toc_priority: 146 -toc_title: intervalLengthSum +sidebar_position: 146 +sidebar_label: intervalLengthSum --- # intervalLengthSum {#agg_function-intervallengthsum} @@ -18,8 +18,9 @@ intervalLengthSum(start, end) - `start` — The starting value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). - `end` — The ending value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). -!!! info "Note" - Arguments must be of the same data type. Otherwise, an exception will be thrown. +:::note +Arguments must be of the same data type. Otherwise, an exception will be thrown. +::: **Returned value** diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md index c51c4b92e74..5640e69ba7c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -1,5 +1,5 @@ --- -toc_priority: 153 +sidebar_position: 153 --- # kurtPop {#kurtpop} diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md index 0ee40138adc..c0768edaf2d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -1,5 +1,5 @@ --- -toc_priority: 154 +sidebar_position: 154 --- # kurtSamp {#kurtsamp} diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index fe97f7edbf8..32e56b8de10 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -1,6 +1,6 @@ --- -toc_priority: 310 -toc_title: mannWhitneyUTest +sidebar_position: 310 +sidebar_label: mannWhitneyUTest --- # mannWhitneyUTest {#mannwhitneyutest} diff --git a/docs/en/sql-reference/aggregate-functions/reference/max.md b/docs/en/sql-reference/aggregate-functions/reference/max.md index 25173a48906..845d0c5ecee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/max.md +++ b/docs/en/sql-reference/aggregate-functions/reference/max.md @@ -1,5 +1,5 @@ --- -toc_priority: 3 +sidebar_position: 3 --- # max {#agg_function-max} diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index c62502cf46e..243a3375552 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -1,5 +1,5 @@ --- -toc_priority: 143 +sidebar_position: 143 --- # maxMap {#agg_functions-maxmap} diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md index 7d016f42819..02b89b1b31d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -1,6 +1,6 @@ --- -toc_priority: 303 -toc_title: meanZTest +sidebar_position: 303 +sidebar_label: meanZTest --- # meanZTest {#meanztest} diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 619e9a5093e..3e84b4b169c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -1,5 +1,5 @@ --- -toc_priority: 212 +sidebar_position: 212 --- # median {#median} diff --git a/docs/en/sql-reference/aggregate-functions/reference/min.md b/docs/en/sql-reference/aggregate-functions/reference/min.md index 64b155857f8..0525066e9f3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/min.md +++ b/docs/en/sql-reference/aggregate-functions/reference/min.md @@ -1,5 +1,5 @@ --- -toc_priority: 2 +sidebar_position: 2 --- ## min {#agg_function-min} diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md index 9408d0ddfff..8a4d50dd46c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -1,5 +1,5 @@ --- -toc_priority: 142 +sidebar_position: 142 --- # minMap {#agg_functions-minmap} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index b6f38e57342..6a0479da77f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -1,5 +1,5 @@ --- -toc_priority: 200 +sidebar_position: 200 --- # quantile {#quantile} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md index 728c200441d..f0bd51f0add 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -1,5 +1,5 @@ --- -toc_priority: 209 +sidebar_position: 209 --- # quantileBFloat16 {#quantilebfloat16} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index a20ac26f599..bb23ce63cea 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -1,5 +1,5 @@ --- -toc_priority: 206 +sidebar_position: 206 --- # quantileDeterministic {#quantiledeterministic} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index bfd9d1e5a55..b3a384b0cfd 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -1,5 +1,5 @@ --- -toc_priority: 202 +sidebar_position: 202 --- # quantileExact Functions {#quantileexact-functions} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 210f44e7587..4740d4a26f8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -1,5 +1,5 @@ --- -toc_priority: 203 +sidebar_position: 203 --- # quantileExactWeighted {#quantileexactweighted} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 9777570be83..6d0cf37f25e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -1,5 +1,5 @@ --- -toc_priority: 201 +sidebar_position: 201 --- # quantiles Functions {#quantiles-functions} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index dd0d59978d1..f42c88b2aca 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -1,5 +1,5 @@ --- -toc_priority: 207 +sidebar_position: 207 --- # quantileTDigest {#quantiletdigest} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 70f30f3a480..684e438f0c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -1,5 +1,5 @@ --- -toc_priority: 208 +sidebar_position: 208 --- # quantileTDigestWeighted {#quantiletdigestweighted} @@ -12,8 +12,9 @@ The result depends on the order of running the query, and is nondeterministic. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function. -!!! note "Note" - Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead. +:::note +Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead. +::: **Syntax** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index dd545c1a485..f282f7e2004 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -1,5 +1,5 @@ --- -toc_priority: 204 +sidebar_position: 204 --- # quantileTiming {#quantiletiming} @@ -36,8 +36,9 @@ The calculation is accurate if: Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. -!!! note "Note" - For calculating page loading time quantiles, this function is more effective and accurate than [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +:::note +For calculating page loading time quantiles, this function is more effective and accurate than [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +::: **Returned value** @@ -45,8 +46,9 @@ Otherwise, the result of the calculation is rounded to the nearest multiple of 1 Type: `Float32`. -!!! note "Note" - If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. +:::note +If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. +::: **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index 25846cde636..c773f900764 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -1,5 +1,5 @@ --- -toc_priority: 205 +sidebar_position: 205 --- # quantileTimingWeighted {#quantiletimingweighted} @@ -38,8 +38,9 @@ The calculation is accurate if: Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. -!!! note "Note" - For calculating page loading time quantiles, this function is more effective and accurate than [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +:::note +For calculating page loading time quantiles, this function is more effective and accurate than [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +::: **Returned value** @@ -47,8 +48,9 @@ Otherwise, the result of the calculation is rounded to the nearest multiple of 1 Type: `Float32`. -!!! note "Note" - If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. +:::note +If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. +::: **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index b364317c22b..399fd88cf0e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -1,5 +1,5 @@ --- -toc_priority: 145 +sidebar_position: 145 --- # rankCorr {#agg_function-rankcorr} diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index fee71cdeb49..8684cd4c3bb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -1,5 +1,5 @@ --- -toc_priority: 220 +sidebar_position: 220 --- # simpleLinearRegression {#simplelinearregression} diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md index f84f8897a35..4cb3d58304f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -1,5 +1,5 @@ --- -toc_priority: 150 +sidebar_position: 150 --- # skewPop {#skewpop} diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md index 48a049ca69d..92e807d2d7d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -1,5 +1,5 @@ --- -toc_priority: 151 +sidebar_position: 151 --- # skewSamp {#skewsamp} diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 47c696129c7..ebb9cccbd40 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,6 +1,6 @@ --- -toc_priority: 311 -toc_title: sparkbar +sidebar_position: 311 +sidebar_label: sparkbar --- # sparkbar {#sparkbar} diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md index 58f8c27cd72..2b22320ae7a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -1,10 +1,11 @@ --- -toc_priority: 30 +sidebar_position: 30 --- # stddevPop {#stddevpop} The result is equal to the square root of [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md). -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md index 4ec72881ae5..3dcee821606 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -1,10 +1,11 @@ --- -toc_priority: 31 +sidebar_position: 31 --- # stddevSamp {#stddevsamp} The result is equal to the square root of [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md). -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index 7a37ed83e17..e171629e90d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -1,5 +1,5 @@ --- -toc_priority: 221 +sidebar_position: 221 --- # stochasticLinearRegression {#agg_functions-stochasticlinearregression} diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md index 35d1e3899ac..a7d4c640126 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md @@ -1,5 +1,5 @@ --- -toc_priority: 222 +sidebar_position: 222 --- # stochasticLogisticRegression {#agg_functions-stochasticlogisticregression} diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index 7d8d255e15b..86207a35c04 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -1,6 +1,6 @@ --- -toc_priority: 300 -toc_title: studentTTest +sidebar_position: 300 +sidebar_label: studentTTest --- # studentTTest {#studentttest} diff --git a/docs/en/sql-reference/aggregate-functions/reference/sum.md b/docs/en/sql-reference/aggregate-functions/reference/sum.md index 77d38a2c7b2..b72cb84e74f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sum.md @@ -1,5 +1,5 @@ --- -toc_priority: 4 +sidebar_position: 4 --- # sum {#agg_function-sum} diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index 00a7a9fc9f1..dbc0601241e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -1,5 +1,5 @@ --- -toc_priority: 144 +sidebar_position: 144 --- # sumCount {#agg_function-sumCount} diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md index d4d47fde1fa..8c96464dfd5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md @@ -1,5 +1,5 @@ --- -toc_priority: 145 +sidebar_position: 145 --- # sumKahan {#agg_function-sumKahan} diff --git a/docs/en/sql-reference/aggregate-functions/reference/summap.md b/docs/en/sql-reference/aggregate-functions/reference/summap.md index 4ccbc22de35..78ce6a9e835 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/summap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/summap.md @@ -1,5 +1,5 @@ --- -toc_priority: 141 +sidebar_position: 141 --- # sumMap {#agg_functions-summap} diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md index 1b39e9d0eb1..0582eb5fb7b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md @@ -1,5 +1,5 @@ --- -toc_priority: 140 +sidebar_position: 140 --- # sumWithOverflow {#sumwithoverflowx} diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index 7e6d0db4946..19e98262899 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -1,5 +1,5 @@ --- -toc_priority: 108 +sidebar_position: 108 --- # topK {#topk} diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index 694cbd1ad41..2d6e86667ef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -1,5 +1,5 @@ --- -toc_priority: 109 +sidebar_position: 109 --- # topKWeighted {#topkweighted} diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index 33bfe72548b..6e6791702ef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -1,5 +1,5 @@ --- -toc_priority: 190 +sidebar_position: 190 --- # uniq {#agg_function-uniq} diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 623c43ae10c..79357cb14ce 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -1,5 +1,5 @@ --- -toc_priority: 192 +sidebar_position: 192 --- # uniqCombined {#agg_function-uniqcombined} @@ -34,8 +34,9 @@ Function: - Provides the result deterministically (it does not depend on the query processing order). -!!! note "Note" - Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +:::note +Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +::: Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`: diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md index 6d060d82779..fb0be23c768 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md @@ -1,5 +1,5 @@ --- -toc_priority: 193 +sidebar_position: 193 --- # uniqCombined64 {#agg_function-uniqcombined64} diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index e446258fbf7..68e6bc562f9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -1,5 +1,5 @@ --- -toc_priority: 191 +sidebar_position: 191 --- # uniqExact {#agg_function-uniqexact} diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index 1d619ab7d93..1a13b365560 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -1,5 +1,5 @@ --- -toc_priority: 194 +sidebar_position: 194 --- # uniqHLL12 {#agg_function-uniqhll12} diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md index b5161462442..9b9c16922b1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md @@ -1,5 +1,5 @@ --- -toc_priority: 195 +sidebar_position: 195 --- # uniqTheta {#agg_function-uniqthetasketch} diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index c08dcfd9bfd..f16cfcdc63f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,5 +1,5 @@ --- -toc_priority: 32 +sidebar_position: 32 --- # varPop(x) {#varpopx} @@ -8,5 +8,6 @@ Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x In other words, dispersion for a set of values. Returns `Float64`. -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index 78bc545a5d0..b323f78fbd1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,5 +1,5 @@ --- -toc_priority: 33 +sidebar_position: 33 --- # varSamp {#varsamp} @@ -10,5 +10,6 @@ It represents an unbiased estimate of the variance of a random variable if passe Returns `Float64`. When `n <= 1`, returns `+∞`. -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 2e127f87f9f..0aff60e7bbf 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -1,6 +1,6 @@ --- -toc_priority: 301 -toc_title: welchTTest +sidebar_position: 301 +sidebar_label: welchTTest --- # welchTTest {#welchttest} diff --git a/docs/en/sql-reference/ansi.md b/docs/en/sql-reference/ansi.md index 7a87ac2dcdb..5797c697255 100644 --- a/docs/en/sql-reference/ansi.md +++ b/docs/en/sql-reference/ansi.md @@ -1,12 +1,13 @@ --- -toc_priority: 40 -toc_title: ANSI Compatibility +sidebar_position: 40 +sidebar_label: ANSI Compatibility --- # ANSI SQL Compatibility of ClickHouse SQL Dialect {#ansi-sql-compatibility-of-clickhouse-sql-dialect} -!!! note "Note" - This article relies on Table 38, “Feature taxonomy and definition for mandatory features”, Annex F of [ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8). +:::note +This article relies on Table 38, “Feature taxonomy and definition for mandatory features”, Annex F of [ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8). +::: ## Differences in Behaviour {#differences-in-behaviour} diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index e483a20eed9..6dc89e2864f 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -1,6 +1,6 @@ --- -toc_priority: 53 -toc_title: AggregateFunction +sidebar_position: 53 +sidebar_label: AggregateFunction --- # AggregateFunction {#data-type-aggregatefunction} diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index a8dad7ba989..909df86ec2f 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -1,6 +1,6 @@ --- -toc_priority: 52 -toc_title: Array(T) +sidebar_position: 52 +sidebar_label: Array(T) --- # Array(t) {#data-type-array} diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index ca44238277c..a20e30777af 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -1,6 +1,6 @@ --- -toc_priority: 43 -toc_title: Boolean +sidebar_position: 43 +sidebar_label: Boolean --- # Boolean Values {#boolean-values} diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index 828f9ee70f5..bc3fda4a9d0 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -1,6 +1,6 @@ --- -toc_priority: 47 -toc_title: Date +sidebar_position: 47 +sidebar_label: Date --- # Date {#data_type-date} diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index 592f952e1be..46c9fe00b34 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -1,6 +1,6 @@ --- -toc_priority: 48 -toc_title: Date32 +sidebar_position: 48 +sidebar_label: Date32 --- # Date32 {#data_type-datetime32} diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 6fa4f8326fe..cae83ac9a31 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -1,6 +1,6 @@ --- -toc_priority: 48 -toc_title: DateTime +sidebar_position: 48 +sidebar_label: DateTime --- # Datetime {#data_type-datetime} diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 02d9efc0249..aefd7e4a18b 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -1,6 +1,6 @@ --- -toc_priority: 49 -toc_title: DateTime64 +sidebar_position: 49 +sidebar_label: DateTime64 --- # Datetime64 {#data_type-datetime64} diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index fae0bb6dbb9..33b4addb54f 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -1,6 +1,6 @@ --- -toc_priority: 42 -toc_title: Decimal +sidebar_position: 42 +sidebar_label: Decimal --- # Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) {#decimal} diff --git a/docs/en/sql-reference/data-types/domains/index.md b/docs/en/sql-reference/data-types/domains/index.md index 57db0c4263c..e27bf9a6d37 100644 --- a/docs/en/sql-reference/data-types/domains/index.md +++ b/docs/en/sql-reference/data-types/domains/index.md @@ -1,7 +1,6 @@ --- -toc_folder_title: Domains -toc_priority: 56 -toc_title: Overview +sidebar_position: 56 +sidebar_label: Domains --- # Domains {#domains} diff --git a/docs/en/sql-reference/data-types/domains/ipv4.md b/docs/en/sql-reference/data-types/domains/ipv4.md index aafd46edef8..76d285fe34a 100644 --- a/docs/en/sql-reference/data-types/domains/ipv4.md +++ b/docs/en/sql-reference/data-types/domains/ipv4.md @@ -1,6 +1,6 @@ --- -toc_priority: 59 -toc_title: IPv4 +sidebar_position: 59 +sidebar_label: IPv4 --- ## IPv4 {#ipv4} diff --git a/docs/en/sql-reference/data-types/domains/ipv6.md b/docs/en/sql-reference/data-types/domains/ipv6.md index 30b3c8add69..c5745dcb80f 100644 --- a/docs/en/sql-reference/data-types/domains/ipv6.md +++ b/docs/en/sql-reference/data-types/domains/ipv6.md @@ -1,6 +1,6 @@ --- -toc_priority: 60 -toc_title: IPv6 +sidebar_position: 60 +sidebar_label: IPv6 --- ## IPv6 {#ipv6} diff --git a/docs/en/sql-reference/data-types/enum.md b/docs/en/sql-reference/data-types/enum.md index ae22e60a5f3..5dbec255da6 100644 --- a/docs/en/sql-reference/data-types/enum.md +++ b/docs/en/sql-reference/data-types/enum.md @@ -1,6 +1,6 @@ --- -toc_priority: 50 -toc_title: Enum +sidebar_position: 50 +sidebar_label: Enum --- # Enum {#enum} diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index 59ed123fb10..230792c19bb 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: FixedString(N) +sidebar_position: 45 +sidebar_label: FixedString(N) --- # Fixedstring {#fixedstring} diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index fcc071b9f9a..46076e29525 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -1,6 +1,6 @@ --- -toc_priority: 41 -toc_title: Float32, Float64 +sidebar_position: 41 +sidebar_label: Float32, Float64 --- # Float32, Float64 {#float32-float64} diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index e6d32ef3305..7ce863a5a10 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -1,14 +1,15 @@ --- -toc_priority: 62 -toc_title: Geo +sidebar_position: 62 +sidebar_label: Geo --- # Geo Data Types {#geo-data-types} ClickHouse supports data types for representing geographical objects — locations, lands, etc. -!!! warning "Warning" - Currently geo data types are an experimental feature. To work with them you must set `allow_experimental_geo_types = 1`. +:::warning +Currently geo data types are an experimental feature. To work with them you must set `allow_experimental_geo_types = 1`. +::: **See Also** - [Representing simple geographical features](https://en.wikipedia.org/wiki/GeoJSON). diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index 831b8d19d94..ca26b89ec87 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: Data Types -toc_priority: 37 -toc_title: Introduction +sidebar_label: Data Types +sidebar_position: 37 --- -# Data Types {#data_types} +# Data Types ClickHouse can store various kinds of data in table cells. diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index 4cc590d9fa5..86d587cfb55 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -1,6 +1,6 @@ --- -toc_priority: 40 -toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 +sidebar_position: 40 +sidebar_label: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 --- # UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 3a813103335..40105446cbe 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -1,6 +1,6 @@ --- -toc_priority: 51 -toc_title: LowCardinality +sidebar_position: 51 +sidebar_label: LowCardinality --- # LowCardinality Data Type {#lowcardinality-data-type} diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index cdc3c874043..56f4442fe5a 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -1,6 +1,6 @@ --- -toc_priority: 65 -toc_title: Map(key, value) +sidebar_position: 65 +sidebar_label: Map(key, value) --- # Map(key, value) {#data_type-map} diff --git a/docs/en/sql-reference/data-types/multiword-types.md b/docs/en/sql-reference/data-types/multiword-types.md index bd91dd10ad6..ae57037b6e2 100644 --- a/docs/en/sql-reference/data-types/multiword-types.md +++ b/docs/en/sql-reference/data-types/multiword-types.md @@ -1,6 +1,6 @@ --- -toc_priority: 61 -toc_title: Multiword Type Names +sidebar_position: 61 +sidebar_label: Multiword Type Names --- # Multiword Types {#multiword-types} diff --git a/docs/en/sql-reference/data-types/nested-data-structures/index.md b/docs/en/sql-reference/data-types/nested-data-structures/index.md index b383fc53464..c0f016ea41d 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/index.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md @@ -1,8 +1,6 @@ --- -toc_folder_title: Nested Data Structures -toc_hidden: true -toc_priority: 54 -toc_title: hidden +sidebar_label: Nested Data Structures +sidebar_position: 54 --- # Nested Data Structures {#nested-data-structures} diff --git a/docs/en/sql-reference/data-types/nested-data-structures/nested.md b/docs/en/sql-reference/data-types/nested-data-structures/nested.md index e08b7e0de3e..8258d8bd8e5 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/nested.md @@ -1,6 +1,6 @@ --- -toc_priority: 57 -toc_title: Nested(Name1 Type1, Name2 Type2, ...) +sidebar_position: 57 +sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) --- # Nested {#nested} diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index 2154315d269..f3c3dcd2326 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -1,6 +1,6 @@ --- -toc_priority: 55 -toc_title: Nullable +sidebar_position: 55 +sidebar_label: Nullable --- # Nullable(typename) {#data_type-nullable} @@ -17,8 +17,9 @@ A `Nullable` type field can’t be included in table indexes. To store `Nullable` type values in a table column, ClickHouse uses a separate file with `NULL` masks in addition to normal file with values. Entries in masks file allow ClickHouse to distinguish between `NULL` and a default value of corresponding data type for each table row. Because of an additional file, `Nullable` column consumes additional storage space compared to a similar normal one. -!!! info "Note" - Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases. +:::note +Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases. +::: ## Finding NULL {#finding-null} diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 7a4c4375541..1c04a71dedb 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -22,10 +22,11 @@ The following aggregate functions are supported: - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) -!!! note "Note" - Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. +:::note +Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. - `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. +`SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. +::: **Parameters** diff --git a/docs/en/sql-reference/data-types/special-data-types/expression.md b/docs/en/sql-reference/data-types/special-data-types/expression.md index e1ffba478e6..b6a2a2ebb9d 100644 --- a/docs/en/sql-reference/data-types/special-data-types/expression.md +++ b/docs/en/sql-reference/data-types/special-data-types/expression.md @@ -1,6 +1,6 @@ --- -toc_priority: 58 -toc_title: Expression +sidebar_position: 58 +sidebar_label: Expression --- # Expression {#expression} diff --git a/docs/en/sql-reference/data-types/special-data-types/index.md b/docs/en/sql-reference/data-types/special-data-types/index.md index 3398af94c70..5455d34a2a2 100644 --- a/docs/en/sql-reference/data-types/special-data-types/index.md +++ b/docs/en/sql-reference/data-types/special-data-types/index.md @@ -1,8 +1,6 @@ --- -toc_folder_title: Special Data Types -toc_hidden: true -toc_priority: 55 -toc_title: hidden +sidebar_label: Special Data Types +sidebar_position: 55 --- # Special Data Types {#special-data-types} diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index 7c0c5b00c0d..3ebeee01bf6 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -1,14 +1,15 @@ --- -toc_priority: 61 -toc_title: Interval +sidebar_position: 61 +sidebar_label: Interval --- # Interval {#data-type-interval} The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator. -!!! warning "Warning" - `Interval` data type values can’t be stored in tables. +:::warning +`Interval` data type values can’t be stored in tables. +::: Structure: diff --git a/docs/en/sql-reference/data-types/special-data-types/nothing.md b/docs/en/sql-reference/data-types/special-data-types/nothing.md index e69272a665e..f9f296f7dc4 100644 --- a/docs/en/sql-reference/data-types/special-data-types/nothing.md +++ b/docs/en/sql-reference/data-types/special-data-types/nothing.md @@ -1,6 +1,6 @@ --- -toc_priority: 60 -toc_title: Nothing +sidebar_position: 60 +sidebar_label: Nothing --- # Nothing {#nothing} diff --git a/docs/en/sql-reference/data-types/special-data-types/set.md b/docs/en/sql-reference/data-types/special-data-types/set.md index 6babd047888..6d447b96f3b 100644 --- a/docs/en/sql-reference/data-types/special-data-types/set.md +++ b/docs/en/sql-reference/data-types/special-data-types/set.md @@ -1,6 +1,6 @@ --- -toc_priority: 59 -toc_title: Set +sidebar_position: 59 +sidebar_label: Set --- # Set {#set} diff --git a/docs/en/sql-reference/data-types/string.md b/docs/en/sql-reference/data-types/string.md index 3d0f01e147f..e2903c7329d 100644 --- a/docs/en/sql-reference/data-types/string.md +++ b/docs/en/sql-reference/data-types/string.md @@ -1,6 +1,6 @@ --- -toc_priority: 44 -toc_title: String +sidebar_position: 44 +sidebar_label: String --- # String {#string} diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index b28bef67af5..eea48ab37b4 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -1,6 +1,6 @@ --- -toc_priority: 54 -toc_title: Tuple(T1, T2, ...) +sidebar_position: 54 +sidebar_label: Tuple(T1, T2, ...) --- # Tuple(t1, T2, …) {#tuplet1-t2} diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index 528534de0a0..010fc0b5cf5 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: UUID +sidebar_position: 46 +sidebar_label: UUID --- # UUID {#uuid-data-type} diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml b/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml new file mode 100644 index 00000000000..77f42ba74d1 --- /dev/null +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml @@ -0,0 +1,7 @@ +position: 37 +label: 'External Dictionaries' +collapsible: true +collapsed: true +link: + type: generated-index + title: External Dictionaries \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md index 08d3b8d8ad0..c48ad217431 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md @@ -1,9 +1,9 @@ --- -toc_priority: 45 -toc_title: Hierarchical dictionaries +sidebar_position: 45 +sidebar_label: Hierarchical dictionaries --- -# Hierarchical Dictionaries {#hierarchical-dictionaries} +# Hierarchical Dictionaries ClickHouse supports hierarchical dictionaries with a [numeric key](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict-numeric-key). diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index effcc614930..bd88a8b09f7 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -1,9 +1,9 @@ --- -toc_priority: 41 -toc_title: Storing Dictionaries in Memory +sidebar_position: 41 +sidebar_label: Storing Dictionaries in Memory --- -# Storing Dictionaries in Memory {#dicts-external-dicts-dict-layout} +# Storing Dictionaries in Memory There are a variety of ways to store dictionaries in memory. @@ -238,8 +238,9 @@ Example: The table contains discounts for each advertiser in the format: To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). -!!! warning "Warning" - Values of `range_min` and `range_max` should fit in `Int64` type. +:::warning +Values of `range_min` and `range_max` should fit in `Int64` type. +::: Example: @@ -407,8 +408,9 @@ Set a large enough cache size. You need to experiment to select the number of ce 3. Assess memory consumption using the `system.dictionaries` table. 4. Increase or decrease the number of cells until the required memory consumption is reached. -!!! warning "Warning" - Do not use ClickHouse as a source, because it is slow to process queries with random reads. +:::warning +Do not use ClickHouse as a source, because it is slow to process queries with random reads. +::: ### complex_key_cache {#complex-key-cache} diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index afef6ae249d..83814781005 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -1,9 +1,9 @@ --- -toc_priority: 42 -toc_title: Dictionary Updates +sidebar_position: 42 +sidebar_label: Dictionary Updates --- -# Dictionary Updates {#dictionary-updates} +# Dictionary Updates ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `` tag in seconds. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index b49f384367d..7fcea84b55d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -1,9 +1,9 @@ --- -toc_priority: 46 -toc_title: Polygon Dictionaries With Grids +sidebar_position: 46 +sidebar_label: Polygon Dictionaries With Grids --- -# Polygon dictionaries {#polygon-dictionaries} +# Polygon dictionaries Polygon dictionaries allow you to efficiently search for the polygon containing specified points. For example: defining a city area by geographical coordinates. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index c3c4bbc6493..e5502a17a3a 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -1,9 +1,9 @@ --- -toc_priority: 43 -toc_title: Sources of External Dictionaries +sidebar_position: 43 +sidebar_label: Sources of External Dictionaries --- -# Sources of External Dictionaries {#dicts-external-dicts-dict-sources} +# Sources of External Dictionaries An external dictionary can be connected from many different sources. @@ -220,8 +220,9 @@ When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remot ### Known Vulnerability of the ODBC Dictionary Functionality {#known-vulnerability-of-the-odbc-dictionary-functionality} -!!! attention "Attention" - When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. +:::note +When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. +::: **Example of insecure use** @@ -471,8 +472,9 @@ Setting fields: - `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). - `query` – The custom query. Optional parameter. -!!! info "Note" - The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. +:::note +The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. +::: ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. @@ -549,8 +551,9 @@ Setting fields: - `query` – The custom query. Optional parameter. -!!! info "Note" - The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +:::note +The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +::: MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`. @@ -639,8 +642,9 @@ Setting fields: - `secure` - Use ssl for connection. - `query` – The custom query. Optional parameter. -!!! info "Note" - The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +:::note +The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +::: ### Mongodb {#dicts-external_dicts_dict_sources-mongodb} @@ -752,8 +756,9 @@ Setting fields: - `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. - `query` – The custom query. Optional parameter. -!!! info "Note" - The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. +:::note +The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. +::: ### PostgreSQL {#dicts-external_dicts_dict_sources-postgresql} @@ -808,5 +813,6 @@ Setting fields: - `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). - `query` – The custom query. Optional parameter. -!!! info "Note" - The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +:::note +The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 0d1c4535b28..2712bbf6911 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -1,11 +1,11 @@ --- -toc_priority: 44 -toc_title: Dictionary Key and Fields +sidebar_position: 44 +sidebar_label: Dictionary Key and Fields --- -# Dictionary Key and Fields {#dictionary-key-and-fields} +# Dictionary Key and Fields -The `` clause describes the dictionary key and fields available for queries. +The `structure` clause describes the dictionary key and fields available for queries. XML description: @@ -56,8 +56,9 @@ ClickHouse supports the following types of keys: An xml structure can contain either `` or ``. DDL-query must contain single `PRIMARY KEY`. -!!! warning "Warning" - You must not describe key as an attribute. +:::warning +You must not describe key as an attribute. +::: ### Numeric Key {#ext_dict-numeric-key} @@ -92,8 +93,9 @@ PRIMARY KEY Id The key can be a `tuple` from any types of fields. The [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) in this case must be `complex_key_hashed` or `complex_key_cache`. -!!! tip "Tip" - A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. +:::tip +A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. +::: The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Example: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index e15d944130e..bb4fcdab51a 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -1,9 +1,9 @@ --- -toc_priority: 40 -toc_title: Configuring an External Dictionary +sidebar_position: 40 +sidebar_label: Configuring an External Dictionary --- -# Configuring an External Dictionary {#dicts-external-dicts-dict} +# Configuring an External Dictionary If dictionary is configured using xml file, than dictionary configuration has the following structure: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 00025c70c60..d816888f019 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -1,9 +1,9 @@ --- -toc_priority: 39 -toc_title: General Description +sidebar_position: 39 +sidebar_label: General Description --- -# External Dictionaries {#dicts-external-dicts} +# External Dictionaries You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Sources for external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. @@ -45,8 +45,9 @@ You can [configure](../../../sql-reference/dictionaries/external-dictionaries/ex [DDL queries for dictionaries](../../../sql-reference/statements/create/dictionary.md) does not require any additional records in server configuration. They allow to work with dictionaries as first-class entities, like tables or views. -!!! attention "Attention" - You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to external dictionaries. +:::note +You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to external dictionaries. +::: ## See Also {#ext-dicts-see-also} diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/index.md b/docs/en/sql-reference/dictionaries/external-dictionaries/index.md deleted file mode 100644 index 4098ac38060..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -toc_folder_title: External Dictionaries -toc_priority: 37 ---- - - diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 22f4182a1c0..8e54b70eab0 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -1,7 +1,6 @@ --- -toc_folder_title: Dictionaries -toc_priority: 35 -toc_title: Introduction +sidebar_label: Dictionaries +sidebar_position: 35 --- # Dictionaries {#dictionaries} diff --git a/docs/en/sql-reference/dictionaries/internal-dicts.md b/docs/en/sql-reference/dictionaries/internal-dicts.md index a8976772aa5..1996c974412 100644 --- a/docs/en/sql-reference/dictionaries/internal-dicts.md +++ b/docs/en/sql-reference/dictionaries/internal-dicts.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: Internal Dictionaries +sidebar_position: 39 +sidebar_label: Internal Dictionaries --- # Internal Dictionaries {#internal_dicts} diff --git a/docs/en/sql-reference/distributed-ddl.md b/docs/en/sql-reference/distributed-ddl.md index c291c85fa7a..e0eae13672a 100644 --- a/docs/en/sql-reference/distributed-ddl.md +++ b/docs/en/sql-reference/distributed-ddl.md @@ -1,6 +1,6 @@ --- -toc_priority: 32 -toc_title: Distributed DDL +sidebar_position: 32 +sidebar_label: Distributed DDL --- # Distributed DDL Queries (ON CLUSTER Clause) {#distributed-ddl-queries-on-cluster-clause} @@ -17,5 +17,6 @@ In order to run these queries correctly, each host must have the same cluster de The local version of the query will eventually be executed on each host in the cluster, even if some hosts are currently not available. -!!! warning "Warning" - The order for executing queries within a single host is guaranteed. +:::warning +The order for executing queries within a single host is guaranteed. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 40fadf34eab..63c481c9ae6 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 34 -toc_title: Arithmetic +sidebar_position: 34 +sidebar_label: Arithmetic --- # Arithmetic Functions {#arithmetic-functions} diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 8231cda4b77..3f96f75e7b8 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 35 -toc_title: Arrays +sidebar_position: 35 +sidebar_label: Arrays --- # Array Functions {#functions-for-working-with-arrays} @@ -17,8 +17,9 @@ empty([x]) An array is considered empty if it does not contain any elements. -!!! note "Note" - Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. +:::note +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. +::: The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty). @@ -60,8 +61,9 @@ notEmpty([x]) An array is considered non-empty if it contains at least one element. -!!! note "Note" - Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. +:::note +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. +::: The function also works for [strings](string-functions.md#notempty) or [UUID](uuid-functions.md#notempty). @@ -733,8 +735,9 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; └─────────┘ ``` -!!! note "Note" - To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. +:::note +To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. +::: ## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} diff --git a/docs/en/sql-reference/functions/array-join.md b/docs/en/sql-reference/functions/array-join.md index e87d0bca4bb..24d9c2b08d8 100644 --- a/docs/en/sql-reference/functions/array-join.md +++ b/docs/en/sql-reference/functions/array-join.md @@ -1,6 +1,6 @@ --- -toc_priority: 61 -toc_title: arrayJoin +sidebar_position: 61 +sidebar_label: arrayJoin --- # arrayJoin function {#functions_arrayjoin} diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 24adb362c98..c23c5ac5431 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 48 -toc_title: Bit +sidebar_position: 48 +sidebar_label: Bit --- # Bit Functions {#bit-functions} diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index a6104835469..68d1fc88a31 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 49 -toc_title: Bitmap +sidebar_position: 49 +sidebar_label: Bitmap --- # Bitmap Functions {#bitmap-functions} diff --git a/docs/en/sql-reference/functions/comparison-functions.md b/docs/en/sql-reference/functions/comparison-functions.md index edaf0a01c73..b5e842ddcad 100644 --- a/docs/en/sql-reference/functions/comparison-functions.md +++ b/docs/en/sql-reference/functions/comparison-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 36 -toc_title: Comparison +sidebar_position: 36 +sidebar_label: Comparison --- # Comparison Functions {#comparison-functions} diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index 241112f7f7f..21189bbb072 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 43 -toc_title: 'Conditional ' +sidebar_position: 43 +sidebar_label: 'Conditional ' --- # Conditional Functions {#conditional-functions} diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index d535a516b3a..5f783cf4149 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: Dates and Times +sidebar_position: 39 +sidebar_label: Dates and Times --- # Functions for Working with Dates and Times {#functions-for-working-with-dates-and-times} @@ -266,8 +266,9 @@ Result: └────────────────┘ ``` -!!! attention "Attention" - The return type `toStartOf*` functions described below is `Date` or `DateTime`. Though these functions can take `DateTime64` as an argument, passing them a `DateTime64` that is out of the normal range (years 1925 - 2283) will give an incorrect result. +:::note +The return type `toStartOf*` functions described below is `Date` or `DateTime`. Though these functions can take `DateTime64` as an argument, passing them a `DateTime64` that is out of the normal range (years 1925 - 2283) will give an incorrect result. +::: ## toStartOfYear {#tostartofyear} @@ -290,8 +291,9 @@ Returns the date. Rounds down a date or date with time to the first day of the month. Returns the date. -!!! attention "Attention" - The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception or do “natural” overflow. +:::note +The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception or do “natural” overflow. +::: ## toMonday {#tomonday} diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index e3d5a4b18db..6e25befcbc7 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 52 -toc_title: Encoding +sidebar_position: 52 +sidebar_label: Encoding --- # Encoding Functions {#encoding-functions} @@ -170,8 +170,9 @@ Performs the opposite operation of [hex](#hex). It interprets each pair of hexad If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions. -!!! note "Note" - If `unhex` is invoked from within the `clickhouse-client`, binary strings display using UTF-8. +:::note +If `unhex` is invoked from within the `clickhouse-client`, binary strings display using UTF-8. +::: Alias: `UNHEX`. @@ -328,8 +329,9 @@ Alias: `UNBIN`. For a numeric argument `unbin()` does not return the inverse of `bin()`. If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) functions. -!!! note "Note" - If `unbin` is invoked from within the `clickhouse-client`, binary strings are displayed using UTF-8. +:::note +If `unbin` is invoked from within the `clickhouse-client`, binary strings are displayed using UTF-8. +::: Supports binary digits `0` and `1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isn’t thrown). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index ea4d0f84488..942a63a48a8 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 67 -toc_title: Encryption +sidebar_position: 67 +sidebar_label: Encryption --- # Encryption functions {#encryption-functions} diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 84e1e5eca3b..1d3f5952c98 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -1,10 +1,11 @@ --- -toc_priority: 58 -toc_title: External Dictionaries +sidebar_position: 58 +sidebar_label: External Dictionaries --- -!!! attention "Attention" - For dictionaries, created with [DDL queries](../../sql-reference/statements/create/dictionary.md), the `dict_name` parameter must be fully specified, like `.`. Otherwise, the current database is used. +:::note +For dictionaries created with [DDL queries](../../sql-reference/statements/create/dictionary.md), the `dict_name` parameter must be fully specified, like `.`. Otherwise, the current database is used. +::: # Functions for Working with External Dictionaries {#ext_dict_functions} diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 9cbf8932465..5bb77016039 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -1,6 +1,6 @@ --- -toc_priority: 43 -toc_title: Files +sidebar_position: 43 +sidebar_label: Files --- # Functions for Working with Files {#functions-for-working-with-files} diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 42307093dda..0ff93357208 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -1,6 +1,6 @@ --- -toc_priority: 63 -toc_title: Nullable +sidebar_position: 63 +sidebar_label: Nullable --- # Functions for Working with Nullable Values {#functions-for-working-with-nullable-aggregates} diff --git a/docs/en/sql-reference/functions/geo/coordinates.md b/docs/en/sql-reference/functions/geo/coordinates.md index b0862dded67..41ba409cbc1 100644 --- a/docs/en/sql-reference/functions/geo/coordinates.md +++ b/docs/en/sql-reference/functions/geo/coordinates.md @@ -1,6 +1,6 @@ --- -toc_title: Geographical Coordinates -toc_priority: 62 +sidebar_label: Geographical Coordinates +sidebar_position: 62 --- diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index 1192ed5f56a..e65456d0c40 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -1,5 +1,5 @@ --- -toc_title: Geohash +sidebar_label: Geohash --- # Functions for Working with Geohash {#geohash} @@ -80,8 +80,9 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi - `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). - `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). -!!! info "Note" - All coordinate parameters must be of the same type: either `Float32` or `Float64`. +:::note +All coordinate parameters must be of the same type: either `Float32` or `Float64`. +::: **Returned values** @@ -90,8 +91,9 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). -!!! info "Note" - Function throws an exception if resulting array is over 10’000’000 items long. +:::note +Function throws an exception if resulting array is over 10’000’000 items long. +::: **Example** diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index ecbe00adfd7..50115dd4d75 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1,5 +1,5 @@ --- -toc_title: H3 Indexes +sidebar_label: H3 Indexes --- # Functions for Working with H3 Indexes {#h3index} diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md index 65bf2ab83cb..f76c3a3f731 100644 --- a/docs/en/sql-reference/functions/geo/index.md +++ b/docs/en/sql-reference/functions/geo/index.md @@ -1,8 +1,8 @@ --- -toc_title: hidden -toc_priority: 62 -toc_folder_title: Geo +sidebar_label: Geo +sidebar_position: 62 --- +# Geo Functions [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/) diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index f8736bcc61a..c3d95d2f0a9 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -1,5 +1,5 @@ --- -toc_title: S2 Geometry +sidebar_label: S2 Geometry --- # Functions for Working with S2 Index {#s2index} diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index c892b814957..e4b1fdd3bbb 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 50 -toc_title: Hash +sidebar_position: 50 +sidebar_label: Hash --- # Hash Functions {#hash-functions} diff --git a/docs/en/sql-reference/functions/in-functions.md b/docs/en/sql-reference/functions/in-functions.md index c8936e74954..ab8ba93daba 100644 --- a/docs/en/sql-reference/functions/in-functions.md +++ b/docs/en/sql-reference/functions/in-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 60 -toc_title: IN Operator +sidebar_position: 60 +sidebar_label: IN Operator --- # Functions for Implementing the IN Operator {#functions-for-implementing-the-in-operator} diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 7cceec889bd..261cf908e07 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: Functions -toc_priority: 32 -toc_title: Introduction +sidebar_position: 32 +sidebar_label: Functions --- -# Functions {#functions} +# Functions There are at least\* two types of functions - regular functions (they are just called “functions”) and aggregate functions. These are completely different concepts. Regular functions work as if they are applied to each row separately (for each row, the result of the function does not depend on the other rows). Aggregate functions accumulate a set of values from various rows (i.e. they depend on the entire set of rows). diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 1be68c6bdd4..694d07f18dc 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -1,14 +1,15 @@ --- -toc_priority: 65 -toc_title: Introspection +sidebar_position: 65 +sidebar_label: Introspection --- # Introspection Functions {#introspection-functions} You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling. -!!! warning "Warning" - These functions are slow and may impose security considerations. +:::warning +These functions are slow and may impose security considerations. +::: For proper operation of introspection functions: diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 469a66d460f..c293c1ff317 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 55 -toc_title: IP Addresses +sidebar_position: 55 +sidebar_label: IP Addresses --- # Functions for Working with IPv4 and IPv6 Addresses {#functions-for-working-with-ip-addresses} diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index d5622ac5fdc..be69b7b4f2b 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 56 -toc_title: JSON +sidebar_position: 56 +sidebar_label: JSON --- # Functions for Working with JSON {#functions-for-working-with-json} @@ -359,8 +359,9 @@ SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); ``` -!!! note "Note" - before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) +:::note +Before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) +::: ## JSON_QUERY(json, path) {#json-query} @@ -385,8 +386,9 @@ Result: [2] String ``` -!!! note "Note" - before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, json) +:::note +Before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, json) +::: ## JSON_VALUE(json, path) {#json-value} @@ -412,8 +414,9 @@ Result: String ``` -!!! note "Note" - before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, json) +:::note +Before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, json) +::: ## toJSONString {#tojsonstring} diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index dcdb01e2059..0055e253951 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: Logical +sidebar_position: 37 +sidebar_label: Logical --- # Logical Functions {#logical-functions} diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index b823340058e..5b3e8b87e34 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 64 -toc_title: Machine Learning +sidebar_position: 64 +sidebar_label: Machine Learning --- # Machine Learning Functions {#machine-learning-functions} diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index a5fc07cf687..645587b4f5c 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 44 -toc_title: Mathematical +sidebar_position: 44 +sidebar_label: Mathematical --- # Mathematical Functions {#mathematical-functions} diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 8a1a44cf079..5a00252f56c 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -1,12 +1,13 @@ --- -toc_priority: 67 -toc_title: NLP +sidebar_position: 67 +sidebar_label: NLP --- # [experimental] Natural Language Processing functions {#nlp-functions} -!!! warning "Warning" - This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. +:::warning +This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. +::: ## stem {#stem} diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index bce3f9144b1..45e9ef43c6a 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 67 -toc_title: Other +sidebar_position: 67 +sidebar_label: Other --- # Other Functions {#other-functions} @@ -729,8 +729,9 @@ neighbor(column, offset[, default_value]) The result of the function depends on the affected data blocks and the order of data in the block. -!!! warning "Warning" - It can reach the neighbor rows only inside the currently processed data block. +:::warning +It can reach the neighbor rows only inside the currently processed data block. +::: The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user. To prevent that you can make a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. @@ -838,8 +839,9 @@ Result: Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. -!!! warning "Warning" - It can reach the previous row only inside the currently processed data block. +:::warning +It can reach the previous row only inside the currently processed data block. +::: The result of the function depends on the affected data blocks and the order of data in the block. @@ -921,9 +923,9 @@ Each event has a start time and an end time. The start time is included in the e The function calculates the total number of active (concurrent) events for each event start time. -!!! warning "Warning" - Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. - Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly. +:::warning +Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly. +::: **Syntax** @@ -1609,8 +1611,9 @@ Result: Accumulates states of an aggregate function for each row of a data block. -!!! warning "Warning" - The state is reset for each new data block. +:::warning +The state is reset for each new data block. +::: **Syntax** @@ -2068,8 +2071,9 @@ Number of digits. Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). - !!! note "Note" - For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). +:::note +For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). +::: **Example** diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index aab9483de45..5e20a93da1f 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -1,14 +1,15 @@ --- -toc_priority: 51 -toc_title: Pseudo-Random Numbers +sidebar_position: 51 +sidebar_label: Pseudo-Random Numbers --- # Functions for Generating Pseudo-Random Numbers {#functions-for-generating-pseudo-random-numbers} All the functions accept zero arguments or one argument. If an argument is passed, it can be any type, and its value is not used for anything. The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers. -!!! note "Note" - Non-cryptographic generators of pseudo-random numbers are used. +:::note +Non-cryptographic generators of pseudo-random numbers are used. +::: ## rand, rand32 {#rand} diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index b224e7ab406..a469318e623 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: Rounding +sidebar_position: 45 +sidebar_label: Rounding --- # Rounding Functions {#rounding-functions} diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 7a4e04bbf6c..7e94c225f6b 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 47 -toc_title: Splitting and Merging Strings and Arrays +sidebar_position: 47 +sidebar_label: Splitting and Merging Strings and Arrays --- # Functions for Splitting and Merging Strings and Arrays {#functions-for-splitting-and-merging-strings-and-arrays} diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a30cacde519..d63e466a836 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1,12 +1,13 @@ --- -toc_priority: 40 -toc_title: Strings +sidebar_position: 40 +sidebar_label: Strings --- # Functions for Working with Strings {#functions-for-working-with-strings} -!!! note "Note" - Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [replacing](../../sql-reference/functions/string-replace-functions.md) in strings are described separately. +:::note +Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [replacing](../../sql-reference/functions/string-replace-functions.md) in strings are described separately. +::: ## empty {#empty} diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 144b4fbc1da..1df8bfd0c44 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -1,12 +1,13 @@ --- -toc_priority: 42 -toc_title: For Replacing in Strings +sidebar_position: 42 +sidebar_label: For Replacing in Strings --- # Functions for Searching and Replacing in Strings {#functions-for-searching-and-replacing-in-strings} -!!! note "Note" - Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. +:::note +Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. +::: ## replaceOne(haystack, pattern, replacement) {#replaceonehaystack-pattern-replacement} diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index a0c0116a058..985d9f1e63a 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -1,14 +1,15 @@ --- -toc_priority: 41 -toc_title: For Searching in Strings +sidebar_position: 41 +sidebar_label: For Searching in Strings --- # Functions for Searching in Strings {#functions-for-searching-strings} The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search. -!!! note "Note" - Functions for [replacing](../../sql-reference/functions/string-replace-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. +:::note +Functions for [replacing](../../sql-reference/functions/string-replace-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. +::: ## position(haystack, needle), locate(haystack, needle) {#position} @@ -30,8 +31,9 @@ position(needle IN haystack) Alias: `locate(haystack, needle[, start_pos])`. -!!! note "Note" - Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`. +:::note +Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`. +::: **Arguments** @@ -342,8 +344,9 @@ Returns 1, if at least one string needlei matches the string `haystac For a case-insensitive search or/and in UTF-8 format use functions `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. -!!! note "Note" - In all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. +:::note +In all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. +::: ## match(haystack, pattern) {#matchhaystack-pattern} @@ -358,8 +361,9 @@ For patterns to search for substrings in a string, it is better to use LIKE or The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. It uses [hyperscan](https://github.com/intel/hyperscan) library. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster. -!!! note "Note" - The length of any of the `haystack` string must be less than 232 bytes otherwise the exception is thrown. This restriction takes place because of hyperscan API. +:::note +The length of any of the `haystack` string must be less than 232 bytes otherwise the exception is thrown. This restriction takes place because of hyperscan API. +::: ## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} @@ -381,11 +385,13 @@ The same as `multiFuzzyMatchAny`, but returns any index that matches the haystac The same as `multiFuzzyMatchAny`, but returns the array of all indices in any order that match the haystack within a constant edit distance. -!!! note "Note" - `multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction. +:::note +`multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction. +::: -!!! note "Note" - To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. +:::note +To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. +::: ## extract(haystack, pattern) {#extracthaystack-pattern} @@ -399,8 +405,9 @@ Extracts all the fragments of a string using a regular expression. If ‘haystac Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc. -!!! note "Note" - `extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). +:::note +`extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). +::: **Syntax** @@ -570,8 +577,9 @@ Same as `ngramDistance` but calculates the non-symmetric difference between `nee For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. -!!! note "Note" - For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. +:::note +For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. +::: ## countSubstrings {#countSubstrings} diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index 2ea44a6e585..b45866cf931 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 68 -toc_title: Time Window +sidebar_position: 68 +sidebar_label: Time Window --- # Time Window Functions {#time-window-functions} diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 96bceb8958c..cfce02f4d31 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 66 -toc_title: Tuples +sidebar_position: 66 +sidebar_label: Tuples --- # Functions for Working with Tuples {#tuple-functions} diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 8ead8c58c7a..a0d62ff5ecb 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: Working with maps +sidebar_position: 46 +sidebar_label: Working with maps --- # Functions for maps {#functions-for-working-with-tuple-maps} diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 18cc3d98561..de6ca769589 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: Type Conversion +sidebar_position: 38 +sidebar_label: Type Conversion --- # Type Conversion Functions {#type-conversion-functions} @@ -689,8 +689,9 @@ x::t - Converted value. -!!! note "Note" - If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`. +:::note +If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`. +::: **Examples** @@ -1432,8 +1433,9 @@ Result: Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. -!!! info "Note" - The output value is a timestamp in UTC, not in the timezone of `DateTime64`. +:::note +The output value is a timestamp in UTC, not in the timezone of `DateTime64`. +::: **Syntax** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 5a305aa5033..c91029c4fce 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 54 -toc_title: URLs +sidebar_position: 54 +sidebar_label: URLs --- # Functions for Working with URLs {#functions-for-working-with-urls} diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 3616b587bf7..d23b505a93f 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 53 -toc_title: UUID +sidebar_position: 53 +sidebar_label: UUID --- # Functions for Working with UUID {#functions-for-working-with-uuid} diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 1e6c9cbd0b4..85215957443 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -1,6 +1,6 @@ --- -toc_priority: 59 -toc_title: Embedded Dictionaries +sidebar_position: 59 +sidebar_label: Embedded Dictionaries --- # Functions for Working with Embedded Dictionaries diff --git a/docs/en/sql-reference/index.md b/docs/en/sql-reference/index.md index e8fe092e622..1123c8533a9 100644 --- a/docs/en/sql-reference/index.md +++ b/docs/en/sql-reference/index.md @@ -1,8 +1,6 @@ --- -toc_folder_title: SQL Reference -toc_hidden: true -toc_priority: 28 -toc_title: hidden +sidebar_position: 28 +sidebar_label: SQL Reference --- # SQL Reference {#sql-reference} diff --git a/docs/en/sql-reference/operators/exists.md b/docs/en/sql-reference/operators/exists.md index ee0c7317637..25413790801 100644 --- a/docs/en/sql-reference/operators/exists.md +++ b/docs/en/sql-reference/operators/exists.md @@ -4,8 +4,9 @@ The `EXISTS` operator checks how many records are in the result of a subquery. I `EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause. -!!! warning "Warning" - References to main query tables and columns are not supported in a subquery. +:::warning +References to main query tables and columns are not supported in a subquery. +::: **Syntax** diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index d8468370f3e..5dda097e799 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -119,8 +119,9 @@ FROM t_null There are two options for IN-s with subqueries (similar to JOINs): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. -!!! attention "Attention" - Remember that the algorithms described below may work differently depending on the [settings](../../operations/settings/settings.md) `distributed_product_mode` setting. +:::note +Remember that the algorithms described below may work differently depending on the [settings](../../operations/settings/settings.md) `distributed_product_mode` setting. +::: When using the regular IN, the query is sent to remote servers, and each of them runs the subqueries in the `IN` or `JOIN` clause. diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index a64dcd70c6c..4761f46ec05 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: Operators +sidebar_position: 38 +sidebar_label: Operators --- # Operators {#operators} @@ -210,8 +210,9 @@ Types of intervals: You can also use a string literal when setting the `INTERVAL` value. For example, `INTERVAL 1 HOUR` is identical to the `INTERVAL '1 hour'` or `INTERVAL '1' hour`. -!!! warning "Warning" - Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below. +:::warning +Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below. +::: Examples: @@ -247,9 +248,9 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV You can work with dates without using `INTERVAL`, just by adding or subtracting seconds, minutes, and hours. For example, an interval of one day can be set by adding `60*60*24`. -!!! note "Note" - The `INTERVAL` syntax or `addDays` function are always preferred. Simple addition or subtraction (syntax like `now() + ...`) doesn't consider time settings. For example, daylight saving time. - +:::note +The `INTERVAL` syntax or `addDays` function are always preferred. Simple addition or subtraction (syntax like `now() + ...`) doesn't consider time settings. For example, daylight saving time. +::: Examples: diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 6bb63ea06a6..3d22146a56b 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: COLUMN +sidebar_position: 37 +sidebar_label: COLUMN --- # Column Manipulations {#column-manipulations} @@ -75,8 +75,9 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified, Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly. -!!! warning "Warning" - You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error. +:::warning +You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error. +::: Example: diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index 67a17fc8974..af57adcf31c 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -1,6 +1,6 @@ --- -toc_priority: 51 -toc_title: COMMENT +sidebar_position: 51 +sidebar_label: COMMENT --- # ALTER TABLE … MODIFY COMMENT {#alter-modify-comment} diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md index 8f4ce57b905..c9517981ae7 100644 --- a/docs/en/sql-reference/statements/alter/constraint.md +++ b/docs/en/sql-reference/statements/alter/constraint.md @@ -1,6 +1,6 @@ --- -toc_priority: 43 -toc_title: CONSTRAINT +sidebar_position: 43 +sidebar_label: CONSTRAINT --- # Manipulating Constraints {#manipulations-with-constraints} @@ -16,7 +16,8 @@ See more on [constraints](../../../sql-reference/statements/create/table.md#cons Queries will add or remove metadata about constraints from table so they are processed immediately. -!!! warning "Warning" - Constraint check **will not be executed** on existing data if it was added. +:::warning +Constraint check **will not be executed** on existing data if it was added. +::: All changes on replicated tables are broadcasted to ZooKeeper and will be applied on other replicas as well. diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index 6c638c0a3ac..21ae091f9e7 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: DELETE +sidebar_position: 39 +sidebar_label: DELETE --- # ALTER TABLE … DELETE Statement {#alter-mutations} @@ -11,8 +11,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr Deletes data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). -!!! note "Note" - The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. +:::note +The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. +::: The `filter_expr` must be of type `UInt8`. The query deletes rows in the table for which this expression takes a non-zero value. diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 0d5909518ed..536da948218 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -1,9 +1,9 @@ --- -toc_priority: 35 -toc_title: ALTER +sidebar_position: 35 +sidebar_label: ALTER --- -## ALTER {#query_language_queries_alter} +# ALTER Most `ALTER TABLE` queries modify table settings or data: @@ -16,8 +16,9 @@ Most `ALTER TABLE` queries modify table settings or data: - [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md) - [TTL](../../../sql-reference/statements/alter/ttl.md) -!!! note "Note" - Most `ALTER TABLE` queries are supported only for [\*MergeTree](../../../engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](../../../engines/table-engines/special/merge.md) and [Distributed](../../../engines/table-engines/special/distributed.md). +:::note +Most `ALTER TABLE` queries are supported only for [\*MergeTree](../../../engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](../../../engines/table-engines/special/merge.md) and [Distributed](../../../engines/table-engines/special/distributed.md). +::: These `ALTER` statements manipulate views: @@ -54,7 +55,8 @@ For all `ALTER` queries, you can use the [replication_alter_partitions_sync](../ You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](../../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. -!!! info "Note" - For all `ALTER` queries, if `replication_alter_partitions_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. +:::note +For all `ALTER` queries, if `replication_alter_partitions_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. +::: For `ALTER TABLE ... UPDATE|DELETE` queries the synchronicity is defined by the [mutations_sync](../../../operations/settings/settings.md#mutations_sync) setting. diff --git a/docs/en/sql-reference/statements/alter/index/index.md b/docs/en/sql-reference/statements/alter/index/index.md index 4e2943d37f3..92f55792a70 100644 --- a/docs/en/sql-reference/statements/alter/index/index.md +++ b/docs/en/sql-reference/statements/alter/index/index.md @@ -1,7 +1,7 @@ --- toc_hidden_folder: true -toc_priority: 42 -toc_title: INDEX +sidebar_position: 42 +sidebar_label: INDEX --- # Manipulating Data Skipping Indices {#manipulations-with-data-skipping-indices} @@ -18,5 +18,6 @@ The first two commands are lightweight in a sense that they only change metadata Also, they are replicated, syncing indices metadata via ZooKeeper. -!!! note "Note" - Index manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants). +:::note +Index manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants). +::: \ No newline at end of file diff --git a/docs/en/sql-reference/statements/alter/order-by.md b/docs/en/sql-reference/statements/alter/order-by.md index 16f9ace206d..84d29ae8e11 100644 --- a/docs/en/sql-reference/statements/alter/order-by.md +++ b/docs/en/sql-reference/statements/alter/order-by.md @@ -1,6 +1,6 @@ --- -toc_priority: 41 -toc_title: ORDER BY +sidebar_position: 41 +sidebar_label: ORDER BY --- # Manipulating Key Expressions {#manipulations-with-key-expressions} @@ -13,5 +13,6 @@ The command changes the [sorting key](../../../engines/table-engines/mergetree-f The command is lightweight in a sense that it only changes metadata. To keep the property that data part rows are ordered by the sorting key expression you cannot add expressions containing existing columns to the sorting key (only columns added by the `ADD COLUMN` command in the same `ALTER` query, without default column value). -!!! note "Note" - It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). +:::note +It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). +::: \ No newline at end of file diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 12737624ecb..453d1bd7bf6 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: PARTITION +sidebar_position: 38 +sidebar_label: PARTITION --- # Manipulating Partitions and Parts {#alter_manipulations-with-partitions} @@ -160,8 +160,9 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once. -!!! note "Note" - The entire backup process is performed without stopping the server. +:::note +The entire backup process is performed without stopping the server. +::: Note that for old-styled tables you can specify the prefix of the partition name (for example, `2019`) - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). @@ -171,8 +172,9 @@ At the time of execution, for a data snapshot, the query creates hardlinks to a - `N` is the incremental number of the backup. - if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. -!!! note "Note" - If you use [a set of disks for data storage in a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. +:::note +If you use [a set of disks for data storage in a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. +::: The same structure of directories is created inside the backup as inside `/var/lib/clickhouse/`. The query performs `chmod` for all files, forbidding writing into them. diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index c7ebc83c496..5ccf33d2d2f 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -1,6 +1,6 @@ --- -toc_priority: 49 -toc_title: PROJECTION +sidebar_position: 49 +sidebar_label: PROJECTION --- # Manipulating Projections {#manipulations-with-projections} @@ -20,5 +20,6 @@ The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only Also, they are replicated, syncing projections metadata via ZooKeeper. -!!! note "Note" - Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +:::note +Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +::: \ No newline at end of file diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md index 05130a569ab..2398a57502c 100644 --- a/docs/en/sql-reference/statements/alter/quota.md +++ b/docs/en/sql-reference/statements/alter/quota.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: QUOTA +sidebar_position: 46 +sidebar_label: QUOTA --- # ALTER QUOTA {#alter-quota-statement} diff --git a/docs/en/sql-reference/statements/alter/role.md b/docs/en/sql-reference/statements/alter/role.md index ea6d3c61820..d3cb28a1705 100644 --- a/docs/en/sql-reference/statements/alter/role.md +++ b/docs/en/sql-reference/statements/alter/role.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: ROLE +sidebar_position: 46 +sidebar_label: ROLE --- ## ALTER ROLE {#alter-role-statement} diff --git a/docs/en/sql-reference/statements/alter/row-policy.md b/docs/en/sql-reference/statements/alter/row-policy.md index bbf9f317737..47207d29287 100644 --- a/docs/en/sql-reference/statements/alter/row-policy.md +++ b/docs/en/sql-reference/statements/alter/row-policy.md @@ -1,6 +1,6 @@ --- -toc_priority: 47 -toc_title: ROW POLICY +sidebar_position: 47 +sidebar_label: ROW POLICY --- # ALTER ROW POLICY {#alter-row-policy-statement} diff --git a/docs/en/sql-reference/statements/alter/sample-by.md b/docs/en/sql-reference/statements/alter/sample-by.md index 21b20be8b78..08e4fe1066b 100644 --- a/docs/en/sql-reference/statements/alter/sample-by.md +++ b/docs/en/sql-reference/statements/alter/sample-by.md @@ -1,6 +1,6 @@ --- -toc_priority: 41 -toc_title: SAMPLE BY +sidebar_position: 41 +sidebar_label: SAMPLE BY --- # Manipulating Sampling-Key Expressions {#manipulations-with-sampling-key-expressions} @@ -15,5 +15,6 @@ The command changes the [sampling key](../../../engines/table-engines/mergetree- The command is lightweight in the sense that it only changes metadata. The primary key must contain the new sample key. -!!! note "Note" - It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). +:::note +It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). +::: \ No newline at end of file diff --git a/docs/en/sql-reference/statements/alter/setting.md b/docs/en/sql-reference/statements/alter/setting.md index 90747bc1919..bb361e2ee6f 100644 --- a/docs/en/sql-reference/statements/alter/setting.md +++ b/docs/en/sql-reference/statements/alter/setting.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: SETTING +sidebar_position: 38 +sidebar_label: SETTING --- # Table Settings Manipulations {#table_settings_manipulations} @@ -14,9 +14,9 @@ If a setting with the specified name does not exist, then the query raises an ex ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ... ``` -!!! note "Note" - These queries can be applied to [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) tables only. - +:::note +These queries can be applied to [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) tables only. +::: ## MODIFY SETTING {#alter_modify_setting} diff --git a/docs/en/sql-reference/statements/alter/settings-profile.md b/docs/en/sql-reference/statements/alter/settings-profile.md index 57d12142c48..b1728f21c08 100644 --- a/docs/en/sql-reference/statements/alter/settings-profile.md +++ b/docs/en/sql-reference/statements/alter/settings-profile.md @@ -1,6 +1,6 @@ --- -toc_priority: 48 -toc_title: SETTINGS PROFILE +sidebar_position: 48 +sidebar_label: SETTINGS PROFILE --- ## ALTER SETTINGS PROFILE {#alter-settings-profile-statement} diff --git a/docs/en/sql-reference/statements/alter/ttl.md b/docs/en/sql-reference/statements/alter/ttl.md index 9cd63d3b8fe..f2cf8724197 100644 --- a/docs/en/sql-reference/statements/alter/ttl.md +++ b/docs/en/sql-reference/statements/alter/ttl.md @@ -1,6 +1,6 @@ --- -toc_priority: 44 -toc_title: TTL +sidebar_position: 44 +sidebar_label: TTL --- # Manipulations with Table TTL {#manipulations-with-table-ttl} diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index 13ea1b2a8db..aeff7cfa1b2 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -1,6 +1,6 @@ --- -toc_priority: 40 -toc_title: UPDATE +sidebar_position: 40 +sidebar_label: UPDATE --- # ALTER TABLE … UPDATE Statements {#alter-table-update-statements} @@ -11,8 +11,9 @@ ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr Manipulates data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). -!!! note "Note" - The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. +:::note +The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. +::: The `filter_expr` must be of type `UInt8`. This query updates values of specified columns to the values of corresponding expressions in rows for which the `filter_expr` takes a non-zero value. Values are casted to the column type using the `CAST` operator. Updating columns that are used in the calculation of the primary or the partition key is not supported. diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 4873982e2a1..f9b90349dab 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: USER +sidebar_position: 45 +sidebar_label: USER --- # ALTER USER {#alter-user-statement} diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 0fb1c4be0ff..71e89aaefe8 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -1,6 +1,6 @@ --- -toc_priority: 50 -toc_title: VIEW +sidebar_position: 50 +sidebar_label: VIEW --- # ALTER TABLE … MODIFY QUERY Statement {#alter-modify-query} diff --git a/docs/en/sql-reference/statements/attach.md b/docs/en/sql-reference/statements/attach.md index 2949ac6db38..bc7b2be333f 100644 --- a/docs/en/sql-reference/statements/attach.md +++ b/docs/en/sql-reference/statements/attach.md @@ -1,6 +1,6 @@ --- -toc_priority: 40 -toc_title: ATTACH +sidebar_position: 40 +sidebar_label: ATTACH --- # ATTACH Statement {#attach} diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index c9ad40860f7..1164a8b8be6 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -1,6 +1,6 @@ --- -toc_priority: 41 -toc_title: CHECK +sidebar_position: 41 +sidebar_label: CHECK --- # CHECK TABLE Statement {#check-table} diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md index 787bbc02346..18ed94bef79 100644 --- a/docs/en/sql-reference/statements/create/database.md +++ b/docs/en/sql-reference/statements/create/database.md @@ -1,6 +1,6 @@ --- -toc_priority: 35 -toc_title: DATABASE +sidebar_position: 35 +sidebar_label: DATABASE --- # CREATE DATABASE {#query-language-create-database} diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index 86ab8f977b0..246625cc901 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: DICTIONARY +sidebar_position: 38 +sidebar_label: DICTIONARY --- # CREATE DICTIONARY {#create-dictionary-query} diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index ddfcdfef521..a87d3d70e54 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: FUNCTION +sidebar_position: 38 +sidebar_label: FUNCTION --- # CREATE FUNCTION {#create-function} diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 3df62869e2b..666a2c66d2f 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: CREATE -toc_priority: 34 -toc_title: Overview +sidebar_position: 34 +sidebar_label: CREATE --- -# CREATE Queries {#create-queries} +# CREATE Queries Create queries make a new entity of one of the following kinds: diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index 767846ead52..931da165a73 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -1,6 +1,6 @@ --- -toc_priority: 42 -toc_title: QUOTA +sidebar_position: 42 +sidebar_label: QUOTA --- # CREATE QUOTA {#create-quota-statement} diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index e0e58f7a0f6..5f7db960f27 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -1,6 +1,6 @@ --- -toc_priority: 40 -toc_title: ROLE +sidebar_position: 40 +sidebar_label: ROLE --- # CREATE ROLE {#create-role-statement} diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 3f88d794619..58b7b1e2cb9 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -1,14 +1,15 @@ --- -toc_priority: 41 -toc_title: ROW POLICY +sidebar_position: 41 +sidebar_label: ROW POLICY --- # CREATE ROW POLICY {#create-row-policy-statement} Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table. -!!! note "Warning" - Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. +:::warning +Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. +::: Syntax: @@ -30,16 +31,17 @@ In the section `TO` you can provide a list of users and roles this policy should Keyword `ALL` means all the ClickHouse users including current user. Keyword `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost` -!!! note "Note" - If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy +:::note +If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy - `CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` +`CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` - forbids the users `mira` and `peter` to see the rows with `b != 1`, and any non-mentioned user (e.g., the user `paul`) will see no rows from `mydb.table1` at all. +forbids the users `mira` and `peter` to see the rows with `b != 1`, and any non-mentioned user (e.g., the user `paul`) will see no rows from `mydb.table1` at all. - If that's not desirable it can't be fixed by adding one more row policy, like the following: +If that's not desirable it can't be fixed by adding one more row policy, like the following: - `CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` +`CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` +::: ## AS Clause {#create-row-policy-as} diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index 07bb54c9da3..0cc633d9770 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -1,6 +1,6 @@ --- -toc_priority: 43 -toc_title: SETTINGS PROFILE +sidebar_position: 43 +sidebar_label: SETTINGS PROFILE --- # CREATE SETTINGS PROFILE {#create-settings-profile-statement} diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 409ec422ade..82aad344117 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -1,6 +1,6 @@ --- -toc_priority: 36 -toc_title: TABLE +sidebar_position: 36 +sidebar_label: TABLE --- # CREATE TABLE {#create-table-query} @@ -159,8 +159,9 @@ ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); ``` -!!! warning "Warning" - You can't combine both ways in one query. +:::warning +You can't combine both ways in one query. +::: ## Constraints {#constraints} @@ -214,8 +215,9 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default); Codecs can be combined in a pipeline, for example, `CODEC(Delta, Default)`. -!!! warning "Warning" - You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility. +:::warning +You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility. +::: Compression is supported for the following table engines: @@ -271,11 +273,13 @@ Encryption codecs: These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content). -!!! attention "Attention" - Most engines including the "*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed. +:::warning +Most engines including the "*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed. +::: -!!! attention "Attention" - If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging. +:::warning +If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging. +::: **Example** @@ -287,8 +291,9 @@ CREATE TABLE mytable ENGINE = MergeTree ORDER BY x; ``` -!!!note "Note" - If compression needs to be applied, it must be explicitly specified. Otherwise, only encryption will be applied to data. +:::note +If compression needs to be applied, it must be explicitly specified. Otherwise, only encryption will be applied to data. +::: **Example** @@ -330,8 +335,9 @@ It’s possible to use tables with [ENGINE = Memory](../../../engines/table-engi 'REPLACE' query allows you to update the table atomically. -!!!note "Note" - This query is supported only for [Atomic](../../../engines/database-engines/atomic.md) database engine. +:::note +This query is supported only for [Atomic](../../../engines/database-engines/atomic.md) database engine. +::: If you need to delete some data from a table, you can create a new table and fill it with a `SELECT` statement that does not retrieve unwanted data, then drop the old table and rename the new one: @@ -405,8 +411,9 @@ SELECT * FROM base.t1; You can add a comment to the table when you creating it. -!!!note "Note" - The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). +:::note +The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). +::: **Syntax** diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 5dfcf891439..0aad0961a8b 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: USER +sidebar_position: 39 +sidebar_label: USER --- # CREATE USER {#create-user-statement} @@ -52,9 +52,9 @@ Another way of specifying host is to use `@` syntax following the username. Exam - `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. - `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. -!!! info "Warning" - ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. - +:::warning +ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. +::: ## GRANTEES Clause {#grantees} diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index f7d3a6d697a..e31d1b4473f 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: VIEW +sidebar_position: 37 +sidebar_label: VIEW --- # CREATE VIEW {#create-view} @@ -49,10 +49,11 @@ When creating a materialized view with `TO [db].[table]`, you must not use `POPU A materialized view is implemented as follows: when inserting data to the table specified in `SELECT`, part of the inserted data is converted by this `SELECT` query, and the result is inserted in the view. -!!! important "Important" - Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views. +:::note +Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views. - Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view. +Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view. +::: If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it. @@ -68,10 +69,9 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop ## Live View [Experimental] {#live-view} -!!! important "Important" - This is an experimental feature that may change in backwards-incompatible ways in the future releases. - Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. - +:::note +This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. +::: ```sql CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... @@ -83,14 +83,15 @@ Live views are triggered by insert into the innermost table specified in the que Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery. -!!! info "Limitations" - - [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table. - - Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view. - - Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved. - - Does not work with replicated or distributed tables where inserts are performed on different nodes. - - Can't be triggered by multiple tables. +:::info +- [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table. +- Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view. +- Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved. +- Does not work with replicated or distributed tables where inserts are performed on different nodes. +- Can't be triggered by multiple tables. - See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround. +See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround. +::: ### Monitoring Live View Changes {#live-view-monitoring} @@ -246,9 +247,9 @@ Most common uses of live view tables include: ## Window View [Experimental] {#window-view} -!!! important "Important" - This is an experimental feature that may change in backwards-incompatible ways in the future releases. - Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. +:::info +This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. +::: ``` sql CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY time_window_function diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index 823a31ed313..7fbe5bd2790 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -1,6 +1,6 @@ --- -toc_priority: 42 -toc_title: DESCRIBE +sidebar_position: 42 +sidebar_label: DESCRIBE --- # DESCRIBE TABLE {#misc-describe-table} diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index b77bcbc00fb..bf20f7b3461 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -1,6 +1,6 @@ --- -toc_priority: 43 -toc_title: DETACH +sidebar_position: 43 +sidebar_label: DETACH --- # DETACH Statement {#detach} diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 552a7b5f1a9..0d3e1f7860d 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -1,6 +1,6 @@ --- -toc_priority: 44 -toc_title: DROP +sidebar_position: 44 +sidebar_label: DROP --- # DROP Statements {#drop} diff --git a/docs/en/sql-reference/statements/exchange.md b/docs/en/sql-reference/statements/exchange.md index 91b0c48ddcf..abe3d40950e 100644 --- a/docs/en/sql-reference/statements/exchange.md +++ b/docs/en/sql-reference/statements/exchange.md @@ -1,6 +1,6 @@ --- -toc_priority: 49 -toc_title: EXCHANGE +sidebar_position: 49 +sidebar_label: EXCHANGE --- # EXCHANGE Statement {#exchange} @@ -8,8 +8,9 @@ toc_title: EXCHANGE Exchanges the names of two tables or dictionaries atomically. This task can also be accomplished with a [RENAME](./rename.md) query using a temporary name, but the operation is not atomic in that case. -!!! note "Note" - The `EXCHANGE` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. +:::note +The `EXCHANGE` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. +::: **Syntax** diff --git a/docs/en/sql-reference/statements/exists.md b/docs/en/sql-reference/statements/exists.md index b7c4a487791..7c6cc812665 100644 --- a/docs/en/sql-reference/statements/exists.md +++ b/docs/en/sql-reference/statements/exists.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: EXISTS +sidebar_position: 45 +sidebar_label: EXISTS --- # EXISTS Statement {#exists-statement} diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 9c74c069f02..80f8961a3e9 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: EXPLAIN +sidebar_position: 39 +sidebar_label: EXPLAIN --- # EXPLAIN Statement {#explain} @@ -138,8 +138,9 @@ Union ReadFromStorage (SystemNumbers) ``` -!!! note "Note" - Step and query cost estimation is not supported. +:::note +Step and query cost estimation is not supported. +::: When `json = 1`, the query plan is represented in JSON format. Every node is a dictionary that always has the keys `Node Type` and `Plans`. `Node Type` is a string with a step name. `Plans` is an array with child step descriptions. Other optional keys may be added depending on node type and settings. @@ -446,8 +447,8 @@ Result: └─────────────────────────────────────────────────────────┘ ``` -!!! note "Note" - The validation is not complete, so a successfull query does not guarantee that the override would - not cause issues. +:::note +The validation is not complete, so a successfull query does not guarantee that the override would not cause issues. +::: [Оriginal article](https://clickhouse.com/docs/en/sql-reference/statements/explain/) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 1b2b63ba0e7..1ee330061b5 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: GRANT +sidebar_position: 38 +sidebar_label: GRANT --- # GRANT Statement {#grant} diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md index a317e4a47de..ab51cbb330c 100644 --- a/docs/en/sql-reference/statements/index.md +++ b/docs/en/sql-reference/statements/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: Statements -toc_hidden: true -toc_priority: 31 +sidebar_position: 31 +sidebar_label: Statements --- -# ClickHouse SQL Statements {#clickhouse-sql-statements} +# ClickHouse SQL Statements Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it’s own syntax and usage details that are described separately: diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index f8eefad7051..17d6ce1809b 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -1,9 +1,9 @@ --- -toc_priority: 33 -toc_title: INSERT INTO +sidebar_position: 33 +sidebar_label: INSERT INTO --- -## INSERT INTO Statement {#insert} +# INSERT INTO Statement Inserts data into a table. diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index eab6f602c4a..9fe207f24b2 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: KILL +sidebar_position: 46 +sidebar_label: KILL --- # KILL Statements {#kill-statements} diff --git a/docs/en/sql-reference/statements/misc.md b/docs/en/sql-reference/statements/misc.md index c553ef37f8d..2751c5296c2 100644 --- a/docs/en/sql-reference/statements/misc.md +++ b/docs/en/sql-reference/statements/misc.md @@ -1,6 +1,6 @@ --- toc_hidden: true -toc_priority: 41 +sidebar_position: 70 --- # Miscellaneous Statements {#miscellaneous-queries} diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 30899cc2940..773284a1b30 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -1,14 +1,15 @@ --- -toc_priority: 47 -toc_title: OPTIMIZE +sidebar_position: 47 +sidebar_label: OPTIMIZE --- # OPTIMIZE Statement {#misc_operations-optimize} This query tries to initialize an unscheduled merge of data parts for tables. -!!! warning "Warning" - `OPTIMIZE` can’t fix the `Too many parts` error. +:::warning +`OPTIMIZE` can’t fix the `Too many parts` error. +::: **Syntax** @@ -27,16 +28,19 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engin You can specify how long (in seconds) to wait for inactive replicas to execute `OPTIMIZE` queries by the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. -!!! info "Note" - If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. +:::note +If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. +::: ## BY expression {#by-expression} If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key). -!!! note "Note" - Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion. - Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an `ALIAS` column. +:::note +Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion. + +Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an `ALIAS` column. +::: **Syntax** diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index c2192f1a6e1..b3bea3e3c37 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -1,6 +1,6 @@ --- -toc_priority: 48 -toc_title: RENAME +sidebar_position: 48 +sidebar_label: RENAME --- # RENAME Statement {#misc_operations-rename} @@ -8,8 +8,9 @@ toc_title: RENAME Renames databases, tables, or dictionaries. Several entities can be renamed in a single query. Note that the `RENAME` query with several entities is non-atomic operation. To swap entities names atomically, use the [EXCHANGE](./exchange.md) statement. -!!! note "Note" - The `RENAME` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. +:::note +The `RENAME` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. +::: **Syntax** diff --git a/docs/en/sql-reference/statements/revoke.md b/docs/en/sql-reference/statements/revoke.md index 75005260c4a..4ffa8a21027 100644 --- a/docs/en/sql-reference/statements/revoke.md +++ b/docs/en/sql-reference/statements/revoke.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: REVOKE +sidebar_position: 39 +sidebar_label: REVOKE --- # REVOKE Statement {#revoke} diff --git a/docs/en/sql-reference/statements/select/all.md b/docs/en/sql-reference/statements/select/all.md index ba66f63b447..6b35678fd92 100644 --- a/docs/en/sql-reference/statements/select/all.md +++ b/docs/en/sql-reference/statements/select/all.md @@ -1,5 +1,5 @@ --- -toc_title: ALL +sidebar_label: ALL --- # ALL Clause {#select-all} diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index f138bcc45c7..f7fc08ae9ba 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -1,5 +1,5 @@ --- -toc_title: ARRAY JOIN +sidebar_label: ARRAY JOIN --- # ARRAY JOIN Clause {#select-array-join-clause} diff --git a/docs/en/sql-reference/statements/select/distinct.md b/docs/en/sql-reference/statements/select/distinct.md index 390afa46248..898de4730ae 100644 --- a/docs/en/sql-reference/statements/select/distinct.md +++ b/docs/en/sql-reference/statements/select/distinct.md @@ -1,5 +1,5 @@ --- -toc_title: DISTINCT +sidebar_label: DISTINCT --- # DISTINCT Clause {#select-distinct} diff --git a/docs/en/sql-reference/statements/select/except.md b/docs/en/sql-reference/statements/select/except.md index e6d9b365a91..dcaefd67ca9 100644 --- a/docs/en/sql-reference/statements/select/except.md +++ b/docs/en/sql-reference/statements/select/except.md @@ -1,5 +1,5 @@ --- -toc_title: EXCEPT +sidebar_label: EXCEPT --- # EXCEPT Clause {#except-clause} diff --git a/docs/en/sql-reference/statements/select/format.md b/docs/en/sql-reference/statements/select/format.md index c3104bd12fe..a7936509ad5 100644 --- a/docs/en/sql-reference/statements/select/format.md +++ b/docs/en/sql-reference/statements/select/format.md @@ -1,5 +1,5 @@ --- -toc_title: FORMAT +sidebar_label: FORMAT --- # FORMAT Clause {#format-clause} diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index df30a0fb0d2..9d5147db13c 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -1,5 +1,5 @@ --- -toc_title: FROM +sidebar_label: FROM --- # FROM Clause {#select-from} diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 969a39ce51f..b08647271f1 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -1,5 +1,5 @@ --- -toc_title: GROUP BY +sidebar_label: GROUP BY --- # GROUP BY Clause {#select-group-by-clause} @@ -12,8 +12,9 @@ toc_title: GROUP BY When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). -!!! note "Note" - There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. +:::note +There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. +::: ## NULL Processing {#null-processing} @@ -55,8 +56,9 @@ The subtotals are calculated in the reverse order: at first subtotals are calcul In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line. -!!! note "Note" - Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. +:::note +Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. +::: **Example** @@ -114,8 +116,9 @@ As `GROUP BY` section has three key expressions, the result contains four tables In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line. -!!! note "Note" - Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. +:::note +Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. +::: **Example** @@ -206,8 +209,9 @@ This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` forma - In `Pretty*` formats, the row is output as a separate table after the main result. - In the other formats it is not available. -!!! note "Note" - totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. +:::note +totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. +::: `WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having.md) is present. The behavior depends on the `totals_mode` setting. diff --git a/docs/en/sql-reference/statements/select/having.md b/docs/en/sql-reference/statements/select/having.md index 93d56097b11..9aee0cf4d63 100644 --- a/docs/en/sql-reference/statements/select/having.md +++ b/docs/en/sql-reference/statements/select/having.md @@ -1,5 +1,5 @@ --- -toc_title: HAVING +sidebar_label: HAVING --- # HAVING Clause {#having-clause} diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 33644133153..50dd8fecf3a 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -1,11 +1,9 @@ --- -title: SELECT Query -toc_folder_title: SELECT -toc_priority: 32 -toc_title: Overview +sidebar_position: 32 +sidebar_label: SELECT --- -# SELECT Query {#select-queries-syntax} +# SELECT Query `SELECT` queries perform data retrieval. By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../../sql-reference/statements/insert-into.md) it can be forwarded to a different table. diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md index 2243a35e4d8..ef9868daebb 100644 --- a/docs/en/sql-reference/statements/select/intersect.md +++ b/docs/en/sql-reference/statements/select/intersect.md @@ -1,5 +1,5 @@ --- -toc_title: INTERSECT +sidebar_label: INTERSECT --- # INTERSECT Clause {#intersect-clause} diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index b949b9c83c0..08f53348cd3 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -1,5 +1,5 @@ --- -toc_title: INTO OUTFILE +sidebar_label: INTO OUTFILE --- # INTO OUTFILE Clause {#into-outfile-clause} diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 3d302be561a..0cf58d0b90f 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -1,5 +1,5 @@ --- -toc_title: JOIN +sidebar_label: JOIN --- # JOIN Clause {#select-join} @@ -36,8 +36,9 @@ Additional join types available in ClickHouse: - `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. - `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. -!!! note "Note" - When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). +:::note +When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). +::: ## Settings {#join-settings} @@ -63,8 +64,9 @@ Rows are joined if the whole complex condition is met. If the conditions are not The `OR` operator inside the `ON` clause works using the hash join algorithm — for each `OR` argument with join keys for `JOIN`, a separate hash table is created, so memory consumption and query execution time grow linearly with an increase in the number of expressions `OR` of the `ON` clause. -!!! note "Note" - If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far. +:::note +If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far. +::: **Example** @@ -197,8 +199,9 @@ For example, consider the following tables: `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined. -!!! note "Note" - `ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. +:::note +`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. +::: ## Distributed JOIN {#global-join} diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md index 68b459a46e8..913b7b40338 100644 --- a/docs/en/sql-reference/statements/select/limit-by.md +++ b/docs/en/sql-reference/statements/select/limit-by.md @@ -1,5 +1,5 @@ --- -toc_title: LIMIT BY +sidebar_label: LIMIT BY --- # LIMIT BY Clause {#limit-by-clause} @@ -13,8 +13,9 @@ ClickHouse supports the following syntax variants: During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](order-by.md#select-order-by) clause or implicitly as a property of the table engine (row order is only guaranteed when using [ORDER BY](order-by.md#select-order-by), otherwise the row blocks will not be ordered due to multi-threading). Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. -!!! note "Note" - `LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query. +:::note +`LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query. +::: If you want to use column numbers instead of column names in the `LIMIT BY` clause, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). diff --git a/docs/en/sql-reference/statements/select/limit.md b/docs/en/sql-reference/statements/select/limit.md index 6ed38b2dd64..6b1c90041fe 100644 --- a/docs/en/sql-reference/statements/select/limit.md +++ b/docs/en/sql-reference/statements/select/limit.md @@ -1,5 +1,5 @@ --- -toc_title: LIMIT +sidebar_label: LIMIT --- # LIMIT Clause {#limit-clause} @@ -12,8 +12,9 @@ toc_title: LIMIT If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md) clause that explicitly sorts results, the choice of rows for the result may be arbitrary and non-deterministic. -!!! note "Note" - The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting. +:::note +The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting. +::: ## LIMIT … WITH TIES Modifier {#limit-with-ties} diff --git a/docs/en/sql-reference/statements/select/offset.md b/docs/en/sql-reference/statements/select/offset.md index 20ebd972a24..e120845dbc6 100644 --- a/docs/en/sql-reference/statements/select/offset.md +++ b/docs/en/sql-reference/statements/select/offset.md @@ -1,5 +1,5 @@ --- -toc_title: OFFSET +sidebar_label: OFFSET --- # OFFSET FETCH Clause {#offset-fetch} @@ -30,11 +30,13 @@ SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; The `WITH TIES` option is used to return any additional rows that tie for the last place in the result set according to the `ORDER BY` clause. For example, if `fetch_row_count` is set to 5 but two additional rows match the values of the `ORDER BY` columns in the fifth row, the result set will contain seven rows. -!!! note "Note" - According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present. +:::note +According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present. +::: -!!! note "Note" - The real offset can also depend on the [offset](../../../operations/settings/settings.md#offset) setting. +:::note +The real offset can also depend on the [offset](../../../operations/settings/settings.md#offset) setting. +::: ## Examples {#examples} diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index b24f0213e4e..46e483dddf4 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -1,5 +1,5 @@ --- -toc_title: ORDER BY +sidebar_label: ORDER BY --- # ORDER BY Clause {#select-order-by} diff --git a/docs/en/sql-reference/statements/select/prewhere.md b/docs/en/sql-reference/statements/select/prewhere.md index 646bb83e692..c3aa2e14384 100644 --- a/docs/en/sql-reference/statements/select/prewhere.md +++ b/docs/en/sql-reference/statements/select/prewhere.md @@ -1,5 +1,5 @@ --- -toc_title: PREWHERE +sidebar_label: PREWHERE --- # PREWHERE Clause {#prewhere-clause} @@ -18,8 +18,9 @@ If the [optimize_move_to_prewhere](../../../operations/settings/settings.md#opti If query has [FINAL](from.md#select-from-final) modifier, the `PREWHERE` optimization is not always correct. It is enabled only if both settings [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) and [optimize_move_to_prewhere_if_final](../../../operations/settings/settings.md#optimize_move_to_prewhere_if_final) are turned on. -!!! note "Attention" - The `PREWHERE` section is executed before `FINAL`, so the results of `FROM ... FINAL` queries may be skewed when using `PREWHERE` with fields not in the `ORDER BY` section of a table. +:::note +The `PREWHERE` section is executed before `FINAL`, so the results of `FROM ... FINAL` queries may be skewed when using `PREWHERE` with fields not in the `ORDER BY` section of a table. +::: ## Limitations {#limitations} diff --git a/docs/en/sql-reference/statements/select/sample.md b/docs/en/sql-reference/statements/select/sample.md index a587731e563..3673a49a9e9 100644 --- a/docs/en/sql-reference/statements/select/sample.md +++ b/docs/en/sql-reference/statements/select/sample.md @@ -1,5 +1,5 @@ --- -toc_title: SAMPLE +sidebar_label: SAMPLE --- # SAMPLE Clause {#select-sample-clause} @@ -14,8 +14,9 @@ Approximated query processing can be useful in the following cases: - When your raw data is not accurate, so approximation does not noticeably degrade the quality. - Business requirements target approximate results (for cost-effectiveness, or to market exact results to premium users). -!!! note "Note" - You can only use sampling with the tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). +:::note +You can only use sampling with the tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). +::: The features of data sampling are listed below: diff --git a/docs/en/sql-reference/statements/select/union.md b/docs/en/sql-reference/statements/select/union.md index 6dfe554edf0..8a1c7a770c9 100644 --- a/docs/en/sql-reference/statements/select/union.md +++ b/docs/en/sql-reference/statements/select/union.md @@ -1,5 +1,5 @@ --- -toc_title: UNION +sidebar_label: UNION --- # UNION Clause {#union-clause} diff --git a/docs/en/sql-reference/statements/select/where.md b/docs/en/sql-reference/statements/select/where.md index 348b869e2db..c68f9d39d09 100644 --- a/docs/en/sql-reference/statements/select/where.md +++ b/docs/en/sql-reference/statements/select/where.md @@ -1,5 +1,5 @@ --- -toc_title: WHERE +sidebar_label: WHERE --- # WHERE Clause {#select-where} @@ -10,8 +10,9 @@ If there is a `WHERE` clause, it must contain an expression with the `UInt8` typ `WHERE` expression is evaluated on the ability to use indexes and partition pruning, if the underlying table engine supports that. -!!! note "Note" - There is a filtering optimization called [PREWHERE](../../../sql-reference/statements/select/prewhere.md). +:::note +There is a filtering optimization called [PREWHERE](../../../sql-reference/statements/select/prewhere.md). +::: If you need to test a value for [NULL](../../../sql-reference/syntax.md#null-literal), use [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) operators or [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) and [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) functions. Otherwise an expression with `NULL` never passes. diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md index d6c8da261cb..39fcb752980 100644 --- a/docs/en/sql-reference/statements/select/with.md +++ b/docs/en/sql-reference/statements/select/with.md @@ -1,5 +1,5 @@ --- -toc_title: WITH +sidebar_label: WITH --- # WITH Clause {#with-clause} diff --git a/docs/en/sql-reference/statements/set-role.md b/docs/en/sql-reference/statements/set-role.md index cf14a9c6d75..cac7ca28b92 100644 --- a/docs/en/sql-reference/statements/set-role.md +++ b/docs/en/sql-reference/statements/set-role.md @@ -1,6 +1,6 @@ --- -toc_priority: 51 -toc_title: SET ROLE +sidebar_position: 51 +sidebar_label: SET ROLE --- # SET ROLE Statement {#set-role-statement} diff --git a/docs/en/sql-reference/statements/set.md b/docs/en/sql-reference/statements/set.md index e5de5c41284..d2a1d30c797 100644 --- a/docs/en/sql-reference/statements/set.md +++ b/docs/en/sql-reference/statements/set.md @@ -1,6 +1,6 @@ --- -toc_priority: 50 -toc_title: SET +sidebar_position: 50 +sidebar_label: SET --- # SET Statement {#query-set} diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 96cbee0b04d..75c5c121946 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: SHOW +sidebar_position: 37 +sidebar_label: SHOW --- # SHOW Statements {#show-queries} @@ -361,8 +361,9 @@ SHOW ACCESS Returns a list of clusters. All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table. -!!! info "Note" - `SHOW CLUSTER name` query displays the contents of system.clusters table for this cluster. +:::note +`SHOW CLUSTER name` query displays the contents of system.clusters table for this cluster. +::: ### Syntax {#show-cluster-syntax} diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index b71853f29dd..14eed981381 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -1,6 +1,6 @@ --- -toc_priority: 36 -toc_title: SYSTEM +sidebar_position: 36 +sidebar_label: SYSTEM --- # SYSTEM Statements {#query-language-system} @@ -191,8 +191,9 @@ Provides possibility to stop background merges for tables in the MergeTree famil SYSTEM STOP MERGES [ON VOLUME | [db.]merge_tree_family_table_name] ``` -!!! note "Note" - `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before. +:::note +`DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before. +::: ### START MERGES {#query_language-system-start-merges} @@ -326,8 +327,9 @@ One may execute query after: Replica attaches locally found parts and sends info about them to Zookeeper. Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network). -!!! warning "Warning" - Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached. +:::warning +Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached. +::: **Syntax** diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index b5354196fa4..393ba82b3cd 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -1,6 +1,6 @@ --- -toc_priority: 52 -toc_title: TRUNCATE +sidebar_position: 52 +sidebar_label: TRUNCATE --- # TRUNCATE Statement {#truncate-statement} @@ -17,5 +17,6 @@ You can use the [replication_alter_partitions_sync](../../operations/settings/se You can specify how long (in seconds) to wait for inactive replicas to execute `TRUNCATE` queries with the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. -!!! info "Note" - If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. +:::note +If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/statements/use.md b/docs/en/sql-reference/statements/use.md index 841c23d333d..869bf44fdeb 100644 --- a/docs/en/sql-reference/statements/use.md +++ b/docs/en/sql-reference/statements/use.md @@ -1,6 +1,6 @@ --- -toc_priority: 53 -toc_title: USE +sidebar_position: 53 +sidebar_label: USE --- # USE Statement {#use} diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md index be793d30f3d..688cf21e23c 100644 --- a/docs/en/sql-reference/statements/watch.md +++ b/docs/en/sql-reference/statements/watch.md @@ -1,14 +1,13 @@ --- -toc_priority: 53 -toc_title: WATCH +sidebar_position: 53 +sidebar_label: WATCH --- # WATCH Statement (Experimental) {#watch} -!!! important "Important" - This is an experimental feature that may change in backwards-incompatible ways in the future releases. - Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`. - +:::warning +This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`. +::: ``` sql WATCH [db.]live_view @@ -105,5 +104,6 @@ WATCH lv EVENTS LIMIT 1; The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause). -!!! info "Note" - The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting. +:::note +The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 19efef3dc6a..10664549329 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -1,6 +1,6 @@ --- -toc_priority: 31 -toc_title: Syntax +sidebar_position: 31 +sidebar_label: Syntax --- # Syntax {#syntax} diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index a02c2a10fb7..5954ed1b439 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -1,6 +1,6 @@ --- -toc_priority: 50 -toc_title: cluster +sidebar_position: 50 +sidebar_label: cluster --- # cluster, clusterAllReplicas {#cluster-clusterallreplicas} @@ -9,8 +9,9 @@ Allows to access all shards in an existing cluster which configured in `remote_s `clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection. -!!! note "Note" - All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table. +:::note +All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table. +::: **Syntax** diff --git a/docs/en/sql-reference/table-functions/dictionary.md b/docs/en/sql-reference/table-functions/dictionary.md index ad30cb30adf..f04a4b6eb24 100644 --- a/docs/en/sql-reference/table-functions/dictionary.md +++ b/docs/en/sql-reference/table-functions/dictionary.md @@ -1,6 +1,6 @@ --- -toc_priority: 54 -toc_title: dictionary function +sidebar_position: 54 +sidebar_label: dictionary function --- # dictionary {#dictionary-function} diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index f7c2a9e6d5b..4b72b0d84f5 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -1,6 +1,6 @@ --- -toc_priority: 37 -toc_title: file +sidebar_position: 37 +sidebar_label: file --- # file {#file} @@ -106,8 +106,9 @@ Query the number of rows in all files of these two directories: SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` -!!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: **Example** diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index ae22e1a1b88..bb9ad3f7551 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -1,6 +1,6 @@ --- -toc_priority: 47 -toc_title: generateRandom +sidebar_position: 47 +sidebar_label: generateRandom --- # generateRandom {#generaterandom} diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index a7c3baca299..7f7dc53d27e 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: hdfs +sidebar_position: 45 +sidebar_label: hdfs --- # hdfs {#hdfs} @@ -78,8 +78,9 @@ SELECT count(*) FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -!!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: **Example** diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index 6183fe83c38..b46b8e64a1a 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -1,6 +1,6 @@ --- -toc_priority: 55 -toc_title: hdfsCluster +sidebar_position: 55 +sidebar_label: hdfsCluster --- # hdfsCluster Table Function {#hdfsCluster-table-function} @@ -49,8 +49,9 @@ SELECT count(*) FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -!!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: **See Also** diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md index 24d67e31fa8..a51312324f0 100644 --- a/docs/en/sql-reference/table-functions/index.md +++ b/docs/en/sql-reference/table-functions/index.md @@ -1,10 +1,9 @@ --- -toc_folder_title: Table Functions -toc_priority: 34 -toc_title: Introduction +sidebar_label: Table Functions +sidebar_position: 34 --- -# Table Functions {#table-functions} +# Table Functions Table functions are methods for constructing tables. @@ -20,8 +19,9 @@ You can use table functions in: - [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query. -!!! warning "Warning" - You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. +:::warning +You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. +::: | Function | Description | |------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/en/sql-reference/table-functions/input.md b/docs/en/sql-reference/table-functions/input.md index 17707b798d6..916abb890ff 100644 --- a/docs/en/sql-reference/table-functions/input.md +++ b/docs/en/sql-reference/table-functions/input.md @@ -1,6 +1,6 @@ --- -toc_priority: 46 -toc_title: input +sidebar_position: 46 +sidebar_label: input --- # input {#input} diff --git a/docs/en/sql-reference/table-functions/jdbc.md b/docs/en/sql-reference/table-functions/jdbc.md index 9fe1333fc94..57128f7d146 100644 --- a/docs/en/sql-reference/table-functions/jdbc.md +++ b/docs/en/sql-reference/table-functions/jdbc.md @@ -1,6 +1,6 @@ --- -toc_priority: 43 -toc_title: jdbc +sidebar_position: 43 +sidebar_label: jdbc --- # jdbc {#table-function-jdbc} diff --git a/docs/en/sql-reference/table-functions/merge.md b/docs/en/sql-reference/table-functions/merge.md index c89f0f4cc5a..301f0a69caf 100644 --- a/docs/en/sql-reference/table-functions/merge.md +++ b/docs/en/sql-reference/table-functions/merge.md @@ -1,6 +1,6 @@ --- -toc_priority: 38 -toc_title: merge +sidebar_position: 38 +sidebar_label: merge --- # merge {#merge} diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index b45ab86f60f..c6983d8fba1 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -1,6 +1,6 @@ --- -toc_priority: 42 -toc_title: mysql +sidebar_position: 42 +sidebar_label: mysql --- # mysql {#mysql} @@ -55,8 +55,9 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', A table object with the same columns as the original MySQL table. -!!! info "Note" - In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. +:::note +In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. +::: **Examples** diff --git a/docs/en/sql-reference/table-functions/null.md b/docs/en/sql-reference/table-functions/null.md index 4a8d221d620..48df12bfece 100644 --- a/docs/en/sql-reference/table-functions/null.md +++ b/docs/en/sql-reference/table-functions/null.md @@ -1,6 +1,6 @@ --- -toc_priority: 53 -toc_title: null function +sidebar_position: 53 +sidebar_label: null function --- # null {#null-function} diff --git a/docs/en/sql-reference/table-functions/numbers.md b/docs/en/sql-reference/table-functions/numbers.md index f9735056b05..c15c47cf725 100644 --- a/docs/en/sql-reference/table-functions/numbers.md +++ b/docs/en/sql-reference/table-functions/numbers.md @@ -1,6 +1,6 @@ --- -toc_priority: 39 -toc_title: numbers +sidebar_position: 39 +sidebar_label: numbers --- # numbers {#numbers} diff --git a/docs/en/sql-reference/table-functions/odbc.md b/docs/en/sql-reference/table-functions/odbc.md index a8481fbfd68..d2614337cdd 100644 --- a/docs/en/sql-reference/table-functions/odbc.md +++ b/docs/en/sql-reference/table-functions/odbc.md @@ -1,6 +1,6 @@ --- -toc_priority: 44 -toc_title: odbc +sidebar_position: 44 +sidebar_label: odbc --- # odbc {#table-functions-odbc} diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index b2bdc2495e5..6a30b1f3f0c 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -1,6 +1,6 @@ --- -toc_priority: 42 -toc_title: postgresql +sidebar_position: 42 +sidebar_label: postgresql --- # postgresql {#postgresql} @@ -26,8 +26,9 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) A table object with the same columns as the original PostgreSQL table. -!!! info "Note" - In the `INSERT` query to distinguish table function `postgresql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. +:::note +In the `INSERT` query to distinguish table function `postgresql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. +::: ## Implementation Details {#implementation-details} @@ -41,8 +42,9 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp PostgreSQL Array types converts into ClickHouse arrays. -!!! info "Note" - Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows. +:::note +Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows. +::: Supports multiple replicas that must be listed by `|`. For example: diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index 9effbb03553..0eae00564ba 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -1,6 +1,6 @@ --- -toc_priority: 40 -toc_title: remote +sidebar_position: 40 +sidebar_label: remote --- # remote, remoteSecure {#remote-remotesecure} diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 7dffd252dc9..61dda209ee6 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -1,6 +1,6 @@ --- -toc_priority: 45 -toc_title: s3 +sidebar_position: 45 +sidebar_label: s3 --- # s3 Table Function {#s3-table-function} @@ -95,8 +95,9 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ └─────────┘ ``` -!!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 65565aa92cb..dbd3538c692 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -1,6 +1,6 @@ --- -toc_priority: 55 -toc_title: s3Cluster +sidebar_position: 55 +sidebar_label: s3Cluster --- # s3Cluster Table Function {#s3Cluster-table-function} @@ -39,8 +39,9 @@ Count the total amount of rows in all files in the cluster `cluster_simple`: SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); ``` -!!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: **See Also** diff --git a/docs/en/sql-reference/table-functions/sqlite.md b/docs/en/sql-reference/table-functions/sqlite.md index be7bd92d7e7..6058843ae61 100644 --- a/docs/en/sql-reference/table-functions/sqlite.md +++ b/docs/en/sql-reference/table-functions/sqlite.md @@ -1,6 +1,6 @@ --- -toc_priority: 55 -toc_title: sqlite +sidebar_position: 55 +sidebar_label: sqlite --- ## sqlite {#sqlite} diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index bfad7a67e0d..3f2f9c6a710 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -1,6 +1,6 @@ --- -toc_priority: 41 -toc_title: url +sidebar_position: 41 +sidebar_label: url --- # url {#url} diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md index f78120c370e..727cc04e5a2 100644 --- a/docs/en/sql-reference/table-functions/view.md +++ b/docs/en/sql-reference/table-functions/view.md @@ -1,6 +1,6 @@ --- -toc_priority: 51 -toc_title: view +sidebar_position: 51 +sidebar_label: view --- ## view {#view} diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 0a55eafc7ab..e9a15995a16 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -1,6 +1,6 @@ --- -toc_priority: 62 -toc_title: Window Functions +sidebar_position: 62 +sidebar_label: Window Functions --- # Window Functions From 893b8a95131333a7c9031f460669248a41a2c668 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 28 Mar 2022 06:31:10 +0000 Subject: [PATCH 046/117] Initial implementation of is_secure --- src/Interpreters/ClientInfo.cpp | 8 ++++++++ src/Interpreters/ClientInfo.h | 1 + src/Interpreters/QueryLog.cpp | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 75af25e842e..0992c76b7b9 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -70,6 +70,8 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const writeVarUInt(client_version_patch, out); } + writeBinary(static_cast(is_secure), out); + if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { if (client_trace_context.trace_id != UUID()) @@ -165,6 +167,12 @@ void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) client_version_patch = client_tcp_protocol_version; } + { + UInt8 value; + readBinary(value, in); + is_secure = value; + } + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { uint8_t have_trace_id = 0; diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 0b40c78becc..42c56d470af 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -69,6 +69,7 @@ public: /// All below are parameters related to initial query. Interface interface = Interface::TCP; + bool is_secure = false; /// For tcp String os_user; diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index b464d9c1ca5..b30914c1813 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -116,7 +116,8 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"used_formats", std::make_shared(std::make_shared())}, {"used_functions", std::make_shared(std::make_shared())}, {"used_storages", std::make_shared(std::make_shared())}, - {"used_table_functions", std::make_shared(std::make_shared())} + {"used_table_functions", std::make_shared(std::make_shared())}, + {"is_secure", std::make_shared()} }; } @@ -291,5 +292,7 @@ void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableCo columns[i++]->insert(client_info.quota_key); columns[i++]->insert(client_info.distributed_depth); + + columns[i++]->insert(static_cast(client_info.is_secure)); } } From b982959c4c9fc1a26b7c62c51428d2f55f35f7ed Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 28 Mar 2022 08:30:34 +0000 Subject: [PATCH 047/117] Fix writting is_secure --- src/Core/ProtocolDefines.h | 4 +++- src/Interpreters/ClientInfo.cpp | 17 ++++++++++------- src/Interpreters/QueryLog.cpp | 7 +++---- src/Interpreters/QueryThreadLog.cpp | 1 + 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 6ee491f3ab5..e5a07f96577 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -52,6 +52,8 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54455 +#define DBMS_TCP_PROTOCOL_VERSION 54456 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 + +#define DBMS_MIN_PROTOCOL_VERSION_WITH_IS_SECURE 54456 diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 0992c76b7b9..07ff17d260e 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -37,6 +37,9 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const writeBinary(UInt8(interface), out); + if (server_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_IS_SECURE) + writeBinary(static_cast(is_secure), out); + if (interface == Interface::TCP) { writeBinary(os_user, out); @@ -70,8 +73,6 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const writeVarUInt(client_version_patch, out); } - writeBinary(static_cast(is_secure), out); - if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { if (client_trace_context.trace_id != UUID()) @@ -129,6 +130,13 @@ void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) readBinary(read_interface, in); interface = Interface(read_interface); + if (client_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_IS_SECURE) + { + UInt8 value; + readBinary(value, in); + is_secure = value; + } + if (interface == Interface::TCP) { readBinary(os_user, in); @@ -167,11 +175,6 @@ void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) client_version_patch = client_tcp_protocol_version; } - { - UInt8 value; - readBinary(value, in); - is_secure = value; - } if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index b30914c1813..5adca8f0e79 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -86,6 +86,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"initial_query_start_time", std::make_shared()}, {"initial_query_start_time_microseconds", std::make_shared(6)}, {"interface", std::make_shared()}, + {"is_secure", std::make_shared()}, {"os_user", std::make_shared()}, {"client_hostname", std::make_shared()}, {"client_name", std::make_shared()}, @@ -116,8 +117,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"used_formats", std::make_shared(std::make_shared())}, {"used_functions", std::make_shared(std::make_shared())}, {"used_storages", std::make_shared(std::make_shared())}, - {"used_table_functions", std::make_shared(std::make_shared())}, - {"is_secure", std::make_shared()} + {"used_table_functions", std::make_shared(std::make_shared())} }; } @@ -276,6 +276,7 @@ void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableCo columns[i++]->insert(client_info.initial_query_start_time_microseconds); columns[i++]->insert(UInt64(client_info.interface)); + columns[i++]->insert(static_cast(client_info.is_secure)); columns[i++]->insert(client_info.os_user); columns[i++]->insert(client_info.client_hostname); @@ -292,7 +293,5 @@ void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableCo columns[i++]->insert(client_info.quota_key); columns[i++]->insert(client_info.distributed_depth); - - columns[i++]->insert(static_cast(client_info.is_secure)); } } diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index d9feaf0a0c3..3b28df17999 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -56,6 +56,7 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes() {"initial_query_start_time", std::make_shared()}, {"initial_query_start_time_microseconds", std::make_shared(6)}, {"interface", std::make_shared()}, + {"is_secure", std::make_shared()}, {"os_user", std::make_shared()}, {"client_hostname", std::make_shared()}, {"client_name", std::make_shared()}, From d14ad227b1a1023a12794f6fdd54c8311b16d6e0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 28 Mar 2022 13:25:47 +0000 Subject: [PATCH 048/117] Polish TCP is_secure flag --- src/Core/ProtocolDefines.h | 4 +--- src/Interpreters/ClientInfo.cpp | 10 ---------- src/Interpreters/Session.cpp | 3 ++- src/Interpreters/Session.h | 2 +- src/Server/TCPHandler.cpp | 5 +++-- src/Server/TCPHandler.h | 4 +++- src/Server/TCPHandlerFactory.h | 4 +++- 7 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index e5a07f96577..6ee491f3ab5 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -52,8 +52,6 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54456 +#define DBMS_TCP_PROTOCOL_VERSION 54455 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 - -#define DBMS_MIN_PROTOCOL_VERSION_WITH_IS_SECURE 54456 diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 07ff17d260e..62bd006d857 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -37,9 +37,6 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const writeBinary(UInt8(interface), out); - if (server_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_IS_SECURE) - writeBinary(static_cast(is_secure), out); - if (interface == Interface::TCP) { writeBinary(os_user, out); @@ -130,13 +127,6 @@ void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) readBinary(read_interface, in); interface = Interface(read_interface); - if (client_protocol_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_IS_SECURE) - { - UInt8 value; - readBinary(value, in); - is_secure = value; - } - if (interface == Interface::TCP) { readBinary(os_user, in); diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 2af9a2b6bbc..42ccd45f4de 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -243,7 +243,7 @@ void Session::shutdownNamedSessions() NamedSessionsStorage::instance().shutdown(); } -Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_) +Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure) : auth_id(UUIDHelpers::generateV4()), global_context(global_context_), interface(interface_), @@ -251,6 +251,7 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter { prepared_client_info.emplace(); prepared_client_info->interface = interface_; + prepared_client_info->is_secure = is_secure; } Session::~Session() diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index f937c73d1a8..d210f38fb88 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -32,7 +32,7 @@ public: /// Stops using named sessions. The method must be called at the server shutdown. static void shutdownNamedSessions(); - Session(const ContextPtr & global_context_, ClientInfo::Interface interface_); + Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false); ~Session(); Session(const Session &&) = delete; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index f4592a8b2c9..e33fc244772 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -81,12 +81,13 @@ namespace ErrorCodes extern const int UNKNOWN_PROTOCOL; } -TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_) +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, bool is_secure_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) , parse_proxy_protocol(parse_proxy_protocol_) , log(&Poco::Logger::get("TCPHandler")) + , is_secure(is_secure_) , server_display_name(std::move(server_display_name_)) { } @@ -110,7 +111,7 @@ void TCPHandler::runImpl() setThreadName("TCPHandler"); ThreadStatus thread_status; - session = std::make_unique(server.context(), ClientInfo::Interface::TCP); + session = std::make_unique(server.context(), ClientInfo::Interface::TCP, is_secure); extractConnectionSettingsFromContext(server.context()); socket().setReceiveTimeout(receive_timeout); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 153b8c35ea4..8c63e8ad0ee 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -133,7 +133,7 @@ public: * because it allows to check the IP ranges of the trusted proxy. * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP. */ - TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, bool is_secure_ = false); ~TCPHandler() override; void run() override; @@ -147,6 +147,8 @@ private: bool parse_proxy_protocol = false; Poco::Logger * log; + bool is_secure = false; + String client_name; UInt64 client_version_major = 0; UInt64 client_version_minor = 0; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 6e27dfc93bd..be5efccc030 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -19,6 +19,7 @@ private: bool parse_proxy_protocol = false; Poco::Logger * log; std::string server_display_name; + bool is_secure = false; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -35,6 +36,7 @@ public: TCPHandlerFactory(IServer & server_, bool secure_, bool parse_proxy_protocol_) : server(server_), parse_proxy_protocol(parse_proxy_protocol_) , log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory")) + , is_secure(secure_) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } @@ -45,7 +47,7 @@ public: { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name); + return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name, is_secure); } catch (const Poco::Net::NetException &) { From e15fccd31b70406d51e38d3c2b39018cb892c5af Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 28 Mar 2022 14:01:22 +0000 Subject: [PATCH 049/117] Add is_secure info to HTTP --- src/Server/HTTPHandler.cpp | 2 +- src/Server/TCPHandler.cpp | 5 ++--- src/Server/TCPHandler.h | 4 +--- src/Server/TCPHandlerFactory.h | 4 +--- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 9218c75c390..8e9ce395f83 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -922,7 +922,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse setThreadName("HTTPHandler"); ThreadStatus thread_status; - session = std::make_unique(server.context(), ClientInfo::Interface::HTTP); + session = std::make_unique(server.context(), ClientInfo::Interface::HTTP, request.isSecure()); SCOPE_EXIT({ session.reset(); }); std::optional query_scope; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e33fc244772..119f601563a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -81,13 +81,12 @@ namespace ErrorCodes extern const int UNKNOWN_PROTOCOL; } -TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, bool is_secure_) +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) , parse_proxy_protocol(parse_proxy_protocol_) , log(&Poco::Logger::get("TCPHandler")) - , is_secure(is_secure_) , server_display_name(std::move(server_display_name_)) { } @@ -111,7 +110,7 @@ void TCPHandler::runImpl() setThreadName("TCPHandler"); ThreadStatus thread_status; - session = std::make_unique(server.context(), ClientInfo::Interface::TCP, is_secure); + session = std::make_unique(server.context(), ClientInfo::Interface::TCP, socket().secure()); extractConnectionSettingsFromContext(server.context()); socket().setReceiveTimeout(receive_timeout); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 8c63e8ad0ee..153b8c35ea4 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -133,7 +133,7 @@ public: * because it allows to check the IP ranges of the trusted proxy. * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP. */ - TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, bool is_secure_ = false); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); ~TCPHandler() override; void run() override; @@ -147,8 +147,6 @@ private: bool parse_proxy_protocol = false; Poco::Logger * log; - bool is_secure = false; - String client_name; UInt64 client_version_major = 0; UInt64 client_version_minor = 0; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index be5efccc030..6e27dfc93bd 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -19,7 +19,6 @@ private: bool parse_proxy_protocol = false; Poco::Logger * log; std::string server_display_name; - bool is_secure = false; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -36,7 +35,6 @@ public: TCPHandlerFactory(IServer & server_, bool secure_, bool parse_proxy_protocol_) : server(server_), parse_proxy_protocol(parse_proxy_protocol_) , log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory")) - , is_secure(secure_) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } @@ -47,7 +45,7 @@ public: { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name, is_secure); + return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name); } catch (const Poco::Net::NetException &) { From b70456879e597dc781df5e7461cd5aebea84caeb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 29 Mar 2022 08:01:15 +0000 Subject: [PATCH 050/117] Add tests for is_secure in query_log --- .../02246_is_secure_query_log.reference | 4 ++++ .../0_stateless/02246_is_secure_query_log.sh | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/queries/0_stateless/02246_is_secure_query_log.reference create mode 100755 tests/queries/0_stateless/02246_is_secure_query_log.sh diff --git a/tests/queries/0_stateless/02246_is_secure_query_log.reference b/tests/queries/0_stateless/02246_is_secure_query_log.reference new file mode 100644 index 00000000000..1e8c0bbc9cf --- /dev/null +++ b/tests/queries/0_stateless/02246_is_secure_query_log.reference @@ -0,0 +1,4 @@ +1 0 +1 1 +2 0 +2 1 diff --git a/tests/queries/0_stateless/02246_is_secure_query_log.sh b/tests/queries/0_stateless/02246_is_secure_query_log.sh new file mode 100755 index 00000000000..6b064cedfdf --- /dev/null +++ b/tests/queries/0_stateless/02246_is_secure_query_log.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --log_queries=1 --query_id "2246_${CLICKHOUSE_DATABASE}_client_nonsecure" -q "select 1 Format Null" +${CLICKHOUSE_CLIENT} -q "system flush logs" +${CLICKHOUSE_CLIENT} -q "select interface, is_secure from system.query_log where query_id = '2246_${CLICKHOUSE_DATABASE}_client_nonsecure' and type = 'QueryFinish' and current_database = currentDatabase()" + +${CLICKHOUSE_CLIENT_SECURE} --log_queries=1 --query_id "2246_${CLICKHOUSE_DATABASE}_client_secure" -q "select 1 Format Null" +${CLICKHOUSE_CLIENT} -q "system flush logs" +${CLICKHOUSE_CLIENT} -q "select interface, is_secure from system.query_log where query_id = '2246_${CLICKHOUSE_DATABASE}_client_secure' and type = 'QueryFinish' and current_database = currentDatabase()" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&log_queries=1&query_id=2246_${CLICKHOUSE_DATABASE}_http_nonsecure" -d "select 1 Format Null" +${CLICKHOUSE_CLIENT} -q "system flush logs" +${CLICKHOUSE_CLIENT} -q "select interface, is_secure from system.query_log where query_id = '2246_${CLICKHOUSE_DATABASE}_http_nonsecure' and type = 'QueryFinish' and current_database = currentDatabase()" + +${CLICKHOUSE_CURL} -sSk "${CLICKHOUSE_URL_HTTPS}&log_queries=1&query_id=2246_${CLICKHOUSE_DATABASE}_http_secure" -d "select 1 Format Null" +${CLICKHOUSE_CLIENT} -q "system flush logs" +${CLICKHOUSE_CLIENT} -q "select interface, is_secure from system.query_log where query_id = '2246_${CLICKHOUSE_DATABASE}_http_secure' and type = 'QueryFinish' and current_database = currentDatabase()" From 87bab4f2eac3d9c9e807d8382d0db25ac1537b40 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 30 Mar 2022 11:25:58 +0000 Subject: [PATCH 051/117] Fix style --- tests/ci/clickhouse_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 218aaca8b91..d52b6262a78 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -15,7 +15,7 @@ class ClickHouseHelper: self.url = url self.auth = { "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"), - "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password") + "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"), } @staticmethod From d53858758d8be429d18d74b9b3ceccc99f14945f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 30 Mar 2022 11:32:08 +0000 Subject: [PATCH 052/117] fix tests --- tests/queries/0_stateless/01297_create_quota.reference | 8 ++++---- .../0_stateless/02117_show_create_table_system.reference | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference index 375d67346be..f3f833d7619 100644 --- a/tests/queries/0_stateless/01297_create_quota.reference +++ b/tests/queries/0_stateless/01297_create_quota.reference @@ -57,10 +57,10 @@ q2_01297 local directory [] [5259492] 0 ['r1_01297','u1_01297'] [] q3_01297 local directory ['client_key','user_name'] [5259492,15778476] 0 [] [] q4_01297 local directory [] [604800] 1 [] ['u1_01297'] -- system.quota_limits -q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5 -q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N -q3_01297 15778476 0 100 \N \N 11 \N \N \N \N \N -q4_01297 604800 0 \N \N \N \N \N \N \N \N \N +q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5 \N +q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N \N +q3_01297 15778476 0 100 \N \N 11 \N \N \N \N \N \N +q4_01297 604800 0 \N \N \N \N \N \N \N \N \N \N -- query_selects query_inserts CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297 CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297 diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index a6a184b3d22..0aadc509137 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -39,10 +39,10 @@ CREATE TABLE system.privileges\n(\n `privilege` Enum16(\'SHOW DATABASES\' = 0 CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `distributed_depth` UInt64,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts_columns\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.quota_limits\n(\n `quota_name` String,\n `duration` UInt32,\n `is_randomized_interval` UInt8,\n `max_queries` Nullable(UInt64),\n `max_query_selects` Nullable(UInt64),\n `max_query_inserts` Nullable(UInt64),\n `max_errors` Nullable(UInt64),\n `max_result_rows` Nullable(UInt64),\n `max_result_bytes` Nullable(UInt64),\n `max_read_rows` Nullable(UInt64),\n `max_read_bytes` Nullable(UInt64),\n `max_execution_time` Nullable(Float64)\n)\nENGINE = SystemQuotaLimits()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.quota_usage\n(\n `quota_name` String,\n `quota_key` String,\n `start_time` Nullable(DateTime),\n `end_time` Nullable(DateTime),\n `duration` Nullable(UInt32),\n `queries` Nullable(UInt64),\n `max_queries` Nullable(UInt64),\n `query_selects` Nullable(UInt64),\n `max_query_selects` Nullable(UInt64),\n `query_inserts` Nullable(UInt64),\n `max_query_inserts` Nullable(UInt64),\n `errors` Nullable(UInt64),\n `max_errors` Nullable(UInt64),\n `result_rows` Nullable(UInt64),\n `max_result_rows` Nullable(UInt64),\n `result_bytes` Nullable(UInt64),\n `max_result_bytes` Nullable(UInt64),\n `read_rows` Nullable(UInt64),\n `max_read_rows` Nullable(UInt64),\n `read_bytes` Nullable(UInt64),\n `max_read_bytes` Nullable(UInt64),\n `execution_time` Nullable(Float64),\n `max_execution_time` Nullable(Float64)\n)\nENGINE = SystemQuotaUsage()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.quota_limits\n(\n `quota_name` String,\n `duration` UInt32,\n `is_randomized_interval` UInt8,\n `max_queries` Nullable(UInt64),\n `max_query_selects` Nullable(UInt64),\n `max_query_inserts` Nullable(UInt64),\n `max_errors` Nullable(UInt64),\n `max_result_rows` Nullable(UInt64),\n `max_result_bytes` Nullable(UInt64),\n `max_read_rows` Nullable(UInt64),\n `max_read_bytes` Nullable(UInt64),\n `max_execution_time` Nullable(Float64),\n `max_written_bytes` Nullable(UInt64)\n)\nENGINE = SystemQuotaLimits()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.quota_usage\n(\n `quota_name` String,\n `quota_key` String,\n `start_time` Nullable(DateTime),\n `end_time` Nullable(DateTime),\n `duration` Nullable(UInt32),\n `queries` Nullable(UInt64),\n `max_queries` Nullable(UInt64),\n `query_selects` Nullable(UInt64),\n `max_query_selects` Nullable(UInt64),\n `query_inserts` Nullable(UInt64),\n `max_query_inserts` Nullable(UInt64),\n `errors` Nullable(UInt64),\n `max_errors` Nullable(UInt64),\n `result_rows` Nullable(UInt64),\n `max_result_rows` Nullable(UInt64),\n `result_bytes` Nullable(UInt64),\n `max_result_bytes` Nullable(UInt64),\n `read_rows` Nullable(UInt64),\n `max_read_rows` Nullable(UInt64),\n `read_bytes` Nullable(UInt64),\n `max_read_bytes` Nullable(UInt64),\n `execution_time` Nullable(Float64),\n `max_execution_time` Nullable(Float64),\n `written_bytes` Nullable(UInt64),\n `max_written_bytes` Nullable(UInt64)\n)\nENGINE = SystemQuotaUsage()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.quotas\n(\n `name` String,\n `id` UUID,\n `storage` String,\n `keys` Array(Enum8(\'user_name\' = 1, \'ip_address\' = 2, \'forwarded_ip_address\' = 3, \'client_key\' = 4)),\n `durations` Array(UInt32),\n `apply_to_all` UInt8,\n `apply_to_list` Array(String),\n `apply_to_except` Array(String)\n)\nENGINE = SystemQuotas()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.quotas_usage\n(\n `quota_name` String,\n `quota_key` String,\n `is_current` UInt8,\n `start_time` Nullable(DateTime),\n `end_time` Nullable(DateTime),\n `duration` Nullable(UInt32),\n `queries` Nullable(UInt64),\n `max_queries` Nullable(UInt64),\n `query_selects` Nullable(UInt64),\n `max_query_selects` Nullable(UInt64),\n `query_inserts` Nullable(UInt64),\n `max_query_inserts` Nullable(UInt64),\n `errors` Nullable(UInt64),\n `max_errors` Nullable(UInt64),\n `result_rows` Nullable(UInt64),\n `max_result_rows` Nullable(UInt64),\n `result_bytes` Nullable(UInt64),\n `max_result_bytes` Nullable(UInt64),\n `read_rows` Nullable(UInt64),\n `max_read_rows` Nullable(UInt64),\n `read_bytes` Nullable(UInt64),\n `max_read_bytes` Nullable(UInt64),\n `execution_time` Nullable(Float64),\n `max_execution_time` Nullable(Float64)\n)\nENGINE = SystemQuotasUsage()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.quotas_usage\n(\n `quota_name` String,\n `quota_key` String,\n `is_current` UInt8,\n `start_time` Nullable(DateTime),\n `end_time` Nullable(DateTime),\n `duration` Nullable(UInt32),\n `queries` Nullable(UInt64),\n `max_queries` Nullable(UInt64),\n `query_selects` Nullable(UInt64),\n `max_query_selects` Nullable(UInt64),\n `query_inserts` Nullable(UInt64),\n `max_query_inserts` Nullable(UInt64),\n `errors` Nullable(UInt64),\n `max_errors` Nullable(UInt64),\n `result_rows` Nullable(UInt64),\n `max_result_rows` Nullable(UInt64),\n `result_bytes` Nullable(UInt64),\n `max_result_bytes` Nullable(UInt64),\n `read_rows` Nullable(UInt64),\n `max_read_rows` Nullable(UInt64),\n `read_bytes` Nullable(UInt64),\n `max_read_bytes` Nullable(UInt64),\n `execution_time` Nullable(Float64),\n `max_execution_time` Nullable(Float64),\n `written_bytes` Nullable(UInt64),\n `max_written_bytes` Nullable(UInt64)\n)\nENGINE = SystemQuotasUsage()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.replicas\n(\n `database` String,\n `table` String,\n `engine` String,\n `is_leader` UInt8,\n `can_become_leader` UInt8,\n `is_readonly` UInt8,\n `is_session_expired` UInt8,\n `future_parts` UInt32,\n `parts_to_check` UInt32,\n `zookeeper_path` String,\n `replica_name` String,\n `replica_path` String,\n `columns_version` Int32,\n `queue_size` UInt32,\n `inserts_in_queue` UInt32,\n `merges_in_queue` UInt32,\n `part_mutations_in_queue` UInt32,\n `queue_oldest_time` DateTime,\n `inserts_oldest_time` DateTime,\n `merges_oldest_time` DateTime,\n `part_mutations_oldest_time` DateTime,\n `oldest_part_to_get` String,\n `oldest_part_to_merge_to` String,\n `oldest_part_to_mutate_to` String,\n `log_max_index` UInt64,\n `log_pointer` UInt64,\n `last_queue_update` DateTime,\n `absolute_delay` UInt64,\n `total_replicas` UInt8,\n `active_replicas` UInt8,\n `last_queue_update_exception` String,\n `zookeeper_exception` String,\n `replica_is_active` Map(String, UInt8)\n)\nENGINE = SystemReplicas()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.replicated_fetches\n(\n `database` String,\n `table` String,\n `elapsed` Float64,\n `progress` Float64,\n `result_part_name` String,\n `result_part_path` String,\n `partition_id` String,\n `total_size_bytes_compressed` UInt64,\n `bytes_read_compressed` UInt64,\n `source_replica_path` String,\n `source_replica_hostname` String,\n `source_replica_port` UInt16,\n `interserver_scheme` String,\n `URI` String,\n `to_detached` UInt8,\n `thread_id` UInt64\n)\nENGINE = SystemReplicatedFetches()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.replicated_merge_tree_settings\n(\n `name` String,\n `value` String,\n `changed` UInt8,\n `description` String,\n `type` String\n)\nENGINE = SystemReplicatedMergeTreeSettings()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' From 5cb2301e398552ba9f3ccabde0f1563481e5c3df Mon Sep 17 00:00:00 2001 From: chen9t Date: Wed, 30 Mar 2022 20:40:20 +0800 Subject: [PATCH 053/117] Clean dirty meta cache when part is broken during part loading phase --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 69ed238d78e..76d42e4b785 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -635,24 +635,32 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks /// Motivation: memory for index is shared between queries - not belong to the query itself. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker(VariableContext::Global); - loadUUID(); - loadColumns(require_columns_checksums); - loadChecksums(require_columns_checksums); - loadIndexGranularity(); - calculateColumnsAndSecondaryIndicesSizesOnDisk(); - loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` - loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. - loadPartitionAndMinMaxIndex(); - if (!parent_part) - { - loadTTLInfos(); - loadProjections(require_columns_checksums, check_consistency); + try { + loadUUID(); + loadColumns(require_columns_checksums); + loadChecksums(require_columns_checksums); + loadIndexGranularity(); + calculateColumnsAndSecondaryIndicesSizesOnDisk(); + loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` + loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. + loadPartitionAndMinMaxIndex(); + if (!parent_part) + { + loadTTLInfos(); + loadProjections(require_columns_checksums, check_consistency); + } + + if (check_consistency) + checkConsistency(require_columns_checksums); + + loadDefaultCompressionCodec(); + } catch (...) { + // There could be conditions that data part to be loaded is broken, but some of meta infos are already written + // into meta data before exception, need to clean them all. + metadata_manager->deleteAll(/*include_projection*/true); + metadata_manager->assertAllDeleted(/*include_projection*/true); + throw; } - - if (check_consistency) - checkConsistency(require_columns_checksums); - - loadDefaultCompressionCodec(); } void IMergeTreeDataPart::appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection) const From 849596c7a299b9d6905b539bc6ec6f322de7fd6d Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 30 Mar 2022 12:44:53 +0000 Subject: [PATCH 054/117] Improve schema inference for insert select queries --- src/Interpreters/Context.cpp | 12 ++++++++++++ src/Interpreters/Context.h | 1 + src/TableFunctions/TableFunctionInput.cpp | 17 +++++++++++++++++ src/TableFunctions/TableFunctionInput.h | 3 +++ ...select_from_input_schema_inference.reference | 2 ++ ...nsert_select_from_input_schema_inference.sql | 5 +++++ ..._select_from_file_schema_inference.reference | 1 + ...insert_select_from_file_schema_inference.sql | 6 ++++++ 8 files changed, 47 insertions(+) create mode 100644 tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.reference create mode 100644 tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql create mode 100644 tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.reference create mode 100644 tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 4c688d0f901..07724258213 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1092,6 +1093,17 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) if (!res) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + if (table_function_ptr->needStructureHint()) + { + const auto & insertion_table = getInsertionTable(); + if (!insertion_table.empty()) + { + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); + } + } + res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); /// Since ITableFunction::parseArguments() may change table_expression, i.e.: diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 26fc5816ddf..ad68f2a2245 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "config_core.h" diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index cba145ee87b..9c434444314 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; } void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -29,6 +30,12 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr auto args = function->arguments->children; + if (args.empty()) + { + structure = "auto"; + return; + } + if (args.size() != 1) throw Exception("Table function '" + getName() + "' requires exactly 1 argument: structure", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -38,6 +45,16 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context) const { + if (structure == "auto") + { + if (structure_hint.empty()) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Table function '{}' was used without structure argument but structure could not be determined automatically. Please, " + "provide structure manually", + getName()); + return structure_hint; + } return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionInput.h b/src/TableFunctions/TableFunctionInput.h index 5953693e711..8e7b34cb829 100644 --- a/src/TableFunctions/TableFunctionInput.h +++ b/src/TableFunctions/TableFunctionInput.h @@ -16,6 +16,8 @@ public: static constexpr auto name = "input"; std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } + bool needStructureHint() const override { return true; } + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; @@ -25,6 +27,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String structure; + ColumnsDescription structure_hint; }; } diff --git a/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.reference b/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.reference new file mode 100644 index 00000000000..7fd75453a3c --- /dev/null +++ b/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.reference @@ -0,0 +1,2 @@ +1 string1 +2 string2 diff --git a/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql b/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql new file mode 100644 index 00000000000..da3bccfd745 --- /dev/null +++ b/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql @@ -0,0 +1,5 @@ +drop table if exists test_02249; +create table test_02249 (x UInt32, y String) engine=Memory(); +insert into test_02249 select * from input() format JSONEachRow {"x" : 1, "y" : "string1"}, {"y" : "string2", "x" : 2}; +select * from test_02249; +drop table test_02249; diff --git a/tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.reference b/tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.sql b/tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.sql new file mode 100644 index 00000000000..2c04045463e --- /dev/null +++ b/tests/queries/0_stateless/02250_insert_select_from_file_schema_inference.sql @@ -0,0 +1,6 @@ +insert into table function file('data_02250.jsonl') select NULL as x settings engine_file_truncate_on_insert=1; +drop table if exists test_02250; +create table test_02250 (x Nullable(UInt32)) engine=Memory(); +insert into test_02250 select * from file('data_02250.jsonl'); +select * from test_02250; +drop table test_02250; From b30948dd5374447e3427dbb120d5d1d30003b402 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 30 Mar 2022 17:12:23 +0200 Subject: [PATCH 055/117] Relax test --- src/Common/FileCache.cpp | 4 ++-- tests/queries/0_stateless/02235_remote_fs_cache_stress.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 2a64b64ee63..d648267b95d 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -531,8 +531,8 @@ void LRUFileCache::loadCacheInfoIntoMemory() std::lock_guard cache_lock(mutex); Key key; - UInt64 offset; - size_t size; + UInt64 offset = 0; + size_t size = 0; std::vector cells; /// cache_base_path / key_prefix / key / offset diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index 7f1af5e854c..a5c0ee6ecff 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -18,7 +18,7 @@ CREATE TABLE t_01411( ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO t_01411 (str) SELECT concat('asdf', toString(number % 10000)) FROM numbers(2000000); +INSERT INTO t_01411 (str) SELECT concat('asdf', toString(number % 10000)) FROM numbers(100000); CREATE TABLE t_01411_num( num UInt8, @@ -26,10 +26,10 @@ CREATE TABLE t_01411_num( ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(200000); +INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(100000); create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val; -insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000; +insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 100000; """ function go() From d810853b783837569ff3d0bc7d7415ea8a35c9c4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 30 Mar 2022 17:34:07 +0200 Subject: [PATCH 056/117] Fix path in test --- tests/integration/test_storage_hdfs/test.py | 4 ++-- tests/queries/0_stateless/02245_s3_virtual_columns.reference | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 1f4b61c1030..a69d4c7ebc3 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -558,12 +558,12 @@ def test_virtual_column(started_cluster): hdfs_api = started_cluster.hdfs_api table_function = ( - f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')" + f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')" ) node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") result = node1.query(f"SELECT _path FROM {table_function}") - assert result.strip() == "parquet" + assert result.strip() == "parquet_2" if __name__ == "__main__": diff --git a/tests/queries/0_stateless/02245_s3_virtual_columns.reference b/tests/queries/0_stateless/02245_s3_virtual_columns.reference index c876207357f..09383c51888 100644 --- a/tests/queries/0_stateless/02245_s3_virtual_columns.reference +++ b/tests/queries/0_stateless/02245_s3_virtual_columns.reference @@ -5,7 +5,7 @@ insert into test_02245 select 1 settings s3_truncate_on_insert=1; select * from test_02245; 1 select _path from test_02245; -clickhouse-experiment-kseniia-eu-west-1.clickhouse-dev.com/stateless/test_02245 +test/test_02245 drop table if exists test_02245_2; create table test_02245_2 (a UInt64, _path Int32) engine = S3(s3_conn, filename='test_02245_2', format=Parquet); insert into test_02245_2 select 1, 2 settings s3_truncate_on_insert=1; From dd7ae2b9aaaa747bcd8b67bd4d0955f90f543e72 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 30 Mar 2022 17:43:52 +0200 Subject: [PATCH 057/117] Fix enable LLVM for JIT compilation in CMake --- contrib/llvm-cmake/CMakeLists.txt | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-cmake/CMakeLists.txt index 6ff07f0e016..87c8a65510f 100644 --- a/contrib/llvm-cmake/CMakeLists.txt +++ b/contrib/llvm-cmake/CMakeLists.txt @@ -1,12 +1,9 @@ -# During cross-compilation in our CI we have to use llvm-tblgen and other building tools -# tools to be build for host architecture and everything else for target architecture (e.g. AArch64) -# Possible workaround is to use llvm-tblgen from some package... -# But lets just enable LLVM for native builds -if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined") - set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) +if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") + set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() - set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) + set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) endif() + option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) if (NOT ENABLE_EMBEDDED_COMPILER) From 98e85dd68e9b80eeaeca8046c4b714e5e5b0f0a8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 30 Mar 2022 20:27:04 +0200 Subject: [PATCH 058/117] Fix build --- src/Storages/StorageS3.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index f8745e4bbe2..cbf18e240f6 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -101,7 +101,6 @@ private: std::unique_ptr reader; /// onCancel and generate can be called concurrently std::mutex reader_mutex; - bool initialized = false; std::vector requested_virtual_columns; std::shared_ptr file_iterator; size_t download_thread_num = 1; From b40a69f59acbb53464df12952a3aca1e8d8ad9f9 Mon Sep 17 00:00:00 2001 From: chen9t Date: Thu, 31 Mar 2022 10:10:05 +0800 Subject: [PATCH 059/117] Code style --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 76d42e4b785..0ed96f5dda4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -635,31 +635,34 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks /// Motivation: memory for index is shared between queries - not belong to the query itself. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker(VariableContext::Global); - try { - loadUUID(); - loadColumns(require_columns_checksums); - loadChecksums(require_columns_checksums); - loadIndexGranularity(); - calculateColumnsAndSecondaryIndicesSizesOnDisk(); - loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` - loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. - loadPartitionAndMinMaxIndex(); - if (!parent_part) - { - loadTTLInfos(); - loadProjections(require_columns_checksums, check_consistency); - } + try + { + loadUUID(); + loadColumns(require_columns_checksums); + loadChecksums(require_columns_checksums); + loadIndexGranularity(); + calculateColumnsAndSecondaryIndicesSizesOnDisk(); + loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` + loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. + loadPartitionAndMinMaxIndex(); + if (!parent_part) + { + loadTTLInfos(); + loadProjections(require_columns_checksums, check_consistency); + } - if (check_consistency) - checkConsistency(require_columns_checksums); + if (check_consistency) + checkConsistency(require_columns_checksums); - loadDefaultCompressionCodec(); - } catch (...) { - // There could be conditions that data part to be loaded is broken, but some of meta infos are already written - // into meta data before exception, need to clean them all. - metadata_manager->deleteAll(/*include_projection*/true); - metadata_manager->assertAllDeleted(/*include_projection*/true); - throw; + loadDefaultCompressionCodec(); + } + catch (...) + { + // There could be conditions that data part to be loaded is broken, but some of meta infos are already written + // into meta data before exception, need to clean them all. + metadata_manager->deleteAll(/*include_projection*/ true); + metadata_manager->assertAllDeleted(/*include_projection*/ true); + throw; } } From 3b40b56e56709fb1c2afca9378c131192e4ba7ac Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 30 Mar 2022 23:36:49 -0400 Subject: [PATCH 060/117] refactored to remove parsers rependency on datatypes --- src/Interpreters/InterpreterCreateQuery.cpp | 10 ++++++++-- src/Parsers/ASTColumnDeclaration.cpp | 9 +++++++-- src/Parsers/ParserCreateQuery.h | 10 ++++++---- tests/queries/0_stateless/02205_ephemeral_1.sql | 4 ++++ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d8923b3cc42..14bf9df2347 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -508,7 +508,9 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( default_expr_list->children.emplace_back( setAlias( - col_decl.default_expression->clone(), + col_decl.default_specifier == "EPHEMERAL" ? /// can be ASTLiteral::value NULL + std::make_shared(data_type_ptr->getDefault()) : + col_decl.default_expression->clone(), tmp_column_name)); } else @@ -536,7 +538,11 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_expression) { - ASTPtr default_expr = col_decl.default_expression->clone(); + ASTPtr default_expr = + col_decl.default_specifier == "EPHEMERAL" && col_decl.default_expression->as()->value.isNull() ? + std::make_shared(DataTypeFactory::instance().get(col_decl.type)->getDefault()) : + col_decl.default_expression->clone(); + if (col_decl.type) column.type = name_type_it->type; else diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 4c14230e926..888cd639fb0 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -71,8 +72,12 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta if (default_expression) { - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; - default_expression->formatImpl(settings, state, frame); + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); + if (default_specifier != "EPHEMERAL" || !default_expression->as()->value.isNull()) + { + settings.ostr << ' '; + default_expression->formatImpl(settings, state, frame); + } } if (comment) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 2f2c927c63b..7f02b685ffc 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -10,7 +10,6 @@ #include #include #include -#include namespace DB @@ -197,9 +196,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E } else if (s_ephemeral.ignore(pos, expected)) { - default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end}); - if (!expr_parser.parse(pos, default_expression, expected) && type) - default_expression = std::make_shared(DataTypeFactory::instance().get(type)->getDefault()); + if (s_ephemeral.ignore(pos, expected)) + { + default_specifier = "EPHEMERAL"; + if (!expr_parser.parse(pos, default_expression, expected) && type) + default_expression = std::make_shared(Field()); + } } if (require_type && !type && !default_expression) diff --git a/tests/queries/0_stateless/02205_ephemeral_1.sql b/tests/queries/0_stateless/02205_ephemeral_1.sql index eecb074d42c..d7df2e61cfb 100644 --- a/tests/queries/0_stateless/02205_ephemeral_1.sql +++ b/tests/queries/0_stateless/02205_ephemeral_1.sql @@ -77,3 +77,7 @@ INSERT INTO t_ephemeral_02205_1 (x, y) VALUES (21, 7); SELECT * FROM t_ephemeral_02205_1; DROP TABLE IF EXISTS t_ephemeral_02205_1; +<<<<<<< HEAD +======= + +>>>>>>> refactored to remove parsers rependency on datatypes From 8e00cc2aca107b3de5c8c5ee4ed222d9300eb663 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 30 Mar 2022 23:52:48 -0400 Subject: [PATCH 061/117] fix messed commit --- tests/queries/0_stateless/02205_ephemeral_1.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/queries/0_stateless/02205_ephemeral_1.sql b/tests/queries/0_stateless/02205_ephemeral_1.sql index d7df2e61cfb..7a996ee3a8f 100644 --- a/tests/queries/0_stateless/02205_ephemeral_1.sql +++ b/tests/queries/0_stateless/02205_ephemeral_1.sql @@ -77,7 +77,4 @@ INSERT INTO t_ephemeral_02205_1 (x, y) VALUES (21, 7); SELECT * FROM t_ephemeral_02205_1; DROP TABLE IF EXISTS t_ephemeral_02205_1; -<<<<<<< HEAD -======= ->>>>>>> refactored to remove parsers rependency on datatypes From 049559d510fc6c9f2ca060eb604591f716d951ed Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 31 Mar 2022 06:57:59 +0000 Subject: [PATCH 062/117] Remove extra newline --- src/Interpreters/ClientInfo.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 62bd006d857..75af25e842e 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -165,7 +165,6 @@ void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) client_version_patch = client_tcp_protocol_version; } - if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { uint8_t have_trace_id = 0; From f17a32063577a00d955ae1523d72a486ed8c11a2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 31 Mar 2022 11:13:38 +0200 Subject: [PATCH 063/117] Fix test --- tests/integration/test_storage_hdfs/test.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index a69d4c7ebc3..5e8a96d2c05 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -554,7 +554,7 @@ def test_insert_select_schema_inference(started_cluster): assert int(result) == 1 -def test_virtual_column(started_cluster): +def test_virtual_columns_2(started_cluster): hdfs_api = started_cluster.hdfs_api table_function = ( @@ -563,7 +563,15 @@ def test_virtual_column(started_cluster): node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") result = node1.query(f"SELECT _path FROM {table_function}") - assert result.strip() == "parquet_2" + assert result.strip() == "hdfs://hdfs1:9000/parquet_2" + + table_function = ( + f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')" + ) + node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") + + result = node1.query(f"SELECT _path FROM {table_function}") + assert result.strip() == "kek" if __name__ == "__main__": From 4c66c2e10fb993087f2ce81c6702deea5d7be37b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 31 Mar 2022 11:42:38 +0200 Subject: [PATCH 064/117] Update src/Disks/IO/CachedReadBufferFromRemoteFS.cpp --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 7cd4f4b491a..e3f7bc669c7 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -298,7 +298,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File { case ReadType::CACHED: { -#ifdef NDEBUG +#ifndef NDEBUG auto * file_reader = assert_cast(read_buffer_for_file_segment.get()); size_t file_size = file_reader->size(); From e12860f646418f617b8a7aafd56309675011105e Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 31 Mar 2022 11:42:42 +0200 Subject: [PATCH 065/117] Update src/Disks/IO/CachedReadBufferFromRemoteFS.cpp --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index e3f7bc669c7..d9e9f53187d 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -634,7 +634,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() if (!result) { -#ifdef NDEBUG +#ifndef NDEBUG if (auto * cache_file_reader = typeid_cast(implementation_buffer.get())) { auto cache_file_size = cache_file_reader->size(); From 12449caf8917afd074d0f431f72b7cc0bcbf95ed Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 31 Mar 2022 16:08:57 +0800 Subject: [PATCH 066/117] Refactoring QueryPipeline --- src/QueryPipeline/Pipe.cpp | 38 -------- src/QueryPipeline/Pipe.h | 1 - src/QueryPipeline/QueryPipelineBuilder.cpp | 103 --------------------- src/QueryPipeline/QueryPipelineBuilder.h | 9 -- 4 files changed, 151 deletions(-) diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 6cef7cc28bd..551841524b3 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -759,44 +759,6 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) header.clear(); } -void Pipe::setOutputFormat(ProcessorPtr output) -{ - if (output_ports.empty()) - throw Exception("Cannot set output format to empty Pipe.", ErrorCodes::LOGICAL_ERROR); - - if (output_ports.size() != 1) - throw Exception("Cannot set output format to Pipe because single output port is expected, " - "but it has " + std::to_string(output_ports.size()) + " ports", ErrorCodes::LOGICAL_ERROR); - - auto * format = dynamic_cast(output.get()); - - if (!format) - throw Exception("IOutputFormat processor expected for QueryPipelineBuilder::setOutputFormat.", - ErrorCodes::LOGICAL_ERROR); - - auto & main = format->getPort(IOutputFormat::PortKind::Main); - auto & totals = format->getPort(IOutputFormat::PortKind::Totals); - auto & extremes = format->getPort(IOutputFormat::PortKind::Extremes); - - if (!totals_port) - addTotalsSource(std::make_shared(totals.getHeader())); - - if (!extremes_port) - addExtremesSource(std::make_shared(extremes.getHeader())); - - if (collected_processors) - collected_processors->emplace_back(output); - - processors.emplace_back(std::move(output)); - - connect(*output_ports.front(), main); - connect(*totals_port, totals); - connect(*extremes_port, extremes); - - output_ports.clear(); - header.clear(); -} - void Pipe::transform(const Transformer & transformer) { if (output_ports.empty()) diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 613e92a782d..bc19b8389b3 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -141,7 +141,6 @@ private: bool isCompleted() const { return !empty() && output_ports.empty(); } static Pipe unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header); void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); - void setOutputFormat(ProcessorPtr output); friend class QueryPipelineBuilder; friend class QueryPipeline; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index dba7c7cb8f7..fcd3105a422 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -247,21 +246,6 @@ void QueryPipelineBuilder::addExtremesTransform() pipe.addTransform(std::move(transform), nullptr, port); } -void QueryPipelineBuilder::setOutputFormat(ProcessorPtr output) -{ - checkInitializedAndNotCompleted(); - - if (output_format) - throw Exception("QueryPipeline already has output.", ErrorCodes::LOGICAL_ERROR); - - resize(1); - - output_format = dynamic_cast(output.get()); - pipe.setOutputFormat(std::move(output)); - - initRowsBeforeLimit(); -} - QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( std::vector> pipelines, size_t max_threads_limit, @@ -461,93 +445,6 @@ void QueryPipelineBuilder::setProcessListElement(QueryStatus * elem) } } -void QueryPipelineBuilder::initRowsBeforeLimit() -{ - RowsBeforeLimitCounterPtr rows_before_limit_at_least; - - /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. - std::vector limits; - std::vector remote_sources; - - std::unordered_set visited; - - struct QueuedEntry - { - IProcessor * processor; - bool visited_limit; - }; - - std::queue queue; - - queue.push({ output_format, false }); - visited.emplace(output_format); - - while (!queue.empty()) - { - auto * processor = queue.front().processor; - auto visited_limit = queue.front().visited_limit; - queue.pop(); - - if (!visited_limit) - { - if (auto * limit = typeid_cast(processor)) - { - visited_limit = true; - limits.emplace_back(limit); - } - - if (auto * source = typeid_cast(processor)) - remote_sources.emplace_back(source); - } - else if (auto * sorting = typeid_cast(processor)) - { - if (!rows_before_limit_at_least) - rows_before_limit_at_least = std::make_shared(); - - sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least); - - /// Don't go to children. Take rows_before_limit from last PartialSortingTransform. - continue; - } - - /// Skip totals and extremes port for output format. - if (auto * format = dynamic_cast(processor)) - { - auto * child_processor = &format->getPort(IOutputFormat::PortKind::Main).getOutputPort().getProcessor(); - if (visited.emplace(child_processor).second) - queue.push({ child_processor, visited_limit }); - - continue; - } - - for (auto & child_port : processor->getInputs()) - { - auto * child_processor = &child_port.getOutputPort().getProcessor(); - if (visited.emplace(child_processor).second) - queue.push({ child_processor, visited_limit }); - } - } - - if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty())) - { - rows_before_limit_at_least = std::make_shared(); - - for (auto & limit : limits) - limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); - - for (auto & source : remote_sources) - source->setRowsBeforeLimitCounter(rows_before_limit_at_least); - } - - /// If there is a limit, then enable rows_before_limit_at_least - /// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result. - if (!limits.empty()) - rows_before_limit_at_least->add(0); - - if (rows_before_limit_at_least) - output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least); -} - PipelineExecutorPtr QueryPipelineBuilder::execute() { if (!isCompleted()) diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 9e198f45e98..ac84191cf34 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -10,8 +10,6 @@ namespace DB { -class IOutputFormat; - class QueryPipelineProcessorsCollector; struct AggregatingTransformParams; @@ -71,10 +69,6 @@ public: void addTotalsHavingTransform(ProcessorPtr transform); /// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number. void addExtremesTransform(); - /// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation. - void setOutputFormat(ProcessorPtr output); - /// Get current OutputFormat. - IOutputFormat * getOutputFormat() const { return output_format; } /// Sink is a processor with single input port and no output ports. Creates sink for each output port. /// Pipeline will be completed after this transformation. void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); @@ -163,7 +157,6 @@ public: private: Pipe pipe; - IOutputFormat * output_format = nullptr; /// Limit on the number of threads. Zero means no limit. /// Sometimes, more streams are created then the number of threads for more optimal execution. @@ -174,8 +167,6 @@ private: void checkInitialized(); void checkInitializedAndNotCompleted(); - void initRowsBeforeLimit(); - void setCollectedProcessors(Processors * processors); friend class QueryPipelineProcessorsCollector; From 74f09886571715ddd7ac95f4b2a64f5a8b08a04e Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 31 Mar 2022 11:34:06 +0000 Subject: [PATCH 067/117] Fix tests --- .../0_stateless/01825_type_json_schema_inference.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_type_json_schema_inference.reference b/tests/queries/0_stateless/01825_type_json_schema_inference.reference index c2c18b5a2ff..6f1a65c6af3 100644 --- a/tests/queries/0_stateless/01825_type_json_schema_inference.reference +++ b/tests/queries/0_stateless/01825_type_json_schema_inference.reference @@ -5,4 +5,4 @@ Tuple(k1 Int8, k2 Tuple(k3 String, k4 Nested(k5 Int8, k6 Int8)), some Int8) {"id":"2","obj":"bbb","s":"bar"} {"map":{"k1":1,"k2":2},"obj":{"k1":1,"k2.k3":2},"map_type":"Map(String, Nullable(Float64))","obj_type":"Object('json')"} {"obj":{"k1":1,"k2":2},"map":{"k1":"1","k2":"2"}} -Tuple(k1 Float64, k2 Float64) +Tuple(k1 Int8, k2 Int8) From d141dbc388c5870f4147cff164f20fc648773d78 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Thu, 31 Mar 2022 19:34:30 +0800 Subject: [PATCH 068/117] Delete duplicate code Delete duplicate code --- src/Storages/MergeTree/MergeTreeData.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6192306e2d8..b433c6e4591 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1314,9 +1314,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (!parts_from_wal.empty()) loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal, part_lock); - for (auto & part : duplicate_parts_to_remove) - part->remove(); - for (auto & part : broken_parts_to_detach) part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes From e022f8527b8b442bcddd871d0e16dbf10e3fb4cb Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 31 Mar 2022 11:39:29 +0000 Subject: [PATCH 069/117] Fix extract parser --- src/Parsers/ExpressionElementParsers.cpp | 42 +++++++++++++----------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index c51201750c5..ef236388a04 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -505,32 +505,34 @@ namespace bool parseExtract(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr expr; - + IParser::Pos begin = pos; IntervalKind interval_kind; - if (!parseIntervalKind(pos, expected, interval_kind)) - { - ASTPtr expr_list; - if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) - return false; - auto res = std::make_shared(); - res->name = "extract"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - return true; + if (parseIntervalKind(pos, expected, interval_kind)) + { + ASTPtr expr; + + ParserKeyword s_from("FROM"); + ParserExpression elem_parser; + + if (s_from.ignore(pos, expected) && elem_parser.parse(pos, expr, expected)) + { + node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr); + return true; + } } - ParserKeyword s_from("FROM"); - if (!s_from.ignore(pos, expected)) + pos = begin; + + ASTPtr expr_list; + if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) return false; - ParserExpression elem_parser; - if (!elem_parser.parse(pos, expr, expected)) - return false; - - node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr); + auto res = std::make_shared(); + res->name = "extract"; + res->arguments = expr_list; + res->children.push_back(res->arguments); + node = std::move(res); return true; } From e2e4b02d13ef042c75b5384c7a69e83cabcb8fe3 Mon Sep 17 00:00:00 2001 From: fenglv Date: Thu, 31 Mar 2022 12:01:10 +0000 Subject: [PATCH 070/117] remove unused variable --- src/Storages/WindowView/StorageWindowView.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index ef552262378..863eac275a2 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -182,7 +182,6 @@ private: /// Mutex for the blocks and ready condition std::mutex mutex; - std::mutex flush_table_mutex; std::shared_mutex fire_signal_mutex; mutable std::mutex sample_block_lock; /// Mutex to protect access to sample block and inner_blocks_query From eb88a5f29e63d28e81fc6f25b84d60f5623ab5b9 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 31 Mar 2022 12:20:48 +0000 Subject: [PATCH 071/117] Add test --- tests/queries/0_stateless/02247_fix_extract_parser.reference | 3 +++ tests/queries/0_stateless/02247_fix_extract_parser.sql | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02247_fix_extract_parser.reference create mode 100644 tests/queries/0_stateless/02247_fix_extract_parser.sql diff --git a/tests/queries/0_stateless/02247_fix_extract_parser.reference b/tests/queries/0_stateless/02247_fix_extract_parser.reference new file mode 100644 index 00000000000..01e79c32a8c --- /dev/null +++ b/tests/queries/0_stateless/02247_fix_extract_parser.reference @@ -0,0 +1,3 @@ +1 +2 +3 diff --git a/tests/queries/0_stateless/02247_fix_extract_parser.sql b/tests/queries/0_stateless/02247_fix_extract_parser.sql new file mode 100644 index 00000000000..9b721a6e830 --- /dev/null +++ b/tests/queries/0_stateless/02247_fix_extract_parser.sql @@ -0,0 +1,3 @@ +WITH 'number: 1' as year SELECT extract(year, '\\d+'); +WITH 'number: 2' as mm SELECT extract(mm, '\\d+'); +WITH 'number: 3' as s SELECT extract(s, '\\d+'); From 836e7dae67ce34cd406ae0629607d4af365263eb Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 31 Mar 2022 12:24:40 +0000 Subject: [PATCH 072/117] Fix bug in indexes of not presented columns in -WithNames formats --- src/Processors/Formats/IInputFormat.h | 2 +- .../RowInputFormatWithNamesAndTypes.cpp | 2 - ...ots_of_columns_in_csv_with_names.reference | 1 + ...02250_lots_of_columns_in_csv_with_names.sh | 274 ++++++++++++++++++ 4 files changed, 276 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.reference create mode 100755 tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.sh diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h index 99d25d87b73..9774f630f15 100644 --- a/src/Processors/Formats/IInputFormat.h +++ b/src/Processors/Formats/IInputFormat.h @@ -17,7 +17,7 @@ struct ColumnMapping OptionalIndexes column_indexes_for_input_fields; /// The list of column indexes that are not presented in input data. - std::vector not_presented_columns; + std::vector not_presented_columns; /// The list of column names in input data. Needed for better exception messages. std::vector names_of_columns; diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index 7720b01dc74..0735f927c6a 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -98,8 +98,6 @@ void RowInputFormatWithNamesAndTypes::readPrefix() /// Skip prefix before names and types. format_reader->skipPrefixBeforeHeader(); - /// This is a bit of abstraction leakage, but we need it in parallel parsing: - /// we check if this InputFormat is working with the "real" beginning of the data. if (with_names) { if (format_settings.with_names_use_header) diff --git a/tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.reference b/tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.reference new file mode 100644 index 00000000000..9cd40e0d748 --- /dev/null +++ b/tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.reference @@ -0,0 +1 @@ +42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.sh b/tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.sh new file mode 100755 index 00000000000..9366d41af9a --- /dev/null +++ b/tests/queries/0_stateless/02250_lots_of_columns_in_csv_with_names.sh @@ -0,0 +1,274 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02250" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02250 +( + field_1 Int32, + field_2 Int32, + field_3 Int32, + field_4 Int32, + field_5 Int32, + field_6 Int32, + field_7 Int32, + field_8 Int32, + field_9 Int32, + field_10 Int32, + field_11 Int32, + field_12 Int32, + field_13 Int32, + field_14 Int32, + field_15 Int32, + field_16 Int32, + field_17 Int32, + field_18 Int32, + field_19 Int32, + field_20 Int32, + field_21 Int32, + field_22 Int32, + field_23 Int32, + field_24 Int32, + field_25 Int32, + field_26 Int32, + field_27 Int32, + field_28 Int32, + field_29 Int32, + field_30 Int32, + field_31 Int32, + field_32 Int32, + field_33 Int32, + field_34 Int32, + field_35 Int32, + field_36 Int32, + field_37 Int32, + field_38 Int32, + field_39 Int32, + field_40 Int32, + field_41 Int32, + field_42 Int32, + field_43 Int32, + field_44 Int32, + field_45 Int32, + field_46 Int32, + field_47 Int32, + field_48 Int32, + field_49 Int32, + field_50 Int32, + field_51 Int32, + field_52 Int32, + field_53 Int32, + field_54 Int32, + field_55 Int32, + field_56 Int32, + field_57 Int32, + field_58 Int32, + field_59 Int32, + field_60 Int32, + field_61 Int32, + field_62 Int32, + field_63 Int32, + field_64 Int32, + field_65 Int32, + field_66 Int32, + field_67 Int32, + field_68 Int32, + field_69 Int32, + field_70 Int32, + field_71 Int32, + field_72 Int32, + field_73 Int32, + field_74 Int32, + field_75 Int32, + field_76 Int32, + field_77 Int32, + field_78 Int32, + field_79 Int32, + field_80 Int32, + field_81 Int32, + field_82 Int32, + field_83 Int32, + field_84 Int32, + field_85 Int32, + field_86 Int32, + field_87 Int32, + field_88 Int32, + field_89 Int32, + field_90 Int32, + field_91 Int32, + field_92 Int32, + field_93 Int32, + field_94 Int32, + field_95 Int32, + field_96 Int32, + field_97 Int32, + field_98 Int32, + field_99 Int32, + field_100 Int32, + field_101 Int32, + field_102 Int32, + field_103 Int32, + field_104 Int32, + field_105 Int32, + field_106 Int32, + field_107 Int32, + field_108 Int32, + field_109 Int32, + field_110 Int32, + field_111 Int32, + field_112 Int32, + field_113 Int32, + field_114 Int32, + field_115 Int32, + field_116 Int32, + field_117 Int32, + field_118 Int32, + field_119 Int32, + field_120 Int32, + field_121 Int32, + field_122 Int32, + field_123 Int32, + field_124 Int32, + field_125 Int32, + field_126 Int32, + field_127 Int32, + field_128 Int32, + field_129 Int32, + field_130 Int32, + field_131 Int32, + field_132 Int32, + field_133 Int32, + field_134 Int32, + field_135 Int32, + field_136 Int32, + field_137 Int32, + field_138 Int32, + field_139 Int32, + field_140 Int32, + field_141 Int32, + field_142 Int32, + field_143 Int32, + field_144 Int32, + field_145 Int32, + field_146 Int32, + field_147 Int32, + field_148 Int32, + field_149 Int32, + field_150 Int32, + field_151 Int32, + field_152 Int32, + field_153 Int32, + field_154 Int32, + field_155 Int32, + field_156 Int32, + field_157 Int32, + field_158 Int32, + field_159 Int32, + field_160 Int32, + field_161 Int32, + field_162 Int32, + field_163 Int32, + field_164 Int32, + field_165 Int32, + field_166 Int32, + field_167 Int32, + field_168 Int32, + field_169 Int32, + field_170 Int32, + field_171 Int32, + field_172 Int32, + field_173 Int32, + field_174 Int32, + field_175 Int32, + field_176 Int32, + field_177 Int32, + field_178 Int32, + field_179 Int32, + field_180 Int32, + field_181 Int32, + field_182 Int32, + field_183 Int32, + field_184 Int32, + field_185 Int32, + field_186 Int32, + field_187 Int32, + field_188 Int32, + field_189 Int32, + field_190 Int32, + field_191 Int32, + field_192 Int32, + field_193 Int32, + field_194 Int32, + field_195 Int32, + field_196 Int32, + field_197 Int32, + field_198 Int32, + field_199 Int32, + field_200 Int32, + field_201 Int32, + field_202 Int32, + field_203 Int32, + field_204 Int32, + field_205 Int32, + field_206 Int32, + field_207 Int32, + field_208 Int32, + field_209 Int32, + field_210 Int32, + field_211 Int32, + field_212 Int32, + field_213 Int32, + field_214 Int32, + field_215 Int32, + field_216 Int32, + field_217 Int32, + field_218 Int32, + field_219 Int32, + field_220 Int32, + field_221 Int32, + field_222 Int32, + field_223 Int32, + field_224 Int32, + field_225 Int32, + field_226 Int32, + field_227 Int32, + field_228 Int32, + field_229 Int32, + field_230 Int32, + field_231 Int32, + field_232 Int32, + field_233 Int32, + field_234 Int32, + field_235 Int32, + field_236 Int32, + field_237 Int32, + field_238 Int32, + field_239 Int32, + field_240 Int32, + field_241 Int32, + field_242 Int32, + field_243 Int32, + field_244 Int32, + field_245 Int32, + field_246 Int32, + field_247 Int32, + field_248 Int32, + field_249 Int32, + field_250 Int32, + field_251 Int32, + field_252 Int32, + field_253 Int32, + field_254 Int32, + field_255 Int32, + field_256 Int32, + field_257 Int32 +) +ENGINE = MergeTree +ORDER BY tuple()" + +echo -e "field_1\n42" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_02250 FORMAT CSVWithNames" --input_format_with_names_use_header 1 +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02250" +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02250" From a217e9e9d0dd47e8ddd4392febc7a7e906e3ca43 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 31 Mar 2022 14:27:43 +0200 Subject: [PATCH 073/117] Remove unused header --- src/Interpreters/Context.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 07724258213..e837ce5dae1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include From 0367cbad3cd99beccb47bd3373a347093708b77b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 31 Mar 2022 09:35:19 -0400 Subject: [PATCH 074/117] messed merge fixed --- src/Parsers/ParserCreateQuery.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 7f02b685ffc..a6d3476e3f7 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -196,12 +196,9 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E } else if (s_ephemeral.ignore(pos, expected)) { - if (s_ephemeral.ignore(pos, expected)) - { - default_specifier = "EPHEMERAL"; - if (!expr_parser.parse(pos, default_expression, expected) && type) - default_expression = std::make_shared(Field()); - } + default_specifier = "EPHEMERAL"; + if (!expr_parser.parse(pos, default_expression, expected) && type) + default_expression = std::make_shared(Field()); } if (require_type && !type && !default_expression) From 4ba7cf6d1b81003f6339f8e5c30fabb3b90e26c7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 15:40:32 +0200 Subject: [PATCH 075/117] Fix latest_error referenced before assignment --- tests/ci/docker_pull_helper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/ci/docker_pull_helper.py b/tests/ci/docker_pull_helper.py index ee7f3337cd9..717327ded41 100644 --- a/tests/ci/docker_pull_helper.py +++ b/tests/ci/docker_pull_helper.py @@ -56,18 +56,20 @@ def get_images_with_versions( for i in range(10): try: logging.info("Pulling image %s", docker_image) - latest_error = subprocess.check_output( + subprocess.check_output( f"docker pull {docker_image}", stderr=subprocess.STDOUT, shell=True, ) break except Exception as ex: + latest_error = ex time.sleep(i * 3) logging.info("Got execption pulling docker %s", ex) else: raise Exception( - f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}" + "Cannot pull dockerhub for image docker pull " + f"{docker_image} because of {latest_error}" ) return docker_images From 418070c012ac64ae3e1e8854ac6ffc68bc847da8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 31 Mar 2022 15:56:07 +0200 Subject: [PATCH 076/117] try to fix some integration tests --- tests/integration/test_dotnet_client/test.py | 2 +- tests/integration/test_mysql_protocol/test.py | 8 ++++---- tests/integration/test_postgresql_protocol/test.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_dotnet_client/test.py b/tests/integration/test_dotnet_client/test.py index b147688c099..2af9b80f720 100644 --- a/tests/integration/test_dotnet_client/test.py +++ b/tests/integration/test_dotnet_client/test.py @@ -44,7 +44,7 @@ def dotnet_container(): "-f", docker_compose, "up", - "--no-recreate", + "--force-recreate", "-d", "--no-build", ] diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 78049e0f123..6e61675563f 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -55,7 +55,7 @@ def golang_container(): "-f", docker_compose, "up", - "--no-recreate", + "--force-recreate", "-d", "--no-build", ] @@ -82,7 +82,7 @@ def php_container(): "-f", docker_compose, "up", - "--no-recreate", + "--force-recreate", "-d", "--no-build", ] @@ -109,7 +109,7 @@ def nodejs_container(): "-f", docker_compose, "up", - "--no-recreate", + "--force-recreate", "-d", "--no-build", ] @@ -136,7 +136,7 @@ def java_container(): "-f", docker_compose, "up", - "--no-recreate", + "--force-recreate", "-d", "--no-build", ] diff --git a/tests/integration/test_postgresql_protocol/test.py b/tests/integration/test_postgresql_protocol/test.py index 5c270fd9ca7..43528c13c4d 100644 --- a/tests/integration/test_postgresql_protocol/test.py +++ b/tests/integration/test_postgresql_protocol/test.py @@ -56,7 +56,7 @@ def psql_client(): "-f", docker_compose, "up", - "--no-recreate", + "--force-recreate", "-d", "--build", ] @@ -99,7 +99,7 @@ def java_container(): "-f", docker_compose, "up", - "--no-recreate", + "--force-recreate", "-d", "--build", ] From 97523a33298d31dcdbdd8a430cd37d1e36e6caf3 Mon Sep 17 00:00:00 2001 From: fenglv Date: Fri, 1 Apr 2022 09:58:23 +0000 Subject: [PATCH 077/117] Try remove unneed variable --- src/Storages/WindowView/StorageWindowView.cpp | 28 +++++++++---------- src/Storages/WindowView/StorageWindowView.h | 1 - 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 644ab5d57c2..3fca34ed511 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -467,7 +467,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) InterpreterSelectQuery fetch( getFetchColumnQuery(w_start, watermark), - window_view_context, + getContext(), getInnerStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::FetchColumns)); @@ -509,11 +509,11 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::WithMergeableState); }; - TemporaryTableHolder blocks_storage(window_view_context, creator); + TemporaryTableHolder blocks_storage(getContext(), creator); InterpreterSelectQuery select( getFinalQuery(), - window_view_context, + getContext(), blocks_storage.getTable(), blocks_storage.getTable()->getInMemoryMetadataPtr(), SelectQueryOptions(QueryProcessingStage::Complete)); @@ -617,8 +617,8 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( auto t_sample_block = InterpreterSelectQuery( - inner_select_query, window_view_context, getParentStorage(), nullptr, - SelectQueryOptions(QueryProcessingStage::WithMergeableState)) .getSampleBlock(); + inner_select_query, getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::WithMergeableState)) + .getSampleBlock(); auto columns_list = std::make_shared(); @@ -891,7 +891,7 @@ void StorageWindowView::updateMaxWatermark(UInt32 watermark) inline void StorageWindowView::cleanup() { - InterpreterAlterQuery alter_query(getCleanupQuery(), window_view_context); + InterpreterAlterQuery alter_query(getCleanupQuery(), getContext()); alter_query.execute(); std::lock_guard lock(fire_signal_mutex); @@ -999,9 +999,6 @@ StorageWindowView::StorageWindowView( , WithContext(context_->getGlobalContext()) , log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) { - window_view_context = Context::createCopy(getContext()); - window_view_context->makeQueryContext(); - StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); @@ -1089,11 +1086,11 @@ StorageWindowView::StorageWindowView( clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds(); next_fire_signal = getWindowUpperBound(std::time(nullptr)); - clean_cache_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); + clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); if (is_proctime) - fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); }); + fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); }); else - fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); }); + fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); }); clean_cache_task->deactivate(); fire_task->deactivate(); } @@ -1424,9 +1421,10 @@ Block & StorageWindowView::getHeader() const std::lock_guard lock(sample_block_lock); if (!sample_block) { - sample_block = InterpreterSelectQuery( - select_query->clone(), window_view_context, getParentStorage(), nullptr, - SelectQueryOptions(QueryProcessingStage::Complete)).getSampleBlock(); + sample_block + = InterpreterSelectQuery( + select_query->clone(), getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)) + .getSampleBlock(); /// convert all columns to full columns /// in case some of them are constant for (size_t i = 0; i < sample_block.columns(); ++i) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 863eac275a2..4e1ca87a8da 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -157,7 +157,6 @@ private: /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *) ASTPtr final_query; - ContextMutablePtr window_view_context; bool is_proctime{true}; bool is_time_column_func_now; bool is_tumble; // false if is hop From 4cc37ae9f3b655a0d4ec65c5b938ec9f5ff4c0e3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 11 Mar 2022 12:24:48 +0100 Subject: [PATCH 078/117] Clean stale code --- docker/server/local.Dockerfile | 47 ----------- docker/test/test_runner.sh | 86 --------------------- docker/test/test_runner_docker_compose.yaml | 34 -------- 3 files changed, 167 deletions(-) delete mode 100644 docker/server/local.Dockerfile delete mode 100755 docker/test/test_runner.sh delete mode 100644 docker/test/test_runner_docker_compose.yaml diff --git a/docker/server/local.Dockerfile b/docker/server/local.Dockerfile deleted file mode 100644 index 0d86c9ce45a..00000000000 --- a/docker/server/local.Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -# Since right now we can't set volumes to the docker during build, we split building container in stages: -# 1. build base container -# 2. run base conatiner with mounted volumes -# 3. commit container as image -# 4. build final container atop that image -# Middle steps are performed by the bash script. - -FROM ubuntu:18.04 as clickhouse-server-base -ARG gosu_ver=1.14 - -VOLUME /packages/ - -# update to allow installing dependencies of clickhouse automatically -RUN apt update; \ - DEBIAN_FRONTEND=noninteractive \ - apt install -y locales; - -ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically. -CMD DEBIAN_FRONTEND=noninteractive \ - apt install -y \ - /packages/clickhouse-common-static_*.deb \ - /packages/clickhouse-server_*.deb ; - -FROM clickhouse-server-base:postinstall as clickhouse-server - -RUN mkdir /docker-entrypoint-initdb.d - -COPY docker_related_config.xml /etc/clickhouse-server/config.d/ -COPY entrypoint.sh /entrypoint.sh - -RUN chmod +x \ - /entrypoint.sh \ - /bin/gosu - -EXPOSE 9000 8123 9009 -VOLUME /var/lib/clickhouse - -ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh deleted file mode 100755 index 0c99c8c2b32..00000000000 --- a/docker/test/test_runner.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/sh - -set -e -x - -# Not sure why shellcheck complains that rc is not assigned before it is referenced. -# shellcheck disable=SC2154 -trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT - -# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time -readonly NO_REBUILD_FLAG="--no-rebuild" - -readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")" -readonly CLICKHOUSE_PACKAGES_ARG="${2}" -CLICKHOUSE_SERVER_IMAGE="${3}" - -if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild -fi - - -# In order to allow packages directory to be anywhere, and to reduce amount of context sent to the docker daemon, -# all images are built in multiple stages: -# 1. build base image, install dependencies -# 2. run image with volume mounted, install what needed from those volumes -# 3. tag container as image -# 4. [optional] build another image atop of tagged. - -# TODO: optionally mount most recent clickhouse-test and queries directory from local machine - -if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ - --target clickhouse-test-runner-base \ - -t clickhouse-test-runner-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/test/stateless" - - docker rm -f clickhouse-test-runner-installing-packages || true - docker run --network=host \ - -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse-test-runner-installing-packages \ - clickhouse-test-runner-base:preinstall - docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local - docker rm -f clickhouse-test-runner-installing-packages || true -fi - -# # Create a bind-volume to the clickhouse-test script file -# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/clickhouse-test --opt o=bind clickhouse-test-script-volume -# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/queries --opt o=bind clickhouse-test-queries-dir-volume - -# Build server image (optional) from local packages -if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then - CLICKHOUSE_SERVER_IMAGE="clickhouse/server:local" - - if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server-base \ - -t clickhouse-server-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/server" - - docker rm -f clickhouse_server_base_installing_server || true - docker run --network=host -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse_server_base_installing_server \ - clickhouse-server-base:preinstall - docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall - - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server \ - -t "${CLICKHOUSE_SERVER_IMAGE}" \ - "${CLICKHOUSE_DOCKER_DIR}/server" - fi -fi - -docker rm -f test-runner || true -docker-compose down -CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - create \ - --build --force-recreate - -CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - run \ - --name test-runner \ - test-runner diff --git a/docker/test/test_runner_docker_compose.yaml b/docker/test/test_runner_docker_compose.yaml deleted file mode 100644 index 2aef6a48d77..00000000000 --- a/docker/test/test_runner_docker_compose.yaml +++ /dev/null @@ -1,34 +0,0 @@ -version: "2" - -services: - clickhouse-server: - image: ${CLICKHOUSE_SERVER_IMAGE} - expose: - - "8123" # HTTP - - "9000" # TCP - - "9009" # HTTP-interserver - restart: "no" - - test-runner: - image: clickhouse-statelest-test-runner:local - - restart: "no" - depends_on: - - clickhouse-server - environment: - # these are used by clickhouse-test to point clickhouse-client to the right server - - CLICKHOUSE_HOST=clickhouse-server - - CLICKHOUSE_PORT=9009 - - CLICKHOUSE_TEST_HOST_EXPOSED_PORT=51234 - expose: - # port for any test to serve data to clickhouse-server on rare occasion (like URL-engine tables in 00646), - # should match value of CLICKHOUSE_TEST_HOST_EXPOSED_PORT above - - "51234" - - # NOTE: Dev-mode: mount newest versions of the queries and clickhouse-test script into container. - # volumes: - # - /home/enmk/proj/ClickHouse_master/tests/queries:/usr/share/clickhouse-test/queries:ro - # - /home/enmk/proj/ClickHouse_master/tests/clickhouse-test:/usr/bin/clickhouse-test:ro - - # String-form instead of list-form to allow multiple arguments in "${CLICKHOUSE_TEST_ARGS}" - entrypoint: "clickhouse-test ${CLICKHOUSE_TEST_ARGS}" From fbb6787a45e6dc7c0e6c52f4407e504f16b93c51 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 11 Mar 2022 17:23:20 +0100 Subject: [PATCH 079/117] Rename Dockerfile to Dockerfile.ubuntu --- docker/server/{Dockerfile => Dockerfile.ubuntu} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docker/server/{Dockerfile => Dockerfile.ubuntu} (100%) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile.ubuntu similarity index 100% rename from docker/server/Dockerfile rename to docker/server/Dockerfile.ubuntu From 1b53993560f4142123e53a9936e5bc54b0e2241d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 11 Mar 2022 17:23:45 +0100 Subject: [PATCH 080/117] Make Dockerfile.ubuntu a default image definition --- docker/server/Dockerfile | 1 + 1 file changed, 1 insertion(+) create mode 120000 docker/server/Dockerfile diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile new file mode 120000 index 00000000000..fd45f0f7c7c --- /dev/null +++ b/docker/server/Dockerfile @@ -0,0 +1 @@ +Dockerfile.ubuntu \ No newline at end of file From c053f1cb1dd3154364d7861e0a87781ee6bf8b51 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 14 Mar 2022 12:59:09 +0100 Subject: [PATCH 081/117] Rewrite alpine building to pure Dockerfile --- docker/server/.gitignore | 2 -- docker/server/Dockerfile.alpine | 22 ++++++++++-- docker/server/alpine-build.sh | 63 --------------------------------- 3 files changed, 19 insertions(+), 68 deletions(-) delete mode 100644 docker/server/.gitignore delete mode 100755 docker/server/alpine-build.sh diff --git a/docker/server/.gitignore b/docker/server/.gitignore deleted file mode 100644 index 692758d55aa..00000000000 --- a/docker/server/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -alpine-root/* -tgz-packages/* diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index cd192c0c9da..7aa06bf87cd 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -6,7 +6,17 @@ ENV LANG=en_US.UTF-8 \ TZ=UTC \ CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml -COPY alpine-root/ / +COPY --from=ubuntu:20.04 /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/libdl.so.2 /lib/x86_64-linux-gnu/libm.so.6 /lib/x86_64-linux-gnu/libpthread.so.0 /lib/x86_64-linux-gnu/librt.so.1 /lib/x86_64-linux-gnu/libnss_dns.so.2 /lib/x86_64-linux-gnu/libnss_files.so.2 /lib/x86_64-linux-gnu/libresolv.so.2 /lib/ +COPY --from=ubuntu:20.04 /lib64/ld-linux-x86-64.so.2 /lib64/ +COPY --from=ubuntu:20.04 /etc/nsswitch.conf /etc/ +COPY docker_related_config.xml /etc/clickhouse-server/config.d/ +COPY entrypoint.sh /entrypoint.sh + +# lts / testing / prestable / etc +ARG REPO_CHANNEL="stable" +ARG REPO_URL="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" +ARG VERSION="20.9.3.45" +ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. # It is especially important for rootless containers: in that case entrypoint @@ -15,9 +25,15 @@ COPY alpine-root/ / # installed to prevent picking those uid / gid by some unrelated software. # The same uid / gid (101) is used both for alpine and ubuntu. -RUN addgroup -S -g 101 clickhouse \ +RUN for package in ${PACKAGES}; do \ + echo "${REPO_URL}/${package}-${VERSION}.tgz" \ + && wget -c -q "${REPO_URL}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / || exit 1 \ + ; done \ + && rm /tmp/*.tgz /install -r \ + && addgroup -S -g 101 clickhouse \ && adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \ - && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server/config.d /etc/clickhouse-server/users.d /etc/clickhouse-client /docker-entrypoint-initdb.d \ && chown clickhouse:clickhouse /var/lib/clickhouse \ && chown root:clickhouse /var/log/clickhouse-server \ && chmod +x /entrypoint.sh \ diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh deleted file mode 100755 index 1b448c61fbb..00000000000 --- a/docker/server/alpine-build.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -set -x - -REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc -REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}" -VERSION="${VERSION:-20.9.3.45}" -DOCKER_IMAGE="${DOCKER_IMAGE:-clickhouse/clickhouse-server}" - -# where original files live -DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}" - -# we will create root for our image here -CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root" - -# clean up the root from old runs, it's reconstructed each time -rm -rf "$CONTAINER_ROOT_FOLDER" -mkdir -p "$CONTAINER_ROOT_FOLDER" - -# where to put downloaded tgz -TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages" -mkdir -p "$TGZ_PACKAGES_FOLDER" - -PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" ) - -# download tars from the repo -for package in "${PACKAGES[@]}" -do - wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" -done - -# unpack tars -for package in "${PACKAGES[@]}" -do - tar xvzf "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" --strip-components=2 -C "$CONTAINER_ROOT_FOLDER" -done - -# prepare few more folders -mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \ - "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d" \ - "${CONTAINER_ROOT_FOLDER}/var/log/clickhouse-server" \ - "${CONTAINER_ROOT_FOLDER}/var/lib/clickhouse" \ - "${CONTAINER_ROOT_FOLDER}/docker-entrypoint-initdb.d" \ - "${CONTAINER_ROOT_FOLDER}/lib64" - -cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/" -cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh" - -## get glibc components from ubuntu 20.04 and put them to expected place -docker pull ubuntu:20.04 -ubuntu20image=$(docker create --rm ubuntu:20.04) -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" -docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc" - -docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull -rm -rf "$CONTAINER_ROOT_FOLDER" From 90be03c750e1053bbd3634602b34455bf9ea7423 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 23 Mar 2022 11:53:32 +0100 Subject: [PATCH 082/117] Rework env_helper to return correct paths --- tests/ci/env_helper.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index c34162ba51a..242d6bf5e40 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -1,7 +1,11 @@ import os +from os import path as p + +module_dir = p.abspath(p.dirname(__file__)) +git_root = p.abspath(p.join(module_dir, "..", "..")) CI = bool(os.getenv("CI")) -TEMP_PATH = os.getenv("TEMP_PATH", os.path.abspath(".")) +TEMP_PATH = os.getenv("TEMP_PATH", module_dir) CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") @@ -9,11 +13,11 @@ GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") -GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) +GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", git_root) GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" IMAGES_PATH = os.getenv("IMAGES_PATH") -REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports") -REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../")) -RUNNER_TEMP = os.getenv("RUNNER_TEMP", os.path.abspath("./tmp")) +REPORTS_PATH = os.getenv("REPORTS_PATH", p.abspath(p.join(module_dir, "./reports"))) +REPO_COPY = os.getenv("REPO_COPY", git_root) +RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp"))) S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports") From 5212ff0146cff19feb5f6503caf38ded919dd4d1 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 29 Mar 2022 00:23:07 +0200 Subject: [PATCH 083/117] Improve docker_images_check logging --- tests/ci/docker_images_check.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 30aebb6aaeb..aaac652c80a 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -407,7 +407,10 @@ def main(): pr_info = PRInfo(need_changed_files=True) changed_images = get_changed_docker_images(pr_info, images_dict) - logging.info("Has changed images %s", ", ".join([im.path for im in changed_images])) + if changed_images: + logging.info( + "Has changed images: %s", ", ".join([im.path for im in changed_images]) + ) image_versions, result_version = gen_versions(pr_info, args.suffix) From 93cfc4d590b90c56acf3094fcab966d14f944815 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 00:11:25 +0200 Subject: [PATCH 084/117] Add validate_version function to version_helper.py --- tests/ci/version_helper.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 3bb547333e7..57d3c34961f 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -138,6 +138,14 @@ class VersionType: VALID = (TESTING, PRESTABLE, STABLE, LTS) +def validate_version(version: str): + parts = version.split(".") + if len(parts) != 4: + raise ValueError(f"{version} does not contain 4 parts") + for part in parts: + int(part) + + def get_abs_path(path: str) -> str: return p.abspath(p.join(git.root, path)) From 3e7c46075ab600bebbde09d6d032091271b5e8d8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 00:12:35 +0200 Subject: [PATCH 085/117] Clean out dead code from version_helper --- tests/ci/build_check.py | 2 +- tests/ci/version_helper.py | 32 +------------------------------- 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 2a079a60367..8ef723454d5 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -264,7 +264,7 @@ def main(): version_type = "stable" official_flag = True - update_version_local(REPO_COPY, version, version_type) + update_version_local(version, version_type) logging.info("Updated local files with version") diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 57d3c34961f..fc6c20a86b6 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 -import datetime import logging import os.path as p -import subprocess from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from typing import Dict, Tuple, Union @@ -193,22 +191,6 @@ def update_cmake_version( f.write(VERSIONS_TEMPLATE.format_map(version.as_dict())) -def _update_changelog(repo_path: str, version: ClickHouseVersion): - cmd = """sed \ - -e "s/[@]VERSION_STRING[@]/{version_str}/g" \ - -e "s/[@]DATE[@]/{date}/g" \ - -e "s/[@]AUTHOR[@]/clickhouse-release/g" \ - -e "s/[@]EMAIL[@]/clickhouse-release@yandex-team.ru/g" \ - < {in_path} > {changelog_path} - """.format( - version_str=version.string, - date=datetime.datetime.now().strftime("%a, %d %b %Y %H:%M:%S") + " +0300", - in_path=p.join(repo_path, CHANGELOG_IN_PATH), - changelog_path=p.join(repo_path, CHANGELOG_PATH), - ) - subprocess.check_call(cmd, shell=True) - - def update_contributors( relative_contributors_path: str = GENERATED_CONTRIBUTORS, force: bool = False ): @@ -233,22 +215,10 @@ def update_contributors( cfd.write(content) -def _update_dockerfile(repo_path: str, version: ClickHouseVersion): - version_str_for_docker = ".".join( - [str(version.major), str(version.minor), str(version.patch), "*"] - ) - cmd = "ls -1 {path}/docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='{ver}'/'".format( - path=repo_path, ver=version_str_for_docker - ) - subprocess.check_call(cmd, shell=True) - - -def update_version_local(repo_path, version, version_type="testing"): +def update_version_local(version, version_type="testing"): update_contributors() version.with_description(version_type) update_cmake_version(version) - _update_changelog(repo_path, version) - _update_dockerfile(repo_path, version) def main(): From b950b531564ad3a4cff6f087bc52bb095fdd913e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 00:14:28 +0200 Subject: [PATCH 086/117] Make multiarch clickhouse-server Dockerfiles --- docker/server/Dockerfile.alpine | 42 +++++++++--- docker/server/Dockerfile.ubuntu | 110 +++++++++++++++++--------------- 2 files changed, 92 insertions(+), 60 deletions(-) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7aa06bf87cd..5aaf5dd5511 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -1,3 +1,14 @@ +FROM ubuntu:20.04 AS glibc-donor +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && ln -s "${rarch}-linux-gnu" /lib/linux-gnu + + FROM alpine ENV LANG=en_US.UTF-8 \ @@ -6,15 +17,22 @@ ENV LANG=en_US.UTF-8 \ TZ=UTC \ CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml -COPY --from=ubuntu:20.04 /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/libdl.so.2 /lib/x86_64-linux-gnu/libm.so.6 /lib/x86_64-linux-gnu/libpthread.so.0 /lib/x86_64-linux-gnu/librt.so.1 /lib/x86_64-linux-gnu/libnss_dns.so.2 /lib/x86_64-linux-gnu/libnss_files.so.2 /lib/x86_64-linux-gnu/libresolv.so.2 /lib/ -COPY --from=ubuntu:20.04 /lib64/ld-linux-x86-64.so.2 /lib64/ -COPY --from=ubuntu:20.04 /etc/nsswitch.conf /etc/ +COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/ +COPY --from=glibc-donor /etc/nsswitch.conf /etc/ COPY docker_related_config.xml /etc/clickhouse-server/config.d/ COPY entrypoint.sh /entrypoint.sh +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \ + arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \ + esac + # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" -ARG REPO_URL="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" +ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG VERSION="20.9.3.45" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" @@ -25,10 +43,18 @@ ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # installed to prevent picking those uid / gid by some unrelated software. # The same uid / gid (101) is used both for alpine and ubuntu. -RUN for package in ${PACKAGES}; do \ - echo "${REPO_URL}/${package}-${VERSION}.tgz" \ - && wget -c -q "${REPO_URL}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ - && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / || exit 1 \ +RUN arch=${TARGETARCH:-amd64} \ + && for package in ${PACKAGES}; do \ + { \ + { echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \ + } || \ + { echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \ + } ; \ + } || exit 1 \ ; done \ && rm /tmp/*.tgz /install -r \ && addgroup -S -g 101 clickhouse \ diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 5b7990ab030..cc198772251 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -1,11 +1,36 @@ FROM ubuntu:20.04 +# see https://github.com/moby/moby/issues/4032#issuecomment-192327844 +ARG DEBIAN_FRONTEND=noninteractive + +COPY su-exec.c /su-exec.c + # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list +RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \ + && groupadd -r clickhouse --gid=101 \ + && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ + && apt-get update \ + && apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + dirmngr \ + gnupg \ + locales \ + wget \ + tzdata \ + && apt-get install -y --no-install-recommends tcc libc-dev && \ + tcc /su-exec.c -o /bin/su-exec && \ + chown root:root /bin/su-exec && \ + chmod 0755 /bin/su-exec && \ + rm /su-exec.c && \ + apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ + && apt-get clean -ARG repository="deb https://packages.clickhouse.com/deb stable main" -ARG version=22.1.1.* +ARG REPO_CHANNEL="stable" +ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" +ARG VERSION=22.1.1.* +ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image # from debs created by CI build, for example: @@ -19,9 +44,6 @@ ARG deb_location_url="" # note: clickhouse-odbc-bridge is not supported there. ARG single_binary_location_url="" -# see https://github.com/moby/moby/issues/4032#issuecomment-192327844 -ARG DEBIAN_FRONTEND=noninteractive - # user/group precreated explicitly with fixed uid/gid on purpose. # It is especially important for rootless containers: in that case entrypoint # can't do chown and owners of mounted volumes should be configured externally. @@ -44,58 +66,42 @@ ARG DEBIAN_FRONTEND=noninteractive # 1. Dependency on some foreign code in yet another programming language - does not sound alright. # 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners. -COPY su-exec.c /su-exec.c +ARG TARGETARCH -RUN groupadd -r clickhouse --gid=101 \ - && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ - && apt-get update \ - && apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - dirmngr \ - gnupg \ - locales \ - wget \ - tzdata \ - && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ - && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ - && if [ -n "$deb_location_url" ]; then \ - echo "installing from custom url with deb packages: $deb_location_url" \ - rm -rf /tmp/clickhouse_debs \ - && mkdir -p /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \ - && dpkg -i /tmp/clickhouse_debs/*.deb ; \ - elif [ -n "$single_binary_location_url" ]; then \ - echo "installing from single binary url: $single_binary_location_url" \ - && rm -rf /tmp/clickhouse_binary \ - && mkdir -p /tmp/clickhouse_binary \ - && wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \ - && chmod +x /tmp/clickhouse_binary/clickhouse \ - && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ - else \ - echo "installing from repository: $repository" \ - && apt-get update \ - && apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ - && apt-get install --allow-unauthenticated --yes --no-install-recommends \ - clickhouse-common-static=$version \ - clickhouse-client=$version \ - clickhouse-server=$version ; \ - fi \ - && apt-get install -y --no-install-recommends tcc libc-dev && \ - tcc /su-exec.c -o /bin/su-exec && \ - chown root:root /bin/su-exec && \ - chmod 0755 /bin/su-exec && \ - rm /su-exec.c && \ - apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ +RUN arch=${TARGETARCH:-amd64} \ + && if [ -n "${deb_location_url}" ]; then \ + echo "installing from custom url with deb packages: ${deb_location_url}" \ + rm -rf /tmp/clickhouse_debs \ + && mkdir -p /tmp/clickhouse_debs \ + && for package in ${PACKAGES}; do \ + { wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_${arch}.deb" -P /tmp/clickhouse_debs || \ + wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_all.deb" -P /tmp/clickhouse_debs ; } \ + || exit 1 \ + ; done \ + && dpkg -i /tmp/clickhouse_debs/*.deb ; \ + elif [ -n "${single_binary_location_url}" ]; then \ + echo "installing from single binary url: ${single_binary_location_url}" \ + && rm -rf /tmp/clickhouse_binary \ + && mkdir -p /tmp/clickhouse_binary \ + && wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \ + && chmod +x /tmp/clickhouse_binary/clickhouse \ + && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ + else \ + mkdir -p /etc/apt/sources.list.d \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ + && echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \ + && echo "installing from repository: ${REPOSITORY}" \ + && apt-get update \ + && apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ + && for package in ${PACKAGES}; do \ + apt-get install --allow-unauthenticated --yes --no-install-recommends "${package}=${VERSION}" || exit 1 \ + ; done \ + ; fi \ && clickhouse-local -q 'SELECT * FROM system.build_options' \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ /tmp/* \ - && apt-get clean \ && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client From 9aab7e991047ca2dc1025d423296aa3c9307f5eb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 00:15:01 +0200 Subject: [PATCH 087/117] Create a script to build and push server images --- tests/ci/docker_server.py | 356 ++++++++++++++++++++++++++++++++++++++ tests/ci/docker_test.py | 46 ++++- 2 files changed, 401 insertions(+), 1 deletion(-) create mode 100644 tests/ci/docker_server.py diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py new file mode 100644 index 00000000000..0d22724fcb7 --- /dev/null +++ b/tests/ci/docker_server.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python + +# here +import argparse +import json +import logging +import subprocess +from os import path as p, makedirs +from typing import List, Tuple + +from github import Github + +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from docker_images_check import DockerImage +from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET +from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from pr_info import PRInfo +from s3_helper import S3Helper +from stopwatch import Stopwatch +from upload_result_helper import upload_results +from version_helper import ( + get_tagged_versions, + get_version_from_repo, + get_version_from_string, + validate_version, +) + +TEMP_PATH = p.join(RUNNER_TEMP, "docker_images_check") +BUCKETS = {"amd64": "package_release", "arm64": "package_aarch64"} + + +class DelOS(argparse.Action): + def __call__(self, _, namespace, __, option_string=None): + no_build = self.dest[3:] if self.dest.startswith("no_") else self.dest + if no_build in namespace.os: + namespace.os.remove(no_build) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="A program to build clickhouse-server image, both alpine and " + "ubuntu versions", + ) + + parser.add_argument( + "--version", + type=version_arg, + default=get_version_from_repo().string, + help="a version to build", + ) + parser.add_argument( + "--release-type", + type=str, + choices=("auto", "latest", "major", "minor", "patch", "head"), + default="head", + help="version part that will be updated when '--version' is set; " + "'auto' is a special case, it will get versions from github and detect the " + "release type (latest, major, minor or patch) automatically", + ) + parser.add_argument( + "--image-path", + type=str, + default="docker/server", + help="a path to docker context directory", + ) + parser.add_argument( + "--image-repo", + type=str, + default="clickhouse/clickhouse-server", + help="image name on docker hub", + ) + parser.add_argument( + "--bucket-prefix", + help="if set, then is used as source for deb and tgz files", + ) + parser.add_argument("--reports", default=True, help=argparse.SUPPRESS) + parser.add_argument( + "--no-reports", + action="store_false", + dest="reports", + default=argparse.SUPPRESS, + help="don't push reports to S3 and github", + ) + parser.add_argument("--push", default=True, help=argparse.SUPPRESS) + parser.add_argument( + "--no-push-images", + action="store_false", + dest="push", + default=argparse.SUPPRESS, + help="don't push images to docker hub", + ) + parser.add_argument("--os", default=["ubuntu", "alpine"], help=argparse.SUPPRESS) + parser.add_argument( + "--no-ubuntu", + action=DelOS, + nargs=0, + default=argparse.SUPPRESS, + help="don't build ubuntu image", + ) + parser.add_argument( + "--no-alpine", + action=DelOS, + nargs=0, + default=argparse.SUPPRESS, + help="don't build alpine image", + ) + + return parser.parse_args() + + +def version_arg(version: str) -> str: + try: + validate_version(version) + return version + except ValueError as e: + raise argparse.ArgumentTypeError(e) + + +def auto_release_type(version: str, release_type: str) -> str: + if release_type != "auto": + return release_type + current_version = get_version_from_string(version) + + git_versions = get_tagged_versions() + reference_version = git_versions[0] + for i in reversed(range(len(git_versions))): + if git_versions[i] < current_version: + if i == len(git_versions) - 1: + return "latest" + reference_version = git_versions[i + 1] + break + + if current_version.major < reference_version.major: + return "major" + if current_version.minor < reference_version.minor: + return "minor" + if current_version.patch < reference_version.patch: + return "patch" + + raise ValueError( + "Release type 'tweak' is not supported for " + f"{current_version.string} < {reference_version.string}" + ) + + +def gen_tags(version: str, release_type: str) -> List[str]: + """ + 22.2.2.2 + latest: + - latest + - 22 + - 22.2 + - 22.2.2 + - 22.2.2.2 + 22.2.2.2 + major: + - 22 + - 22.2 + - 22.2.2 + - 22.2.2.2 + 22.2.2.2 + minor: + - 22.2 + - 22.2.2 + - 22.2.2.2 + 22.2.2.2 + patch: + - 22.2.2 + - 22.2.2.2 + 22.2.2.2 + head: + - head + """ + validate_version(version) + parts = version.split(".") + tags = [] + if release_type == "latest": + tags.append(release_type) + for i in range(len(parts)): + tags.append(".".join(parts[: i + 1])) + elif release_type == "major": + for i in range(len(parts)): + tags.append(".".join(parts[: i + 1])) + elif release_type == "minor": + for i in range(1, len(parts)): + tags.append(".".join(parts[: i + 1])) + elif release_type == "patch": + for i in range(2, len(parts)): + tags.append(".".join(parts[: i + 1])) + elif release_type == "head": + tags.append(release_type) + else: + raise ValueError(f"{release_type} is not valid release part") + return tags + + +def buildx_args(bucket_prefix: str, arch: str) -> List[str]: + args = [f"--platform=linux/{arch}", f"--label=build-url={GITHUB_RUN_URL}"] + if bucket_prefix: + url = p.join(bucket_prefix, BUCKETS[arch]) # to prevent a double // + args.append(f"--build-arg=REPOSITORY='{url}'") + args.append(f"--build-arg=deb_location_url='{url}'") + return args + + +def build_and_push_image( + image: DockerImage, push: bool, bucket_prefix: str, os: str, tag: str, version: str +) -> List[Tuple[str, str]]: + result = [] + if os != "ubuntu": + tag += f"-{os}" + init_args = ["docker", "buildx", "build"] + if push: + init_args.append("--push") + init_args.append("--output=type=image,push-by-digest=true") + init_args.append(f"--tag={image.repo}") + else: + init_args.append("--output=type=docker") + + # `docker buildx build --load` does not support multiple images currently + # images must be built separately and merged together with `docker manifest` + digests = [] + for arch in BUCKETS: + arch_tag = f"{tag}-{arch}" + metadata_path = p.join(TEMP_PATH, arch_tag) + dockerfile = p.join(image.full_path, f"Dockerfile.{os}") + cmd_args = list(init_args) + cmd_args.extend(buildx_args(bucket_prefix, arch)) + if not push: + cmd_args.append(f"--tag={image.repo}:{arch_tag}") + cmd_args.extend( + [ + f"--metadata-file={metadata_path}", + f"--build-arg=VERSION='{version}'", + "--progress=plain", + f"--file={dockerfile}", + image.full_path, + ] + ) + cmd = " ".join(cmd_args) + logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd) + with subprocess.Popen( + cmd, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + universal_newlines=True, + ) as process: + for line in process.stdout: # type: ignore + print(line, end="") + retcode = process.wait() + if retcode != 0: + result.append((f"{image.repo}:{tag}-{arch}", "FAIL")) + return result + result.append((f"{image.repo}:{tag}-{arch}", "OK")) + with open(metadata_path, "rb") as m: + metadata = json.load(m) + digests.append(metadata["containerimage.digest"]) + if push: + cmd = ( + "docker buildx imagetools create " + f"--tag {image.repo}:{tag} {' '.join(digests)}" + ) + logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd) + with subprocess.Popen( + cmd, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + universal_newlines=True, + ) as process: + for line in process.stdout: # type: ignore + print(line, end="") + retcode = process.wait() + if retcode != 0: + result.append((f"{image.repo}:{tag}", "FAIL")) + else: + logging.info( + "Merging is available only on push, separate %s images are created", + f"{image.repo}:{tag}-$arch", + ) + + return result + + +def main(): + logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() + makedirs(TEMP_PATH, exist_ok=True) + + args = parse_args() + image = DockerImage(args.image_path, args.image_repo, False) + args.release_type = auto_release_type(args.version, args.release_type) + tags = gen_tags(args.version, args.release_type) + NAME = f"Docker image {image.repo} building check (actions)" + pr_info = None + if CI: + pr_info = PRInfo() + args.bucket_prefix = ( + f"https://s3.amazonaws.com/{S3_BUILDS_BUCKET}/" + f"{pr_info.number}/{pr_info.sha}" + ) + + if args.push: + subprocess.check_output( # pylint: disable=unexpected-keyword-arg + "docker login --username 'robotclickhouse' --password-stdin", + input=get_parameter_from_ssm("dockerhub_robot_password"), + encoding="utf-8", + shell=True, + ) + NAME = f"Docker image {image.repo} build and push (actions)" + + logging.info("Following tags will be created: %s", ", ".join(tags)) + status = "success" + test_results = [] # type: List[Tuple[str, str]] + for os in args.os: + for tag in tags: + test_results.extend( + build_and_push_image( + image, args.push, args.bucket_prefix, os, tag, args.version + ) + ) + if test_results[-1][1] != "OK": + status = "failure" + + pr_info = pr_info or PRInfo() + s3_helper = S3Helper("https://s3.amazonaws.com") + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) + + print(f"::notice ::Report url: {url}") + print(f'::set-output name=url_output::"{url}"') + + if not args.reports: + return + + description = f"Processed tags: {', '.join(tags)}" + + if len(description) >= 140: + description = description[:136] + "..." + + gh = Github(get_best_robot_token()) + post_commit_status(gh, pr_info.sha, NAME, description, status, url) + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + url, + NAME, + ) + ch_helper = ClickHouseHelper() + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 2b864b6b94c..e2dc8874a7a 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -2,12 +2,16 @@ import os import unittest -from unittest.mock import patch +from unittest.mock import patch, MagicMock from env_helper import GITHUB_RUN_URL from pr_info import PRInfo import docker_images_check as di +with patch("git_helper.Git"): + from version_helper import get_version_from_string, get_tagged_versions + import docker_server as ds + # di.logging.basicConfig(level=di.logging.INFO) @@ -221,5 +225,45 @@ class TestDockerImageCheck(unittest.TestCase): self.assertEqual(results, expected) +class TestDockerServer(unittest.TestCase): + def test_gen_tags(self): + cases = ( + (("22.2.2.2", "latest"), ["latest", "22", "22.2", "22.2.2", "22.2.2.2"]), + (("22.2.2.2", "major"), ["22", "22.2", "22.2.2", "22.2.2.2"]), + (("22.2.2.2", "minor"), ["22.2", "22.2.2", "22.2.2.2"]), + (("22.2.2.2", "patch"), ["22.2.2", "22.2.2.2"]), + (("22.2.2.2", "head"), ["head"]), + ) + for case in cases: + version, release_type = case[0] + self.assertEqual(case[1], ds.gen_tags(version, release_type)) + + with self.assertRaises(ValueError): + ds.gen_tags("22.2.2.2", "auto") + + @patch("docker_server.get_tagged_versions") + def test_auto_release_type(self, mock_tagged_versions: MagicMock): + mock_tagged_versions.return_value = [ + get_version_from_string("1.1.1.1"), + get_version_from_string("1.2.1.1"), + get_version_from_string("2.1.1.1"), + get_version_from_string("2.2.1.1"), + get_version_from_string("2.2.2.1"), + ] + cases = ( + ("1.0.1.1", "minor"), + ("1.1.2.1", "minor"), + ("1.3.1.1", "major"), + ("2.1.2.1", "minor"), + ("2.2.1.3", "patch"), + ("2.2.3.1", "latest"), + ("2.3.1.1", "latest"), + ) + _ = get_tagged_versions() + for case in cases: + release = ds.auto_release_type(case[0], "auto") + self.assertEqual(case[1], release) + + if __name__ == "__main__": unittest.main() From 00a373407821bd5201d58409b61a400543f5d5f1 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 10:33:57 +0200 Subject: [PATCH 088/117] Cosmetic improvement --- tests/ci/docker_images_check.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index aaac652c80a..43671e3a2f7 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -397,14 +397,13 @@ def main(): images_dict = get_images_dict(GITHUB_WORKSPACE, "docker/images.json") + pr_info = PRInfo() if args.all: - pr_info = PRInfo() pr_info.changed_files = set(images_dict.keys()) elif args.image_path: - pr_info = PRInfo() pr_info.changed_files = set(i for i in args.image_path) else: - pr_info = PRInfo(need_changed_files=True) + pr_info.fetch_changed_files() changed_images = get_changed_docker_images(pr_info, images_dict) if changed_images: From 310f8e67dce6c6fb7be2ede26d98415bf5422abf Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 13:11:49 +0200 Subject: [PATCH 089/117] Add docker/server images check --- .github/workflows/pull_request.yml | 31 +++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 87a31b9683c..c01d1821d0f 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4,7 +4,7 @@ env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 -on: # yamllint disable-line rule:truthy +on: # yamllint disable-line rule:truthy pull_request: types: - synchronize @@ -998,6 +998,34 @@ jobs: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ +##################################### Docker images ####################################### +############################################################################################ + DockerServerImages: + needs: + - BuilderDebRelease + - BuilderDebAarch64 + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head --no-push + python3 docker_server.py --release-type head --no-push --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" +############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: @@ -3138,6 +3166,7 @@ jobs: needs: - StyleCheck - DockerHubPush + - DockerServerImages - CheckLabels - BuilderReport - FastTest From 66851b28b792cda965506ec0d8326fd461595ece Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 22:18:14 +0200 Subject: [PATCH 090/117] Make _check_tag public --- tests/ci/git_helper.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/ci/git_helper.py b/tests/ci/git_helper.py index 2d28c693087..5c02ea53cf8 100644 --- a/tests/ci/git_helper.py +++ b/tests/ci/git_helper.py @@ -3,7 +3,7 @@ import argparse import os.path as p import re import subprocess -from typing import Optional +from typing import List, Optional # ^ and $ match subline in `multiple\nlines` # \A and \Z match only start and end of the whole string @@ -89,7 +89,7 @@ class Git: self.run(f"git rev-list {self.latest_tag}..HEAD --count") ) - def _check_tag(self, value: str): + def check_tag(self, value: str): if value == "": return if not self._tag_pattern.match(value): @@ -101,7 +101,7 @@ class Git: @latest_tag.setter def latest_tag(self, value: str): - self._check_tag(value) + self.check_tag(value) self._latest_tag = value @property @@ -110,7 +110,7 @@ class Git: @new_tag.setter def new_tag(self, value: str): - self._check_tag(value) + self.check_tag(value) self._new_tag = value @property @@ -122,3 +122,6 @@ class Git: version = self.latest_tag.split("-", maxsplit=1)[0] return int(version.split(".")[-1]) + self.commits_since_tag + + def get_tags(self) -> List[str]: + return self.run("git tag").split() From 98aa8bf5794c0571b4a09f59c2916644a530a173 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Mar 2022 22:19:14 +0200 Subject: [PATCH 091/117] Improve version_helper.py - Add helper functions - Add __lt__ and __eq__ to compare versions between each other --- tests/ci/version_helper.py | 50 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index fc6c20a86b6..7db96cfde7c 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -2,7 +2,7 @@ import logging import os.path as p from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -from typing import Dict, Tuple, Union +from typing import Dict, List, Tuple, Union from git_helper import Git, removeprefix @@ -47,12 +47,16 @@ class ClickHouseVersion: patch: Union[int, str], revision: Union[int, str], git: Git, + tweak: str = None, ): self._major = int(major) self._minor = int(minor) self._patch = int(patch) self._revision = int(revision) self._git = git + self._tweak = None + if tweak is not None: + self._tweak = int(tweak) self._describe = "" def update(self, part: str) -> "ClickHouseVersion": @@ -87,7 +91,7 @@ class ClickHouseVersion: @property def tweak(self) -> int: - return self._git.tweak + return self._tweak or self._git.tweak @property def revision(self) -> int: @@ -127,6 +131,25 @@ class ClickHouseVersion: raise ValueError(f"version type {version_type} not in {VersionType.VALID}") self._describe = f"v{self.string}-{version_type}" + def __eq__(self, other) -> bool: + if not isinstance(self, type(other)): + return NotImplemented + return ( + self.major == other.major + and self.minor == other.minor + and self.patch == other.patch + and self.tweak == other.tweak + ) + + def __lt__(self, other: "ClickHouseVersion") -> bool: + for part in ("major", "minor", "patch", "tweak"): + if getattr(self, part) < getattr(other, part): + return True + elif getattr(self, part) > getattr(other, part): + return False + + return False + class VersionType: LTS = "lts" @@ -182,6 +205,29 @@ def get_version_from_repo( ) +def get_version_from_string(version: str) -> ClickHouseVersion: + validate_version(version) + parts = version.split(".") + return ClickHouseVersion(parts[0], parts[1], parts[2], -1, git, parts[3]) + + +def get_version_from_tag(tag: str) -> ClickHouseVersion: + git.check_tag(tag) + tag = tag[1:].split("-")[0] + return get_version_from_string(tag) + + +def get_tagged_versions() -> List[ClickHouseVersion]: + versions = [] + for tag in git.get_tags(): + try: + version = get_version_from_tag(tag) + versions.append(version) + except Exception: + continue + return sorted(versions) + + def update_cmake_version( version: ClickHouseVersion, versions_path: str = FILE_WITH_VERSION_PATH, From 279defbe9f04d8732a3ba275b769a1fba61a3267 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 31 Mar 2022 23:47:11 +0200 Subject: [PATCH 092/117] Add clickhouse-keeper Dockerfile --- docker/keeper/Dockerfile | 72 +++++++++++++++++++++++++ docker/keeper/Dockerfile.alpine | 1 + docker/keeper/entrypoint.sh | 93 +++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 docker/keeper/Dockerfile create mode 120000 docker/keeper/Dockerfile.alpine create mode 100644 docker/keeper/entrypoint.sh diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile new file mode 100644 index 00000000000..207dddce1bb --- /dev/null +++ b/docker/keeper/Dockerfile @@ -0,0 +1,72 @@ +FROM ubuntu:20.04 AS glibc-donor + +ARG TARGETARCH +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && ln -s "${rarch}-linux-gnu" /lib/linux-gnu + + +FROM alpine + +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + TZ=UTC \ + CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml + +COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/ +COPY --from=glibc-donor /etc/nsswitch.conf /etc/ +COPY entrypoint.sh /entrypoint.sh +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \ + arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \ + esac + +ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" +ARG VERSION="22.4.1.917" +ARG PACKAGES="clickhouse-keeper" + +# user/group precreated explicitly with fixed uid/gid on purpose. +# It is especially important for rootless containers: in that case entrypoint +# can't do chown and owners of mounted volumes should be configured externally. +# We do that in advance at the begining of Dockerfile before any packages will be +# installed to prevent picking those uid / gid by some unrelated software. +# The same uid / gid (101) is used both for alpine and ubuntu. + + +ARG TARGETARCH +RUN arch=${TARGETARCH:-amd64} \ + && for package in ${PACKAGES}; do \ + { \ + { echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \ + } || \ + { echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \ + } ; \ + } || exit 1 \ + ; done \ + && rm /tmp/*.tgz /install -r \ + && addgroup -S -g 101 clickhouse \ + && adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse keeper" -u 101 clickhouse \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper \ + && chown clickhouse:clickhouse /var/lib/clickhouse \ + && chown root:clickhouse /var/log/clickhouse-keeper \ + && chmod +x /entrypoint.sh \ + && apk add --no-cache su-exec bash tzdata \ + && cp /usr/share/zoneinfo/UTC /etc/localtime \ + && echo "UTC" > /etc/timezone \ + && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper + + +EXPOSE 2181 10181 44444 + +VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/keeper/Dockerfile.alpine b/docker/keeper/Dockerfile.alpine new file mode 120000 index 00000000000..1d1fe94df49 --- /dev/null +++ b/docker/keeper/Dockerfile.alpine @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/docker/keeper/entrypoint.sh b/docker/keeper/entrypoint.sh new file mode 100644 index 00000000000..3aacf655c28 --- /dev/null +++ b/docker/keeper/entrypoint.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +set +x +set -eo pipefail +shopt -s nullglob + +DO_CHOWN=1 +if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then + DO_CHOWN=0 +fi + +CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}" +CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" + +# support --user +if [ "$(id -u)" = "0" ]; then + USER=$CLICKHOUSE_UID + GROUP=$CLICKHOUSE_GID + if command -v gosu &> /dev/null; then + gosu="gosu $USER:$GROUP" + elif command -v su-exec &> /dev/null; then + gosu="su-exec $USER:$GROUP" + else + echo "No gosu/su-exec detected!" + exit 1 + fi +else + USER="$(id -u)" + GROUP="$(id -g)" + gosu="" + DO_CHOWN=0 +fi + +KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}" + +if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then + echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'" + exit 1 +fi + +DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}" +LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}" +LOG_PATH="${LOG_DIR}/clickhouse-keeper.log" +ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log" +COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log" +COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots" +CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0} + +for dir in "$DATA_DIR" \ + "$LOG_DIR" \ + "$TMP_DIR" \ + "$COORDINATION_LOG_DIR" \ + "$COORDINATION_SNAPSHOT_DIR" +do + # check if variable not empty + [ -z "$dir" ] && continue + # ensure directories exist + if ! mkdir -p "$dir"; then + echo "Couldn't create necessary directory: $dir" + exit 1 + fi + + if [ "$DO_CHOWN" = "1" ]; then + # ensure proper directories permissions + # but skip it for if directory already has proper premissions, cause recursive chown may be slow + if [ "$(stat -c %u "$dir")" != "$USER" ] || [ "$(stat -c %g "$dir")" != "$GROUP" ]; then + chown -R "$USER:$GROUP" "$dir" + fi + elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then + echo "Necessary directory '$dir' isn't accessible by user with id '$USER'" + exit 1 + fi +done + +# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments +if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then + # Watchdog is launched by default, but does not send SIGINT to the main process, + # so the container can't be finished by ctrl+c + export CLICKHOUSE_WATCHDOG_ENABLE + + cd /var/lib/clickhouse + + # There is a config file. It is already tested with gosu (if it is readably by keeper user) + if [ -f "$KEEPER_CONFIG" ]; then + exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@" + fi + + # There is no config file. Will use embedded one + exec $gosu /usr/bin/clickhouse-keeper --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@" +fi + +# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image +exec "$@" From 1078b1d31eba360a88183080e2080267cb0e304b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 1 Apr 2022 11:59:12 +0200 Subject: [PATCH 093/117] Build server and keeper images for each master commit --- .github/workflows/master.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index cfa95b84ee5..081fa165c68 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -947,6 +947,34 @@ jobs: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ +##################################### Docker images ####################################### +############################################################################################ + DockerServerImages: + needs: + - BuilderDebRelease + - BuilderDebAarch64 + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head + python3 docker_server.py --release-type head --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" +############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: From b49d94c9aaa0d63a5cb9b2b4bac70701dda5bd93 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 1 Apr 2022 12:35:52 +0200 Subject: [PATCH 094/117] Add building images to ReleaseCI --- .github/workflows/release.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bd62e64409f..29e3d0c4358 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -36,3 +36,28 @@ jobs: overwrite: true tag: ${{ github.ref }} file_glob: true + ############################################################################################ + ##################################### Docker images ####################################### + ############################################################################################ + DockerServerImages: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type auto + python3 docker_server.py --release-type auto --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" From cb698c4cc16d0317f4c032cd31f79ec4809dd54b Mon Sep 17 00:00:00 2001 From: tcoyvwac <53616399+tcoyvwac@users.noreply.github.com> Date: Thu, 31 Mar 2022 18:13:58 +0200 Subject: [PATCH 095/117] Replace throw in debug mode in destructor FileSegmentsHolder: - Replaced throw in destructor with assert for all #ifdef modes. --- src/Common/FileSegment.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 748074c75c2..ac89721683e 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -532,11 +532,8 @@ FileSegmentsHolder::~FileSegmentsHolder() } catch (...) { -#ifdef NDEBUG tryLogCurrentException(__PRETTY_FUNCTION__); -#else - throw; -#endif + assert(false); } } } From 687942ce70ad644b495c12a20336e788da175e68 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 1 Apr 2022 15:02:49 +0000 Subject: [PATCH 096/117] more strict quota for written bytes --- src/Access/EnabledQuota.cpp | 4 +-- src/Common/ErrorCodes.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 10 +++--- .../Executors/ExecutionThreadContext.cpp | 4 +-- .../Transforms/CountingTransform.cpp | 7 +--- src/Processors/Transforms/CountingTransform.h | 2 ++ tests/integration/test_quota/test.py | 32 ++++++++++++++----- .../02246_async_insert_quota.reference | 2 +- .../0_stateless/02246_async_insert_quota.sh | 2 +- .../02247_written_bytes_quota.reference | 8 ++--- .../0_stateless/02247_written_bytes_quota.sh | 17 ++++------ 11 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 78dd3c7022a..f2354a3837c 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -13,7 +13,7 @@ namespace DB { namespace ErrorCodes { - extern const int QUOTA_EXPIRED; + extern const int QUOTA_EXCEEDED; } @@ -33,7 +33,7 @@ struct EnabledQuota::Impl "Quota for user " + backQuote(user_name) + " for " + to_string(duration) + " has been exceeded: " + type_info.valueToStringWithName(used) + "/" + type_info.valueToString(max) + ". " + "Interval will end at " + to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name), - ErrorCodes::QUOTA_EXPIRED); + ErrorCodes::QUOTA_EXCEEDED); } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index b6c67478b26..2e60e125d73 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -208,7 +208,7 @@ M(198, DNS_ERROR) \ M(199, UNKNOWN_QUOTA) \ M(200, QUOTA_DOESNT_ALLOW_KEYS) \ - M(201, QUOTA_EXPIRED) \ + M(201, QUOTA_EXCEEDED) \ M(202, TOO_MANY_SIMULTANEOUS_QUERIES) \ M(203, NO_FREE_CONNECTION) \ M(204, CANNOT_FSYNC) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index c6f63c3c36e..c218af556f0 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -198,13 +198,11 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) copyData(*read_buf, write_buf); } + std::cerr << "bytes.size: " << bytes.size() << "\n"; + std::cerr << "bytes: " << bytes << "\n"; + if (auto quota = query_context->getQuota()) - { - /// Do not throw if quota exceded right now, because - /// bytes are not written now actually. - quota->checkExceeded(QuotaType::WRITTEN_BYTES); - quota->used(QuotaType::WRITTEN_BYTES, bytes.size(), /*check_exceeded=*/ false); - } + quota->used(QuotaType::WRITTEN_BYTES, bytes.size()); auto entry = std::make_shared(std::move(bytes), query_context->getCurrentQueryId()); InsertQuery key{query, settings}; diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index acf702a5c7e..e0a336c6b6a 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -7,7 +7,7 @@ namespace DB namespace ErrorCodes { extern const int TOO_MANY_ROWS_OR_BYTES; - extern const int QUOTA_EXPIRED; + extern const int QUOTA_EXCEEDED; extern const int QUERY_WAS_CANCELLED; } @@ -34,7 +34,7 @@ static bool checkCanAddAdditionalInfoToException(const DB::Exception & exception { /// Don't add additional info to limits and quota exceptions, and in case of kill query (to pass tests). return exception.code() != ErrorCodes::TOO_MANY_ROWS_OR_BYTES - && exception.code() != ErrorCodes::QUOTA_EXPIRED + && exception.code() != ErrorCodes::QUOTA_EXCEEDED && exception.code() != ErrorCodes::QUERY_WAS_CANCELLED; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index daf154d28ca..646256d60c0 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -19,12 +19,7 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { if (quota) - { - /// Do not throw if quota exceded right now, because - /// bytes are not written now actually. - quota->checkExceeded(QuotaType::WRITTEN_BYTES); - quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes(), /*check_exceeded=*/ false); - } + quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); Progress local_progress{WriteProgress(chunk.getNumRows(), chunk.bytes())}; progress.incrementPiecewiseAtomically(local_progress); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 0386a7f71af..bd2ec58a27f 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -52,6 +52,8 @@ protected: ProgressCallback progress_callback; QueryStatus * process_elem = nullptr; ThreadStatus * thread_status = nullptr; + + /// Quota is used to limit amount of written bytes. std::shared_ptr quota; Chunk cur_chunk; }; diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 651726f30c0..fd5a6599a59 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -129,6 +129,7 @@ def test_quota_from_users_xml(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -349,6 +350,7 @@ def test_tracking_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -454,7 +456,7 @@ def test_exceed_quota(): ] ) system_quota_limits( - [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]] + [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]] ) system_quota_usage( [ @@ -545,6 +547,7 @@ def test_exceed_quota(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -634,6 +637,7 @@ def test_add_remove_interval(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -695,6 +699,7 @@ def test_add_remove_interval(): 1000, "\\N", "\\N", + "\\N", ], [ "myQuota", @@ -709,6 +714,7 @@ def test_add_remove_interval(): "\\N", 20000, 120, + "\\N", ], ] ) @@ -842,6 +848,7 @@ def test_add_remove_interval(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -1003,6 +1010,7 @@ def test_add_remove_interval(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -1064,6 +1072,7 @@ def test_add_remove_quota(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -1136,6 +1145,7 @@ def test_add_remove_quota(): 1000, "\\N", "\\N", + "\\N", ], [ "myQuota2", @@ -1150,6 +1160,7 @@ def test_add_remove_quota(): 4000, 400000, 60, + "\\N", ], [ "myQuota2", @@ -1164,6 +1175,7 @@ def test_add_remove_quota(): "\\N", "\\N", 1800, + "\\N", ], ] ) @@ -1226,6 +1238,7 @@ def test_add_remove_quota(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -1294,6 +1307,7 @@ def test_add_remove_quota(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -1356,6 +1370,7 @@ def test_reload_users_xml_by_timer(): 1000, "\\N", "\\N", + "\\N", ] ] ) @@ -1382,7 +1397,7 @@ def test_reload_users_xml_by_timer(): assert_eq_with_retry( instance, "SELECT * FROM system.quota_limits", - [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]], + [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]], ) @@ -1481,15 +1496,15 @@ def test_dcl_management(): == "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n" ) assert re.match( - "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n" - "qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n" + "qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n" - "qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n" + "qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1503,7 +1518,7 @@ def test_dcl_management(): instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1519,7 +1534,7 @@ def test_dcl_management(): instance.query("SELECT * from test_table") assert re.match( - "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n", + "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1563,6 +1578,7 @@ def test_query_inserts(): 1000, "\\N", "\\N", + "\\N", ] ] ) diff --git a/tests/queries/0_stateless/02246_async_insert_quota.reference b/tests/queries/0_stateless/02246_async_insert_quota.reference index 99b8e471635..9d384c1aaf1 100644 --- a/tests/queries/0_stateless/02246_async_insert_quota.reference +++ b/tests/queries/0_stateless/02246_async_insert_quota.reference @@ -1,2 +1,2 @@ -QUOTA_EXPIRED +QUOTA_EXCEEDED 2 diff --git a/tests/queries/0_stateless/02246_async_insert_quota.sh b/tests/queries/0_stateless/02246_async_insert_quota.sh index 9fc4df4660c..92e6818438c 100755 --- a/tests/queries/0_stateless/02246_async_insert_quota.sh +++ b/tests/queries/0_stateless/02246_async_insert_quota.sh @@ -20,7 +20,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02246 FOR INTERVAL 1 HOUR MAX QUERY INSER ${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (1, 'a')" ${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (2, 'b')" -${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (3, 'c')" 2>&1 | grep -m1 -o QUOTA_EXPIRED +${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (3, 'c')" 2>&1 | grep -m1 -o QUOTA_EXCEEDED sleep 1.0 diff --git a/tests/queries/0_stateless/02247_written_bytes_quota.reference b/tests/queries/0_stateless/02247_written_bytes_quota.reference index 37ec8358721..f6e9710e282 100644 --- a/tests/queries/0_stateless/02247_written_bytes_quota.reference +++ b/tests/queries/0_stateless/02247_written_bytes_quota.reference @@ -1,9 +1,7 @@ -QUOTA_EXPIRED -QUOTA_EXPIRED +QUOTA_EXCEEDED +QUOTA_EXCEEDED 1 2 -QUOTA_EXPIRED -QUOTA_EXPIRED -QUOTA_EXPIRED +QUOTA_EXCEEDED 1 50 diff --git a/tests/queries/0_stateless/02247_written_bytes_quota.sh b/tests/queries/0_stateless/02247_written_bytes_quota.sh index 17e66a6bc75..072626f41ac 100755 --- a/tests/queries/0_stateless/02247_written_bytes_quota.sh +++ b/tests/queries/0_stateless/02247_written_bytes_quota.sh @@ -16,25 +16,22 @@ ${CLICKHOUSE_CLIENT} -q "CREATE ROLE r02247" ${CLICKHOUSE_CLIENT} -q "CREATE USER u02247" ${CLICKHOUSE_CLIENT} -q "GRANT ALL ON *.* TO r02247" ${CLICKHOUSE_CLIENT} -q "GRANT r02247 to u02247" -${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02247 FOR INTERVAL 100 YEAR MAX WRITTEN BYTES = 10 TO r02247" +${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02247 FOR INTERVAL 100 YEAR MAX WRITTEN BYTES = 25 TO r02247" -${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" -${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" -${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED -${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqw')" +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqw')" +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqw')" 2>&1 | grep -m1 -o QUOTA_EXCEEDED +${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqw')" 2>&1 | grep -m1 -o QUOTA_EXCEEDED ${CLICKHOUSE_CLIENT} -q "SELECT written_bytes > 10 FROM system.quotas_usage WHERE quota_name = 'q02247'" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM written_bytes_02247" ${CLICKHOUSE_CLIENT} -q "DROP QUOTA q02247" -${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02247 FOR INTERVAL 100 YEAR MAX WRITTEN BYTES = 100 TO r02247" +${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02247 FOR INTERVAL 100 YEAR MAX WRITTEN BYTES = 1000 TO r02247" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE written_bytes_02247" ${CLICKHOUSE_CLIENT} --user u02247 -q "INSERT INTO written_bytes_02247 SELECT toString(number) FROM numbers(50)" - -${CLICKHOUSE_CLIENT} --user u02247 -q "INSERT INTO written_bytes_02247 SELECT toString(number) FROM numbers(1)" 2>&1 | grep -m1 -o QUOTA_EXPIRED -${CLICKHOUSE_CLIENT} --user u02247 --async_insert 1 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED -${CLICKHOUSE_CLIENT} --user u02247 --async_insert 0 -q "INSERT INTO written_bytes_02247 VALUES ('qwqwqw')" 2>&1 | grep -m1 -o QUOTA_EXPIRED +${CLICKHOUSE_CLIENT} --user u02247 -q "INSERT INTO written_bytes_02247 SELECT toString(number) FROM numbers(100)" 2>&1 | grep -m1 -o QUOTA_EXCEEDED ${CLICKHOUSE_CLIENT} -q "SELECT written_bytes > 100 FROM system.quotas_usage WHERE quota_name = 'q02247'" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM written_bytes_02247" From 6b83e2b2a7477c80730a2a3967a92c6a33c69ddd Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 1 Apr 2022 17:53:43 +0200 Subject: [PATCH 097/117] Fix automatic bucket prefix for master --- tests/ci/docker_server.py | 38 +++++++++++++++++++++----------------- tests/ci/docker_test.py | 29 +++++++++++++++-------------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 0d22724fcb7..789abc30b0c 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -10,6 +10,7 @@ from typing import List, Tuple from github import Github +from build_check import get_release_or_pr from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import post_commit_status from docker_images_check import DockerImage @@ -20,10 +21,10 @@ from s3_helper import S3Helper from stopwatch import Stopwatch from upload_result_helper import upload_results from version_helper import ( + ClickHouseVersion, get_tagged_versions, get_version_from_repo, get_version_from_string, - validate_version, ) TEMP_PATH = p.join(RUNNER_TEMP, "docker_images_check") @@ -110,42 +111,40 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -def version_arg(version: str) -> str: +def version_arg(version: str) -> ClickHouseVersion: try: - validate_version(version) - return version + return get_version_from_string(version) except ValueError as e: raise argparse.ArgumentTypeError(e) -def auto_release_type(version: str, release_type: str) -> str: +def auto_release_type(version: ClickHouseVersion, release_type: str) -> str: if release_type != "auto": return release_type - current_version = get_version_from_string(version) git_versions = get_tagged_versions() reference_version = git_versions[0] for i in reversed(range(len(git_versions))): - if git_versions[i] < current_version: + if git_versions[i] < version: if i == len(git_versions) - 1: return "latest" reference_version = git_versions[i + 1] break - if current_version.major < reference_version.major: + if version.major < reference_version.major: return "major" - if current_version.minor < reference_version.minor: + if version.minor < reference_version.minor: return "minor" - if current_version.patch < reference_version.patch: + if version.patch < reference_version.patch: return "patch" raise ValueError( "Release type 'tweak' is not supported for " - f"{current_version.string} < {reference_version.string}" + f"{version.string} < {reference_version.string}" ) -def gen_tags(version: str, release_type: str) -> List[str]: +def gen_tags(version: ClickHouseVersion, release_type: str) -> List[str]: """ 22.2.2.2 + latest: - latest @@ -168,8 +167,7 @@ def gen_tags(version: str, release_type: str) -> List[str]: 22.2.2.2 + head: - head """ - validate_version(version) - parts = version.split(".") + parts = version.string.split(".") tags = [] if release_type == "latest": tags.append(release_type) @@ -201,7 +199,12 @@ def buildx_args(bucket_prefix: str, arch: str) -> List[str]: def build_and_push_image( - image: DockerImage, push: bool, bucket_prefix: str, os: str, tag: str, version: str + image: DockerImage, + push: bool, + bucket_prefix: str, + os: str, + tag: str, + version: ClickHouseVersion, ) -> List[Tuple[str, str]]: result = [] if os != "ubuntu": @@ -228,7 +231,7 @@ def build_and_push_image( cmd_args.extend( [ f"--metadata-file={metadata_path}", - f"--build-arg=VERSION='{version}'", + f"--build-arg=VERSION='{version.string}'", "--progress=plain", f"--file={dockerfile}", image.full_path, @@ -293,9 +296,10 @@ def main(): pr_info = None if CI: pr_info = PRInfo() + release_or_pr = get_release_or_pr(pr_info, {"package_type": ""}, args.version) args.bucket_prefix = ( f"https://s3.amazonaws.com/{S3_BUILDS_BUCKET}/" - f"{pr_info.number}/{pr_info.sha}" + f"{release_or_pr}/{pr_info.sha}" ) if args.push: diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index e2dc8874a7a..0d1c554026f 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -227,19 +227,20 @@ class TestDockerImageCheck(unittest.TestCase): class TestDockerServer(unittest.TestCase): def test_gen_tags(self): + version = get_version_from_string("22.2.2.2") cases = ( - (("22.2.2.2", "latest"), ["latest", "22", "22.2", "22.2.2", "22.2.2.2"]), - (("22.2.2.2", "major"), ["22", "22.2", "22.2.2", "22.2.2.2"]), - (("22.2.2.2", "minor"), ["22.2", "22.2.2", "22.2.2.2"]), - (("22.2.2.2", "patch"), ["22.2.2", "22.2.2.2"]), - (("22.2.2.2", "head"), ["head"]), + ("latest", ["latest", "22", "22.2", "22.2.2", "22.2.2.2"]), + ("major", ["22", "22.2", "22.2.2", "22.2.2.2"]), + ("minor", ["22.2", "22.2.2", "22.2.2.2"]), + ("patch", ["22.2.2", "22.2.2.2"]), + ("head", ["head"]), ) for case in cases: - version, release_type = case[0] + release_type = case[0] self.assertEqual(case[1], ds.gen_tags(version, release_type)) with self.assertRaises(ValueError): - ds.gen_tags("22.2.2.2", "auto") + ds.gen_tags(version, "auto") @patch("docker_server.get_tagged_versions") def test_auto_release_type(self, mock_tagged_versions: MagicMock): @@ -251,13 +252,13 @@ class TestDockerServer(unittest.TestCase): get_version_from_string("2.2.2.1"), ] cases = ( - ("1.0.1.1", "minor"), - ("1.1.2.1", "minor"), - ("1.3.1.1", "major"), - ("2.1.2.1", "minor"), - ("2.2.1.3", "patch"), - ("2.2.3.1", "latest"), - ("2.3.1.1", "latest"), + (get_version_from_string("1.0.1.1"), "minor"), + (get_version_from_string("1.1.2.1"), "minor"), + (get_version_from_string("1.3.1.1"), "major"), + (get_version_from_string("2.1.2.1"), "minor"), + (get_version_from_string("2.2.1.3"), "patch"), + (get_version_from_string("2.2.3.1"), "latest"), + (get_version_from_string("2.3.1.1"), "latest"), ) _ = get_tagged_versions() for case in cases: From 3c14d00aea9a0e926d128da588c6356a7300b50c Mon Sep 17 00:00:00 2001 From: rfraposa Date: Fri, 1 Apr 2022 10:18:17 -0600 Subject: [PATCH 098/117] Delete docs_release.yml --- .github/workflows/docs_release.yml | 121 ----------------------------- 1 file changed, 121 deletions(-) delete mode 100644 .github/workflows/docs_release.yml diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml deleted file mode 100644 index 66838a05552..00000000000 --- a/.github/workflows/docs_release.yml +++ /dev/null @@ -1,121 +0,0 @@ -name: DocsReleaseChecks - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -concurrency: - group: master-release - cancel-in-progress: true -on: # yamllint disable-line rule:truthy - push: - branches: - - master - paths: - - 'docs/**' - - 'website/**' - - 'benchmark/**' - - 'docker/**' - - '.github/**' - workflow_dispatch: -jobs: - DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Images check - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_images_check.py --suffix aarch64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 - with: - name: changed_images_aarch64 - path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json - DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] - steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Images check - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_images_check.py --suffix amd64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 - with: - name: changed_images_amd64 - path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json - DockerHubPush: - needs: [DockerHubPushAmd64, DockerHubPushAarch64] - runs-on: [self-hosted, style-checker] - steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Download changed aarch64 images - uses: actions/download-artifact@v2 - with: - name: changed_images_aarch64 - path: ${{ runner.temp }} - - name: Download changed amd64 images - uses: actions/download-artifact@v2 - with: - name: changed_images_amd64 - path: ${{ runner.temp }} - - name: Images check - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 - with: - name: changed_images - path: ${{ runner.temp }}/changed_images.json - DocsRelease: - needs: DockerHubPush - runs-on: [self-hosted, func-tester] - steps: - - name: Set envs - # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/docs_release - REPO_COPY=${{runner.temp}}/docs_release/ClickHouse - CLOUDFLARE_TOKEN=${{secrets.CLOUDFLARE}} - ROBOT_CLICKHOUSE_SSH_KEY< Date: Fri, 1 Apr 2022 11:15:41 -0600 Subject: [PATCH 099/117] Fixing conflicts with source repo --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +- docs/en/sql-reference/aggregate-functions/reference/index.md | 1 - .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 095adc32505..1195ee55dc7 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -693,7 +693,7 @@ Tags: - `volume_name_N` — Volume name. Volume names must be unique. - `disk` — a disk within a volume. - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. -- `move_factor` — when the amount of available space gets lower than this factor, data automatically start to move on the next volume if any (by default, 0.1). +- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. - `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. Cofiguration examples: diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 4bbd00043a8..cd71bca2556 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -35,7 +35,6 @@ ClickHouse-specific aggregate functions: - [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) - [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) -- [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md) - [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 83814781005..ab83017f263 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -5,7 +5,7 @@ sidebar_label: Dictionary Updates # Dictionary Updates -ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `` tag in seconds. +ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `lifetime` tag in seconds. Dictionary updates (other than loading for first use) do not block queries. During updates, the old version of a dictionary is used. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. From 59e1ef9577083f369d847e0dda9421274bf7ab17 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Fri, 1 Apr 2022 11:18:06 -0600 Subject: [PATCH 100/117] Update grouparraysorted.md --- .../aggregate-functions/reference/grouparraysorted.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md index 0237885bcb6..e34fcbc5788 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -1,5 +1,5 @@ --- -toc_priority: 108 +sidebar_position: 108 --- # groupArraySorted {#groupArraySorted} From e5ee39541e94cd5b3dff881904cdaf03f217435a Mon Sep 17 00:00:00 2001 From: rfraposa Date: Fri, 1 Apr 2022 11:34:35 -0600 Subject: [PATCH 101/117] Fixing conflicts with source repo --- docs/en/operations/named-collections.md | 230 ++++++++++++++++++ .../sql-reference/statements/create/table.md | 2 +- 2 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 docs/en/operations/named-collections.md diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md new file mode 100644 index 00000000000..52520ba76b7 --- /dev/null +++ b/docs/en/operations/named-collections.md @@ -0,0 +1,230 @@ +--- +sidebar_position: 69 +sidebar_label: "Named connections" +--- + +# Storing details for connecting to external sources in configuration files {#named-collections} + +Details for connecting to external sources (dictionaries, tables, table functions) can be saved +in configuration files and thus simplify the creation of objects and hide credentials +from users with only SQL access. + +Parameters can be set in XML `CSV` and overridden in SQL `, format = 'TSV'`. +The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`. + +Named connections are stored in the `config.xml` file of the ClickHouse server in the `` section and are applied when ClickHouse starts. + +Example of configuration: +```xml +$ cat /etc/clickhouse-server/config.d/named_collections.xml + + + ... + + +``` + +## Named connections for accessing S3. + +The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md). + +Example of configuration: +```xml + + + + AKIAIOSFODNN7EXAMPLE + wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + CSV + https://s3.us-east-1.amazonaws.com/yourbucket/mydata/ + + + +``` + +### Example of using named connections with the s3 function + +```sql +INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz', + format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') +SELECT * FROM numbers(10000); + +SELECT count() +FROM s3(s3_mydata, filename = 'test_file.tsv.gz') + +┌─count()─┐ +│ 10000 │ +└─────────┘ +1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.) +``` + +### Example of using named connections with an S3 table + +```sql +CREATE TABLE s3_engine_table (number Int64) +ENGINE=S3(s3_mydata, url='https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', format = 'TSV') +SETTINGS input_format_with_names_use_header = 0; + +SELECT * FROM s3_engine_table LIMIT 3; +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +└────────┘ +``` + +## Named connections for accessing MySQL database + +The description of parameters see [mysql](../sql-reference/table-functions/mysql.md). + +Example of configuration: +```xml + + + + myuser + mypass + 127.0.0.1 + 3306 + test + 8 + 1 + 1 + + + +``` + +### Example of using named connections with the mysql function + +```sql +SELECT count() FROM mysql(mymysql, table = 'test'); + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Example of using named connections with an MySQL table + +```sql +CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0); +SELECT count() FROM mytable; + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Example of using named connections with database with engine MySQL + +```sql +CREATE DATABASE mydatabase ENGINE = MySQL(mymysql); + +SHOW TABLES FROM mydatabase; + +┌─name───┐ +│ source │ +│ test │ +└────────┘ +``` + +### Example of using named connections with an external dictionary with source MySQL + +```sql +CREATE DICTIONARY dict (A Int64, B String) +PRIMARY KEY A +SOURCE(MYSQL(NAME mymysql TABLE 'source')) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'B', 2); + +┌─dictGet('dict', 'B', 2)─┐ +│ two │ +└─────────────────────────┘ +``` + +## Named connections for accessing PostgreSQL database + +The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). + +Example of configuration: +```xml + + + + pguser + jw8s0F4 + 127.0.0.1 + 5432 + test + test_schema + 8 + + + +``` + +### Example of using named connections with the postgresql function + +```sql +SELECT * FROM postgresql(mypg, table = 'test'); + +┌─a─┬─b───┐ +│ 2 │ two │ +│ 1 │ one │ +└───┴─────┘ + + +SELECT * FROM postgresql(mypg, table = 'test', schema = 'public'); + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + + +### Example of using named connections with database with engine PostgreSQL + +```sql +CREATE TABLE mypgtable (a Int64) ENGINE = PostgreSQL(mypg, table = 'test', schema = 'public'); + +SELECT * FROM mypgtable; + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + +### Example of using named connections with database with engine PostgreSQL + +```sql +CREATE DATABASE mydatabase ENGINE = PostgreSQL(mypg); + +SHOW TABLES FROM mydatabase + +┌─name─┐ +│ test │ +└──────┘ +``` + +### Example of using named connections with an external dictionary with source POSTGRESQL + +```sql +CREATE DICTIONARY dict (a Int64, b String) +PRIMARY KEY a +SOURCE(POSTGRESQL(NAME mypg TABLE test)) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'b', 2); + +┌─dictGet('dict', 'b', 2)─┐ +│ two │ +└─────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 82aad344117..ea98796427e 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -116,7 +116,7 @@ In addition, this column is not substituted when using an asterisk in a SELECT q `EPHEMERAL expr` -Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. +Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. ### ALIAS {#alias} From a10bf550c465ff5002a8ae428525553bfb8f37ed Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 1 Apr 2022 20:57:41 +0200 Subject: [PATCH 102/117] Update AsynchronousInsertQueue.cpp --- src/Interpreters/AsynchronousInsertQueue.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index c218af556f0..72898dc1a07 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -198,9 +198,6 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) copyData(*read_buf, write_buf); } - std::cerr << "bytes.size: " << bytes.size() << "\n"; - std::cerr << "bytes: " << bytes << "\n"; - if (auto quota = query_context->getQuota()) quota->used(QuotaType::WRITTEN_BYTES, bytes.size()); From a565a937409120993cdc450884ff1ac8915086d5 Mon Sep 17 00:00:00 2001 From: shuchaome Date: Mon, 28 Mar 2022 12:23:51 +0800 Subject: [PATCH 103/117] reduce mutex scope when setenv LIBHDFS3_CONF --- src/Storages/HDFS/HDFSCommon.cpp | 23 ++++++++++++++--------- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 8 -------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index b186808a2db..da08f429457 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -25,6 +25,8 @@ namespace ErrorCodes const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs"; const String HDFS_URL_REGEXP = "^hdfs://[^/]*/.*"; +std::once_flag init_libhdfs3_conf_flag; + void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const String & prefix, bool isUser) { @@ -123,19 +125,22 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); // Shall set env LIBHDFS3_CONF *before* HDFSBuilderWrapper construction. - String libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", ""); - if (!libhdfs3_conf.empty()) + std::call_once(init_libhdfs3_conf_flag, [&config]() { - if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf)) + String libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", ""); + if (!libhdfs3_conf.empty()) { - const String config_path = config.getString("config-file", "config.xml"); - const auto config_dir = std::filesystem::path{config_path}.remove_filename(); - if (std::filesystem::exists(config_dir / libhdfs3_conf)) - libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf); + if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf)) + { + const String config_path = config.getString("config-file", "config.xml"); + const auto config_dir = std::filesystem::path{config_path}.remove_filename(); + if (std::filesystem::exists(config_dir / libhdfs3_conf)) + libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf); + } + setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1); } + }); - setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1); - } HDFSBuilderWrapper builder; if (builder.get() == nullptr) throw Exception("Unable to create builder to connect to HDFS: " + diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 902307fc828..1bafa49e55b 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -22,8 +22,6 @@ ReadBufferFromHDFS::~ReadBufferFromHDFS() = default; struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory { - /// HDFS create/open functions are not thread safe - static std::mutex hdfs_init_mutex; String hdfs_uri; String hdfs_file_path; @@ -46,8 +44,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory Date: Fri, 1 Apr 2022 19:24:14 -0400 Subject: [PATCH 104/117] Added an animation to the play ui hourglass when a query is running --- programs/server/play.html | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/programs/server/play.html b/programs/server/play.html index 08934196f67..37579bcb5f9 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -266,12 +266,25 @@ color: var(--null-color); } + @keyframes hourglass-animation { + 0% { + transform: rotate(-180deg); + } + 50% { + transform: rotate(-180deg); + } + 100% { + transform: none; + } + } + #hourglass { display: none; - padding-left: 1rem; + margin-left: 1rem; font-size: 110%; color: #888; + animation: hourglass-animation 1s linear infinite; } #check-mark From 409eccec84556ced5a0e26ea7beb350b2c6fa600 Mon Sep 17 00:00:00 2001 From: Nir Peled Date: Fri, 1 Apr 2022 19:29:13 -0400 Subject: [PATCH 105/117] When showing the animation, set display to inline-block instead of inline, otherwise the animation won't start --- programs/server/play.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/play.html b/programs/server/play.html index 37579bcb5f9..ef45ee1476b 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -470,7 +470,7 @@ } document.getElementById('check-mark').style.display = 'none'; - document.getElementById('hourglass').style.display = 'inline'; + document.getElementById('hourglass').style.display = 'inline-block'; xhr.send(query); } From 017f5671327905a123e28a9ae779b7d3d0230860 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Sat, 2 Apr 2022 04:13:37 +0000 Subject: [PATCH 106/117] fix: docs/tools/requirements.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-MKDOCS-2438396 --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index c48a70b0909..dd641c13629 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -16,7 +16,7 @@ jsmin==3.0.0 livereload==2.6.3 Markdown==3.3.2 MarkupSafe==2.1.0 -mkdocs==1.1.2 +mkdocs==1.3.0 mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.20 nltk==3.7 From c33a6ced7b2a656dc7b76446ce9c54cf02c32e05 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Sat, 2 Apr 2022 10:16:20 -0600 Subject: [PATCH 107/117] Fixed category links --- docs/en/engines/_category_.yml | 3 ++- docs/en/example-datasets/_category_.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/_category_.yml b/docs/en/engines/_category_.yml index f8554057fdc..0c462323df4 100644 --- a/docs/en/engines/_category_.yml +++ b/docs/en/engines/_category_.yml @@ -4,4 +4,5 @@ collapsible: true collapsed: true link: type: generated-index - title: Database & Table Engines \ No newline at end of file + title: Database & Table Engines + slug: /en/table-engines \ No newline at end of file diff --git a/docs/en/example-datasets/_category_.yml b/docs/en/example-datasets/_category_.yml index 5824de77e1d..310ce834a92 100644 --- a/docs/en/example-datasets/_category_.yml +++ b/docs/en/example-datasets/_category_.yml @@ -4,4 +4,5 @@ collapsible: true collapsed: true link: type: generated-index - title: Example Datasets \ No newline at end of file + title: Example Datasets + slug: /en/example-datasets \ No newline at end of file From 192ff2fcf519e3b328b82f14eceef84a73ca0c76 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 2 Apr 2022 21:32:17 +0200 Subject: [PATCH 108/117] Drop modernize-replace-auto-ptr from .clang-tidy - std::auto_ptr was removed from the C++17 standard - ClickHouse is compiled with C++20 - thus, the clang-tidy check is obsolete by now --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index ddd0ee6d911..aef3f975d18 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -16,7 +16,6 @@ Checks: '-*, modernize-make-unique, modernize-raw-string-literal, modernize-redundant-void-arg, - modernize-replace-auto-ptr, modernize-replace-random-shuffle, modernize-use-bool-literals, modernize-use-nullptr, From afe0563856589c1c1daaa74813746744b41a6dc8 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Sat, 2 Apr 2022 15:39:11 -0600 Subject: [PATCH 109/117] Adding the playground back in --- docs/en/playground.md | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 docs/en/playground.md diff --git a/docs/en/playground.md b/docs/en/playground.md new file mode 100644 index 00000000000..6b42107af3a --- /dev/null +++ b/docs/en/playground.md @@ -0,0 +1,45 @@ +--- +sidebar_label: Playground +sidebar_position: 2 +keywords: [clickhouse, playground, getting, started, docs] +description: The ClickHouse Playground allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +--- + +# ClickHouse Playground {#clickhouse-playground} + +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. + +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). + +## Credentials {#credentials} + +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | + +## Limitations {#limitations} + +The queries are executed as a read-only user. It implies some limitations: + +- DDL queries are not allowed +- INSERT queries are not allowed + +The service also have quotas on its usage. + +## Examples {#examples} + +HTTPS endpoint example with `curl`: + +``` bash +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" +``` + +TCP endpoint example with [CLI](../interfaces/cli.md): + +``` bash +clickhouse client --secure --host play.clickhouse.com --user explorer +``` From 5d4a87778583560b56189a326a050332035fc423 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Sat, 2 Apr 2022 15:44:53 -0600 Subject: [PATCH 110/117] Fixed slug for playground and install pages --- docs/en/install.md | 1 + docs/en/playground.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/en/install.md b/docs/en/install.md index a5405143d77..036452f3697 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -3,6 +3,7 @@ sidebar_label: Installation sidebar_position: 1 keywords: [clickhouse, install, installation, docs] description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture. +slug: /en/getting-started/install --- # Installation {#installation} diff --git a/docs/en/playground.md b/docs/en/playground.md index 6b42107af3a..fee687dd856 100644 --- a/docs/en/playground.md +++ b/docs/en/playground.md @@ -3,6 +3,7 @@ sidebar_label: Playground sidebar_position: 2 keywords: [clickhouse, playground, getting, started, docs] description: The ClickHouse Playground allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +slug: /en/getting-started/playground --- # ClickHouse Playground {#clickhouse-playground} From 4debd3b609a11a30f118f45834b22ec32f0d830c Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 2 Apr 2022 19:26:41 -0300 Subject: [PATCH 111/117] fix service start with systemd --- debian/clickhouse-server.service | 2 +- packages/clickhouse-server.service | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service index a9400b24270..028b4fbf8ab 100644 --- a/debian/clickhouse-server.service +++ b/debian/clickhouse-server.service @@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 -CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE +CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE [Install] # ClickHouse should not start from the rescue shell (rescue.target). diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index a9400b24270..028b4fbf8ab 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 -CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE +CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE [Install] # ClickHouse should not start from the rescue shell (rescue.target). From 17107cb1dc35eb918bc918f6a781dc93278ea7bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Apr 2022 01:21:48 +0200 Subject: [PATCH 112/117] Update examples and remove obsolete embedded dictionaries from configs --- programs/server/config.xml | 21 +++++---------------- programs/server/config.yaml.example | 16 +++------------- programs/server/users.xml | 4 ++-- programs/server/users.yaml.example | 4 ++-- 4 files changed, 12 insertions(+), 33 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 1de379b0b2a..4e4cabdb03b 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -148,13 +148,13 @@ - - - - - - - - - + + diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 82cd9273680..03121880555 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -1,10 +1,9 @@ --- -sidebar_position: 69 -sidebar_label: C++ Guide -description: A list of recommendations regarding coding style, naming convention, formatting and more +toc_priority: 69 +toc_title: C++ Guide --- -# How to Write C++ Code +# How to Write C++ Code {#how-to-write-c-code} ## General Recommendations {#general-recommendations} diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 29b69f0b697..be9fc7907af 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -1,12 +1,11 @@ --- -sidebar_position: 70 -sidebar_label: Testing -description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. +toc_priority: 70 +toc_title: Testing --- -# ClickHouse Testing +# ClickHouse Testing {#clickhouse-testing} -## Functional Tests +## Functional Tests {#functional-tests} Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. diff --git a/docs/en/engines/_category_.yml b/docs/en/engines/_category_.yml deleted file mode 100644 index 0c462323df4..00000000000 --- a/docs/en/engines/_category_.yml +++ /dev/null @@ -1,8 +0,0 @@ -position: 30 -label: 'Database & Table Engines' -collapsible: true -collapsed: true -link: - type: generated-index - title: Database & Table Engines - slug: /en/table-engines \ No newline at end of file diff --git a/docs/en/engines/database-engines/atomic.md b/docs/en/engines/database-engines/atomic.md index 878307121aa..1e555a0a502 100644 --- a/docs/en/engines/database-engines/atomic.md +++ b/docs/en/engines/database-engines/atomic.md @@ -1,9 +1,9 @@ --- -sidebar_label: Atomic -sidebar_position: 10 +toc_priority: 32 +toc_title: Atomic --- -# Atomic +# Atomic {#atomic} It supports non-blocking [DROP TABLE](#drop-detach-table) and [RENAME TABLE](#rename-table) queries and atomic [EXCHANGE TABLES](#exchange-tables) queries. `Atomic` database engine is used by default. @@ -18,21 +18,14 @@ CREATE DATABASE test [ENGINE = Atomic]; ### Table UUID {#table-uuid} All tables in database `Atomic` have persistent [UUID](../../sql-reference/data-types/uuid.md) and store data in directory `/clickhouse_path/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`, where `xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy` is UUID of the table. -Usually, the UUID is generated automatically, but the user can also explicitly specify the UUID in the same way when creating the table (this is not recommended). - -For example: +Usually, the UUID is generated automatically, but the user can also explicitly specify the UUID in the same way when creating the table (this is not recommended). To display the `SHOW CREATE` query with the UUID you can use setting [show_table_uuid_in_table_create_query_if_not_nil](../../operations/settings/settings.md#show_table_uuid_in_table_create_query_if_not_nil). For example: ```sql CREATE TABLE name UUID '28f1c61c-2970-457a-bffe-454156ddcfef' (n UInt64) ENGINE = ...; ``` - -:::note -You can use the [show_table_uuid_in_table_create_query_if_not_nil](../../operations/settings/settings.md#show_table_uuid_in_table_create_query_if_not_nil) setting to display the UUID with the `SHOW CREATE` query. -::: - ### RENAME TABLE {#rename-table} -[RENAME](../../sql-reference/statements/rename.md) queries are performed without changing the UUID or moving table data. These queries do not wait for the completion of queries using the table and are executed instantly. +[RENAME](../../sql-reference/statements/rename.md) queries are performed without changing UUID and moving table data. These queries do not wait for the completion of queries using the table and are executed instantly. ### DROP/DETACH TABLE {#drop-detach-table} diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index 0cee580abcd..dd8959d2700 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -6,11 +6,11 @@ toc_title: Introduction # Database Engines {#database-engines} -Database engines allow you to work with tables. By default, ClickHouse uses the [Atomic](../../engines/database-engines/atomic.md) database engine, which provides configurable [table engines](../../engines/table-engines/index.md) and an [SQL dialect](../../sql-reference/syntax.md). +Database engines allow you to work with tables. -Here is a complete list of available database engines. Follow the links for more details: +By default, ClickHouse uses database engine [Atomic](../../engines/database-engines/atomic.md). It provides configurable [table engines](../../engines/table-engines/index.md) and an [SQL dialect](../../sql-reference/syntax.md). -- [Atomic](../../engines/database-engines/atomic.md) +You can also use the following database engines: - [MySQL](../../engines/database-engines/mysql.md) @@ -18,6 +18,8 @@ Here is a complete list of available database engines. Follow the links for more - [Lazy](../../engines/database-engines/lazy.md) +- [Atomic](../../engines/database-engines/atomic.md) + - [PostgreSQL](../../engines/database-engines/postgresql.md) - [Replicated](../../engines/database-engines/replicated.md) diff --git a/docs/en/engines/database-engines/lazy.md b/docs/en/engines/database-engines/lazy.md index b95ade19df4..ecd4b94f579 100644 --- a/docs/en/engines/database-engines/lazy.md +++ b/docs/en/engines/database-engines/lazy.md @@ -1,6 +1,6 @@ --- -sidebar_label: Lazy -sidebar_position: 20 +toc_priority: 31 +toc_title: Lazy --- # Lazy {#lazy} diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index df072682097..d7dcf21cb02 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -1,15 +1,16 @@ --- -sidebar_label: MaterializedMySQL -sidebar_position: 70 +toc_priority: 29 +toc_title: MaterializedMySQL --- -# [experimental] MaterializedMySQL +# [experimental] MaterializedMySQL {#materialized-mysql} -:::warning -This is an experimental feature that should not be used in production. -::: +!!! warning "Warning" + This is an experimental feature that should not be used in production. -Creates a ClickHouse database with all the tables existing in MySQL, and all the data in those tables. The ClickHouse server works as MySQL replica. It reads `binlog` and performs DDL and DML queries. +Creates ClickHouse database with all the tables existing in MySQL, and all the data in those tables. + +ClickHouse server works as MySQL replica. It reads binlog and performs DDL and DML queries. ## Creating a Database {#creating-a-database} @@ -30,6 +31,8 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo - `max_rows_in_buffer` — Maximum number of rows that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `65 505`. - `max_bytes_in_buffer` — Maximum number of bytes that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `1 048 576`. +- `max_rows_in_buffers` — Maximum number of rows that data is allowed to cache in memory (for database and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `65 505`. +- `max_bytes_in_buffers` — Maximum number of bytes that data is allowed to cache in memory (for database and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `1 048 576`. - `max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`. - `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`. - `allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`). @@ -49,9 +52,8 @@ For the correct work of `MaterializedMySQL`, there are few mandatory `MySQL`-sid - `default_authentication_plugin = mysql_native_password` since `MaterializedMySQL` can only authorize with this method. - `gtid_mode = on` since GTID based logging is a mandatory for providing correct `MaterializedMySQL` replication. -:::note -While turning on `gtid_mode` you should also specify `enforce_gtid_consistency = on`. -::: +!!! attention "Attention" + While turning on `gtid_mode` you should also specify `enforce_gtid_consistency = on`. ## Virtual Columns {#virtual-columns} @@ -74,7 +76,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree]( | FLOAT | [Float32](../../sql-reference/data-types/float.md) | | DOUBLE | [Float64](../../sql-reference/data-types/float.md) | | DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) | -| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | +| DATE, NEWDATE | [Date32](../../sql-reference/data-types/date32.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | | YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) | @@ -218,14 +220,13 @@ extra care needs to be taken. You may specify overrides for tables that do not exist yet. -:::warning -It is easy to break replication with table overrides if not used with care. For example: +!!! warning "Warning" + It is easy to break replication with table overrides if not used with care. For example: -* If an ALIAS column is added with a table override, and a column with the same name is later added to the source - MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops. -* It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in - `ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop. -::: + * If an ALIAS column is added with a table override, and a column with the same name is later added to the source + MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops. + * It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in + `ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop. ## Examples of Use {#examples-of-use} diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index ff8f7b192e0..56793435fac 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -1,6 +1,6 @@ --- -sidebar_label: MaterializedPostgreSQL -sidebar_position: 60 +toc_priority: 30 +toc_title: MaterializedPostgreSQL --- # [experimental] MaterializedPostgreSQL {#materialize-postgresql} @@ -46,9 +46,7 @@ After `MaterializedPostgreSQL` database is created, it does not automatically de ATTACH TABLE postgres_database.new_table; ``` -:::warning -Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1. -::: +Warning: before version 22.1 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 22.1. ## Dynamically removing tables from replication {#dynamically-removing-table-from-replication} @@ -137,70 +135,69 @@ FROM pg_class WHERE oid = 'postgres_table'::regclass; ``` -:::warning -Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. -::: +!!! warning "Warning" + Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. ## Settings {#settings} -1. `materialized_postgresql_tables_list` {#materialized-postgresql-tables-list} +1. materialized_postgresql_tables_list {#materialized-postgresql-tables-list} - Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine. +Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine. - Default value: empty list — means whole PostgreSQL database will be replicated. +Default value: empty list — means whole PostgreSQL database will be replicated. -2. `materialized_postgresql_schema` {#materialized-postgresql-schema} +2. materialized_postgresql_schema {#materialized-postgresql-schema} - Default value: empty string. (Default schema is used) +Default value: empty string. (Default schema is used) -3. `materialized_postgresql_schema_list` {#materialized-postgresql-schema-list} +3. materialized_postgresql_schema_list {#materialized-postgresql-schema-list} - Default value: empty list. (Default schema is used) +Default value: empty list. (Default schema is used) -4. `materialized_postgresql_allow_automatic_update` {#materialized-postgresql-allow-automatic-update} +4. materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update} - Do not use this setting before 22.1 version. +Do not use this setting before 22.1 version. - Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them. +Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them. - Possible values: +Possible values: - - 0 — The table is not automatically updated in the background, when schema changes are detected. - - 1 — The table is automatically updated in the background, when schema changes are detected. +- 0 — The table is not automatically updated in the background, when schema changes are detected. +- 1 — The table is automatically updated in the background, when schema changes are detected. - Default value: `0`. +Default value: `0`. -5. `materialized_postgresql_max_block_size` {#materialized-postgresql-max-block-size} +5. materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size} - Sets the number of rows collected in memory before flushing data into PostgreSQL database table. +Sets the number of rows collected in memory before flushing data into PostgreSQL database table. - Possible values: +Possible values: - - Positive integer. +- Positive integer. - Default value: `65536`. +Default value: `65536`. -6. `materialized_postgresql_replication_slot` {#materialized-postgresql-replication-slot} +6. materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot} - A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`. +A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`. -7. `materialized_postgresql_snapshot` {#materialized-postgresql-snapshot} +7. materialized_postgresql_snapshot {#materialized-postgresql-snapshot} - A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`. +A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`. - ``` sql - CREATE DATABASE database1 - ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') - SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3'; +``` sql +CREATE DATABASE database1 +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') +SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3'; - SELECT * FROM database1.table1; - ``` +SELECT * FROM database1.table1; +``` - The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query. +The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query. - ``` sql - ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; - ``` +``` sql +ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; +``` ## Notes {#notes} @@ -216,47 +213,47 @@ Please note that this should be used only if it is actually needed. If there is 1. Configure replication slot in PostgreSQL. - ```yaml - apiVersion: "acid.zalan.do/v1" - kind: postgresql - metadata: - name: acid-demo-cluster - spec: - numberOfInstances: 2 - postgresql: - parameters: - wal_level: logical - patroni: - slots: - clickhouse_sync: - type: logical - database: demodb - plugin: pgoutput - ``` +```yaml +apiVersion: "acid.zalan.do/v1" +kind: postgresql +metadata: + name: acid-demo-cluster +spec: + numberOfInstances: 2 + postgresql: + parameters: + wal_level: logical + patroni: + slots: + clickhouse_sync: + type: logical + database: demodb + plugin: pgoutput +``` 2. Wait for replication slot to be ready, then begin a transaction and export the transaction snapshot identifier: - ```sql - BEGIN; - SELECT pg_export_snapshot(); - ``` +```sql +BEGIN; +SELECT pg_export_snapshot(); +``` 3. In ClickHouse create database: - ```sql - CREATE DATABASE demodb - ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') - SETTINGS - materialized_postgresql_replication_slot = 'clickhouse_sync', - materialized_postgresql_snapshot = '0000000A-0000023F-3', - materialized_postgresql_tables_list = 'table1,table2,table3'; - ``` +```sql +CREATE DATABASE demodb +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') +SETTINGS + materialized_postgresql_replication_slot = 'clickhouse_sync', + materialized_postgresql_snapshot = '0000000A-0000023F-3', + materialized_postgresql_tables_list = 'table1,table2,table3'; +``` 4. End the PostgreSQL transaction once replication to ClickHouse DB is confirmed. Verify that replication continues after failover: - ```bash - kubectl exec acid-demo-cluster-0 -c postgres -- su postgres -c 'patronictl failover --candidate acid-demo-cluster-1 --force' - ``` +```bash +kubectl exec acid-demo-cluster-0 -c postgres -- su postgres -c 'patronictl failover --candidate acid-demo-cluster-1 --force' +``` ### Required permissions diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md index 89a0786a9ec..df4965b1f8c 100644 --- a/docs/en/engines/database-engines/mysql.md +++ b/docs/en/engines/database-engines/mysql.md @@ -1,9 +1,9 @@ --- -sidebar_position: 50 -sidebar_label: MySQL +toc_priority: 30 +toc_title: MySQL --- -# MySQL +# MySQL {#mysql} Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL. @@ -49,6 +49,8 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') All other MySQL data types are converted into [String](../../sql-reference/data-types/string.md). +Because of the ClickHouse date type has a different range from the MySQL date range,If the MySQL date type is out of the range of ClickHouse date, you can use the setting mysql_datatypes_support_level to modify the mapping from the MySQL date type to the Clickhouse date type: date2Date32 (convert MySQL's date type to ClickHouse Date32) or date2String(convert MySQL's date type to ClickHouse String,this is usually used when your mysql data is less than 1925) or default(convert MySQL's date type to ClickHouse Date). + [Nullable](../../sql-reference/data-types/nullable.md) is supported. ## Global Variables Support {#global-variables-support} @@ -59,9 +61,8 @@ These variables are supported: - `version` - `max_allowed_packet` -:::warning -By now these variables are stubs and don't correspond to anything. -::: +!!! warning "Warning" + By now these variables are stubs and don't correspond to anything. Example: diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md index bc5e93d0923..76ef484e773 100644 --- a/docs/en/engines/database-engines/postgresql.md +++ b/docs/en/engines/database-engines/postgresql.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: PostgreSQL +toc_priority: 35 +toc_title: PostgreSQL --- # PostgreSQL {#postgresql} diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 63d955dc889..bdc17d32393 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -1,6 +1,6 @@ --- -sidebar_position: 30 -sidebar_label: Replicated +toc_priority: 36 +toc_title: Replicated --- # [experimental] Replicated {#replicated} @@ -20,9 +20,8 @@ One ClickHouse server can have multiple replicated databases running and updatin - `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. - `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. -:::warning -For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. -::: +!!! note "Warning" + For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. ## Specifics and Recommendations {#specifics-and-recommendations} diff --git a/docs/en/engines/database-engines/sqlite.md b/docs/en/engines/database-engines/sqlite.md index 2f8b44c9a09..ee9db90859f 100644 --- a/docs/en/engines/database-engines/sqlite.md +++ b/docs/en/engines/database-engines/sqlite.md @@ -1,6 +1,6 @@ --- -sidebar_position: 55 -sidebar_label: SQLite +toc_priority: 32 +toc_title: SQLite --- # SQLite {#sqlite} diff --git a/docs/en/engines/index.md b/docs/en/engines/index.md new file mode 100644 index 00000000000..b3f4a4f7b69 --- /dev/null +++ b/docs/en/engines/index.md @@ -0,0 +1,15 @@ +--- +toc_folder_title: Engines +toc_hidden: true +toc_priority: 25 +toc_title: hidden +--- + +# ClickHouse Engines {#clickhouse-engines} + +There are two key engine kinds in ClickHouse: + +- [Table engines](../engines/table-engines/index.md) +- [Database engines](../engines/database-engines/index.md) + +{## [Original article](https://clickhouse.com/docs/en/engines/) ##} diff --git a/docs/en/engines/table-engines/integrations/ExternalDistributed.md b/docs/en/engines/table-engines/integrations/ExternalDistributed.md index c9aae1934db..0ecbc5383e1 100644 --- a/docs/en/engines/table-engines/integrations/ExternalDistributed.md +++ b/docs/en/engines/table-engines/integrations/ExternalDistributed.md @@ -1,6 +1,6 @@ --- -sidebar_position: 12 -sidebar_label: ExternalDistributed +toc_priority: 12 +toc_title: ExternalDistributed --- # ExternalDistributed {#externaldistributed} @@ -51,6 +51,3 @@ You can specify any number of shards and any number of replicas for each shard. - [MySQL table engine](../../../engines/table-engines/integrations/mysql.md) - [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md) - [Distributed table engine](../../../engines/table-engines/special/distributed.md) - - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/ExternalDistributed/) diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 701d190f022..385abeb83ad 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -1,6 +1,6 @@ --- -sidebar_position: 9 -sidebar_label: EmbeddedRocksDB +toc_priority: 9 +toc_title: EmbeddedRocksDB --- # EmbeddedRocksDB Engine {#EmbeddedRocksDB-engine} diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 503bd779abf..0d6d90f9d31 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -1,6 +1,6 @@ --- -sidebar_position: 6 -sidebar_label: HDFS +toc_priority: 6 +toc_title: HDFS --- # HDFS {#table_engines-hdfs} @@ -98,9 +98,8 @@ Table consists of all the files in both directories (all files should satisfy fo CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV') ``` -:::warning -If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: +!!! warning "Warning" + If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. **Example** diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index 6731f0e7559..61147467690 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -1,6 +1,6 @@ --- -sidebar_position: 4 -sidebar_label: Hive +toc_priority: 4 +toc_title: Hive --- # Hive {#hive} @@ -137,7 +137,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` @@ -406,5 +406,3 @@ f_char: hello world f_bool: true day: 2021-09-18 ``` - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/hive/) diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index 9230ad624ba..a06b4c78394 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: Integrations +toc_folder_title: Integrations +toc_priority: 1 --- # Table Engines for Integrations {#table-engines-for-integrations} diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md index 0ce31f36070..2f442fd7753 100644 --- a/docs/en/engines/table-engines/integrations/jdbc.md +++ b/docs/en/engines/table-engines/integrations/jdbc.md @@ -1,6 +1,6 @@ --- -sidebar_position: 3 -sidebar_label: JDBC +toc_priority: 3 +toc_title: JDBC --- # JDBC {#table-engine-jdbc} diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 3a8d98e1ca9..1d80f143098 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -1,6 +1,6 @@ --- -sidebar_position: 8 -sidebar_label: Kafka +toc_priority: 8 +toc_title: Kafka --- # Kafka {#kafka} @@ -87,9 +87,8 @@ Examples: Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects. If possible, switch old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects. If possible, switch old projects to the method described above. ``` sql Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format @@ -134,7 +133,7 @@ Example: SELECT level, sum(total) FROM daily GROUP BY level; ``` -To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings.md/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. +To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings/#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. To stop receiving topic data or to change the conversion logic, detach the materialized view: diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index 61f97961ddb..fa349e49af5 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -1,6 +1,6 @@ --- -sidebar_position: 12 -sidebar_label: MaterializedPostgreSQL +toc_priority: 12 +toc_title: MaterializedPostgreSQL --- # MaterializedPostgreSQL {#materialize-postgresql} @@ -52,8 +52,5 @@ PRIMARY KEY key; SELECT key, value, _version FROM postgresql_db.postgresql_replica; ``` -:::warning -Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. -::: - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/materialized-postgresql) +!!! warning "Warning" + Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index d212ab4720f..475416ffb94 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -1,6 +1,6 @@ --- -sidebar_position: 5 -sidebar_label: MongoDB +toc_priority: 5 +toc_title: MongoDB --- # MongoDB {#mongodb} diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index e962db58873..7f28f16aa27 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -1,6 +1,6 @@ --- -sidebar_position: 4 -sidebar_label: MySQL +toc_priority: 4 +toc_title: MySQL --- # MySQL {#mysql} @@ -148,5 +148,3 @@ Default value: `16`. - [The mysql table function](../../../sql-reference/table-functions/mysql.md) - [Using MySQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index ed2b77d7ca3..0ef21d8565a 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -1,6 +1,6 @@ --- -sidebar_position: 2 -sidebar_label: ODBC +toc_priority: 2 +toc_title: ODBC --- # ODBC {#table-engine-odbc} diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index d6826000a1a..789759ec521 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -1,6 +1,6 @@ --- -sidebar_position: 11 -sidebar_label: PostgreSQL +toc_priority: 11 +toc_title: PostgreSQL --- # PostgreSQL {#postgresql} @@ -73,9 +73,8 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp PostgreSQL `Array` types are converted into ClickHouse arrays. -:::warning -Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column. -::: +!!! info "Note" + Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column. Supports multiple replicas that must be listed by `|`. For example: diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 6653b76594a..78c144ac76f 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -1,6 +1,6 @@ --- -sidebar_position: 10 -sidebar_label: RabbitMQ +toc_priority: 10 +toc_title: RabbitMQ --- # RabbitMQ Engine {#rabbitmq-engine} diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 42abc2a0b1e..c7301a55bf0 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -1,6 +1,6 @@ --- -sidebar_position: 7 -sidebar_label: S3 +toc_priority: 7 +toc_title: S3 --- # S3 Table Engine {#table-engine-s3} @@ -66,9 +66,8 @@ For more information about virtual columns see [here](../../../engines/table-eng Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. -:::warning -If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: +!!! warning "Warning" + If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. **Example with wildcards 1** @@ -159,5 +158,3 @@ The following settings can be specified in configuration file for given endpoint ## See also - [s3 table function](../../../sql-reference/table-functions/s3.md) - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/s3/) diff --git a/docs/en/engines/table-engines/integrations/sqlite.md b/docs/en/engines/table-engines/integrations/sqlite.md index 45cc1cfc28a..391f1696291 100644 --- a/docs/en/engines/table-engines/integrations/sqlite.md +++ b/docs/en/engines/table-engines/integrations/sqlite.md @@ -1,6 +1,6 @@ --- -sidebar_position: 7 -sidebar_label: SQLite +toc_priority: 7 +toc_title: SQLite --- # SQLite {#sqlite} @@ -56,7 +56,4 @@ SELECT * FROM sqlite_db.table2 ORDER BY col1; **See Also** - [SQLite](../../../engines/database-engines/sqlite.md) engine -- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function - - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/sqlite/) +- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function \ No newline at end of file diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md index 89eb08ad7b9..910df09e67f 100644 --- a/docs/en/engines/table-engines/log-family/index.md +++ b/docs/en/engines/table-engines/log-family/index.md @@ -1,6 +1,7 @@ --- -sidebar_position: 20 -sidebar_label: Log Family +toc_folder_title: Log Family +toc_priority: 29 +toc_title: Introduction --- # Log Engine Family {#log-engine-family} diff --git a/docs/en/engines/table-engines/log-family/log.md b/docs/en/engines/table-engines/log-family/log.md index 8858699f045..2aeef171128 100644 --- a/docs/en/engines/table-engines/log-family/log.md +++ b/docs/en/engines/table-engines/log-family/log.md @@ -10,6 +10,3 @@ The engine belongs to the family of `Log` engines. See the common properties of `Log` differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other. The `Log` engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The `Log` engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes. - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/log-family/log/) - diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 7be10cec2f5..8c9f8dd8ce3 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -1,6 +1,6 @@ --- -sidebar_position: 60 -sidebar_label: AggregatingMergeTree +toc_priority: 35 +toc_title: AggregatingMergeTree --- # AggregatingMergeTree {#aggregatingmergetree} @@ -42,9 +42,8 @@ When creating a `AggregatingMergeTree` table the same [clauses](../../../engines Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects and, if possible, switch the old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects and, if possible, switch the old projects to the method described above. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index 22863611e79..271b8b20fdb 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -1,6 +1,6 @@ --- -sidebar_position: 70 -sidebar_label: CollapsingMergeTree +toc_priority: 36 +toc_title: CollapsingMergeTree --- # CollapsingMergeTree {#table_engine-collapsingmergetree} @@ -42,9 +42,8 @@ When creating a `CollapsingMergeTree` table, the same [query clauses](../../../e Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects and, if possible, switch old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects and, if possible, switch the old projects to the method described above. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 716528f8d77..b58e90a3d92 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -1,15 +1,12 @@ --- -sidebar_position: 30 -sidebar_label: Custom Partitioning Key +toc_priority: 32 +toc_title: Custom Partitioning Key --- # Custom Partitioning Key {#custom-partitioning-key} -:::warning -In most cases you do not need a partition key, and in most other cases you do not need a partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). - -You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression. -::: +!!! warning "Warning" + In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well. @@ -43,9 +40,8 @@ By default, the floating-point partition key is not supported. To use it enable When inserting new data to a table, this data is stored as a separate part (chunk) sorted by the primary key. In 10-15 minutes after inserting, the parts of the same partition are merged into the entire part. -:::info -A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldn’t make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors. -::: +!!! info "Info" + A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldn’t make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors. Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, let’s assume that we have a `visits` table with partitioning by month. Let’s perform the `SELECT` query for the `system.parts` table: @@ -82,9 +78,8 @@ Let’s break down the name of the part: `201901_1_9_2_11`: - `2` is the chunk level (the depth of the merge tree it is formed from). - `11` is the mutation version (if a part mutated) -:::info -The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). -::: +!!! info "Info" + The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). The `active` column shows the status of the part. `1` is active; `0` is inactive. The inactive parts are, for example, source parts remaining after merging to a larger part. The corrupted data parts are also indicated as inactive. diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index 35f3f99d5a9..e1d571c909c 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -1,6 +1,6 @@ --- -sidebar_position: 90 -sidebar_label: GraphiteMergeTree +toc_priority: 38 +toc_title: GraphiteMergeTree --- # GraphiteMergeTree {#graphitemergetree} @@ -54,9 +54,8 @@ When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/t Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects and, if possible, switch old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects and, if possible, switch the old projects to the method described above. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -120,13 +119,12 @@ default ... ``` -:::warning -Patterns must be strictly ordered: +!!! warning "Attention" + Patterns must be strictly ordered: -1. Patterns without `function` or `retention`. -1. Patterns with both `function` and `retention`. -1. Pattern `default`. -::: + 1. Patterns without `function` or `retention`. + 1. Patterns with both `function` and `retention`. + 1. Pattern `default`. When processing a row, ClickHouse checks the rules in the `pattern` sections. Each of `pattern` (including `default`) sections can contain `function` parameter for aggregation, `retention` parameters or both. If the metric name matches the `regexp`, the rules from the `pattern` section (or sections) are applied; otherwise, the rules from the `default` section are used. @@ -255,6 +253,7 @@ Valid values: ``` -:::warning -Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). -::: +!!! warning "Warning" + Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). + +[Original article](https://clickhouse.com/docs/en/operations/table_engines/graphitemergetree/) diff --git a/docs/en/engines/table-engines/mergetree-family/index.md b/docs/en/engines/table-engines/mergetree-family/index.md index 37e7bf5b589..32796a252ac 100644 --- a/docs/en/engines/table-engines/mergetree-family/index.md +++ b/docs/en/engines/table-engines/mergetree-family/index.md @@ -1,6 +1,7 @@ --- -sidebar_position: 10 -sidebar_label: MergeTree Family +toc_folder_title: MergeTree Family +toc_priority: 28 +toc_title: Introduction --- # MergeTree Engine Family {#mergetree-engine-family} diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 1195ee55dc7..b70cd225cdd 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1,6 +1,6 @@ --- -sidebar_position: 11 -sidebar_label: MergeTree +toc_priority: 30 +toc_title: MergeTree --- # MergeTree {#table_engines-mergetree} @@ -27,9 +27,8 @@ Main features: If necessary, you can set the data sampling method in the table. -:::info -The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family. -::: +!!! info "Info" + The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family. ## Creating a Table {#table_engine-mergetree-creating-a-table} @@ -128,9 +127,8 @@ The `index_granularity` setting can be omitted because 8192 is the default value Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects. If possible, switch old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects. If possible, switch old projects to the method described above. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -306,8 +304,8 @@ CREATE TABLE table_name Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries: ``` sql -SELECT count() FROM table WHERE s < 'z' -SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 ``` #### Available Types of Indices {#available-types-of-indices} @@ -366,7 +364,7 @@ The `set` index can be used with all functions. Function subsets for other index | Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | |------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| | [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, <>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, <>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | | [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ | | [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | | [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | @@ -384,10 +382,8 @@ The `set` index can be used with all functions. Function subsets for other index Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. -:::note -Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can not be used for optimizing queries where the result of a function is expected to be false. - -For example: +!!! note "Note" + Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example: - Can be optimized: - `s LIKE '%test%'` @@ -395,13 +391,12 @@ For example: - `s = 1` - `NOT s != 1` - `startsWith(s, 'test')` -- Can not be optimized: +- Can’t be optimized: - `NOT s LIKE '%test%'` - `s NOT LIKE '%test%'` - `NOT s = 1` - `s != 1` - `NOT startsWith(s, 'test')` -::: ## Projections {#projections} Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries. diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 47651527f99..ca0db24e640 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: ReplacingMergeTree +toc_priority: 33 +toc_title: ReplacingMergeTree --- # ReplacingMergeTree {#replacingmergetree} @@ -29,9 +29,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md). -:::warning -Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`. -::: +!!! note "Attention" + Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`. **ReplacingMergeTree Parameters** @@ -50,9 +49,8 @@ When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/t Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects and, if possible, switch old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects and, if possible, switch the old projects to the method described above. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 67c503854a9..d574bd9449e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -1,6 +1,6 @@ --- -sidebar_position: 20 -sidebar_label: Data Replication +toc_priority: 31 +toc_title: Data Replication --- # Data Replication {#table_engines-replication} @@ -31,9 +31,8 @@ ClickHouse uses [Apache ZooKeeper](https://zookeeper.apache.org) for storing rep To use replication, set parameters in the [zookeeper](../../../operations/server-configuration-parameters/settings.md#server-settings_zookeeper) server configuration section. -:::warning -Don’t neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem. -::: +!!! attention "Attention" + Don’t neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem. Example of setting the addresses of the ZooKeeper cluster: diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index 5d180782ed3..5726acf000e 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -1,6 +1,6 @@ --- -sidebar_position: 50 -sidebar_label: SummingMergeTree +toc_priority: 34 +toc_title: SummingMergeTree --- # SummingMergeTree {#summingmergetree} @@ -41,9 +41,8 @@ When creating a `SummingMergeTree` table the same [clauses](../../../engines/tab Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects and, if possible, switch the old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects and, if possible, switch the old projects to the method described above. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 77cf192dcda..8266bf34876 100644 --- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -1,6 +1,6 @@ --- -sidebar_position: 80 -sidebar_label: VersionedCollapsingMergeTree +toc_priority: 37 +toc_title: VersionedCollapsingMergeTree --- # VersionedCollapsingMergeTree {#versionedcollapsingmergetree} @@ -53,9 +53,8 @@ When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../. Deprecated Method for Creating a Table -:::warning -Do not use this method in new projects. If possible, switch old projects to the method described above. -::: +!!! attention "Attention" + Do not use this method in new projects. If possible, switch the old projects to the method described above. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index a0aff2ec813..d1f92d347a4 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -1,6 +1,6 @@ --- -sidebar_position: 120 -sidebar_label: Buffer +toc_priority: 45 +toc_title: Buffer --- # Buffer Table Engine {#buffer} @@ -54,9 +54,8 @@ If the set of columns in the Buffer table does not match the set of columns in a If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared. The same thing happens if the subordinate table does not exist when the buffer is flushed. -:::warning -Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table. -::: +!!! attention "Attention" + Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table. If the server is restarted abnormally, the data in the buffer is lost. @@ -74,4 +73,4 @@ A Buffer table is used when too many INSERTs are received from a large number of Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second, while inserting larger blocks of data can produce over a million rows per second (see the section “Performance”). -[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/buffer/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/buffer/) diff --git a/docs/en/engines/table-engines/special/dictionary.md b/docs/en/engines/table-engines/special/dictionary.md index 67b97e37d44..d76adebe01e 100644 --- a/docs/en/engines/table-engines/special/dictionary.md +++ b/docs/en/engines/table-engines/special/dictionary.md @@ -1,6 +1,6 @@ --- -sidebar_position: 20 -sidebar_label: Dictionary +toc_priority: 35 +toc_title: Dictionary --- # Dictionary Table Engine {#dictionary} @@ -97,5 +97,3 @@ select * from products limit 1; **See Also** - [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function) - -[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/dictionary/) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index db89175e4d9..5072465687e 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -1,6 +1,6 @@ --- -sidebar_position: 10 -sidebar_label: Distributed +toc_priority: 33 +toc_title: Distributed --- # Distributed Table Engine {#distributed} @@ -64,19 +64,19 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 - `monitor_max_sleep_time_ms` - same as [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) -:::note -**Durability settings** (`fsync_...`): +!!! note "Note" -- Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards. -- May significantly decrease the inserts' performance -- Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings` + **Durability settings** (`fsync_...`): -For **Insert limit settings** (`..._insert`) see also: + - Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards. + - May significantly decrease the inserts' performance + - Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings` -- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting -- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting -- `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert` -::: + For **Insert limit settings** (`..._insert`) see also: + + - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting + - [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting + - `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert` **Example** @@ -215,9 +215,8 @@ To learn more about how distibuted `in` and `global in` queries are processed, r - `_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md). -:::note -Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](../../../sql-reference/table-functions/cluster.md) table functions internally create temporary Distributed table, `_shard_num` is available there too. -::: +!!! note "Note" + Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](../../../sql-reference/table-functions/cluster.md) table functions internally create temporary Distributed table, `_shard_num` is available there too. **See Also** @@ -226,4 +225,3 @@ Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](. - [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions -[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/distributed/) diff --git a/docs/en/engines/table-engines/special/external-data.md b/docs/en/engines/table-engines/special/external-data.md index 1f4336c74fe..4ec90905fe5 100644 --- a/docs/en/engines/table-engines/special/external-data.md +++ b/docs/en/engines/table-engines/special/external-data.md @@ -1,6 +1,6 @@ --- -sidebar_position: 130 -sidebar_label: External Data +toc_priority: 45 +toc_title: External Data --- # External Data for Query Processing {#external-data-for-query-processing} @@ -63,3 +63,4 @@ $ curl -F 'passwd=@passwd.tsv;' 'http://localhost:8123/?query=SELECT+shell,+coun For distributed query processing, the temporary tables are sent to all the remote servers. +[Original article](https://clickhouse.com/docs/en/operations/table_engines/external_data/) diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 6e4449bf1a9..7673f45ca8d 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: File +toc_priority: 37 +toc_title: File --- # File Table Engine {#table_engines-file} @@ -30,9 +30,8 @@ When creating table using `File(Format)` it creates empty subdirectory in that f You may manually create this subfolder and file in server filesystem and then [ATTACH](../../../sql-reference/statements/attach.md) it to table information with matching name, so you can query data from that file. -:::warning -Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. -::: +!!! warning "Warning" + Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. ## Example {#example} @@ -86,4 +85,4 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 - Indices - Replication -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/file/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/file/) diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index 453f3b5db0b..fabe31897bb 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -1,6 +1,6 @@ --- -sidebar_position: 140 -sidebar_label: GenerateRandom +toc_priority: 46 +toc_title: GenerateRandom --- # GenerateRandom Table Engine {#table_engines-generate} @@ -56,4 +56,4 @@ SELECT * FROM generate_engine_table LIMIT 3 - Indices - Replication -[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/generate/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/generate/) diff --git a/docs/en/engines/table-engines/special/index.md b/docs/en/engines/table-engines/special/index.md index f87cd86c891..872c01385e0 100644 --- a/docs/en/engines/table-engines/special/index.md +++ b/docs/en/engines/table-engines/special/index.md @@ -1,6 +1,6 @@ --- -sidebar_position: 50 -sidebar_label: Special +toc_folder_title: Special +toc_priority: 31 --- # Special Table Engines {#special-table-engines} diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index 7d6f6e99b9f..4e4a5e9fc03 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -1,15 +1,14 @@ --- -sidebar_position: 70 -sidebar_label: Join +toc_priority: 40 +toc_title: Join --- # Join Table Engine {#join} Optional prepared data structure for usage in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations. -:::note -This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself. -::: +!!! note "Note" + This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself. ## Creating a Table {#creating-a-table} @@ -126,5 +125,3 @@ ALTER TABLE id_val_join DELETE WHERE id = 3; │ 1 │ 21 │ └────┴─────┘ ``` - -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/join/) diff --git a/docs/en/engines/table-engines/special/materializedview.md b/docs/en/engines/table-engines/special/materializedview.md index 6c9a5e84f60..75161829a7e 100644 --- a/docs/en/engines/table-engines/special/materializedview.md +++ b/docs/en/engines/table-engines/special/materializedview.md @@ -1,10 +1,10 @@ --- -sidebar_position: 100 -sidebar_label: MaterializedView +toc_priority: 43 +toc_title: MaterializedView --- # MaterializedView Table Engine {#materializedview} Used for implementing materialized views (for more information, see [CREATE VIEW](../../../sql-reference/statements/create/view.md#materialized)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine. -[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/materializedview/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/materializedview/) diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index 1e154a323d1..eb557d36c50 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -1,6 +1,6 @@ --- -sidebar_position: 110 -sidebar_label: Memory +toc_priority: 44 +toc_title: Memory --- # Memory Table Engine {#memory} @@ -15,4 +15,4 @@ Normally, using this table engine is not justified. However, it can be used for The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”). -[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/memory/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/memory/) diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index bcad7a0c1f6..27f783a3cea 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -1,6 +1,6 @@ --- -sidebar_position: 30 -sidebar_label: Merge +toc_priority: 36 +toc_title: Merge --- # Merge Table Engine {#merge} @@ -12,7 +12,7 @@ Reading is automatically parallelized. Writing to a table is not supported. When ## Creating a Table {#creating-a-table} ``` sql -CREATE TABLE ... Engine=Merge(db_name, tables_regexp) + CREATE TABLE ... Engine=Merge(db_name, tables_regexp) ``` **Engine Parameters** @@ -81,5 +81,3 @@ SELECT * FROM WatchLog; - [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) - [merge](../../../sql-reference/table-functions/merge.md) table function - -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/merge/) diff --git a/docs/en/engines/table-engines/special/null.md b/docs/en/engines/table-engines/special/null.md index 309b09ba779..39ed9c1c1a6 100644 --- a/docs/en/engines/table-engines/special/null.md +++ b/docs/en/engines/table-engines/special/null.md @@ -1,15 +1,13 @@ --- -sidebar_position: 50 -sidebar_label: 'Null' +toc_priority: 38 +toc_title: 'Null' --- # Null Table Engine {#null} When writing to a `Null` table, data is ignored. When reading from a `Null` table, the response is empty. -:::note -If you are wondering why this is useful, note that you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded. -::: +!!! info "Hint" + However, you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded. - -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/null/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/null/) diff --git a/docs/en/engines/table-engines/special/set.md b/docs/en/engines/table-engines/special/set.md index 5fd80ba55fe..c38c2418093 100644 --- a/docs/en/engines/table-engines/special/set.md +++ b/docs/en/engines/table-engines/special/set.md @@ -1,6 +1,6 @@ --- -sidebar_position: 60 -sidebar_label: Set +toc_priority: 39 +toc_title: Set --- # Set Table Engine {#set} @@ -20,4 +20,4 @@ When creating a table, the following settings are applied: - [persistent](../../../operations/settings/settings.md#persistent) -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/set/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/set/) diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 64642623f88..26d928085ce 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -1,6 +1,6 @@ --- -sidebar_position: 80 -sidebar_label: URL +toc_priority: 41 +toc_title: URL --- # URL Table Engine {#table_engines-url} @@ -89,4 +89,4 @@ SELECT * FROM url_engine_table - Indexes. - Replication. -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/url/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/url/) diff --git a/docs/en/engines/table-engines/special/view.md b/docs/en/engines/table-engines/special/view.md index 455c301fb01..9b847a0e2d5 100644 --- a/docs/en/engines/table-engines/special/view.md +++ b/docs/en/engines/table-engines/special/view.md @@ -1,10 +1,10 @@ --- -sidebar_position: 90 -sidebar_label: View +toc_priority: 42 +toc_title: View --- # View Table Engine {#table_engines-view} Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query). -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/view/) +[Original article](https://clickhouse.com/docs/en/operations/table_engines/view/) diff --git a/docs/en/example-datasets/_category_.yml b/docs/en/example-datasets/_category_.yml deleted file mode 100644 index 310ce834a92..00000000000 --- a/docs/en/example-datasets/_category_.yml +++ /dev/null @@ -1,8 +0,0 @@ -position: 10 -label: 'Example Datasets' -collapsible: true -collapsed: true -link: - type: generated-index - title: Example Datasets - slug: /en/example-datasets \ No newline at end of file diff --git a/docs/en/faq/general/columnar-database.md b/docs/en/faq/general/columnar-database.md new file mode 100644 index 00000000000..11bbd2e63f6 --- /dev/null +++ b/docs/en/faq/general/columnar-database.md @@ -0,0 +1,25 @@ +--- +title: What is a columnar database? +toc_hidden: true +toc_priority: 101 +--- + +# What Is a Columnar Database? {#what-is-a-columnar-database} + +A columnar database stores data of each column independently. This allows to read data from disks only for those columns that are used in any given query. The cost is that operations that affect whole rows become proportionally more expensive. The synonym for a columnar database is a column-oriented database management system. ClickHouse is a typical example of such a system. + +Key columnar database advantages are: + +- Queries that use only a few columns out of many. +- Aggregating queries against large volumes of data. +- Column-wise data compression. + +Here is the illustration of the difference between traditional row-oriented systems and columnar databases when building reports: + +**Traditional row-oriented** +![Traditional row-oriented](https://clickhouse.com/docs/en/images/row-oriented.gif#) + +**Columnar** +![Columnar](https://clickhouse.com/docs/en/images/column-oriented.gif#) + +A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing and data warehousing, because they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. diff --git a/docs/en/faq/general/dbms-naming.md b/docs/en/faq/general/dbms-naming.md new file mode 100644 index 00000000000..d4e87ff450a --- /dev/null +++ b/docs/en/faq/general/dbms-naming.md @@ -0,0 +1,17 @@ +--- +title: "What does \u201CClickHouse\u201D mean?" +toc_hidden: true +toc_priority: 10 +--- + +# What Does “ClickHouse” Mean? {#what-does-clickhouse-mean} + +It’s a combination of “**Click**stream” and “Data ware**House**”. It comes from the original use case at Yandex.Metrica, where ClickHouse was supposed to keep records of all clicks by people from all over the Internet, and it still does the job. You can read more about this use case on [ClickHouse history](../../introduction/history.md) page. + +This two-part meaning has two consequences: + +- The only correct way to write Click**H**ouse is with capital H. +- If you need to abbreviate it, use **CH**. For some historical reasons, abbreviating as CK is also popular in China, mostly because one of the first talks about ClickHouse in Chinese used this form. + +!!! info "Fun fact" + Many years after ClickHouse got its name, this approach of combining two words that are meaningful on their own has been highlighted as the best way to name a database in a [research by Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), an Associate Professor of Databases at Carnegie Mellon University. ClickHouse shared his “best database name of all time” award with Postgres. diff --git a/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md new file mode 100644 index 00000000000..731dc9dface --- /dev/null +++ b/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md @@ -0,0 +1,15 @@ +--- +title: How do I contribute code to ClickHouse? +toc_hidden: true +toc_priority: 120 +--- + +# How do I contribute code to ClickHouse? {#how-do-i-contribute-code-to-clickhouse} + +ClickHouse is an open-source project [developed on GitHub](https://github.com/ClickHouse/ClickHouse). + +As customary, contribution instructions are published in [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md) file in the root of the source code repository. + +If you want to suggest a substantial change to ClickHouse, consider [opening a GitHub issue](https://github.com/ClickHouse/ClickHouse/issues/new/choose) explaining what you want to do, to discuss it with maintainers and community first. [Examples of such RFC issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc). + +If your contributions are security related, please check out [our security policy](https://github.com/ClickHouse/ClickHouse/security/policy/) too. diff --git a/docs/en/faq/general/index.md b/docs/en/faq/general/index.md new file mode 100644 index 00000000000..51fff9a53ae --- /dev/null +++ b/docs/en/faq/general/index.md @@ -0,0 +1,25 @@ +--- +title: General questions about ClickHouse +toc_hidden_folder: true +toc_priority: 1 +toc_title: General +--- + +# General Questions About ClickHouse {#general-questions} + +Questions: + +- [What is ClickHouse?](../../index.md#what-is-clickhouse) +- [Why ClickHouse is so fast?](../../faq/general/why-clickhouse-is-so-fast.md) +- [Who is using ClickHouse?](../../faq/general/who-is-using-clickhouse.md) +- [What does “ClickHouse” mean?](../../faq/general/dbms-naming.md) +- [What does “Не тормозит” mean?](../../faq/general/ne-tormozit.md) +- [What is OLAP?](../../faq/general/olap.md) +- [What is a columnar database?](../../faq/general/columnar-database.md) +- [Why not use something like MapReduce?](../../faq/general/mapreduce.md) +- [How do I contribute code to ClickHouse?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md) + +!!! info "Don’t see what you were looking for?" + Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. + +{## [Original article](https://clickhouse.com/docs/en/faq/general/) ##} diff --git a/docs/en/faq/general/mapreduce.md b/docs/en/faq/general/mapreduce.md new file mode 100644 index 00000000000..30cae65cba2 --- /dev/null +++ b/docs/en/faq/general/mapreduce.md @@ -0,0 +1,13 @@ +--- +title: Why not use something like MapReduce? +toc_hidden: true +toc_priority: 110 +--- + +# Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce} + +We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Large IT companies often have proprietary in-house solutions. + +These systems aren’t appropriate for online queries due to their high latency. In other words, they can’t be used as the back-end for a web interface. These types of systems aren’t useful for real-time data updates. Distributed sorting isn’t the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks. + +Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. diff --git a/docs/en/faq/general/ne-tormozit.md b/docs/en/faq/general/ne-tormozit.md new file mode 100644 index 00000000000..e8dc7388eff --- /dev/null +++ b/docs/en/faq/general/ne-tormozit.md @@ -0,0 +1,26 @@ +--- +title: "What does \u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\ + \u201D mean?" +toc_hidden: true +toc_priority: 11 +--- + +# What Does “Не тормозит” Mean? {#what-does-ne-tormozit-mean} + +This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. + +Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, Yandex. That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. + +One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. + +So, what does it mean? Here are some ways to translate *“не тормозит”*: + +- If you translate it literally, it’d be something like *“ClickHouse does not press the brake pedal”*. +- If you’d want to express it as close to how it sounds to a Russian person with IT background, it’d be something like *“If your larger system lags, it’s not because it uses ClickHouse”*. +- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse does not lag”* or just *“ClickHouse is fast”*. + +If you haven’t seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one: + +![iframe](https://www.youtube.com/embed/bSyQahMVZ7w) + +P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.com/#meet), usually for best questions or other forms of active participation. diff --git a/docs/en/faq/general/olap.md b/docs/en/faq/general/olap.md new file mode 100644 index 00000000000..1f6df183f8c --- /dev/null +++ b/docs/en/faq/general/olap.md @@ -0,0 +1,39 @@ +--- +title: What is OLAP? +toc_hidden: true +toc_priority: 100 +--- + +# What Is OLAP? {#what-is-olap} + +[OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward: + +Processing +: Some source data is processed… + +Analytical +: …to produce some analytical reports and insights… + +Online +: …in real-time. + +## OLAP from the Business Perspective {#olap-from-the-business-perspective} + +In recent years, business people started to realize the value of data. Companies who make their decisions blindly, more often than not fail to keep up with the competition. The data-driven approach of successful companies forces them to collect all data that might be remotely useful for making business decisions and need mechanisms to timely analyze them. Here’s where OLAP database management systems (DBMS) come in. + +In a business sense, OLAP allows companies to continuously plan, analyze, and report operational activities, thus maximizing efficiency, reducing expenses, and ultimately conquering the market share. It could be done either in an in-house system or outsourced to SaaS providers like web/mobile analytics services, CRM services, etc. OLAP is the technology behind many BI applications (Business Intelligence). + +ClickHouse is an OLAP database management system that is pretty often used as a backend for those SaaS solutions for analyzing domain-specific data. However, some businesses are still reluctant to share their data with third-party providers and an in-house data warehouse scenario is also viable. + +## OLAP from the Technical Perspective {#olap-from-the-technical-perspective} + +All database management systems could be classified into two groups: OLAP (Online **Analytical** Processing) and OLTP (Online **Transactional** Processing). Former focuses on building reports, each based on large volumes of historical data, but doing it not so frequently. While the latter usually handle a continuous stream of transactions, constantly modifying the current state of data. + +In practice OLAP and OLTP are not categories, it’s more like a spectrum. Most real systems usually focus on one of them but provide some solutions or workarounds if the opposite kind of workload is also desired. This situation often forces businesses to operate multiple storage systems integrated, which might be not so big deal but having more systems make it more expensive to maintain. So the trend of recent years is HTAP (**Hybrid Transactional/Analytical Processing**) when both kinds of the workload are handled equally well by a single database management system. + +Even if a DBMS started as a pure OLAP or pure OLTP, they are forced to move towards that HTAP direction to keep up with their competition. And ClickHouse is no exception, initially, it has been designed as [fast-as-possible OLAP system](../../faq/general/why-clickhouse-is-so-fast.md) and it still does not have full-fledged transaction support, but some features like consistent read/writes and mutations for updating/deleting data had to be added. + +The fundamental trade-off between OLAP and OLTP systems remains: + +- To build analytical reports efficiently it’s crucial to be able to read columns separately, thus most OLAP databases are [columnar](../../faq/general/columnar-database.md), +- While storing columns separately increases costs of operations on rows, like append or in-place modification, proportionally to the number of columns (which can be huge if the systems try to collect all details of an event just in case). Thus, most OLTP systems store data arranged by rows. diff --git a/docs/en/faq/general/who-is-using-clickhouse.md b/docs/en/faq/general/who-is-using-clickhouse.md new file mode 100644 index 00000000000..b7ff867d726 --- /dev/null +++ b/docs/en/faq/general/who-is-using-clickhouse.md @@ -0,0 +1,19 @@ +--- +title: Who is using ClickHouse? +toc_hidden: true +toc_priority: 9 +--- + +# Who Is Using ClickHouse? {#who-is-using-clickhouse} + +Being an open-source product makes this question not so straightforward to answer. You do not have to tell anyone if you want to start using ClickHouse, you just go grab source code or pre-compiled packages. There’s no contract to sign and the [Apache 2.0 license](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) allows for unconstrained software distribution. + +Also, the technology stack is often in a grey zone of what’s covered by an NDA. Some companies consider technologies they use as a competitive advantage even if they are open-source and do not allow employees to share any details publicly. Some see some PR risks and allow employees to share implementation details only with their PR department approval. + +So how to tell who is using ClickHouse? + +One way is to **ask around**. If it’s not in writing, people are much more willing to share what technologies are used in their companies, what the use cases are, what kind of hardware is used, data volumes, etc. We’re talking with users regularly on [ClickHouse Meetups](https://www.youtube.com/channel/UChtmrD-dsdpspr42P_PyRAw/playlists) all over the world and have heard stories about 1000+ companies that use ClickHouse. Unfortunately, that’s not reproducible and we try to treat such stories as if they were told under NDA to avoid any potential troubles. But you can come to any of our future meetups and talk with other users on your own. There are multiple ways how meetups are announced, for example, you can subscribe to [our Twitter](http://twitter.com/ClickHouseDB/). + +The second way is to look for companies **publicly saying** that they use ClickHouse. It’s more substantial because there’s usually some hard evidence like a blog post, talk video recording, slide deck, etc. We collect the collection of links to such evidence on our **[Adopters](../../introduction/adopters.md)** page. Feel free to contribute the story of your employer or just some links you’ve stumbled upon (but try not to violate your NDA in the process). + +You can find names of very large companies in the adopters list, like Bloomberg, Cisco, China Telecom, Tencent, or Uber, but with the first approach, we found that there are many more. For example, if you take [the list of largest IT companies by Forbes (2020)](https://www.forbes.com/sites/hanktucker/2020/05/13/worlds-largest-technology-companies-2020-apple-stays-on-top-zoom-and-uber-debut/) over half of them are using ClickHouse in some way. Also, it would be unfair not to mention [Yandex](../../introduction/history.md), the company which initially open-sourced ClickHouse in 2016 and happens to be one of the largest IT companies in Europe. diff --git a/docs/en/faq/general/why-clickhouse-is-so-fast.md b/docs/en/faq/general/why-clickhouse-is-so-fast.md new file mode 100644 index 00000000000..1ccf2595768 --- /dev/null +++ b/docs/en/faq/general/why-clickhouse-is-so-fast.md @@ -0,0 +1,63 @@ +--- +title: Why ClickHouse is so fast? +toc_hidden: true +toc_priority: 8 +--- + +# Why ClickHouse Is So Fast? {#why-clickhouse-is-so-fast} + +It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system. + +ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. That’s what needs to be done to build a typical analytical report and that’s what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible: + +Column-oriented storage +: Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted. + +Indexes +: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns. + +Data compression +: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs) that can make data even more compact. + +Vectorized query execution +: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage. + +Scalability +: ClickHouse can leverage all available CPU cores and disks to execute even a single query. Not only on a single server but all CPU cores and disks of a cluster as well. + +But many other database management systems use similar techniques. What really makes ClickHouse stand out is **attention to low-level details**. Most programming languages provide implementations for most common algorithms and data structures, but they tend to be too generic to be effective. Every task can be considered as a landscape with various characteristics, instead of just throwing in random implementation. For example, if you need a hash table, here are some key questions to consider: + +- Which hash function to choose? +- Collision resolution algorithm: [open addressing](https://en.wikipedia.org/wiki/Open_addressing) vs [chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)? +- Memory layout: one array for keys and values or separate arrays? Will it store small or large values? +- Fill factor: when and how to resize? How to move values around on resize? +- Will values be removed and which algorithm will work better if they will? +- Will we need fast probing with bitmaps, inline placement of string keys, support for non-movable values, prefetch, and batching? + +Hash table is a key data structure for `GROUP BY` implementation and ClickHouse automatically chooses one of [30+ variations](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) for each specific query. + +The same goes for algorithms, for example, in sorting you might consider: + +- What will be sorted: an array of numbers, tuples, strings, or structures? +- Is all data available completely in RAM? +- Do we need a stable sort? +- Do we need a full sort? Maybe partial sort or n-th element will suffice? +- How to implement comparisons? +- Are we sorting data that has already been partially sorted? + +Algorithms that they rely on characteristics of data they are working with can often do better than their generic counterparts. If it is not really known in advance, the system can try various implementations and choose the one that works best in runtime. For example, see an [article on how LZ4 decompression is implemented in ClickHouse](https://habr.com/en/company/yandex/blog/457612/). + +Last but not least, the ClickHouse team always monitors the Internet on people claiming that they came up with the best implementation, algorithm, or data structure to do something and tries it out. Those claims mostly appear to be false, but from time to time you’ll indeed find a gem. + +!!! info "Tips for building your own high-performance software" + + + - Keep in mind low-level details when designing your system. + - Design based on hardware capabilities. + - Choose data structures and abstractions based on the needs of the task. + - Provide specializations for special cases. + - Try new, “best” algorithms, that you read about yesterday. + - Choose an algorithm in runtime based on statistics. + - Benchmark on real datasets. + - Test for performance regressions in CI. + - Measure and observe everything. diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md new file mode 100644 index 00000000000..891e1ea464e --- /dev/null +++ b/docs/en/faq/index.md @@ -0,0 +1,47 @@ +--- +toc_folder_title: F.A.Q. +toc_hidden: true +toc_priority: 76 +--- + +# ClickHouse F.A.Q {#clickhouse-f-a-q} + +This section of the documentation is a place to collect answers to ClickHouse-related questions that arise often. + +Categories: + +- **[General](../faq/general/index.md)** + - [What is ClickHouse?](../index.md#what-is-clickhouse) + - [Why ClickHouse is so fast?](../faq/general/why-clickhouse-is-so-fast.md) + - [Who is using ClickHouse?](../faq/general/who-is-using-clickhouse.md) + - [What does “ClickHouse” mean?](../faq/general/dbms-naming.md) + - [What does “Не тормозит” mean?](../faq/general/ne-tormozit.md) + - [What is OLAP?](../faq/general/olap.md) + - [What is a columnar database?](../faq/general/columnar-database.md) + - [Why not use something like MapReduce?](../faq/general/mapreduce.md) +- **[Use Cases](../faq/use-cases/index.md)** + - [Can I use ClickHouse as a time-series database?](../faq/use-cases/time-series.md) + - [Can I use ClickHouse as a key-value storage?](../faq/use-cases/key-value.md) +- **[Operations](../faq/operations/index.md)** + - [Which ClickHouse version to use in production?](../faq/operations/production.md) + - [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md) + - [Does ClickHouse support multi-region replication?](../faq/operations/multi-region-replication.md) +- **[Integration](../faq/integration/index.md)** + - [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md) + - [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md) + +{## TODO +Question candidates: +- How to choose a primary key? +- How to add a column in ClickHouse? +- Too many parts +- How to filter ClickHouse table by an array column contents? +- How to insert all rows from one table to another of identical structure? +- How to kill a process (query) in ClickHouse? +- How to implement pivot (like in pandas)? +- How to remove the default ClickHouse user through users.d? +- Importing MySQL dump to ClickHouse +- Window function workarounds (row_number, lag/lead, running diff/sum/average) +##} + +{## [Original article](https://clickhouse.com/docs/en/faq) ##} diff --git a/docs/en/faq/integration/file-export.md b/docs/en/faq/integration/file-export.md new file mode 100644 index 00000000000..f8f458929f9 --- /dev/null +++ b/docs/en/faq/integration/file-export.md @@ -0,0 +1,37 @@ +--- +title: How do I export data from ClickHouse to a file? +toc_hidden: true +toc_priority: 10 +--- + +# How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file} + +## Using INTO OUTFILE Clause {#using-into-outfile-clause} + +Add an [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query. + +For example: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' +``` + +By default, ClickHouse uses the [TabSeparated](../../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../../interfaces/formats.md), use the [FORMAT clause](../../sql-reference/statements/select/format.md#format-clause). + +For example: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV +``` + +## Using a File-Engine Table {#using-a-file-engine-table} + +See [File](../../engines/table-engines/special/file.md) table engine. + +## Using Command-Line Redirection {#using-command-line-redirection} + +``` bash +$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt +``` + +See [clickhouse-client](../../interfaces/cli.md). diff --git a/docs/en/faq/integration/index.md b/docs/en/faq/integration/index.md new file mode 100644 index 00000000000..51a2593b751 --- /dev/null +++ b/docs/en/faq/integration/index.md @@ -0,0 +1,19 @@ +--- +title: Questions about integrating ClickHouse and other systems +toc_hidden_folder: true +toc_priority: 4 +toc_title: Integration +--- + +# Questions About Integrating ClickHouse and Other Systems {#question-about-integrating-clickhouse-and-other-systems} + +Questions: + +- [How do I export data from ClickHouse to a file?](../../faq/integration/file-export.md) +- [How to import JSON into ClickHouse?](../../faq/integration/json-import.md) +- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../../faq/integration/oracle-odbc.md) + +!!! info "Don’t see what you were looking for?" + Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. + +{## [Original article](https://clickhouse.com/docs/en/faq/integration/) ##} diff --git a/docs/en/faq/integration/json-import.md b/docs/en/faq/integration/json-import.md new file mode 100644 index 00000000000..3fa026c794a --- /dev/null +++ b/docs/en/faq/integration/json-import.md @@ -0,0 +1,33 @@ +--- +title: How to import JSON into ClickHouse? +toc_hidden: true +toc_priority: 11 +--- + +# How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse} + +ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline. + +## Examples {#examples} + +Using [HTTP interface](../../interfaces/http.md): + +``` bash +$ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @- +``` + +Using [CLI interface](../../interfaces/cli.md): + +``` bash +$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow" +``` + +Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead. + +## Useful Settings {#useful-settings} + +- `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them). +- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. + +!!! note "Note" + Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. diff --git a/docs/en/faq/integration/oracle-odbc.md b/docs/en/faq/integration/oracle-odbc.md new file mode 100644 index 00000000000..91265a3daa2 --- /dev/null +++ b/docs/en/faq/integration/oracle-odbc.md @@ -0,0 +1,15 @@ +--- +title: What if I have a problem with encodings when using Oracle via ODBC? +toc_hidden: true +toc_priority: 20 +--- + +# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings} + +If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). + +**Example** + +``` sql +NLS_LANG=RUSSIAN_RUSSIA.UTF8 +``` diff --git a/docs/en/faq/operations/delete-old-data.md b/docs/en/faq/operations/delete-old-data.md new file mode 100644 index 00000000000..32fc485e98a --- /dev/null +++ b/docs/en/faq/operations/delete-old-data.md @@ -0,0 +1,42 @@ +--- +title: Is it possible to delete old records from a ClickHouse table? +toc_hidden: true +toc_priority: 20 +--- + +# Is It Possible to Delete Old Records from a ClickHouse Table? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table} + +The short answer is “yes”. ClickHouse has multiple mechanisms that allow freeing up disk space by removing old data. Each mechanism is aimed for different scenarios. + +## TTL {#ttl} + +ClickHouse allows to automatically drop values when some condition happens. This condition is configured as an expression based on any columns, usually just static offset for any timestamp column. + +The key advantage of this approach is that it does not need any external system to trigger, once TTL is configured, data removal happens automatically in background. + +!!! note "Note" + TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD. + +More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). + +## ALTER DELETE {#alter-delete} + +ClickHouse does not have real-time point deletes like in [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) databases. The closest thing to them are mutations. They are issued as `ALTER ... DELETE` or `ALTER ... UPDATE` queries to distinguish from normal `DELETE` or `UPDATE` as they are asynchronous batch operations, not immediate modifications. The rest of syntax after `ALTER TABLE` prefix is similar. + +`ALTER DELETE` can be issued to flexibly remove old data. If you need to do it regularly, the main downside will be the need to have an external system to submit the query. There are also some performance considerations since mutation rewrite complete parts even there’s only a single row to be deleted. + +This is the most common approach to make your system based on ClickHouse [GDPR](https://gdpr-info.eu)-compliant. + +More details on [mutations](../../sql-reference/statements/alter/index.md#alter-mutations). + +## DROP PARTITION {#drop-partition} + +`ALTER TABLE ... DROP PARTITION` provides a cost-efficient way to drop a whole partition. It’s not that flexible and needs proper partitioning scheme configured on table creation, but still covers most common cases. Like mutations need to be executed from an external system for regular use. + +More details on [manipulating partitions](../../sql-reference/statements/alter/partition.md#alter_drop-partition). + +## TRUNCATE {#truncate} + +It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need. + +More details on [table truncation](../../sql-reference/statements/truncate.md). diff --git a/docs/en/faq/operations/index.md b/docs/en/faq/operations/index.md new file mode 100644 index 00000000000..81aec18b9cf --- /dev/null +++ b/docs/en/faq/operations/index.md @@ -0,0 +1,19 @@ +--- +title: Question about operating ClickHouse servers and clusters +toc_hidden_folder: true +toc_priority: 3 +toc_title: Operations +--- + +# Question About Operating ClickHouse Servers and Clusters {#question-about-operating-clickhouse-servers-and-clusters} + +Questions: + +- [Which ClickHouse version to use in production?](../../faq/operations/production.md) +- [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md) +- [Does ClickHouse support multi-region replication?](../../faq/operations/multi-region-replication.md) + +!!! info "Don’t see what you were looking for?" + Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. + +{## [Original article](https://clickhouse.com/docs/en/faq/production/) ##} diff --git a/docs/en/faq/operations/multi-region-replication.md b/docs/en/faq/operations/multi-region-replication.md new file mode 100644 index 00000000000..7d78737544a --- /dev/null +++ b/docs/en/faq/operations/multi-region-replication.md @@ -0,0 +1,13 @@ +--- +title: Does ClickHouse support multi-region replication? +toc_hidden: true +toc_priority: 30 +--- + +# Does ClickHouse support multi-region replication? {#does-clickhouse-support-multi-region-replication} + +The short answer is "yes". However, we recommend keeping latency between all regions/datacenters in two-digit range, otherwise write performance will suffer as it goes through distributed consensus protocol. For example, replication between US coasts will likely work fine, but between the US and Europe won't. + +Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas. + +For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md). diff --git a/docs/en/faq/operations/production.md b/docs/en/faq/operations/production.md new file mode 100644 index 00000000000..52ca300ced0 --- /dev/null +++ b/docs/en/faq/operations/production.md @@ -0,0 +1,70 @@ +--- +title: Which ClickHouse version to use in production? +toc_hidden: true +toc_priority: 10 +--- + +# Which ClickHouse Version to Use in Production? {#which-clickhouse-version-to-use-in-production} + +First of all, let’s discuss why people ask this question in the first place. There are two key reasons: + +1. ClickHouse is developed with pretty high velocity and usually, there are 10+ stable releases per year. It makes a wide range of releases to choose from, which is not so trivial choice. +2. Some users want to avoid spending time figuring out which version works best for their use case and just follow someone else’s advice. + +The second reason is more fundamental, so we’ll start with it and then get back to navigating through various ClickHouse releases. + +## Which ClickHouse Version Do You Recommend? {#which-clickhouse-version-do-you-recommend} + +It’s tempting to hire consultants or trust some known experts to get rid of responsibility for your production environment. You install some specific ClickHouse version that someone else recommended, now if there’s some issue with it - it’s not your fault, it’s someone else’s. This line of reasoning is a big trap. No external person knows better what’s going on in your company’s production environment. + +So how to properly choose which ClickHouse version to upgrade to? Or how to choose your first ClickHouse version? First of all, you need to invest in setting up a **realistic pre-production environment**. In an ideal world, it could be a completely identical shadow copy, but that’s usually expensive. + +Here’re some key points to get reasonable fidelity in a pre-production environment with not so high costs: + +- Pre-production environment needs to run an as close set of queries as you intend to run in production: + - Don’t make it read-only with some frozen data. + - Don’t make it write-only with just copying data without building some typical reports. + - Don’t wipe it clean instead of applying schema migrations. +- Use a sample of real production data and queries. Try to choose a sample that’s still representative and makes `SELECT` queries return reasonable results. Use obfuscation if your data is sensitive and internal policies do not allow it to leave the production environment. +- Make sure that pre-production is covered by your monitoring and alerting software the same way as your production environment does. +- If your production spans across multiple datacenters or regions, make your pre-production does the same. +- If your production uses complex features like replication, distributed table, cascading materialize views, make sure they are configured similarly in pre-production. +- There’s a trade-off on using the roughly same number of servers or VMs in pre-production as in production, but of smaller size, or much less of them, but of the same size. The first option might catch extra network-related issues, while the latter is easier to manage. + +The second area to invest in is **automated testing infrastructure**. Don’t assume that if some kind of query has executed successfully once, it’ll continue to do so forever. It’s ok to have some unit tests where ClickHouse is mocked but make sure your product has a reasonable set of automated tests that are run against real ClickHouse and check that all important use cases are still working as expected. + +Extra step forward could be contributing those automated tests to [ClickHouse’s open-source test infrastructure](https://github.com/ClickHouse/ClickHouse/tree/master/tests) that’s continuously used in its day-to-day development. It definitely will take some additional time and effort to learn [how to run it](../../development/tests.md) and then how to adapt your tests to this framework, but it’ll pay off by ensuring that ClickHouse releases are already tested against them when they are announced stable, instead of repeatedly losing time on reporting the issue after the fact and then waiting for a bugfix to be implemented, backported and released. Some companies even have such test contributions to infrastructure by its use as an internal policy, most notably it’s called [Beyonce’s Rule](https://www.oreilly.com/library/view/software-engineering-at/9781492082781/ch01.html#policies_that_scale_well) at Google. + +When you have your pre-production environment and testing infrastructure in place, choosing the best version is straightforward: + +1. Routinely run your automated tests against new ClickHouse releases. You can do it even for ClickHouse releases that are marked as `testing`, but going forward to the next steps with them is not recommended. +2. Deploy the ClickHouse release that passed the tests to pre-production and check that all processes are running as expected. +3. Report any issues you discovered to [ClickHouse GitHub Issues](https://github.com/ClickHouse/ClickHouse/issues). +4. If there were no major issues, it should be safe to start deploying ClickHouse release to your production environment. Investing in gradual release automation that implements an approach similar to [canary releases](https://martinfowler.com/bliki/CanaryRelease.html) or [green-blue deployments](https://martinfowler.com/bliki/BlueGreenDeployment.html) might further reduce the risk of issues in production. + +As you might have noticed, there’s nothing specific to ClickHouse in the approach described above, people do that for any piece of infrastructure they rely on if they take their production environment seriously. + +## How to Choose Between ClickHouse Releases? {#how-to-choose-between-clickhouse-releases} + +If you look into contents of ClickHouse package repository, you’ll see four kinds of packages: + +1. `testing` +2. `prestable` +3. `stable` +4. `lts` (long-term support) + +As was mentioned earlier, `testing` is good mostly to notice issues early, running them in production is not recommended because each of them is not tested as thoroughly as other kinds of packages. + +`prestable` is a release candidate which generally looks promising and is likely to become announced as `stable` soon. You can try them out in pre-production and report issues if you see any. + +For production use, there are two key options: `stable` and `lts`. Here is some guidance on how to choose between them: + +- `stable` is the kind of package we recommend by default. They are released roughly monthly (and thus provide new features with reasonable delay) and three latest stable releases are supported in terms of diagnostics and backporting of bugfixes. +- `lts` are released twice a year and are supported for a year after their initial release. You might prefer them over `stable` in the following cases: + - Your company has some internal policies that do not allow for frequent upgrades or using non-LTS software. + - You are using ClickHouse in some secondary products that either does not require any complex ClickHouse features and do not have enough resources to keep it updated. + +Many teams who initially thought that `lts` is the way to go, often switch to `stable` anyway because of some recent feature that’s important for their product. + +!!! warning "Important" + One more thing to keep in mind when upgrading ClickHouse: we’re always keeping eye on compatibility across releases, but sometimes it’s not reasonable to keep and some minor details might change. So make sure you check the [changelog](../../whats-new/changelog/index.md) before upgrading to see if there are any notes about backward-incompatible changes. diff --git a/docs/en/faq/use-cases/index.md b/docs/en/faq/use-cases/index.md new file mode 100644 index 00000000000..aac5493b105 --- /dev/null +++ b/docs/en/faq/use-cases/index.md @@ -0,0 +1,18 @@ +--- +title: Questions about ClickHouse use cases +toc_hidden_folder: true +toc_priority: 2 +toc_title: Use Cases +--- + +# Questions About ClickHouse Use Cases {#questions-about-clickhouse-use-cases} + +Questions: + +- [Can I use ClickHouse as a time-series database?](../../faq/use-cases/time-series.md) +- [Can I use ClickHouse as a key-value storage?](../../faq/use-cases/key-value.md) + +!!! info "Don’t see what you were looking for?" + Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. + +{## [Original article](https://clickhouse.com/docs/en/faq/use-cases/) ##} diff --git a/docs/en/faq/use-cases/key-value.md b/docs/en/faq/use-cases/key-value.md new file mode 100644 index 00000000000..2827dd2fa58 --- /dev/null +++ b/docs/en/faq/use-cases/key-value.md @@ -0,0 +1,17 @@ +--- +title: Can I use ClickHouse as a key-value storage? +toc_hidden: true +toc_priority: 101 +--- + +# Can I Use ClickHouse As a Key-Value Storage? {#can-i-use-clickhouse-as-a-key-value-storage} + +The short answer is **“no”**. The key-value workload is among top positions in the list of cases when **NOT**{.text-danger} to use ClickHouse. It’s an [OLAP](../../faq/general/olap.md) system after all, while there are many excellent key-value storage systems out there. + +However, there might be situations where it still makes sense to use ClickHouse for key-value-like queries. Usually, it’s some low-budget products where the main workload is analytical in nature and fits ClickHouse well, but there’s also some secondary process that needs a key-value pattern with not so high request throughput and without strict latency requirements. If you had an unlimited budget, you would have installed a secondary key-value database for thus secondary workload, but in reality, there’s an additional cost of maintaining one more storage system (monitoring, backups, etc.) which might be desirable to avoid. + +If you decide to go against recommendations and run some key-value-like queries against ClickHouse, here’re some tips: + +- The key reason why point queries are expensive in ClickHouse is its sparse primary index of main [MergeTree table engine family](../../engines/table-engines/mergetree-family/mergetree.md). This index can’t point to each specific row of data, instead, it points to each N-th and the system has to scan from the neighboring N-th row to the desired one, reading excessive data along the way. In a key-value scenario, it might be useful to reduce the value of N with the `index_granularity` setting. +- ClickHouse keeps each column in a separate set of files, so to assemble one complete row it needs to go through each of those files. Their count increases linearly with the number of columns, so in the key-value scenario, it might be worth to avoid using many columns and put all your payload in a single `String` column encoded in some serialization format like JSON, Protobuf or whatever makes sense. +- There’s an alternative approach that uses [Join](../../engines/table-engines/special/join.md) table engine instead of normal `MergeTree` tables and [joinGet](../../sql-reference/functions/other-functions.md#joinget) function to retrieve the data. It can provide better query performance but might have some usability and reliability issues. Here’s an [usage example](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00800_versatile_storage_join.sql#L49-L51). diff --git a/docs/en/faq/use-cases/time-series.md b/docs/en/faq/use-cases/time-series.md new file mode 100644 index 00000000000..bf97ac4b1e2 --- /dev/null +++ b/docs/en/faq/use-cases/time-series.md @@ -0,0 +1,15 @@ +--- +title: Can I use ClickHouse as a time-series database? +toc_hidden: true +toc_priority: 101 +--- + +# Can I Use ClickHouse As a Time-Series Database? {#can-i-use-clickhouse-as-a-time-series-database} + +ClickHouse is a generic data storage solution for [OLAP](../../faq/general/olap.md) workloads, while there are many specialized time-series database management systems. Nevertheless, ClickHouse’s [focus on query execution speed](../../faq/general/why-clickhouse-is-so-fast.md) allows it to outperform specialized systems in many cases. There are many independent benchmarks on this topic out there, so we’re not going to conduct one here. Instead, let’s focus on ClickHouse features that are important to use if that’s your use case. + +First of all, there are **[specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs)** which make typical time-series. Either common algorithms like `DoubleDelta` and `Gorilla` or specific to ClickHouse like `T64`. + +Second, time-series queries often hit only recent data, like one day or one week old. It makes sense to use servers that have both fast nVME/SSD drives and high-capacity HDD drives. ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) feature allows to configure keeping fresh hot data on fast drives and gradually move it to slower drives as it ages. Rollup or removal of even older data is also possible if your requirements demand it. + +Even though it’s against ClickHouse philosophy of storing and processing raw data, you can use [materialized views](../../sql-reference/statements/create/view.md) to fit into even tighter latency or costs requirements. diff --git a/docs/en/example-datasets/amplab-benchmark.md b/docs/en/getting-started/example-datasets/amplab-benchmark.md similarity index 96% rename from docs/en/example-datasets/amplab-benchmark.md rename to docs/en/getting-started/example-datasets/amplab-benchmark.md index a87ac53e2e3..b410a3595ec 100644 --- a/docs/en/example-datasets/amplab-benchmark.md +++ b/docs/en/getting-started/example-datasets/amplab-benchmark.md @@ -1,6 +1,6 @@ --- -sidebar_label: AMPLab Big Data Benchmark -description: A benchmark dataset used for comparing the performance of data warehousing solutions. +toc_priority: 19 +toc_title: AMPLab Big Data Benchmark --- # AMPLab Big Data Benchmark {#amplab-big-data-benchmark} diff --git a/docs/en/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md similarity index 99% rename from docs/en/example-datasets/brown-benchmark.md rename to docs/en/getting-started/example-datasets/brown-benchmark.md index 0960756dbe9..93049d1f76a 100644 --- a/docs/en/example-datasets/brown-benchmark.md +++ b/docs/en/getting-started/example-datasets/brown-benchmark.md @@ -1,6 +1,6 @@ --- -sidebar_label: Brown University Benchmark -description: A new analytical benchmark for machine-generated log data +toc_priority: 20 +toc_title: Brown University Benchmark --- # Brown University Benchmark diff --git a/docs/en/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md similarity index 98% rename from docs/en/example-datasets/cell-towers.md rename to docs/en/getting-started/example-datasets/cell-towers.md index 6c3201ff2b2..1f681fc32d8 100644 --- a/docs/en/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -1,8 +1,9 @@ --- -sidebar_label: Cell Towers +toc_priority: 21 +toc_title: Cell Towers --- -# Cell Towers +# Cell Towers {#cell-towers} This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. @@ -95,7 +96,7 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 So, the top countries are: the USA, Germany, and Russia. -You may want to create an [External Dictionary](../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values. +You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values. ## Use case {#use-case} diff --git a/docs/en/example-datasets/criteo.md b/docs/en/getting-started/example-datasets/criteo.md similarity index 96% rename from docs/en/example-datasets/criteo.md rename to docs/en/getting-started/example-datasets/criteo.md index 2d1c700d15c..08298172c70 100644 --- a/docs/en/example-datasets/criteo.md +++ b/docs/en/getting-started/example-datasets/criteo.md @@ -1,8 +1,9 @@ --- -sidebar_label: Terabyte Click Logs from Criteo +toc_priority: 18 +toc_title: Terabyte Click Logs from Criteo --- -# Terabyte of Click Logs from Criteo +# Terabyte of Click Logs from Criteo {#terabyte-of-click-logs-from-criteo} Download the data from http://labs.criteo.com/downloads/download-terabyte-click-logs/ diff --git a/docs/en/example-datasets/github-events.md b/docs/en/getting-started/example-datasets/github-events.md similarity index 89% rename from docs/en/example-datasets/github-events.md rename to docs/en/getting-started/example-datasets/github-events.md index 3a0cbc3324d..e470e88b182 100644 --- a/docs/en/example-datasets/github-events.md +++ b/docs/en/getting-started/example-datasets/github-events.md @@ -1,5 +1,6 @@ --- -sidebar_label: GitHub Events +toc_priority: 11 +toc_title: GitHub Events --- # GitHub Events Dataset diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md new file mode 100644 index 00000000000..d4c9bab2441 --- /dev/null +++ b/docs/en/getting-started/example-datasets/index.md @@ -0,0 +1,28 @@ +--- +toc_folder_title: Example Datasets +toc_priority: 10 +toc_title: Introduction +--- + +# Example Datasets {#example-datasets} + +This section describes how to obtain example datasets and import them into ClickHouse. For some datasets example queries are also available. + +The list of documented datasets: + +- [GitHub Events](../../getting-started/example-datasets/github-events.md) +- [Anonymized Web Analytics Dataset](../../getting-started/example-datasets/metrica.md) +- [Recipes](../../getting-started/example-datasets/recipes.md) +- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) +- [WikiStat](../../getting-started/example-datasets/wikistat.md) +- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) +- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) +- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md) +- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) +- [OpenSky](../../getting-started/example-datasets/opensky.md) +- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md) +- [Cell Towers](../../getting-started/example-datasets/cell-towers.md) +- [What's on the Menu?](../../getting-started/example-datasets/menus.md) +- [OnTime](../../getting-started/example-datasets/ontime.md) + +[Original article](https://clickhouse.com/docs/en/getting_started/example_datasets) diff --git a/docs/en/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md similarity index 93% rename from docs/en/example-datasets/menus.md rename to docs/en/getting-started/example-datasets/menus.md index c41195223a2..665944b3e6f 100644 --- a/docs/en/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -1,8 +1,9 @@ --- -sidebar_label: New York Public Library "What's on the Menu?" Dataset +toc_priority: 21 +toc_title: Menus --- -# New York Public Library "What's on the Menu?" Dataset +# New York Public Library "What's on the Menu?" Dataset {#menus-dataset} The dataset is created by the New York Public Library. It contains historical data on the menus of hotels, restaurants and cafes with the dishes along with their prices. @@ -39,7 +40,7 @@ The data is normalized consisted of four tables: ## Create the Tables {#create-tables} -We use [Decimal](../sql-reference/data-types/decimal.md) data type to store prices. +We use [Decimal](../../sql-reference/data-types/decimal.md) data type to store prices. ```sql CREATE TABLE dish @@ -115,17 +116,17 @@ clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_defa clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --date_time_input_format best_effort --query "INSERT INTO menu_item FORMAT CSVWithNames" < MenuItem.csv ``` -We use [CSVWithNames](../interfaces/formats.md#csvwithnames) format as the data is represented by CSV with header. +We use [CSVWithNames](../../interfaces/formats.md#csvwithnames) format as the data is represented by CSV with header. We disable `format_csv_allow_single_quotes` as only double quotes are used for data fields and single quotes can be inside the values and should not confuse the CSV parser. -We disable [input_format_null_as_default](../operations/settings/settings.md#settings-input-format-null-as-default) as our data does not have [NULL](../sql-reference/syntax.md#null-literal). Otherwise ClickHouse will try to parse `\N` sequences and can be confused with `\` in data. +We disable [input_format_null_as_default](../../operations/settings/settings.md#settings-input-format-null-as-default) as our data does not have [NULL](../../sql-reference/syntax.md#null-literal). Otherwise ClickHouse will try to parse `\N` sequences and can be confused with `\` in data. -The setting [date_time_input_format best_effort](../operations/settings/settings.md#settings-date_time_input_format) allows to parse [DateTime](../sql-reference/data-types/datetime.md) fields in wide variety of formats. For example, ISO-8601 without seconds like '2000-01-01 01:02' will be recognized. Without this setting only fixed DateTime format is allowed. +The setting [date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format) allows to parse [DateTime](../../sql-reference/data-types/datetime.md) fields in wide variety of formats. For example, ISO-8601 without seconds like '2000-01-01 01:02' will be recognized. Without this setting only fixed DateTime format is allowed. ## Denormalize the Data {#denormalize-data} -Data is presented in multiple tables in [normalized form](https://en.wikipedia.org/wiki/Database_normalization#Normal_forms). It means you have to perform [JOIN](../sql-reference/statements/select/join.md#select-join) if you want to query, e.g. dish names from menu items. +Data is presented in multiple tables in [normalized form](https://en.wikipedia.org/wiki/Database_normalization#Normal_forms). It means you have to perform [JOIN](../../sql-reference/statements/select/join.md#select-join) if you want to query, e.g. dish names from menu items. For typical analytical tasks it is way more efficient to deal with pre-JOINed data to avoid doing `JOIN` every time. It is called "denormalized" data. We will create a table `menu_item_denorm` where will contain all the data JOINed together: diff --git a/docs/en/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md similarity index 97% rename from docs/en/example-datasets/metrica.md rename to docs/en/getting-started/example-datasets/metrica.md index c5ef74750a6..d9d8beb0181 100644 --- a/docs/en/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -1,9 +1,9 @@ --- -sidebar_label: Web Analytics Data -description: Dataset consists of two tables containing anonymized web analytics data with hits and visits +toc_priority: 15 +toc_title: Web Analytics Data --- -# Anonymized Web Analytics Data +# Anonymized Web Analytics Data {#anonymized-web-analytics-data} Dataset consists of two tables containing anonymized web analytics data with hits (`hits_v1`) and visits (`visits_v1`). @@ -73,6 +73,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ## Example Queries {#example-queries} -[The ClickHouse tutorial](../../tutorial.md) is based on this web analytics dataset, and the recommended way to get started with this dataset is to go through the tutorial. +[The ClickHouse tutorial](../../getting-started/tutorial.md) is based on this web analytics dataset, and the recommended way to get started with this dataset is to go through the tutorial. Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hits` and `test.visits` there). diff --git a/docs/en/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md similarity index 99% rename from docs/en/example-datasets/nyc-taxi.md rename to docs/en/getting-started/example-datasets/nyc-taxi.md index 270aeb4929c..a7825988695 100644 --- a/docs/en/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -1,9 +1,9 @@ --- -sidebar_label: New York Taxi Data -description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009 +toc_priority: 20 +toc_title: New York Taxi Data --- -# New York Taxi Data +# New York Taxi Data {#new-york-taxi-data} This dataset can be obtained in two ways: @@ -290,9 +290,8 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "select count(*) from datasets.trips_mergetree" ``` -:::info -If you will run the queries described below, you have to use the full table name, `datasets.trips_mergetree`. -::: +!!! info "Info" + If you will run the queries described below, you have to use the full table name, `datasets.trips_mergetree`. ## Results on Single Server {#results-on-single-server} diff --git a/docs/en/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md similarity index 97% rename from docs/en/example-datasets/ontime.md rename to docs/en/getting-started/example-datasets/ontime.md index bb3c3644972..efc807b75fa 100644 --- a/docs/en/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -1,9 +1,9 @@ --- -sidebar_label: OnTime Airline Flight Data -description: Dataset containing the on-time performance of airline flights +toc_priority: 21 +toc_title: OnTime --- -# OnTime +# OnTime {#ontime} This dataset can be obtained in two ways: @@ -156,9 +156,8 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "select count(*) from datasets.ontime" ``` -:::note -If you will run the queries described below, you have to use the full table name, `datasets.ontime`. -::: +!!! info "Info" + If you will run the queries described below, you have to use the full table name, `datasets.ontime`. ## Queries {#queries} diff --git a/docs/en/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md similarity index 98% rename from docs/en/example-datasets/opensky.md rename to docs/en/getting-started/example-datasets/opensky.md index 719f32d7c3e..2d901397cb2 100644 --- a/docs/en/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -1,11 +1,11 @@ --- -sidebar_label: Air Traffic Data -description: The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. +toc_priority: 20 +toc_title: OpenSky --- -# Crowdsourced air traffic data from The OpenSky Network 2020 +# Crowdsourced air traffic data from The OpenSky Network 2020 {#opensky} -The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic. +"The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic". Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd @@ -60,9 +60,9 @@ ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhou `xargs -P100` specifies to use up to 100 parallel workers but as we only have 30 files, the number of workers will be only 30. - For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for `xargs` with `-I{}`). - The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`. -- We also asked to parse [DateTime](../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../operations/settings/settings.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets. +- We also asked to parse [DateTime](../../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets. -Finally, `clickhouse-client` will do insertion. It will read input data in [CSVWithNames](../interfaces/formats.md#csvwithnames) format. +Finally, `clickhouse-client` will do insertion. It will read input data in [CSVWithNames](../../interfaces/formats.md#csvwithnames) format. Parallel upload takes 24 seconds. diff --git a/docs/en/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md similarity index 98% rename from docs/en/example-datasets/recipes.md rename to docs/en/getting-started/example-datasets/recipes.md index b01efc8de26..70a56a0547f 100644 --- a/docs/en/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -1,5 +1,6 @@ --- -sidebar_label: Recipes Dataset +toc_priority: 16 +toc_title: Recipes Dataset --- # Recipes Dataset @@ -50,13 +51,13 @@ clickhouse-client --query " This is a showcase how to parse custom CSV, as it requires multiple tunes. Explanation: -- The dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../sql-reference/table-functions/input.md) to perform preprocessing; +- The dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../../sql-reference/table-functions/input.md) to perform preprocessing; - The structure of CSV file is specified in the argument of the table function `input`; - The field `num` (row number) is unneeded - we parse it from file and ignore; - We use `FORMAT CSVWithNames` but the header in CSV will be ignored (by command line parameter `--input_format_with_names_use_header 0`), because the header does not contain the name for the first field; - File is using only double quotes to enclose CSV strings; some strings are not enclosed in double quotes, and single quote must not be parsed as the string enclosing - that's why we also add the `--format_csv_allow_single_quote 0` parameter; - Some strings from CSV cannot parse, because they contain `\M/` sequence at the beginning of the value; the only value starting with backslash in CSV can be `\N` that is parsed as SQL NULL. We add `--input_format_allow_errors_num 10` parameter and up to ten malformed records can be skipped; -- There are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../sql-reference/functions/json-functions/) function to transform it to Array. +- There are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../../sql-reference/functions/json-functions/) function to transform it to Array. ## Validate the Inserted Data @@ -80,7 +81,7 @@ Result: ### Top Components by the Number of Recipes: -In this example we learn how to use [arrayJoin](../sql-reference/functions/array-join/) function to expand an array into a set of rows. +In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join/) function to expand an array into a set of rows. Query: @@ -185,7 +186,7 @@ Result: 10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.) ``` -In this example, we involve [has](../sql-reference/functions/array-functions/#hasarr-elem) function to filter by array elements and sort by the number of directions. +In this example, we involve [has](../../sql-reference/functions/array-functions/#hasarr-elem) function to filter by array elements and sort by the number of directions. There is a wedding cake that requires the whole 126 steps to produce! Show that directions: diff --git a/docs/en/example-datasets/star-schema.md b/docs/en/getting-started/example-datasets/star-schema.md similarity index 96% rename from docs/en/example-datasets/star-schema.md rename to docs/en/getting-started/example-datasets/star-schema.md index 35ff492c360..14fa7cef654 100644 --- a/docs/en/example-datasets/star-schema.md +++ b/docs/en/getting-started/example-datasets/star-schema.md @@ -1,11 +1,9 @@ --- -sidebar_label: Star Schema Benchmark -description: "Dataset based on the TPC-H dbgen source. The coding style and architecture -follows the TPCH dbgen." +toc_priority: 16 +toc_title: Star Schema Benchmark --- -# Star Schema Benchmark - +# Star Schema Benchmark {#star-schema-benchmark} Compiling dbgen: @@ -17,9 +15,8 @@ $ make Generating data: -:::warning -With `-s 100` dbgen generates 600 million rows (67 GB), while while `-s 1000` it generates 6 billion rows (which takes a lot of time) -::: +!!! warning "Attention" + With `-s 100` dbgen generates 600 million rows (67 GB), while while `-s 1000` it generates 6 billion rows (which takes a lot of time) ``` bash $ ./dbgen -s 1000 -T c diff --git a/docs/en/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md similarity index 98% rename from docs/en/example-datasets/uk-price-paid.md rename to docs/en/getting-started/example-datasets/uk-price-paid.md index e19e801dcf9..4b0ba25907d 100644 --- a/docs/en/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -1,8 +1,9 @@ --- -sidebar_label: UK Property Price Paid +toc_priority: 20 +toc_title: UK Property Price Paid --- -# UK Property Price Paid +# UK Property Price Paid {#uk-property-price-paid} The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995. The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse. @@ -54,9 +55,9 @@ In this example, we define the structure of source data from the CSV file and sp The preprocessing is: - splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries; - coverting the `time` field to date as it only contains 00:00 time; -- ignoring the [UUid](../sql-reference/data-types/uuid.md) field because we don't need it for analysis; -- transforming `type` and `duration` to more readable Enum fields with function [transform](../sql-reference/functions/other-functions.md#transform); -- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1. +- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis; +- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform); +- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1. Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion. @@ -352,7 +353,7 @@ Result: ## Let's Speed Up Queries Using Projections {#speedup-with-projections} -[Projections](../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data. +[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data. ### Build a Projection {#build-projection} @@ -388,7 +389,7 @@ SETTINGS mutations_sync = 1; Let's run the same 3 queries. -[Enable](../operations/settings/settings.md#allow-experimental-projection-optimization) projections for selects: +[Enable](../../operations/settings/settings.md#allow-experimental-projection-optimization) projections for selects: ```sql SET allow_experimental_projection_optimization = 1; diff --git a/docs/en/example-datasets/wikistat.md b/docs/en/getting-started/example-datasets/wikistat.md similarity index 91% rename from docs/en/example-datasets/wikistat.md rename to docs/en/getting-started/example-datasets/wikistat.md index 1185338a1da..3e3f7b164ce 100644 --- a/docs/en/example-datasets/wikistat.md +++ b/docs/en/getting-started/example-datasets/wikistat.md @@ -1,10 +1,11 @@ --- -sidebar_label: WikiStat +toc_priority: 17 +toc_title: WikiStat --- -# WikiStat +# WikiStat {#wikistat} -See http://dumps.wikimedia.org/other/pagecounts-raw/ for details. +See: http://dumps.wikimedia.org/other/pagecounts-raw/ Creating a table: diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md new file mode 100644 index 00000000000..372e8d7bd64 --- /dev/null +++ b/docs/en/getting-started/index.md @@ -0,0 +1,15 @@ +--- +toc_folder_title: Getting Started +toc_hidden: true +toc_priority: 8 +toc_title: hidden +--- + +# Getting Started {#getting-started} + +If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can: + +- [Go through detailed tutorial](../getting-started/tutorial.md) +- [Experiment with example datasets](../getting-started/example-datasets/ontime.md) + +[Original article](https://clickhouse.com/docs/en/getting_started/) diff --git a/docs/en/install.md b/docs/en/getting-started/install.md similarity index 58% rename from docs/en/install.md rename to docs/en/getting-started/install.md index 37cb113bc4a..cd734d4dc8b 100644 --- a/docs/en/install.md +++ b/docs/en/getting-started/install.md @@ -1,9 +1,6 @@ --- -sidebar_label: Installation -sidebar_position: 1 -keywords: [clickhouse, install, installation, docs] -description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture. -slug: /en/getting-started/install +toc_priority: 11 +toc_title: Installation --- # Installation {#installation} @@ -27,36 +24,15 @@ To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or P It is recommended to use official pre-compiled `deb` packages for Debian or Ubuntu. Run these commands to install packages: ``` bash -sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ - /etc/apt/sources.list.d/clickhouse.list -sudo apt-get update - -sudo apt-get install -y clickhouse-server clickhouse-client - -sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. +{% include 'install/deb.sh' %} ``` -
+
+ Deprecated Method for installing deb-packages - ``` bash -sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ - /etc/apt/sources.list.d/clickhouse.list -sudo apt-get update - -sudo apt-get install -y clickhouse-server clickhouse-client - -sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. +{% include 'install/deb_repo.sh' %} ``` -
You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs. @@ -70,10 +46,9 @@ You can also download and install packages manually from [here](https://packages - `clickhouse-client` — Creates a symbolic link for `clickhouse-client` and other client-related tools. and installs client configuration files. - `clickhouse-common-static-dbg` — Installs ClickHouse compiled binary files with debug info. -:::info -If you need to install specific version of ClickHouse you have to install all packages with the same version: -`sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` -::: +!!! attention "Attention" + If you need to install specific version of ClickHouse you have to install all packages with the same version: + `sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` ### From RPM Packages {#from-rpm-packages} @@ -82,28 +57,15 @@ It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat First, you need to add the official repository: ``` bash -sudo yum install -y yum-utils -sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo -sudo yum install -y clickhouse-server clickhouse-client - -sudo /etc/init.d/clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. +{% include 'install/rpm.sh' %} ```
Deprecated Method for installing rpm-packages - ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client - -sudo /etc/init.d/clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. +{% include 'install/rpm_repo.sh' %} ``` -
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available. @@ -124,52 +86,14 @@ The required version can be downloaded with `curl` or `wget` from repository htt After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version: ``` bash -LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ - grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -export LATEST_VERSION -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" - -tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" -sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" - -tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" -sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" - -tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" -sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" -sudo /etc/init.d/clickhouse-server start - -tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" -sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" +{% include 'install/tgz.sh' %} ```
Deprecated Method for installing tgz archives - ``` bash -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ - grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz_repo.sh' %} ```
@@ -190,33 +114,22 @@ sudo ./clickhouse install ### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux} -For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). +For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). +- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse` +- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse` +- [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse` +- [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse` -- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse - ``` -- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse - ``` -- [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse - ``` -- [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse - ``` +After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. -Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it. +Run `sudo ./clickhouse install` if you want to install clickhouse system-wide (also with needed configuration files, configuring users etc.). After that run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it. -Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. +These builds are not recommended for use in production environments because they are less thoroughly tested, but you can do so on your own risk. They also have only a subset of ClickHouse features available. ### From Sources {#from-sources} -To manually compile ClickHouse, follow the instructions for [Linux](./development/build.md) or [Mac OS X](./development/build-osx.md). +To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md). You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs. @@ -271,7 +184,7 @@ If the configuration file is in the current directory, you do not need to specif ClickHouse supports access restriction settings. They are located in the `users.xml` file (next to `config.xml`). By default, access is allowed from anywhere for the `default` user, without a password. See `user/default/networks`. -For more information, see the section [“Configuration Files”](./operations/configuration-files.md). +For more information, see the section [“Configuration Files”](../operations/configuration-files.md). After launching server, you can use the command-line client to connect to it: @@ -282,7 +195,7 @@ $ clickhouse-client By default, it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument. The terminal must use UTF-8 encoding. -For more information, see the section [“Command-line client”](./interfaces/cli.md). +For more information, see the section [“Command-line client”](../interfaces/cli.md). Example: diff --git a/docs/en/playground.md b/docs/en/getting-started/playground.md similarity index 69% rename from docs/en/playground.md rename to docs/en/getting-started/playground.md index ea7b2ccf2c5..01d7dd5b69f 100644 --- a/docs/en/playground.md +++ b/docs/en/getting-started/playground.md @@ -1,9 +1,6 @@ --- -sidebar_label: Playground -sidebar_position: 2 -keywords: [clickhouse, playground, getting, started, docs] -description: The ClickHouse Playground allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. -slug: /en/getting-started/playground +toc_priority: 14 +toc_title: Playground --- # ClickHouse Playground {#clickhouse-playground} @@ -11,7 +8,7 @@ slug: /en/getting-started/playground [ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. Several example datasets are available in Playground. -You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](./interfaces/jdbc.md) or [ODBC](./interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](./interfaces/index.md). +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). ## Credentials {#credentials} @@ -39,7 +36,7 @@ HTTPS endpoint example with `curl`: curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -TCP endpoint example with [CLI](./interfaces/cli.md): +TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash clickhouse client --secure --host play.clickhouse.com --user explorer diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md new file mode 100644 index 00000000000..9f43cc8769d --- /dev/null +++ b/docs/en/getting-started/tutorial.md @@ -0,0 +1,662 @@ +--- +toc_priority: 12 +toc_title: Tutorial +--- + +# ClickHouse Tutorial {#clickhouse-tutorial} + +## What to Expect from This Tutorial? {#what-to-expect-from-this-tutorial} + +By going through this tutorial, you’ll learn how to set up a simple ClickHouse cluster. It’ll be small, but fault-tolerant and scalable. Then we will use one of the example datasets to fill it with data and execute some demo queries. + +## Single Node Setup {#single-node-setup} + +To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them. + +For example, you have chosen `deb` packages and executed: + +``` bash +{% include 'install/deb.sh' %} +``` + +What do we have in the packages that got installed: + +- `clickhouse-client` package contains [clickhouse-client](../interfaces/cli.md) application, interactive ClickHouse console client. +- `clickhouse-common` package contains a ClickHouse executable file. +- `clickhouse-server` package contains configuration files to run ClickHouse as a server. + +Server config files are located in `/etc/clickhouse-server/`. Before going further, please notice the `` element in `config.xml`. Path determines the location for data storage, so it should be located on volume with large disk capacity; the default value is `/var/lib/clickhouse/`. If you want to adjust the configuration, it’s not handy to directly edit `config.xml` file, considering it might get rewritten on future package updates. The recommended way to override the config elements is to create [files in config.d directory](../operations/configuration-files.md) which serve as “patches” to config.xml. + +As you might have noticed, `clickhouse-server` is not launched automatically after package installation. It won’t be automatically restarted after updates, either. The way you start the server depends on your init system, usually, it is: + +``` bash +sudo service clickhouse-server start +``` + +or + +``` bash +sudo /etc/init.d/clickhouse-server start +``` + +The default location for server logs is `/var/log/clickhouse-server/`. The server is ready to handle client connections once it logs the `Ready for connections` message. + +Once the `clickhouse-server` is up and running, we can use `clickhouse-client` to connect to the server and run some test queries like `SELECT "Hello, world!";`. + +
+ +Quick tips for clickhouse-client + +Interactive mode: + +``` bash +clickhouse-client +clickhouse-client --host=... --port=... --user=... --password=... +``` + +Enable multiline queries: + +``` bash +clickhouse-client -m +clickhouse-client --multiline +``` + +Run queries in batch-mode: + +``` bash +clickhouse-client --query='SELECT 1' +echo 'SELECT 1' | clickhouse-client +clickhouse-client <<< 'SELECT 1' +``` + +Insert data from a file in specified format: + +``` bash +clickhouse-client --query='INSERT INTO table VALUES' < data.txt +clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv +``` + +
+ +## Import Sample Dataset {#import-sample-dataset} + +Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use some anonymized web analytics data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. + +### Download and Extract Table Data {#download-and-extract-table-data} + +``` bash +curl https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv +curl https://datasets.clickhouse.com/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv +``` + +The extracted files are about 10GB in size. + +### Create Tables {#create-tables} + +As in most databases management systems, ClickHouse logically groups tables into “databases”. There’s a `default` database, but we’ll create a new one named `tutorial`: + +``` bash +clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" +``` + +Syntax for creating tables is way more complicated compared to databases (see [reference](../sql-reference/statements/create/table.md). In general `CREATE TABLE` statement has to specify three key things: + +1. Name of table to create. +2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md). +3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed. + +There are two tables to create: + +- `hits` is a table with each action done by all users on all websites covered by the service. +- `visits` is a table that contains pre-built sessions instead of individual actions. + +Let’s see and execute the real create table queries for these tables: + +``` sql +CREATE TABLE tutorial.hits_v1 +( + `WatchID` UInt64, + `JavaEnable` UInt8, + `Title` String, + `GoodEvent` Int16, + `EventTime` DateTime, + `EventDate` Date, + `CounterID` UInt32, + `ClientIP` UInt32, + `ClientIP6` FixedString(16), + `RegionID` UInt32, + `UserID` UInt64, + `CounterClass` Int8, + `OS` UInt8, + `UserAgent` UInt8, + `URL` String, + `Referer` String, + `URLDomain` String, + `RefererDomain` String, + `Refresh` UInt8, + `IsRobot` UInt8, + `RefererCategories` Array(UInt16), + `URLCategories` Array(UInt16), + `URLRegions` Array(UInt32), + `RefererRegions` Array(UInt32), + `ResolutionWidth` UInt16, + `ResolutionHeight` UInt16, + `ResolutionDepth` UInt8, + `FlashMajor` UInt8, + `FlashMinor` UInt8, + `FlashMinor2` String, + `NetMajor` UInt8, + `NetMinor` UInt8, + `UserAgentMajor` UInt16, + `UserAgentMinor` FixedString(2), + `CookieEnable` UInt8, + `JavascriptEnable` UInt8, + `IsMobile` UInt8, + `MobilePhone` UInt8, + `MobilePhoneModel` String, + `Params` String, + `IPNetworkID` UInt32, + `TraficSourceID` Int8, + `SearchEngineID` UInt16, + `SearchPhrase` String, + `AdvEngineID` UInt8, + `IsArtifical` UInt8, + `WindowClientWidth` UInt16, + `WindowClientHeight` UInt16, + `ClientTimeZone` Int16, + `ClientEventTime` DateTime, + `SilverlightVersion1` UInt8, + `SilverlightVersion2` UInt8, + `SilverlightVersion3` UInt32, + `SilverlightVersion4` UInt16, + `PageCharset` String, + `CodeVersion` UInt32, + `IsLink` UInt8, + `IsDownload` UInt8, + `IsNotBounce` UInt8, + `FUniqID` UInt64, + `HID` UInt32, + `IsOldCounter` UInt8, + `IsEvent` UInt8, + `IsParameter` UInt8, + `DontCountHits` UInt8, + `WithHash` UInt8, + `HitColor` FixedString(1), + `UTCEventTime` DateTime, + `Age` UInt8, + `Sex` UInt8, + `Income` UInt8, + `Interests` UInt16, + `Robotness` UInt8, + `GeneralInterests` Array(UInt16), + `RemoteIP` UInt32, + `RemoteIP6` FixedString(16), + `WindowName` Int32, + `OpenerName` Int32, + `HistoryLength` Int16, + `BrowserLanguage` FixedString(2), + `BrowserCountry` FixedString(2), + `SocialNetwork` String, + `SocialAction` String, + `HTTPError` UInt16, + `SendTiming` Int32, + `DNSTiming` Int32, + `ConnectTiming` Int32, + `ResponseStartTiming` Int32, + `ResponseEndTiming` Int32, + `FetchTiming` Int32, + `RedirectTiming` Int32, + `DOMInteractiveTiming` Int32, + `DOMContentLoadedTiming` Int32, + `DOMCompleteTiming` Int32, + `LoadEventStartTiming` Int32, + `LoadEventEndTiming` Int32, + `NSToDOMContentLoadedTiming` Int32, + `FirstPaintTiming` Int32, + `RedirectCount` Int8, + `SocialSourceNetworkID` UInt8, + `SocialSourcePage` String, + `ParamPrice` Int64, + `ParamOrderID` String, + `ParamCurrency` FixedString(3), + `ParamCurrencyID` UInt16, + `GoalsReached` Array(UInt32), + `OpenstatServiceName` String, + `OpenstatCampaignID` String, + `OpenstatAdID` String, + `OpenstatSourceID` String, + `UTMSource` String, + `UTMMedium` String, + `UTMCampaign` String, + `UTMContent` String, + `UTMTerm` String, + `FromTag` String, + `HasGCLID` UInt8, + `RefererHash` UInt64, + `URLHash` UInt64, + `CLID` UInt32, + `YCLID` UInt64, + `ShareService` String, + `ShareURL` String, + `ShareTitle` String, + `ParsedParams` Nested( + Key1 String, + Key2 String, + Key3 String, + Key4 String, + Key5 String, + ValueDouble Float64), + `IslandID` FixedString(16), + `RequestNum` UInt32, + `RequestTry` UInt8 +) +ENGINE = MergeTree() +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID)) +SAMPLE BY intHash32(UserID) +``` + +``` sql +CREATE TABLE tutorial.visits_v1 +( + `CounterID` UInt32, + `StartDate` Date, + `Sign` Int8, + `IsNew` UInt8, + `VisitID` UInt64, + `UserID` UInt64, + `StartTime` DateTime, + `Duration` UInt32, + `UTCStartTime` DateTime, + `PageViews` Int32, + `Hits` Int32, + `IsBounce` UInt8, + `Referer` String, + `StartURL` String, + `RefererDomain` String, + `StartURLDomain` String, + `EndURL` String, + `LinkURL` String, + `IsDownload` UInt8, + `TraficSourceID` Int8, + `SearchEngineID` UInt16, + `SearchPhrase` String, + `AdvEngineID` UInt8, + `PlaceID` Int32, + `RefererCategories` Array(UInt16), + `URLCategories` Array(UInt16), + `URLRegions` Array(UInt32), + `RefererRegions` Array(UInt32), + `IsYandex` UInt8, + `GoalReachesDepth` Int32, + `GoalReachesURL` Int32, + `GoalReachesAny` Int32, + `SocialSourceNetworkID` UInt8, + `SocialSourcePage` String, + `MobilePhoneModel` String, + `ClientEventTime` DateTime, + `RegionID` UInt32, + `ClientIP` UInt32, + `ClientIP6` FixedString(16), + `RemoteIP` UInt32, + `RemoteIP6` FixedString(16), + `IPNetworkID` UInt32, + `SilverlightVersion3` UInt32, + `CodeVersion` UInt32, + `ResolutionWidth` UInt16, + `ResolutionHeight` UInt16, + `UserAgentMajor` UInt16, + `UserAgentMinor` UInt16, + `WindowClientWidth` UInt16, + `WindowClientHeight` UInt16, + `SilverlightVersion2` UInt8, + `SilverlightVersion4` UInt16, + `FlashVersion3` UInt16, + `FlashVersion4` UInt16, + `ClientTimeZone` Int16, + `OS` UInt8, + `UserAgent` UInt8, + `ResolutionDepth` UInt8, + `FlashMajor` UInt8, + `FlashMinor` UInt8, + `NetMajor` UInt8, + `NetMinor` UInt8, + `MobilePhone` UInt8, + `SilverlightVersion1` UInt8, + `Age` UInt8, + `Sex` UInt8, + `Income` UInt8, + `JavaEnable` UInt8, + `CookieEnable` UInt8, + `JavascriptEnable` UInt8, + `IsMobile` UInt8, + `BrowserLanguage` UInt16, + `BrowserCountry` UInt16, + `Interests` UInt16, + `Robotness` UInt8, + `GeneralInterests` Array(UInt16), + `Params` Array(String), + `Goals` Nested( + ID UInt32, + Serial UInt32, + EventTime DateTime, + Price Int64, + OrderID String, + CurrencyID UInt32), + `WatchIDs` Array(UInt64), + `ParamSumPrice` Int64, + `ParamCurrency` FixedString(3), + `ParamCurrencyID` UInt16, + `ClickLogID` UInt64, + `ClickEventID` Int32, + `ClickGoodEvent` Int32, + `ClickEventTime` DateTime, + `ClickPriorityID` Int32, + `ClickPhraseID` Int32, + `ClickPageID` Int32, + `ClickPlaceID` Int32, + `ClickTypeID` Int32, + `ClickResourceID` Int32, + `ClickCost` UInt32, + `ClickClientIP` UInt32, + `ClickDomainID` UInt32, + `ClickURL` String, + `ClickAttempt` UInt8, + `ClickOrderID` UInt32, + `ClickBannerID` UInt32, + `ClickMarketCategoryID` UInt32, + `ClickMarketPP` UInt32, + `ClickMarketCategoryName` String, + `ClickMarketPPName` String, + `ClickAWAPSCampaignName` String, + `ClickPageName` String, + `ClickTargetType` UInt16, + `ClickTargetPhraseID` UInt64, + `ClickContextType` UInt8, + `ClickSelectType` Int8, + `ClickOptions` String, + `ClickGroupBannerID` Int32, + `OpenstatServiceName` String, + `OpenstatCampaignID` String, + `OpenstatAdID` String, + `OpenstatSourceID` String, + `UTMSource` String, + `UTMMedium` String, + `UTMCampaign` String, + `UTMContent` String, + `UTMTerm` String, + `FromTag` String, + `HasGCLID` UInt8, + `FirstVisit` DateTime, + `PredLastVisit` Date, + `LastVisit` Date, + `TotalVisits` UInt32, + `TraficSource` Nested( + ID Int8, + SearchEngineID UInt16, + AdvEngineID UInt8, + PlaceID UInt16, + SocialSourceNetworkID UInt8, + Domain String, + SearchPhrase String, + SocialSourcePage String), + `Attendance` FixedString(16), + `CLID` UInt32, + `YCLID` UInt64, + `NormalizedRefererHash` UInt64, + `SearchPhraseHash` UInt64, + `RefererDomainHash` UInt64, + `NormalizedStartURLHash` UInt64, + `StartURLDomainHash` UInt64, + `NormalizedEndURLHash` UInt64, + `TopLevelDomain` UInt64, + `URLScheme` UInt64, + `OpenstatServiceNameHash` UInt64, + `OpenstatCampaignIDHash` UInt64, + `OpenstatAdIDHash` UInt64, + `OpenstatSourceIDHash` UInt64, + `UTMSourceHash` UInt64, + `UTMMediumHash` UInt64, + `UTMCampaignHash` UInt64, + `UTMContentHash` UInt64, + `UTMTermHash` UInt64, + `FromHash` UInt64, + `WebVisorEnabled` UInt8, + `WebVisorActivity` UInt32, + `ParsedParams` Nested( + Key1 String, + Key2 String, + Key3 String, + Key4 String, + Key5 String, + ValueDouble Float64), + `Market` Nested( + Type UInt8, + GoalID UInt32, + OrderID String, + OrderPrice Int64, + PP UInt32, + DirectPlaceID UInt32, + DirectOrderID UInt32, + DirectBannerID UInt32, + GoodID String, + GoodName String, + GoodQuantity Int32, + GoodPrice Int64), + `IslandID` FixedString(16) +) +ENGINE = CollapsingMergeTree(Sign) +PARTITION BY toYYYYMM(StartDate) +ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) +SAMPLE BY intHash32(UserID) +``` + +You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want. + +As we can see, `hits_v1` uses the [basic MergeTree engine](../engines/table-engines/mergetree-family/mergetree.md), while the `visits_v1` uses the [Collapsing](../engines/table-engines/mergetree-family/collapsingmergetree.md) variant. + +### Import Data {#import-data} + +Data import to ClickHouse is done via [INSERT INTO](../sql-reference/statements/insert-into.md) query like in many other SQL databases. However, data is usually provided in one of the [supported serialization formats](../interfaces/formats.md) instead of `VALUES` clause (which is also supported). + +The files we downloaded earlier are in tab-separated format, so here’s how to import them via console client: + +``` bash +clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv +clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv +``` + +ClickHouse has a lot of [settings to tune](../operations/settings/index.md) and one way to specify them in console client is via arguments, as we can see with `--max_insert_block_size`. The easiest way to figure out what settings are available, what do they mean and what the defaults are is to query the `system.settings` table: + +``` sql +SELECT name, value, changed, description +FROM system.settings +WHERE name LIKE '%max_insert_b%' +FORMAT TSV + +max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." +``` + +Optionally you can [OPTIMIZE](../sql-reference/statements/optimize.md) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: + +``` bash +clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" +clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" +``` + +These queries start an I/O and CPU intensive operation, so if the table consistently receives new data, it’s better to leave it alone and let merges run in the background. + +Now we can check if the table import was successful: + +``` bash +clickhouse-client --query "SELECT COUNT(*) FROM tutorial.hits_v1" +clickhouse-client --query "SELECT COUNT(*) FROM tutorial.visits_v1" +``` + +## Example Queries {#example-queries} + +``` sql +SELECT + StartURL AS URL, + AVG(Duration) AS AvgDuration +FROM tutorial.visits_v1 +WHERE StartDate BETWEEN '2014-03-23' AND '2014-03-30' +GROUP BY URL +ORDER BY AvgDuration DESC +LIMIT 10 +``` + +``` sql +SELECT + sum(Sign) AS visits, + sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits, + (100. * goal_visits) / visits AS goal_percent +FROM tutorial.visits_v1 +WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') +``` + +## Cluster Deployment {#cluster-deployment} + +ClickHouse cluster is a homogenous cluster. Steps to set up: + +1. Install ClickHouse server on all machines of the cluster +2. Set up cluster configs in configuration files +3. Create local tables on each instance +4. Create a [Distributed table](../engines/table-engines/special/distributed.md) + +[Distributed table](../engines/table-engines/special/distributed.md) is actually a kind of “view” to local tables of ClickHouse cluster. SELECT query from a distributed table executes using resources of all cluster’s shards. You may specify configs for multiple clusters and create multiple distributed tables providing views to different clusters. + +Example config for a cluster with three shards, one replica each: + +``` xml + + + + + example-perftest01j + 9000 + + + + + example-perftest02j + 9000 + + + + + example-perftest03j + 9000 + + + + +``` + +For further demonstration, let’s create a new local table with the same `CREATE TABLE` query that we used for `hits_v1`, but different table name: + +``` sql +CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... +``` + +Creating a distributed table providing a view into local tables of the cluster: + +``` sql +CREATE TABLE tutorial.hits_all AS tutorial.hits_local +ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); +``` + +A common practice is to create similar Distributed tables on all machines of the cluster. It allows running distributed queries on any machine of the cluster. Also there’s an alternative option to create temporary distributed table for a given SELECT query using [remote](../sql-reference/table-functions/remote.md) table function. + +Let’s run [INSERT SELECT](../sql-reference/statements/insert-into.md) into the Distributed table to spread the table to multiple servers. + +``` sql +INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; +``` + +!!! warning "Notice" + This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utilities/clickhouse-copier.md) that can re-shard arbitrary large tables. + +As you could expect, computationally heavy queries run N times faster if they utilize 3 servers instead of one. + +In this case, we have used a cluster with 3 shards, and each contains a single replica. + +To provide resilience in a production environment, we recommend that each shard should contain 2-3 replicas spread between multiple availability zones or datacenters (or at least racks). Note that ClickHouse supports an unlimited number of replicas. + +Example config for a cluster of one shard containing three replicas: + +``` xml + + ... + + + + example-perftest01j + 9000 + + + example-perftest02j + 9000 + + + example-perftest03j + 9000 + + + + +``` + +To enable native replication [ZooKeeper](http://zookeeper.apache.org/) is required. ClickHouse takes care of data consistency on all replicas and runs restore procedure after failure automatically. It’s recommended to deploy the ZooKeeper cluster on separate servers (where no other processes including ClickHouse are running). + +!!! note "Note" + ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. + +ZooKeeper locations are specified in the configuration file: + +``` xml + + + zoo01 + 2181 + + + zoo02 + 2181 + + + zoo03 + 2181 + + +``` + +Also, we need to set macros for identifying each shard and replica which are used on table creation: + +``` xml + + 01 + 01 + +``` + +If there are no replicas at the moment on replicated table creation, a new first replica is instantiated. If there are already live replicas, the new replica clones data from existing ones. You have an option to create all replicated tables first, and then insert data to it. Another option is to create some replicas and add the others after or during data insertion. + +``` sql +CREATE TABLE tutorial.hits_replica (...) +ENGINE = ReplicatedMergeTree( + '/clickhouse_perftest/tables/{shard}/hits', + '{replica}' +) +... +``` + +Here we use [ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md) table engine. In parameters we specify ZooKeeper path containing shard and replica identifiers. + +``` sql +INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; +``` + +Replication operates in multi-master mode. Data can be loaded into any replica, and the system then syncs it with other instances automatically. Replication is asynchronous so at a given moment, not all replicas may contain recently inserted data. At least one replica should be up to allow data ingestion. Others will sync up data and repair consistency once they will become active again. Note that this approach allows for the low possibility of a loss of recently inserted data. + +[Original article](https://clickhouse.com/docs/en/getting_started/tutorial/) diff --git a/docs/en/guides/apply-catboost-model.md b/docs/en/guides/apply-catboost-model.md new file mode 100644 index 00000000000..859703a31df --- /dev/null +++ b/docs/en/guides/apply-catboost-model.md @@ -0,0 +1,242 @@ +--- +toc_priority: 41 +toc_title: Applying CatBoost Models +--- + +# Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} + +[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at Yandex for machine learning. + +With this instruction, you will learn to apply pre-trained models in ClickHouse by running model inference from SQL. + +To apply a CatBoost model in ClickHouse: + +1. [Create a Table](#create-table). +2. [Insert the Data to the Table](#insert-data-to-table). +3. [Integrate CatBoost into ClickHouse](#integrate-catboost-into-clickhouse) (Optional step). +4. [Run the Model Inference from SQL](#run-model-inference). + +For more information about training CatBoost models, see [Training and applying models](https://catboost.ai/docs/features/training.html#training). + +You can reload CatBoost models if the configuration was updated without restarting the server using [RELOAD MODEL](../sql-reference/statements/system.md#query_language-system-reload-model) and [RELOAD MODELS](../sql-reference/statements/system.md#query_language-system-reload-models) system queries. + +## Prerequisites {#prerequisites} + +If you do not have the [Docker](https://docs.docker.com/install/) yet, install it. + +!!! note "Note" + [Docker](https://www.docker.com) is a software platform that allows you to create containers that isolate a CatBoost and ClickHouse installation from the rest of the system. + +Before applying a CatBoost model: + +**1.** Pull the [Docker image](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) from the registry: + +``` bash +$ docker pull yandex/tutorial-catboost-clickhouse +``` + +This Docker image contains everything you need to run CatBoost and ClickHouse: code, runtime, libraries, environment variables, and configuration files. + +**2.** Make sure the Docker image has been successfully pulled: + +``` bash +$ docker image ls +REPOSITORY TAG IMAGE ID CREATED SIZE +yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB +``` + +**3.** Start a Docker container based on this image: + +``` bash +$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse +``` + +## 1. Create a Table {#create-table} + +To create a ClickHouse table for the training sample: + +**1.** Start ClickHouse console client in the interactive mode: + +``` bash +$ clickhouse client +``` + +!!! note "Note" + The ClickHouse server is already running inside the Docker container. + +**2.** Create the table using the command: + +``` sql +:) CREATE TABLE amazon_train +( + date Date MATERIALIZED today(), + ACTION UInt8, + RESOURCE UInt32, + MGR_ID UInt32, + ROLE_ROLLUP_1 UInt32, + ROLE_ROLLUP_2 UInt32, + ROLE_DEPTNAME UInt32, + ROLE_TITLE UInt32, + ROLE_FAMILY_DESC UInt32, + ROLE_FAMILY UInt32, + ROLE_CODE UInt32 +) +ENGINE = MergeTree ORDER BY date +``` + +**3.** Exit from ClickHouse console client: + +``` sql +:) exit +``` + +## 2. Insert the Data to the Table {#insert-data-to-table} + +To insert the data: + +**1.** Run the following command: + +``` bash +$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv +``` + +**2.** Start ClickHouse console client in the interactive mode: + +``` bash +$ clickhouse client +``` + +**3.** Make sure the data has been uploaded: + +``` sql +:) SELECT count() FROM amazon_train + +SELECT count() +FROM amazon_train + ++-count()-+ +| 65538 | ++-------+ +``` + +## 3. Integrate CatBoost into ClickHouse {#integrate-catboost-into-clickhouse} + +!!! note "Note" + **Optional step.** The Docker image contains everything you need to run CatBoost and ClickHouse. + +To integrate CatBoost into ClickHouse: + +**1.** Build the evaluation library. + +The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.` library. For more information about how to build the library, see [CatBoost documentation](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). + +**2.** Create a new directory anywhere and with any name, for example, `data` and put the created library in it. The Docker image already contains the library `data/libcatboostmodel.so`. + +**3.** Create a new directory for config model anywhere and with any name, for example, `models`. + +**4.** Create a model configuration file with any name, for example, `models/amazon_model.xml`. + +**5.** Describe the model configuration: + +``` xml + + + + catboost + + amazon + + /home/catboost/tutorial/catboost_model.bin + + 0 + + +``` + +**6.** Add the path to CatBoost and the model configuration to the ClickHouse configuration: + +``` xml + +/home/catboost/data/libcatboostmodel.so +/home/catboost/models/*_model.xml +``` + +!!! note "Note" + You can change path to the CatBoost model configuration later without restarting server. + +## 4. Run the Model Inference from SQL {#run-model-inference} + +For test model run the ClickHouse client `$ clickhouse client`. + +Let’s make sure that the model is working: + +``` sql +:) SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) > 0 AS prediction, + ACTION AS target +FROM amazon_train +LIMIT 10 +``` + +!!! note "Note" + Function [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) returns tuple with per-class raw predictions for multiclass models. + +Let’s predict the probability: + +``` sql +:) SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) AS prediction, + 1. / (1 + exp(-prediction)) AS probability, + ACTION AS target +FROM amazon_train +LIMIT 10 +``` + +!!! note "Note" + More info about [exp()](../sql-reference/functions/math-functions.md) function. + +Let’s calculate LogLoss on the sample: + +``` sql +:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss +FROM +( + SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) AS prediction, + 1. / (1. + exp(-prediction)) AS prob, + ACTION AS tg + FROM amazon_train +) +``` + +!!! note "Note" + More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions. + +[Original article](https://clickhouse.com/docs/en/guides/apply_catboost_model/) diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md new file mode 100644 index 00000000000..eb4ca9af367 --- /dev/null +++ b/docs/en/guides/index.md @@ -0,0 +1,14 @@ +--- +toc_folder_title: Guides +toc_priority: 38 +toc_title: Overview +--- + +# ClickHouse Guides {#clickhouse-guides} + +List of detailed step-by-step instructions that help to solve various tasks using ClickHouse: + +- [Tutorial on simple cluster set-up](../getting-started/tutorial.md) +- [Applying a CatBoost model in ClickHouse](../guides/apply-catboost-model.md) + +[Original article](https://clickhouse.com/docs/en/guides/) diff --git a/docs/en/index.md b/docs/en/index.md new file mode 100644 index 00000000000..532be035bbc --- /dev/null +++ b/docs/en/index.md @@ -0,0 +1,95 @@ +--- +toc_priority: 0 +toc_title: Overview +--- + +# What Is ClickHouse? {#what-is-clickhouse} + +ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP). + +In a “normal” row-oriented DBMS, data is stored in this order: + +| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | +|-----|-------------|------------|--------------------|-----------|---------------------| +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | … | … | … | … | … | + +In other words, all the values related to a row are physically stored next to each other. + +Examples of a row-oriented DBMS are MySQL, Postgres, and MS SQL Server. + +In a column-oriented DBMS, data is stored like this: + +| Row: | #0 | #1 | #2 | #N | +|-------------|---------------------|---------------------|---------------------|-----| +| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | +| JavaEnable: | 1 | 0 | 1 | … | +| Title: | Investor Relations | Contact us | Mission | … | +| GoodEvent: | 1 | 1 | 1 | … | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | + +These examples only show the order that data is arranged in. The values from different columns are stored separately, and data from the same column is stored together. + +Examples of a column-oriented DBMS: Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, and kdb+. + +Different orders for storing data are better suited to different scenarios. The data access scenario refers to what queries are made, how often, and in what proportion; how much data is read for each type of query – rows, columns, and bytes; the relationship between reading and updating data; the working size of the data and how locally it is used; whether transactions are used, and how isolated they are; requirements for data replication and logical integrity; requirements for latency and throughput for each type of query, and so on. + +The higher the load on the system, the more important it is to customize the system set up to match the requirements of the usage scenario, and the more fine grained this customization becomes. There is no system that is equally well-suited to significantly different scenarios. If a system is adaptable to a wide set of scenarios, under a high load, the system will handle all the scenarios equally poorly, or will work well for just one or few of possible scenarios. + +## Key Properties of OLAP Scenario {#key-properties-of-olap-scenario} + +- The vast majority of requests are for read access. +- Data is updated in fairly large batches (\> 1000 rows), not by single rows; or it is not updated at all. +- Data is added to the DB but is not modified. +- For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. +- Tables are “wide,” meaning they contain a large number of columns. +- Queries are relatively rare (usually hundreds of queries per server or less per second). +- For simple queries, latencies around 50 ms are allowed. +- Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). +- Requires high throughput when processing a single query (up to billions of rows per second per server). +- Transactions are not necessary. +- Low requirements for data consistency. +- There is one large table per query. All tables are small, except for one. +- A query result is significantly smaller than the source data. In other words, data is filtered or aggregated, so the result fits in a single server’s RAM. + +It is easy to see that the OLAP scenario is very different from other popular scenarios (such as OLTP or Key-Value access). So it does not make sense to try to use OLTP or a Key-Value DB for processing analytical queries if you want to get decent performance. For example, if you try to use MongoDB or Redis for analytics, you will get very poor performance compared to OLAP databases. + +## Why Column-Oriented Databases Work Better in the OLAP Scenario {#why-column-oriented-databases-work-better-in-the-olap-scenario} + +Column-oriented databases are better suited to OLAP scenarios: they are at least 100 times faster in processing most queries. The reasons are explained in detail below, but the fact is easier to demonstrate visually: + +**Row-oriented DBMS** + +![Row-oriented](images/row-oriented.gif#) + +**Column-oriented DBMS** + +![Column-oriented](images/column-oriented.gif#) + +See the difference? + +### Input/output {#inputoutput} + +1. For an analytical query, only a small number of table columns need to be read. In a column-oriented database, you can read just the data you need. For example, if you need 5 columns out of 100, you can expect a 20-fold reduction in I/O. +2. Since data is read in packets, it is easier to compress. Data in columns is also easier to compress. This further reduces the I/O volume. +3. Due to the reduced I/O, more data fits in the system cache. + +For example, the query “count the number of records for each advertising platform” requires reading one “advertising platform ID” column, which takes up 1 byte uncompressed. If most of the traffic was not from advertising platforms, you can expect at least 10-fold compression of this column. When using a quick compression algorithm, data decompression is possible at a speed of at least several gigabytes of uncompressed data per second. In other words, this query can be processed at a speed of approximately several billion rows per second on a single server. This speed is actually achieved in practice. + +### CPU {#cpu} + +Since executing a query requires processing a large number of rows, it helps to dispatch all operations for entire vectors instead of for separate rows, or to implement the query engine so that there is almost no dispatching cost. If you do not do this, with any half-decent disk subsystem, the query interpreter inevitably stalls the CPU. It makes sense to both store data in columns and process it, when possible, by columns. + +There are two ways to do this: + +1. A vector engine. All operations are written for vectors, instead of for separate values. This means you do not need to call operations very often, and dispatching costs are negligible. Operation code contains an optimized internal cycle. + +2. Code generation. The code generated for the query has all the indirect calls in it. + +This is not done in “normal” databases, because it does not make sense when running simple queries. However, there are exceptions. For example, MemSQL uses code generation to reduce latency when processing SQL queries. (For comparison, analytical DBMSs require optimization of throughput, not latency.) + +Note that for CPU efficiency, the query language must be declarative (SQL or MDX), or at least a vector (J, K). The query should only contain implicit loops, allowing for optimization. + +{## [Original article](https://clickhouse.com/docs/en/) ##} diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 9ef1cea280a..eaf7a96ce42 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -1,13 +1,13 @@ --- -sidebar_position: 17 -sidebar_label: Command-Line Client +toc_priority: 17 +toc_title: Command-Line Client --- # Command-line Client {#command-line-client} ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see [Configuring](#interfaces_cli_configuration). -[Install](../../quick-start.mdx) it from the `clickhouse-client` package and run it with the command `clickhouse-client`. +[Install](../getting-started/index.md) it from the `clickhouse-client` package and run it with the command `clickhouse-client`. ``` bash $ clickhouse-client diff --git a/docs/en/interfaces/cpp.md b/docs/en/interfaces/cpp.md index a7b4188799e..dcd1228ea0f 100644 --- a/docs/en/interfaces/cpp.md +++ b/docs/en/interfaces/cpp.md @@ -1,6 +1,6 @@ --- -sidebar_position: 24 -sidebar_label: C++ Client Library +toc_priority: 24 +toc_title: C++ Client Library --- # C++ Client Library {#c-client-library} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 801b7c1a14f..a7066fca087 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1,6 +1,6 @@ --- -sidebar_position: 21 -sidebar_label: Input and Output Formats +toc_priority: 21 +toc_title: Input and Output Formats --- # Formats for Input and Output Data {#formats} @@ -51,6 +51,7 @@ The supported formats are: | [PrettySpace](#prettyspace) | ✗ | ✔ | | [Protobuf](#protobuf) | ✔ | ✔ | | [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [ProtobufList](#protobuflist) | ✔ | ✔ | | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | | [Parquet](#data-format-parquet) | ✔ | ✔ | @@ -64,7 +65,7 @@ The supported formats are: | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | | [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✗ | +| [LineAsString](#lineasstring) | ✔ | ✔ | | [Regexp](#data-format-regexp) | ✔ | ✗ | | [RawBLOB](#rawblob) | ✔ | ✔ | | [MsgPack](#msgpack) | ✔ | ✔ | @@ -401,7 +402,7 @@ Parsing allows the presence of the additional field `tskv` without the equal sig Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, strings are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv @@ -409,7 +410,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR \*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information. -When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. +When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Strings can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing strings without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. If setting [input_format_csv_empty_as_default](../operations/settings/settings.md#settings-input_format_csv_empty_as_default) is enabled, empty unquoted input values are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too. @@ -764,9 +765,8 @@ CREATE TABLE IF NOT EXISTS example_table - If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type). - If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`. -:::warning -When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`. -::: +!!! note "Warning" + When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`. ### Selecting Data {#selecting-data} @@ -788,9 +788,8 @@ The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns: Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 sequences. Values are escaped in the same way as for `JSON`. -:::info -Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. -::: +!!! note "Note" + Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. ### Usage of Nested Structures {#jsoneachrow-nested} @@ -1232,7 +1231,38 @@ See also [how to read/write length-delimited protobuf messages in popular langua ## ProtobufSingle {#protobufsingle} -Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters. +Same as [Protobuf](#protobuf) but for storing/parsing a single Protobuf message without length delimiter. +As a result, only a single table row can be written/read. + +## ProtobufList {#protobuflist} + +Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope". + +Usage example: + +``` sql +SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType' +``` + +``` bash +cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'" +``` + +where the file `schemafile.proto` looks like this: + +``` capnp +syntax = "proto3"; + +message Envelope { + message MessageType { + string name = 1; + string surname = 2; + uint32 birthDate = 3; + repeated string phoneNumbers = 4; + }; + MessageType row = 1; +}; +``` ## Avro {#data-format-avro} @@ -1342,9 +1372,8 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` -:::warning -Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. -::: +!!! note "Warning" + Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. ## Parquet {#data-format-parquet} @@ -1367,7 +1396,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1424,7 +1454,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1486,7 +1517,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index 6ada38c6220..b30715082ec 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -1,6 +1,6 @@ --- -sidebar_position: 19 -sidebar_label: gRPC Interface +toc_priority: 19 +toc_title: gRPC Interface --- # gRPC Interface {#grpc-interface} diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index a97cf6671b2..d72fb4d6f17 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -1,6 +1,6 @@ --- -sidebar_position: 19 -sidebar_label: HTTP Interface +toc_priority: 19 +toc_title: HTTP Interface --- # HTTP Interface {#http-interface} @@ -178,9 +178,8 @@ You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods. -:::info -Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. -::: +!!! note "Note" + Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. **Examples** @@ -440,9 +439,8 @@ Next are the configuration methods for different `type`. The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. -:::warning -To keep the default `handlers` such as` query`, `play`,` ping`, add the `` rule. -::: +!!! note "Warning" + To keep the default `handlers` such as` query`, `play`,` ping`, use the `` rule. Example: @@ -471,9 +469,8 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost: max_final_threads 2 ``` -:::warning -In one `predefined_query_handler` only supports one `query` of an insert type. -::: +!!! note "caution" + In one `predefined_query_handler` only supports one `query` of an insert type. ### dynamic_query_handler {#dynamic_query_handler} diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 16e97ed7c62..e747b93a1a6 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -1,8 +1,7 @@ --- -sidebar_label: Interfaces -sidebar_position: 34 -keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, client, jdbc, odbc, driver] -description: ClickHouse provides three network interfaces +toc_folder_title: Interfaces +toc_priority: 14 +toc_title: Introduction --- # Interfaces {#interfaces} diff --git a/docs/en/interfaces/jdbc.md b/docs/en/interfaces/jdbc.md index 4bea0600a2a..cf97568a8de 100644 --- a/docs/en/interfaces/jdbc.md +++ b/docs/en/interfaces/jdbc.md @@ -1,12 +1,11 @@ --- -sidebar_position: 22 -sidebar_label: JDBC Driver +toc_priority: 22 +toc_title: JDBC Driver --- # JDBC Driver {#jdbc-driver} -Use the [official JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc) (and Java client) to access ClickHouse from your Java applications. - +- **[Official driver](https://github.com/ClickHouse/clickhouse-jdbc)** - Third-party drivers: - [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) - [clickhouse4j](https://github.com/blynkkk/clickhouse4j) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index df8ef38d671..9932e6b6cb3 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -1,6 +1,6 @@ --- -sidebar_position: 20 -sidebar_label: MySQL Interface +toc_priority: 20 +toc_title: MySQL Interface --- # MySQL Interface {#mysql-interface} diff --git a/docs/en/interfaces/odbc.md b/docs/en/interfaces/odbc.md index 4c807654c28..fa58ed8b43e 100644 --- a/docs/en/interfaces/odbc.md +++ b/docs/en/interfaces/odbc.md @@ -1,12 +1,10 @@ --- -sidebar_position: 23 -sidebar_label: ODBC Driver +toc_priority: 23 +toc_title: ODBC Driver --- # ODBC Driver {#odbc-driver} -Use the [official ODBC driver](https://github.com/ClickHouse/clickhouse-odbc) for accessing ClickHouse as a data source. - - +- [Official driver](https://github.com/ClickHouse/clickhouse-odbc) [Original article](https://clickhouse.com/docs/en/interfaces/odbc/) diff --git a/docs/en/interfaces/tcp.md b/docs/en/interfaces/tcp.md index 5f2f400799f..b23f8110320 100644 --- a/docs/en/interfaces/tcp.md +++ b/docs/en/interfaces/tcp.md @@ -1,6 +1,6 @@ --- -sidebar_position: 18 -sidebar_label: Native Interface (TCP) +toc_priority: 18 +toc_title: Native Interface (TCP) --- # Native Interface (TCP) {#native-interface-tcp} diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 885e9f430f2..8d1ff12cf0a 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -1,13 +1,12 @@ --- -sidebar_position: 26 -sidebar_label: Client Libraries +toc_priority: 26 +toc_title: Client Libraries --- # Client Libraries from Third-party Developers {#client-libraries-from-third-party-developers} -:::warning -ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. -::: +!!! warning "Disclaimer" + ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. - Python - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 92d00f2812c..c0e270b7207 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -1,6 +1,6 @@ --- -sidebar_position: 28 -sidebar_label: Visual Interfaces +toc_priority: 28 +toc_title: Visual Interfaces --- # Visual Interfaces from Third-party Developers {#visual-interfaces-from-third-party-developers} diff --git a/docs/en/interfaces/third-party/index.md b/docs/en/interfaces/third-party/index.md index c9be2b6ada9..caf100681b4 100644 --- a/docs/en/interfaces/third-party/index.md +++ b/docs/en/interfaces/third-party/index.md @@ -1,6 +1,6 @@ --- toc_folder_title: Third-Party -sidebar_position: 24 +toc_priority: 24 --- # Third-Party Interfaces {#third-party-interfaces} @@ -12,6 +12,5 @@ This is a collection of links to third-party tools that provide some sort of int - [GUI](../../interfaces/third-party/gui.md) - [Proxies](../../interfaces/third-party/proxy.md) -:::note -Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. -::: \ No newline at end of file +!!! note "Note" + Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index ae055d63a9d..3aac78f0878 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -1,13 +1,12 @@ --- -sidebar_position: 27 -sidebar_label: Integrations +toc_priority: 27 +toc_title: Integrations --- # Integration Libraries from Third-party Developers {#integration-libraries-from-third-party-developers} -:::warning Disclaimer -ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality. -::: +!!! warning "Disclaimer" + ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality. ## Infrastructure Products {#infrastructure-products} diff --git a/docs/en/interfaces/third-party/proxy.md b/docs/en/interfaces/third-party/proxy.md index 45077cb6a89..31a2d5afae9 100644 --- a/docs/en/interfaces/third-party/proxy.md +++ b/docs/en/interfaces/third-party/proxy.md @@ -1,6 +1,6 @@ --- -sidebar_position: 29 -sidebar_label: Proxies +toc_priority: 29 +toc_title: Proxies --- # Proxy Servers from Third-party Developers {#proxy-servers-from-third-party-developers} diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md new file mode 100644 index 00000000000..ad199ce452e --- /dev/null +++ b/docs/en/introduction/adopters.md @@ -0,0 +1,200 @@ +--- +toc_priority: 8 +toc_title: Adopters +--- + +# ClickHouse Adopters {#clickhouse-adopters} + +!!! warning "Disclaimer" + The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. + +| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | +|---------|----------|---------|--------------|------------------------------------------------------------------------------|-----------| +| 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | +| Adapty | Subscription Analytics | Main product | — | — | [Tweet, November 2021](https://twitter.com/iwitaly/status/1462698148061659139) | +| Admiral | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) | +| AdScribe | Ads | TV Analytics | — | — | [A quote from CTO](https://altinity.com/24x7-support/) | +| Ahrefs | SEO | Analytics | — | — | [Job listing](https://ahrefs.com/jobs/data-scientist-search) | +| Alibaba Cloud | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) | +| Alibaba Cloud | Cloud | E-MapReduce | — | — | [Official Website](https://help.aliyun.com/document_detail/212195.html) | +| Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.com/meetup22/aloha.pdf) | +| Altinity | Cloud, SaaS | Main product | — | — | [Official Website](https://altinity.com/) | +| Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | +| ApiRoad | API marketplace | Analytics | — | — | [Blog post, November 2018, March 2020](https://pixeljets.com/blog/clickhouse-vs-elasticsearch/) | +| Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | +| ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | +| Argedor | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) | +| Avito | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) | +| Badoo | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) | +| Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | +| Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | +| BiliBili | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) | +| Bloomberg | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | +| Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | +| CardsMobile | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) | +| CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | +| CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | +| Checkly | Software Development | Analytics | — | — | [Tweet, October 2021](https://twitter.com/tim_nolet/status/1445810665743081474?s=20) | +| ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | +| Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | +| Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | +| Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | +| Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | +| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | +| Crazypanda | Games | | — | — | Live session on ClickHouse meetup | +| Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | +| Cryptology | Digital Assets Trading Platform | — | — | — | [Job advertisement, March 2021](https://career.habr.com/companies/cryptology/vacancies) | +| Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | +| Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| Deepl | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) | +| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | +| Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | +| Ecommpay | Payment Processing | Logs | — | — | [Video, Nov 2019](https://www.youtube.com/watch?v=d3GdZTOWGLk) | +| Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | +| eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | +| Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | +| EventBunker.io | Serverless Data Processing | — | — | — | [Tweet, April 2021](https://twitter.com/Halil_D_/status/1379839133472985091) | +| FastNetMon | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) | +| Firebolt | Analytics | Main product | - | - | [YouTube Tech Talk](https://www.youtube.com/watch?v=9rW9uEJ15tU) | +| Flipkart | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) | +| FunCorp | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | +| Futurra Group | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) | +| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | +| Gigapipe | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) | +| Gigasheet | Analytics | Main product | — | — | Direct Reference, February 2022| +| Glaber | Monitoring | Main product | — | — | [Website](https://glaber.io/) | +| GraphCDN | CDN | Traffic Analytics | — | — | [Blog Post in English, August 2021](https://altinity.com/blog/delivering-insight-on-graphql-apis-with-clickhouse-at-graphcdn/) | +| Grouparoo | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) | +| HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| Hydrolix | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) | +| Hystax | Cloud Operations | Observability Analytics | - | - | [Blog](https://hystax.com/clickhouse-for-real-time-cost-saving-analytics-how-to-stop-hammering-screws-and-use-an-electric-screwdriver/) | +| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | +| Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Infobaleen | AI markting tool | Analytics | — | — | [Official site](https://infobaleen.com) | +| Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | +| InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | +| Instabug | APM Platform | Main product | — | — | [A quote from Co-Founder](https://altinity.com/) | +| Instana | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | +| Integros | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Ippon Technologies | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) | +| Ivi | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) | +| Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| Jitsu | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News post](https://news.ycombinator.com/item?id=29106082) | +| JuiceFS | Storage | Shopping Cart | - | - | [Blog](https://juicefs.com/blog/en/posts/shopee-clickhouse-with-juicefs/) | +| kakaocorp | Internet company | — | — | — | [if(kakao)2020](https://tv.kakao.com/channel/3693125/cliplink/414129353), [if(kakao)2021](https://if.kakao.com/session/24) | +| Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | +| Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | +| Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | +| KGK Global | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) | +| LANCOM Systems | Network Solutions | Traffic analysis | - | - | [ClickHouse Operator for Kubernetes](https://www.lancom-systems.com/), [Hacker News post] (https://news.ycombinator.com/item?id=29413660) | +| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | +| Lever | Talent Management | Recruiting | - | - | [Hacker News post](https://news.ycombinator.com/item?id=29558544) | +| LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | +| Lookforsale | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) | +| Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | +| MAXILECT | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) | +| Marilyn | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) | +| Mello | Marketing | Analytics | 1 server | — | [Article, October 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) | +| MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| Microsoft | Web Analytics | Clarity (Main Product) | — | — | [A question on GitHub](https://github.com/ClickHouse/ClickHouse/issues/21556) | +| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) | +| MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | +| MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| Muse Group | Music Software | Performance Monitoring | — | — | [Blog post in Russian, January 2021](https://habr.com/en/post/647079/) | +| Netskope | Network Security | — | — | — | [Job advertisement, March 2021](https://www.mendeley.com/careers/job/senior-software-developer-backend-developer-1346348) | +| NIC Labs | Network Monitoring | RaTA-DNS | — | — | [Blog post, March 2021](https://niclabs.cl/ratadns/2021/03/Clickhouse) | +| NLMK | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) | +| NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | +| Noction | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability) +| ntop | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) | +| Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | +| Ok.ru | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | +| Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | +| OneAPM | Monitoring and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | +| Opensee | Financial Analytics | Main product | - | - | [Blog](https://opensee.io/news/from-moscow-to-wall-street-the-remarkable-journey-of-clickhouse/) | +| Open Targets | Genome Research | Genome Search | — | — | [Tweet, October 2021](https://twitter.com/OpenTargets/status/1452570865342758913?s=20), [Blog](https://blog.opentargets.org/graphql/) | +| OZON | E-commerce | — | — | — | [Official website](https://job.ozon.ru/vacancy/razrabotchik-clickhouse-ekspluatatsiya-40991870/) | +| Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) | +| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | +| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | +| PostHog | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) | +| Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | +| Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | +| PRANA | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) | +| QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | +| Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | +| R-Vision | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) | +| Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | +| Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| Replica | Urban Planning | Analytics | — | — | [Job advertisement](https://boards.greenhouse.io/replica/jobs/5547732002?gh_jid=5547732002) | +| Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | +| Rollbar | Software Development | Main Product | — | — | [Official Website](https://www.rollbar.com) | +| Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | +| RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | +| S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | +| Sber | Banking, Fintech, Retail, Cloud, Media | — | 128 servers | >1 PB | [Job advertisement, March 2021](https://career.habr.com/vacancies/1000073536) | +| scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | +| sembot.io | Shopping Ads | — | — | — | A comment on LinkedIn, 2020 | +| SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | +| Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | +| SGK | Government Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | +| Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | +| Sipfront | Software Development | Analytics | — | — | [Tweet, October 2021](https://twitter.com/andreasgranig/status/1446404332337913895?s=20) | +| SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | +| Spark New Zealand | Telecommunications | Security Operations | — | — | [Blog Post, Feb 2020](https://blog.n0p.me/2020/02/2020-02-05-dnsmonster/) | +| Splitbee | Analytics | Main Product | — | — | [Blog Post, Mai 2021](https://splitbee.io/blog/new-pricing) | +| Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | +| Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | +| Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | +| Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | +| Superwall | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) | +| Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | +| Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | +| Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | +| Tencent Music Entertainment (TME) | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) | +| Tesla | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) | +| Timeflow | Software | Analytics | — | — | [Blog](https://timeflow.systems/why-we-moved-from-druid-to-clickhouse/ ) | +| Tinybird | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) | +| Traffic Stars | AD network | — | 300 servers in Europe/US | 1.8 PiB, 700 000 insert rps (as of 2021) | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | +| Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/uber.pdf) | +| UseTech | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) | +| UTMSTAT | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) | +| Vercel | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 | +| VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| VMware | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) | +| Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | +| Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | +| Wildberries | E-commerce | | — | — | [Official website](https://it.wildberries.ru/) | +| Wisebits | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Workato | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) | +| Xenoss | Marketing, Advertising | — | — | — | [Instagram, March 2021](https://www.instagram.com/p/CNATV7qBgB1/) | +| Xiaoxin Tech | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | +| Ximalaya | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | +| Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | +| Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) | +| Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | +| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) | +| | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) | +| Yotascale | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) | +| Your Analytics | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) | +| Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | +| ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | +| МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | +| ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | +| Цифровой Рабочий | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) | +| ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | +| ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | +| АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | +| Piwik PRO | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) | + +[Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/introduction/distinctive-features.md b/docs/en/introduction/distinctive-features.md new file mode 100644 index 00000000000..951a8a9d3e5 --- /dev/null +++ b/docs/en/introduction/distinctive-features.md @@ -0,0 +1,96 @@ +--- +toc_priority: 4 +toc_title: Distinctive Features +--- + +# Distinctive Features of ClickHouse {#distinctive-features-of-clickhouse} + +## True Column-Oriented Database Management System {#true-column-oriented-dbms} + +In a real column-oriented DBMS, no extra data is stored with the values. Among other things, this means that constant-length values must be supported, to avoid storing their length “number” next to the values. For example, a billion UInt8-type values should consume around 1 GB uncompressed, or this strongly affects the CPU use. It is essential to store data compactly (without any “garbage”) even when uncompressed since the speed of decompression (CPU usage) depends mainly on the volume of uncompressed data. + +It is worth noting because there are systems that can store values of different columns separately, but that can’t effectively process analytical queries due to their optimization for other scenarios. Examples are HBase, BigTable, Cassandra, and HyperTable. You would get throughput around a hundred thousand rows per second in these systems, but not hundreds of millions of rows per second. + +It’s also worth noting that ClickHouse is a database management system, not a single database. ClickHouse allows creating tables and databases in runtime, loading data, and running queries without reconfiguring and restarting the server. + +## Data Compression {#data-compression} + +Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance. + +In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create/table.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones. + +## Disk Storage of Data {#disk-storage-of-data} + +Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. + +ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. + +## Parallel Processing on Multiple Cores {#parallel-processing-on-multiple-cores} + +Large queries are parallelized naturally, taking all the necessary resources available on the current server. + +## Distributed Processing on Multiple Servers {#distributed-processing-on-multiple-servers} + +Almost none of the columnar DBMSs mentioned above have support for distributed query processing. + +In ClickHouse, data can reside on different shards. Each shard can be a group of replicas used for fault tolerance. All shards are used to run a query in parallel, transparently for the user. + +## SQL Support {#sql-support} + +ClickHouse supports a [declarative query language based on SQL](../sql-reference/index.md) that is identical to the ANSI SQL standard in [many cases](../sql-reference/ansi.md). + +Supported queries include [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), subqueries in [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md) clause, [IN](../sql-reference/operators/in.md) operator, [window functions](../sql-reference/window-functions/index.md) and scalar subqueries. + +Correlated (dependent) subqueries are not supported at the time of writing but might become available in the future. + +## Vector Computation Engine {#vector-engine} + +Data is not only stored by columns but is processed by vectors (parts of columns), which allows achieving high CPU efficiency. + +## Real-time Data Updates {#real-time-data-updates} + +ClickHouse supports tables with a primary key. To quickly perform queries on the range of the primary key, the data is sorted incrementally using the merge tree. Due to this, data can continually be added to the table. No locks are taken when new data is ingested. + +## Primary Index {#primary-index} + +Having a data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. + +## Secondary Indexes {#secondary-indexes} + +Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldn’t match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). + +## Suitable for Online Queries {#suitable-for-online-queries} + +Most OLAP database management systems do not aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with “come back later”). + +In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online. + +## Support for Approximated Calculations {#support-for-approximated-calculations} + +ClickHouse provides various ways to trade accuracy for performance: + +1. Aggregate functions for approximated calculation of the number of distinct values, medians, and quantiles. +2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk. +3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources. + +## Adaptive Join Algorithm {#adaptive-join-algorithm} + +ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if there’s more than one large table. + +## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support} + +ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases. + +For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md). + +## Role-Based Access Control {#role-based-access-control} + +ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems. + +## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} + +1. No full-fledged transactions. +2. Lack of ability to modify or delete already inserted data with a high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example, to comply with [GDPR](https://gdpr-info.eu). +3. The sparse index makes ClickHouse not so efficient for point queries retrieving single rows by their keys. + +[Original article](https://clickhouse.com/docs/en/introduction/distinctive-features/) diff --git a/docs/en/introduction/history.md b/docs/en/introduction/history.md new file mode 100644 index 00000000000..d192eff80ea --- /dev/null +++ b/docs/en/introduction/history.md @@ -0,0 +1,54 @@ +--- +toc_priority: 7 +toc_title: History +--- + +# ClickHouse History {#clickhouse-history} + +ClickHouse has been developed initially to power [Yandex.Metrica](https://metrica.yandex.com/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all), and continues to be the core component of this system. With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows generating custom reports on the fly directly from non-aggregated data. This article briefly covers the goals of ClickHouse in the early stages of its development. + +Yandex.Metrica builds customized reports on the fly based on hits and sessions, with arbitrary segments defined by the user. Doing so often requires building complex aggregates, such as the number of unique users. New data for building a report arrives in real-time. + +As of April 2014, Yandex.Metrica was tracking about 12 billion events (page views and clicks) daily. All these events must be stored to build custom reports. A single query may require scanning millions of rows within a few hundred milliseconds, or hundreds of millions of rows in just a few seconds. + +## Usage in Yandex.Metrica and Other Yandex Services {#usage-in-yandex-metrica-and-other-yandex-services} + +ClickHouse serves multiple purposes in Yandex.Metrica. +Its main task is to build reports in online mode using non-aggregated data. It uses a cluster of 374 servers, which store over 20.3 trillion rows in the database. The volume of compressed data is about 2 PB, without accounting for duplicates and replicas. The volume of uncompressed data (in TSV format) would be approximately 17 PB. + +ClickHouse also plays a key role in the following processes: + +- Storing data for Session Replay from Yandex.Metrica. +- Processing intermediate data. +- Building global reports with Analytics. +- Running queries for debugging the Yandex.Metrica engine. +- Analyzing logs from the API and the user interface. + +Nowadays, there are multiple dozen ClickHouse installations in other Yandex services and departments: search verticals, e-commerce, advertisement, business analytics, mobile development, personal services, and others. + +## Aggregated and Non-aggregated Data {#aggregated-and-non-aggregated-data} + +There is a widespread opinion that to calculate statistics effectively, you must aggregate data since this reduces the volume of data. + +But data aggregation comes with a lot of limitations: + +- You must have a pre-defined list of required reports. +- The user can’t make custom reports. +- When aggregating over a large number of distinct keys, the data volume is barely reduced, so aggregation is useless. +- For a large number of reports, there are too many aggregation variations (combinatorial explosion). +- When aggregating keys with high cardinality (such as URLs), the volume of data is not reduced by much (less than twofold). +- For this reason, the volume of data with aggregation might grow instead of shrink. +- Users do not view all the reports we generate for them. A large portion of those calculations is useless. +- The logical integrity of data may be violated for various aggregations. + +If we do not aggregate anything and work with non-aggregated data, this might reduce the volume of calculations. + +However, with aggregation, a significant part of the work is taken offline and completed relatively calmly. In contrast, online calculations require calculating as fast as possible, since the user is waiting for the result. + +Yandex.Metrica has a specialized system for aggregating data called Metrage, which was used for the majority of reports. +Starting in 2009, Yandex.Metrica also used a specialized OLAP database for non-aggregated data called OLAPServer, which was previously used for the report builder. +OLAPServer worked well for non-aggregated data, but it had many restrictions that did not allow it to be used for all reports as desired. These included the lack of support for data types (only numbers), and the inability to incrementally update data in real-time (it could only be done by rewriting data daily). OLAPServer is not a DBMS, but a specialized DB. + +The initial goal for ClickHouse was to remove the limitations of OLAPServer and solve the problem of working with non-aggregated data for all reports, but over the years, it has grown into a general-purpose database management system suitable for a wide range of analytical tasks. + +[Original article](https://clickhouse.com/docs/en/introduction/history/) diff --git a/docs/en/introduction/index.md b/docs/en/introduction/index.md new file mode 100644 index 00000000000..ba80f9c2640 --- /dev/null +++ b/docs/en/introduction/index.md @@ -0,0 +1,6 @@ +--- +toc_folder_title: Introduction +toc_priority: 1 +--- + + diff --git a/docs/en/introduction/performance.md b/docs/en/introduction/performance.md new file mode 100644 index 00000000000..684b4ee4179 --- /dev/null +++ b/docs/en/introduction/performance.md @@ -0,0 +1,30 @@ +--- +toc_priority: 6 +toc_title: Performance +--- + +# Performance {#performance} + +ClickHouse shows the best performance (both the highest throughput for long queries and the lowest latency on short queries) for comparable operating scenarios among systems of its class that were available for testing. You can view the test results on a [separate page](https://clickhouse.com/benchmark/dbms/). + +Numerous independent benchmarks came to similar conclusions. They are not difficult to find using an internet search, or you can see [our small collection of related links](https://clickhouse.com/#independent-benchmarks). + +## Throughput for a Single Large Query {#throughput-for-a-single-large-query} + +Throughput can be measured in rows per second or megabytes per second. If the data is placed in the page cache, a query that is not too complex is processed on modern hardware at a speed of approximately 2-10 GB/s of uncompressed data on a single server (for the most straightforward cases, the speed may reach 30 GB/s). If data is not placed in the page cache, the speed depends on the disk subsystem and the data compression rate. For example, if the disk subsystem allows reading data at 400 MB/s, and the data compression rate is 3, the speed is expected to be around 1.2 GB/s. To get the speed in rows per second, divide the speed in bytes per second by the total size of the columns used in the query. For example, if 10 bytes of columns are extracted, the speed is expected to be around 100-200 million rows per second. + +The processing speed increases almost linearly for distributed processing, but only if the number of rows resulting from aggregation or sorting is not too large. + +## Latency When Processing Short Queries {#latency-when-processing-short-queries} + +If a query uses a primary key and does not select too many columns and rows to process (hundreds of thousands), you can expect less than 50 milliseconds of latency (single digits of milliseconds in the best case) if data is placed in the page cache. Otherwise, latency is mostly dominated by the number of seeks. If you use rotating disk drives, for a system that is not overloaded, the latency can be estimated with this formula: `seek time (10 ms) * count of columns queried * count of data parts`. + +## Throughput When Processing a Large Quantity of Short Queries {#throughput-when-processing-a-large-quantity-of-short-queries} + +Under the same conditions, ClickHouse can handle several hundred queries per second on a single server (up to several thousand in the best case). Since this scenario is not typical for analytical DBMSs, we recommend expecting a maximum of 100 queries per second. + +## Performance When Inserting Data {#performance-when-inserting-data} + +We recommend inserting data in packets of at least 1000 rows, or no more than a single request per second. When inserting to a MergeTree table from a tab-separated dump, the insertion speed can be from 50 to 200 MB/s. If the inserted rows are around 1 KB in size, the speed will be from 50,000 to 200,000 rows per second. If the rows are small, the performance can be higher in rows per second (on Banner System data -`>` 500,000 rows per second; on Graphite data -`>` 1,000,000 rows per second). To improve performance, you can make multiple INSERT queries in parallel, which scales linearly. + +{## [Original article](https://clickhouse.com/docs/en/introduction/performance/) ##} diff --git a/docs/en/operations/_category_.yml b/docs/en/operations/_category_.yml deleted file mode 100644 index 9d6dd1247db..00000000000 --- a/docs/en/operations/_category_.yml +++ /dev/null @@ -1,4 +0,0 @@ -position: 70 -label: 'Operations' -collapsible: true -collapsed: true diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md index 7d75c47df2b..52f7fb87ffd 100644 --- a/docs/en/operations/access-rights.md +++ b/docs/en/operations/access-rights.md @@ -1,6 +1,6 @@ --- -sidebar_position: 48 -sidebar_label: Access Control and Account Management +toc_priority: 48 +toc_title: Access Control and Account Management --- # Access Control and Account Management {#access-control} @@ -24,9 +24,8 @@ You can configure access entities using: We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow. -:::warning -You can’t manage the same access entity by both configuration methods simultaneously. -::: +!!! note "Warning" + You can’t manage the same access entity by both configuration methods simultaneously. To see all users, roles, profiles, etc. and all their grants use [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement) statement. @@ -102,9 +101,8 @@ Privileges can be granted to a role by the [GRANT](../sql-reference/statements/g Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy. -:::warning -Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. -::: +!!! note "Warning" + Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. Management queries: diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index c39658aa4b0..7f0ed48928a 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -1,6 +1,6 @@ --- -sidebar_position: 49 -sidebar_label: Data Backup +toc_priority: 49 +toc_title: Data Backup --- # Data Backup {#data-backup} @@ -11,9 +11,8 @@ In order to effectively mitigate possible human errors, you should carefully pre Each company has different resources available and business requirements, so there’s no universal solution for ClickHouse backups and restores that will fit every situation. What works for one gigabyte of data likely won’t work for tens of petabytes. There are a variety of possible approaches with their own pros and cons, which will be discussed below. It is a good idea to use several approaches instead of just one in order to compensate for their various shortcomings. -:::note -Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly. -::: +!!! note "Note" + Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly. ## Duplicating Source Data Somewhere Else {#duplicating-source-data-somewhere-else} diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index f2427810184..9aa6419d89c 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -1,11 +1,11 @@ --- -sidebar_position: 65 -sidebar_label: Caches +toc_priority: 65 +toc_title: Caches --- # Cache Types {#cache-types} -When performing queries, ClichHouse uses different caches. +When performing queries, ClickHouse uses different caches. Main cache types: diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 81547736441..a8ca2079070 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -1,15 +1,14 @@ --- -sidebar_position: 66 -sidebar_label: ClickHouse Keeper +toc_priority: 66 +toc_title: ClickHouse Keeper --- # [pre-production] ClickHouse Keeper {#clickHouse-keeper} ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper. -:::warning -This feature is currently in the pre-production stage. We test it in our CI and on small internal installations. -::: +!!! warning "Warning" + This feature is currently in the pre-production stage. We test it in our CI and on small internal installations. ## Implementation details {#implementation-details} @@ -19,9 +18,8 @@ By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (lineari ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. Digest authentication scheme uses pair `username:password`. Password is encoded in Base64. -:::note -External integrations are not supported. -::: +!!! info "Note" + External integrations are not supported. ## Configuration {#configuration} @@ -57,7 +55,7 @@ Internal coordination settings are located in `..` section and contain servers description. @@ -123,7 +121,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro". +The 4lw commands has a allow list configuration `four_letter_word_allow_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro". You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -203,7 +201,7 @@ Server stats reset. ``` server_id=1 tcp_port=2181 -four_letter_word_white_list=* +four_letter_word_allow_list=* log_storage_path=./coordination/logs snapshot_storage_path=./coordination/snapshots max_requests_batch_size=100 diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 582e90544e0..cbc139dd958 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -1,6 +1,6 @@ --- -sidebar_position: 50 -sidebar_label: Configuration Files +toc_priority: 50 +toc_title: Configuration Files --- # Configuration Files {#configuration_files} diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index af2ba713ec1..850b6594b71 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -1,9 +1,10 @@ --- -sidebar_position: 48 -sidebar_label: External User Authenticators and Directories +toc_folder_title: External User Authenticators and Directories +toc_priority: 48 +toc_title: Introduction --- -# External User Authenticators and Directories +# External User Authenticators and Directories {#external-authenticators} ClickHouse supports authenticating and managing users using external services. diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index 3711bac79c3..da84c1f6a89 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -51,13 +51,12 @@ With filtering by realm: ``` -:::warning -You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication. -::: +!!! warning "Note" + You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication. + +!!! warning "Note" + `principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication. -:::warning -`principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication. -::: ## Kerberos as an external authenticator for existing users {#kerberos-as-an-external-authenticator-for-existing-users} @@ -95,13 +94,11 @@ Example (goes into `users.xml`): ``` -:::warning -Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown. -::: +!!! warning "Warning" + Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown. -:::info Reminder -Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in the main `config.xml` file as described previously. -::: +!!! info "Reminder" + Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in the main `config.xml` file as described previously. ### Enabling Kerberos using SQL {#enabling-kerberos-using-sql} diff --git a/docs/en/operations/index.md b/docs/en/operations/index.md index 824e851e997..b78633f2d6b 100644 --- a/docs/en/operations/index.md +++ b/docs/en/operations/index.md @@ -1,6 +1,7 @@ --- -sidebar_position: 41 -sidebar_label: Operations +toc_folder_title: Operations +toc_priority: 41 +toc_title: Introduction --- # Operations {#operations} @@ -22,4 +23,4 @@ ClickHouse operations manual consists of the following major sections: - [Settings](../operations/settings/index.md) - [Utilities](../operations/utilities/index.md) -[Original article](https://clickhouse.com/docs/en/operations/) +{## [Original article](https://clickhouse.com/docs/en/operations/) ##} diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 437122e106d..ffcdae16c4d 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -1,6 +1,6 @@ --- -sidebar_position: 45 -sidebar_label: Monitoring +toc_priority: 45 +toc_title: Monitoring --- # Monitoring {#monitoring} diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 52520ba76b7..ab972c72345 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -1,6 +1,6 @@ --- -sidebar_position: 69 -sidebar_label: "Named connections" +toc_priority: 69 +toc_title: "Named connections" --- # Storing details for connecting to external sources in configuration files {#named-collections} @@ -227,4 +227,4 @@ SELECT dictGet('dict', 'b', 2); ┌─dictGet('dict', 'b', 2)─┐ │ two │ └─────────────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 740537d88bc..ec27ecfd6b2 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -1,15 +1,14 @@ --- -sidebar_position: 62 -sidebar_label: OpenTelemetry Support +toc_priority: 62 +toc_title: OpenTelemetry Support --- # [experimental] OpenTelemetry Support [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry. -:::warning -This is an experimental feature that will change in backwards-incompatible ways in future releases. -::: +!!! warning "Warning" + This is an experimental feature that will change in backwards-incompatible ways in future releases. ## Supplying Trace Context to ClickHouse diff --git a/docs/en/operations/optimizing-performance/index.md b/docs/en/operations/optimizing-performance/index.md index ef9c6a4b664..142d3b2f976 100644 --- a/docs/en/operations/optimizing-performance/index.md +++ b/docs/en/operations/optimizing-performance/index.md @@ -1,6 +1,7 @@ --- -sidebar_label: Optimizing Performance -sidebar_position: 52 +toc_folder_title: Optimizing Performance +toc_hidden: true +toc_priority: 52 --- # Optimizing Performance {#optimizing-performance} diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 39e83545506..72cfa59b8b2 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -1,6 +1,6 @@ --- -sidebar_position: 54 -sidebar_label: Query Profiling +toc_priority: 54 +toc_title: Query Profiling --- # Sampling Query Profiler {#sampling-query-profiler} @@ -21,7 +21,7 @@ The default sampling frequency is one sample per second and both CPU and real ti To analyze the `trace_log` system table: -- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../install.md#install-from-deb-packages). +- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). - Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md index 0ba3a9908a5..e410b1b2dfd 100644 --- a/docs/en/operations/performance-test.md +++ b/docs/en/operations/performance-test.md @@ -1,6 +1,6 @@ --- -sidebar_position: 54 -sidebar_label: Testing Hardware +toc_priority: 54 +toc_title: Testing Hardware --- # How to Test Your Hardware with ClickHouse {#how-to-test-your-hardware-with-clickhouse} @@ -59,7 +59,7 @@ wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/cl chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql ``` -3. Download the [web analytics dataset](../example-datasets/metrica.md) (“hits” table containing 100 million rows). +3. Download the [web analytics dataset](../getting-started/example-datasets/metrica.md) (“hits” table containing 100 million rows). ```bash wget https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz tar xvf hits_100m_obfuscated_v1.tar.xz -C . diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index 77b0697d483..6c6fbbf9cfb 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -1,6 +1,6 @@ --- -sidebar_position: 51 -sidebar_label: Quotas +toc_priority: 51 +toc_title: Quotas --- # Quotas {#quotas} @@ -101,7 +101,7 @@ Quotas can use the “quota key” feature to report on resources for multiple k diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md index f76c3a3f731..65bf2ab83cb 100644 --- a/docs/en/sql-reference/functions/geo/index.md +++ b/docs/en/sql-reference/functions/geo/index.md @@ -1,8 +1,8 @@ --- -sidebar_label: Geo -sidebar_position: 62 +toc_title: hidden +toc_priority: 62 +toc_folder_title: Geo --- -# Geo Functions [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/) diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index c3d95d2f0a9..f8736bcc61a 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -1,5 +1,5 @@ --- -sidebar_label: S2 Geometry +toc_title: S2 Geometry --- # Functions for Working with S2 Index {#s2index} diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e4b1fdd3bbb..c892b814957 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 50 -sidebar_label: Hash +toc_priority: 50 +toc_title: Hash --- # Hash Functions {#hash-functions} diff --git a/docs/en/sql-reference/functions/in-functions.md b/docs/en/sql-reference/functions/in-functions.md index ab8ba93daba..c8936e74954 100644 --- a/docs/en/sql-reference/functions/in-functions.md +++ b/docs/en/sql-reference/functions/in-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 60 -sidebar_label: IN Operator +toc_priority: 60 +toc_title: IN Operator --- # Functions for Implementing the IN Operator {#functions-for-implementing-the-in-operator} diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 261cf908e07..7cceec889bd 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -1,9 +1,10 @@ --- -sidebar_position: 32 -sidebar_label: Functions +toc_folder_title: Functions +toc_priority: 32 +toc_title: Introduction --- -# Functions +# Functions {#functions} There are at least\* two types of functions - regular functions (they are just called “functions”) and aggregate functions. These are completely different concepts. Regular functions work as if they are applied to each row separately (for each row, the result of the function does not depend on the other rows). Aggregate functions accumulate a set of values from various rows (i.e. they depend on the entire set of rows). diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 694d07f18dc..1be68c6bdd4 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -1,15 +1,14 @@ --- -sidebar_position: 65 -sidebar_label: Introspection +toc_priority: 65 +toc_title: Introspection --- # Introspection Functions {#introspection-functions} You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling. -:::warning -These functions are slow and may impose security considerations. -::: +!!! warning "Warning" + These functions are slow and may impose security considerations. For proper operation of introspection functions: diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index c293c1ff317..cf3f92580aa 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 55 -sidebar_label: IP Addresses +toc_priority: 55 +toc_title: IP Addresses --- # Functions for Working with IPv4 and IPv6 Addresses {#functions-for-working-with-ip-addresses} @@ -13,10 +13,18 @@ Alias: `INET_NTOA`. ## IPv4StringToNum(s) {#ipv4stringtonums} -The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0. +The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it throws exception. Alias: `INET_ATON`. +## IPv4StringToNumOrDefault(s) {#ipv4stringtonums} + +Same as `IPv4StringToNum`, but if the IPv4 address has an invalid format, it returns 0. + +## IPv4StringToNumOrNull(s) {#ipv4stringtonums} + +Same as `IPv4StringToNum`, but if the IPv4 address has an invalid format, it returns null. + ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum} Similar to IPv4NumToString, but using xxx instead of the last octet. @@ -123,7 +131,7 @@ LIMIT 10 ## IPv6StringToNum {#ipv6stringtonums} -The reverse function of [IPv6NumToString](#ipv6numtostringx). If the IPv6 address has an invalid format, it returns a string of null bytes. +The reverse function of [IPv6NumToString](#ipv6numtostringx). If the IPv6 address has an invalid format, it throws exception. If the input string contains a valid IPv4 address, returns its IPv6 equivalent. HEX can be uppercase or lowercase. @@ -168,6 +176,14 @@ Result: - [cutIPv6](#cutipv6x-bytestocutforipv6-bytestocutforipv4). +## IPv6StringToNumOrDefault(s) {#ipv6stringtonums} + +Same as `IPv6StringToNum`, but if the IPv6 address has an invalid format, it returns 0. + +## IPv6StringToNumOrNull(s) {#ipv6stringtonums} + +Same as `IPv6StringToNum`, but if the IPv6 address has an invalid format, it returns null. + ## IPv4ToIPv6(x) {#ipv4toipv6x} Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples: @@ -261,6 +277,14 @@ SELECT └───────────────────────────────────┴──────────────────────────┘ ``` +## toIPv4OrDefault(string) {#toipv4ordefaultstring} + +Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0. + +## toIPv4OrNull(string) {#toipv4ornullstring} + +Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null. + ## toIPv6 {#toipv6string} Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. @@ -317,6 +341,14 @@ Result: └─────────────────────┘ ``` +## IPv6StringToNumOrDefault(s) {#toipv6ordefaultstring} + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns 0. + +## IPv6StringToNumOrNull(s) {#toipv6ornullstring} + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null. + ## isIPv4String {#isipv4string} Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`. diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index be69b7b4f2b..8270864de74 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -1,13 +1,11 @@ --- -sidebar_position: 56 -sidebar_label: JSON +toc_priority: 56 +toc_title: JSON --- # Functions for Working with JSON {#functions-for-working-with-json} -ClickHouse has special functions for working with this JSON. All the JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. - -The following assumptions are made: +ClickHouse has special functions for working with this JSON. The `visitParam` functions make strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. The following assumptions are made: 1. The field name (function argument) must be a constant. 2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` @@ -359,9 +357,8 @@ SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); ``` -:::note -Before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) -::: +!!! note "Note" + before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) ## JSON_QUERY(json, path) {#json-query} @@ -386,9 +383,8 @@ Result: [2] String ``` -:::note -Before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, json) -::: +!!! note "Note" + before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, json) ## JSON_VALUE(json, path) {#json-value} @@ -414,9 +410,8 @@ Result: String ``` -:::note -Before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, json) -::: +!!! note "Note" + before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, json) ## toJSONString {#tojsonstring} diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 0055e253951..dcdb01e2059 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 37 -sidebar_label: Logical +toc_priority: 37 +toc_title: Logical --- # Logical Functions {#logical-functions} diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 5b3e8b87e34..b823340058e 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 64 -sidebar_label: Machine Learning +toc_priority: 64 +toc_title: Machine Learning --- # Machine Learning Functions {#machine-learning-functions} diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 645587b4f5c..a5fc07cf687 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 44 -sidebar_label: Mathematical +toc_priority: 44 +toc_title: Mathematical --- # Mathematical Functions {#mathematical-functions} diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 5a00252f56c..8a1a44cf079 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -1,13 +1,12 @@ --- -sidebar_position: 67 -sidebar_label: NLP +toc_priority: 67 +toc_title: NLP --- # [experimental] Natural Language Processing functions {#nlp-functions} -:::warning -This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. -::: +!!! warning "Warning" + This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. ## stem {#stem} diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 14cd7337d76..bce3f9144b1 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 67 -sidebar_label: Other +toc_priority: 67 +toc_title: Other --- # Other Functions {#other-functions} @@ -729,9 +729,8 @@ neighbor(column, offset[, default_value]) The result of the function depends on the affected data blocks and the order of data in the block. -:::warning -It can reach the neighbor rows only inside the currently processed data block. -::: +!!! warning "Warning" + It can reach the neighbor rows only inside the currently processed data block. The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user. To prevent that you can make a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. @@ -839,9 +838,8 @@ Result: Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. -:::warning -It can reach the previous row only inside the currently processed data block. -::: +!!! warning "Warning" + It can reach the previous row only inside the currently processed data block. The result of the function depends on the affected data blocks and the order of data in the block. @@ -923,9 +921,9 @@ Each event has a start time and an end time. The start time is included in the e The function calculates the total number of active (concurrent) events for each event start time. -:::warning -Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly. -::: +!!! warning "Warning" + Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. + Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly. **Syntax** @@ -1216,7 +1214,7 @@ SELECT * FROM table WHERE indexHint() **Example** -Here is the example of test data from the table [ontime](../../example-datasets/ontime.md). +Here is the example of test data from the table [ontime](../../getting-started/example-datasets/ontime.md). Input table: @@ -1611,9 +1609,8 @@ Result: Accumulates states of an aggregate function for each row of a data block. -:::warning -The state is reset for each new data block. -::: +!!! warning "Warning" + The state is reset for each new data block. **Syntax** @@ -2071,9 +2068,8 @@ Number of digits. Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). -:::note -For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). -::: + !!! note "Note" + For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). **Example** diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 5e20a93da1f..aab9483de45 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -1,15 +1,14 @@ --- -sidebar_position: 51 -sidebar_label: Pseudo-Random Numbers +toc_priority: 51 +toc_title: Pseudo-Random Numbers --- # Functions for Generating Pseudo-Random Numbers {#functions-for-generating-pseudo-random-numbers} All the functions accept zero arguments or one argument. If an argument is passed, it can be any type, and its value is not used for anything. The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers. -:::note -Non-cryptographic generators of pseudo-random numbers are used. -::: +!!! note "Note" + Non-cryptographic generators of pseudo-random numbers are used. ## rand, rand32 {#rand} diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index a469318e623..c9044c62ca4 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 45 -sidebar_label: Rounding +toc_priority: 45 +toc_title: Rounding --- # Rounding Functions {#rounding-functions} @@ -189,7 +189,7 @@ Accepts a number. If the number is less than one, it returns 0. Otherwise, it ro ## roundDuration(num) {#rounddurationnum} -Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to numbers from the set: 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000. +Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to numbers from the set: 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000. This function was specifically implemented for a web analytics use case for reporting on session lengths. ## roundAge(num) {#roundagenum} diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 7e94c225f6b..7a4e04bbf6c 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 47 -sidebar_label: Splitting and Merging Strings and Arrays +toc_priority: 47 +toc_title: Splitting and Merging Strings and Arrays --- # Functions for Splitting and Merging Strings and Arrays {#functions-for-splitting-and-merging-strings-and-arrays} diff --git a/docs/en/sql-reference/functions/statistics.md b/docs/en/sql-reference/functions/statistics.md new file mode 100644 index 00000000000..3f337b05cbc --- /dev/null +++ b/docs/en/sql-reference/functions/statistics.md @@ -0,0 +1,48 @@ +--- +toc_priority: 69 +toc_title: Statistics +--- + +# Functions for Working with Statistics {#functions-for-working-with-statistics} + +# proportionsZTest {#proportionsztest} + +Applies proportion z-test to samples from two populations (X and Y). The alternative is 'two-sided'. + +**Syntax** + +``` sql +proportionsZTest(successes_x, successes_y, trials_x, trials_y, significance_level, usevar) +``` + +**Arguments** + +- `successes_x` — The number of successes for X in trials. +- `successes_y` — The number of successes for X in trials. +- `trials_x` — The number of trials for X. +- `trials_y` — The number of trials for Y. +- `significance_level` +- `usevar` - It can be `'pooled'` or `'unpooled'`. + - `'pooled'` - The variance of the two populations are assumed to be equal. + - `'unpooled'` - The assumption of equal variances is dropped. + +**Returned value** + +- A tuple with the (z-statistic, p-value, confidence-interval-lower, confidence-interval-upper). + +Type: [Tuple](../../sql-reference/data-types/tuple.md). + +**Example** + +Query: + +``` sql +SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled'); +``` + +Result: + +``` text +(-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) +``` + diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index d63e466a836..a30cacde519 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1,13 +1,12 @@ --- -sidebar_position: 40 -sidebar_label: Strings +toc_priority: 40 +toc_title: Strings --- # Functions for Working with Strings {#functions-for-working-with-strings} -:::note -Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [replacing](../../sql-reference/functions/string-replace-functions.md) in strings are described separately. -::: +!!! note "Note" + Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [replacing](../../sql-reference/functions/string-replace-functions.md) in strings are described separately. ## empty {#empty} diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 1df8bfd0c44..144b4fbc1da 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -1,13 +1,12 @@ --- -sidebar_position: 42 -sidebar_label: For Replacing in Strings +toc_priority: 42 +toc_title: For Replacing in Strings --- # Functions for Searching and Replacing in Strings {#functions-for-searching-and-replacing-in-strings} -:::note -Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. -::: +!!! note "Note" + Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. ## replaceOne(haystack, pattern, replacement) {#replaceonehaystack-pattern-replacement} diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 985d9f1e63a..a0c0116a058 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -1,15 +1,14 @@ --- -sidebar_position: 41 -sidebar_label: For Searching in Strings +toc_priority: 41 +toc_title: For Searching in Strings --- # Functions for Searching in Strings {#functions-for-searching-strings} The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search. -:::note -Functions for [replacing](../../sql-reference/functions/string-replace-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. -::: +!!! note "Note" + Functions for [replacing](../../sql-reference/functions/string-replace-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. ## position(haystack, needle), locate(haystack, needle) {#position} @@ -31,9 +30,8 @@ position(needle IN haystack) Alias: `locate(haystack, needle[, start_pos])`. -:::note -Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`. -::: +!!! note "Note" + Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`. **Arguments** @@ -344,9 +342,8 @@ Returns 1, if at least one string needlei matches the string `haystac For a case-insensitive search or/and in UTF-8 format use functions `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. -:::note -In all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. -::: +!!! note "Note" + In all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. ## match(haystack, pattern) {#matchhaystack-pattern} @@ -361,9 +358,8 @@ For patterns to search for substrings in a string, it is better to use LIKE or The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. It uses [hyperscan](https://github.com/intel/hyperscan) library. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster. -:::note -The length of any of the `haystack` string must be less than 232 bytes otherwise the exception is thrown. This restriction takes place because of hyperscan API. -::: +!!! note "Note" + The length of any of the `haystack` string must be less than 232 bytes otherwise the exception is thrown. This restriction takes place because of hyperscan API. ## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} @@ -385,13 +381,11 @@ The same as `multiFuzzyMatchAny`, but returns any index that matches the haystac The same as `multiFuzzyMatchAny`, but returns the array of all indices in any order that match the haystack within a constant edit distance. -:::note -`multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction. -::: +!!! note "Note" + `multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction. -:::note -To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. -::: +!!! note "Note" + To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. ## extract(haystack, pattern) {#extracthaystack-pattern} @@ -405,9 +399,8 @@ Extracts all the fragments of a string using a regular expression. If ‘haystac Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc. -:::note -`extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). -::: +!!! note "Note" + `extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). **Syntax** @@ -577,9 +570,8 @@ Same as `ngramDistance` but calculates the non-symmetric difference between `nee For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. -:::note -For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. -::: +!!! note "Note" + For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. ## countSubstrings {#countSubstrings} diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index b45866cf931..2ea44a6e585 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 68 -sidebar_label: Time Window +toc_priority: 68 +toc_title: Time Window --- # Time Window Functions {#time-window-functions} diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index cfce02f4d31..96bceb8958c 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 66 -sidebar_label: Tuples +toc_priority: 66 +toc_title: Tuples --- # Functions for Working with Tuples {#tuple-functions} diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index a0d62ff5ecb..8ead8c58c7a 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 46 -sidebar_label: Working with maps +toc_priority: 46 +toc_title: Working with maps --- # Functions for maps {#functions-for-working-with-tuple-maps} diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index de6ca769589..18cc3d98561 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: Type Conversion +toc_priority: 38 +toc_title: Type Conversion --- # Type Conversion Functions {#type-conversion-functions} @@ -689,9 +689,8 @@ x::t - Converted value. -:::note -If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`. -::: +!!! note "Note" + If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`. **Examples** @@ -1433,9 +1432,8 @@ Result: Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. -:::note -The output value is a timestamp in UTC, not in the timezone of `DateTime64`. -::: +!!! info "Note" + The output value is a timestamp in UTC, not in the timezone of `DateTime64`. **Syntax** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index c91029c4fce..5a305aa5033 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 54 -sidebar_label: URLs +toc_priority: 54 +toc_title: URLs --- # Functions for Working with URLs {#functions-for-working-with-urls} diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index d23b505a93f..3616b587bf7 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 53 -sidebar_label: UUID +toc_priority: 53 +toc_title: UUID --- # Functions for Working with UUID {#functions-for-working-with-uuid} diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 85215957443..1e6c9cbd0b4 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -1,6 +1,6 @@ --- -sidebar_position: 59 -sidebar_label: Embedded Dictionaries +toc_priority: 59 +toc_title: Embedded Dictionaries --- # Functions for Working with Embedded Dictionaries diff --git a/docs/en/sql-reference/index.md b/docs/en/sql-reference/index.md index 1123c8533a9..e8fe092e622 100644 --- a/docs/en/sql-reference/index.md +++ b/docs/en/sql-reference/index.md @@ -1,6 +1,8 @@ --- -sidebar_position: 28 -sidebar_label: SQL Reference +toc_folder_title: SQL Reference +toc_hidden: true +toc_priority: 28 +toc_title: hidden --- # SQL Reference {#sql-reference} diff --git a/docs/en/sql-reference/operators/exists.md b/docs/en/sql-reference/operators/exists.md index 25413790801..ee0c7317637 100644 --- a/docs/en/sql-reference/operators/exists.md +++ b/docs/en/sql-reference/operators/exists.md @@ -4,9 +4,8 @@ The `EXISTS` operator checks how many records are in the result of a subquery. I `EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause. -:::warning -References to main query tables and columns are not supported in a subquery. -::: +!!! warning "Warning" + References to main query tables and columns are not supported in a subquery. **Syntax** diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index 5dda097e799..d8468370f3e 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -119,9 +119,8 @@ FROM t_null There are two options for IN-s with subqueries (similar to JOINs): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. -:::note -Remember that the algorithms described below may work differently depending on the [settings](../../operations/settings/settings.md) `distributed_product_mode` setting. -::: +!!! attention "Attention" + Remember that the algorithms described below may work differently depending on the [settings](../../operations/settings/settings.md) `distributed_product_mode` setting. When using the regular IN, the query is sent to remote servers, and each of them runs the subqueries in the `IN` or `JOIN` clause. diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 4761f46ec05..a64dcd70c6c 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: Operators +toc_priority: 38 +toc_title: Operators --- # Operators {#operators} @@ -210,9 +210,8 @@ Types of intervals: You can also use a string literal when setting the `INTERVAL` value. For example, `INTERVAL 1 HOUR` is identical to the `INTERVAL '1 hour'` or `INTERVAL '1' hour`. -:::warning -Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below. -::: +!!! warning "Warning" + Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below. Examples: @@ -248,9 +247,9 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV You can work with dates without using `INTERVAL`, just by adding or subtracting seconds, minutes, and hours. For example, an interval of one day can be set by adding `60*60*24`. -:::note -The `INTERVAL` syntax or `addDays` function are always preferred. Simple addition or subtraction (syntax like `now() + ...`) doesn't consider time settings. For example, daylight saving time. -::: +!!! note "Note" + The `INTERVAL` syntax or `addDays` function are always preferred. Simple addition or subtraction (syntax like `now() + ...`) doesn't consider time settings. For example, daylight saving time. + Examples: diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 3d22146a56b..6bb63ea06a6 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -1,6 +1,6 @@ --- -sidebar_position: 37 -sidebar_label: COLUMN +toc_priority: 37 +toc_title: COLUMN --- # Column Manipulations {#column-manipulations} @@ -75,9 +75,8 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified, Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly. -:::warning -You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error. -::: +!!! warning "Warning" + You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error. Example: diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index af57adcf31c..67a17fc8974 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -1,6 +1,6 @@ --- -sidebar_position: 51 -sidebar_label: COMMENT +toc_priority: 51 +toc_title: COMMENT --- # ALTER TABLE … MODIFY COMMENT {#alter-modify-comment} diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md index c9517981ae7..8f4ce57b905 100644 --- a/docs/en/sql-reference/statements/alter/constraint.md +++ b/docs/en/sql-reference/statements/alter/constraint.md @@ -1,6 +1,6 @@ --- -sidebar_position: 43 -sidebar_label: CONSTRAINT +toc_priority: 43 +toc_title: CONSTRAINT --- # Manipulating Constraints {#manipulations-with-constraints} @@ -16,8 +16,7 @@ See more on [constraints](../../../sql-reference/statements/create/table.md#cons Queries will add or remove metadata about constraints from table so they are processed immediately. -:::warning -Constraint check **will not be executed** on existing data if it was added. -::: +!!! warning "Warning" + Constraint check **will not be executed** on existing data if it was added. All changes on replicated tables are broadcasted to ZooKeeper and will be applied on other replicas as well. diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index 21ae091f9e7..6c638c0a3ac 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -1,6 +1,6 @@ --- -sidebar_position: 39 -sidebar_label: DELETE +toc_priority: 39 +toc_title: DELETE --- # ALTER TABLE … DELETE Statement {#alter-mutations} @@ -11,9 +11,8 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr Deletes data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). -:::note -The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. -::: +!!! note "Note" + The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. The `filter_expr` must be of type `UInt8`. The query deletes rows in the table for which this expression takes a non-zero value. diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 536da948218..0d5909518ed 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -1,9 +1,9 @@ --- -sidebar_position: 35 -sidebar_label: ALTER +toc_priority: 35 +toc_title: ALTER --- -# ALTER +## ALTER {#query_language_queries_alter} Most `ALTER TABLE` queries modify table settings or data: @@ -16,9 +16,8 @@ Most `ALTER TABLE` queries modify table settings or data: - [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md) - [TTL](../../../sql-reference/statements/alter/ttl.md) -:::note -Most `ALTER TABLE` queries are supported only for [\*MergeTree](../../../engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](../../../engines/table-engines/special/merge.md) and [Distributed](../../../engines/table-engines/special/distributed.md). -::: +!!! note "Note" + Most `ALTER TABLE` queries are supported only for [\*MergeTree](../../../engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](../../../engines/table-engines/special/merge.md) and [Distributed](../../../engines/table-engines/special/distributed.md). These `ALTER` statements manipulate views: @@ -55,8 +54,7 @@ For all `ALTER` queries, you can use the [replication_alter_partitions_sync](../ You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](../../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. -:::note -For all `ALTER` queries, if `replication_alter_partitions_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. -::: +!!! info "Note" + For all `ALTER` queries, if `replication_alter_partitions_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. For `ALTER TABLE ... UPDATE|DELETE` queries the synchronicity is defined by the [mutations_sync](../../../operations/settings/settings.md#mutations_sync) setting. diff --git a/docs/en/sql-reference/statements/alter/index/index.md b/docs/en/sql-reference/statements/alter/index/index.md index 92f55792a70..4e2943d37f3 100644 --- a/docs/en/sql-reference/statements/alter/index/index.md +++ b/docs/en/sql-reference/statements/alter/index/index.md @@ -1,7 +1,7 @@ --- toc_hidden_folder: true -sidebar_position: 42 -sidebar_label: INDEX +toc_priority: 42 +toc_title: INDEX --- # Manipulating Data Skipping Indices {#manipulations-with-data-skipping-indices} @@ -18,6 +18,5 @@ The first two commands are lightweight in a sense that they only change metadata Also, they are replicated, syncing indices metadata via ZooKeeper. -:::note -Index manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants). -::: \ No newline at end of file +!!! note "Note" + Index manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants). diff --git a/docs/en/sql-reference/statements/alter/order-by.md b/docs/en/sql-reference/statements/alter/order-by.md index 84d29ae8e11..16f9ace206d 100644 --- a/docs/en/sql-reference/statements/alter/order-by.md +++ b/docs/en/sql-reference/statements/alter/order-by.md @@ -1,6 +1,6 @@ --- -sidebar_position: 41 -sidebar_label: ORDER BY +toc_priority: 41 +toc_title: ORDER BY --- # Manipulating Key Expressions {#manipulations-with-key-expressions} @@ -13,6 +13,5 @@ The command changes the [sorting key](../../../engines/table-engines/mergetree-f The command is lightweight in a sense that it only changes metadata. To keep the property that data part rows are ordered by the sorting key expression you cannot add expressions containing existing columns to the sorting key (only columns added by the `ADD COLUMN` command in the same `ALTER` query, without default column value). -:::note -It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). -::: \ No newline at end of file +!!! note "Note" + It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 453d1bd7bf6..12737624ecb 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: PARTITION +toc_priority: 38 +toc_title: PARTITION --- # Manipulating Partitions and Parts {#alter_manipulations-with-partitions} @@ -160,9 +160,8 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once. -:::note -The entire backup process is performed without stopping the server. -::: +!!! note "Note" + The entire backup process is performed without stopping the server. Note that for old-styled tables you can specify the prefix of the partition name (for example, `2019`) - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). @@ -172,9 +171,8 @@ At the time of execution, for a data snapshot, the query creates hardlinks to a - `N` is the incremental number of the backup. - if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. -:::note -If you use [a set of disks for data storage in a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. -::: +!!! note "Note" + If you use [a set of disks for data storage in a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. The same structure of directories is created inside the backup as inside `/var/lib/clickhouse/`. The query performs `chmod` for all files, forbidding writing into them. diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index 5ccf33d2d2f..c7ebc83c496 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -1,6 +1,6 @@ --- -sidebar_position: 49 -sidebar_label: PROJECTION +toc_priority: 49 +toc_title: PROJECTION --- # Manipulating Projections {#manipulations-with-projections} @@ -20,6 +20,5 @@ The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only Also, they are replicated, syncing projections metadata via ZooKeeper. -:::note -Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). -::: \ No newline at end of file +!!! note "Note" + Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md index 2398a57502c..05130a569ab 100644 --- a/docs/en/sql-reference/statements/alter/quota.md +++ b/docs/en/sql-reference/statements/alter/quota.md @@ -1,6 +1,6 @@ --- -sidebar_position: 46 -sidebar_label: QUOTA +toc_priority: 46 +toc_title: QUOTA --- # ALTER QUOTA {#alter-quota-statement} diff --git a/docs/en/sql-reference/statements/alter/role.md b/docs/en/sql-reference/statements/alter/role.md index d3cb28a1705..ea6d3c61820 100644 --- a/docs/en/sql-reference/statements/alter/role.md +++ b/docs/en/sql-reference/statements/alter/role.md @@ -1,6 +1,6 @@ --- -sidebar_position: 46 -sidebar_label: ROLE +toc_priority: 46 +toc_title: ROLE --- ## ALTER ROLE {#alter-role-statement} diff --git a/docs/en/sql-reference/statements/alter/row-policy.md b/docs/en/sql-reference/statements/alter/row-policy.md index 47207d29287..bbf9f317737 100644 --- a/docs/en/sql-reference/statements/alter/row-policy.md +++ b/docs/en/sql-reference/statements/alter/row-policy.md @@ -1,6 +1,6 @@ --- -sidebar_position: 47 -sidebar_label: ROW POLICY +toc_priority: 47 +toc_title: ROW POLICY --- # ALTER ROW POLICY {#alter-row-policy-statement} diff --git a/docs/en/sql-reference/statements/alter/sample-by.md b/docs/en/sql-reference/statements/alter/sample-by.md index 08e4fe1066b..21b20be8b78 100644 --- a/docs/en/sql-reference/statements/alter/sample-by.md +++ b/docs/en/sql-reference/statements/alter/sample-by.md @@ -1,6 +1,6 @@ --- -sidebar_position: 41 -sidebar_label: SAMPLE BY +toc_priority: 41 +toc_title: SAMPLE BY --- # Manipulating Sampling-Key Expressions {#manipulations-with-sampling-key-expressions} @@ -15,6 +15,5 @@ The command changes the [sampling key](../../../engines/table-engines/mergetree- The command is lightweight in the sense that it only changes metadata. The primary key must contain the new sample key. -:::note -It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). -::: \ No newline at end of file +!!! note "Note" + It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). diff --git a/docs/en/sql-reference/statements/alter/setting.md b/docs/en/sql-reference/statements/alter/setting.md index bb361e2ee6f..90747bc1919 100644 --- a/docs/en/sql-reference/statements/alter/setting.md +++ b/docs/en/sql-reference/statements/alter/setting.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: SETTING +toc_priority: 38 +toc_title: SETTING --- # Table Settings Manipulations {#table_settings_manipulations} @@ -14,9 +14,9 @@ If a setting with the specified name does not exist, then the query raises an ex ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ... ``` -:::note -These queries can be applied to [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) tables only. -::: +!!! note "Note" + These queries can be applied to [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) tables only. + ## MODIFY SETTING {#alter_modify_setting} diff --git a/docs/en/sql-reference/statements/alter/settings-profile.md b/docs/en/sql-reference/statements/alter/settings-profile.md index b1728f21c08..57d12142c48 100644 --- a/docs/en/sql-reference/statements/alter/settings-profile.md +++ b/docs/en/sql-reference/statements/alter/settings-profile.md @@ -1,6 +1,6 @@ --- -sidebar_position: 48 -sidebar_label: SETTINGS PROFILE +toc_priority: 48 +toc_title: SETTINGS PROFILE --- ## ALTER SETTINGS PROFILE {#alter-settings-profile-statement} diff --git a/docs/en/sql-reference/statements/alter/ttl.md b/docs/en/sql-reference/statements/alter/ttl.md index f2cf8724197..9cd63d3b8fe 100644 --- a/docs/en/sql-reference/statements/alter/ttl.md +++ b/docs/en/sql-reference/statements/alter/ttl.md @@ -1,6 +1,6 @@ --- -sidebar_position: 44 -sidebar_label: TTL +toc_priority: 44 +toc_title: TTL --- # Manipulations with Table TTL {#manipulations-with-table-ttl} diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index aeff7cfa1b2..13ea1b2a8db 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: UPDATE +toc_priority: 40 +toc_title: UPDATE --- # ALTER TABLE … UPDATE Statements {#alter-table-update-statements} @@ -11,9 +11,8 @@ ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr Manipulates data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). -:::note -The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. -::: +!!! note "Note" + The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. The `filter_expr` must be of type `UInt8`. This query updates values of specified columns to the values of corresponding expressions in rows for which the `filter_expr` takes a non-zero value. Values are casted to the column type using the `CAST` operator. Updating columns that are used in the calculation of the primary or the partition key is not supported. diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index f9b90349dab..4873982e2a1 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -1,6 +1,6 @@ --- -sidebar_position: 45 -sidebar_label: USER +toc_priority: 45 +toc_title: USER --- # ALTER USER {#alter-user-statement} diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 71e89aaefe8..0fb1c4be0ff 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -1,6 +1,6 @@ --- -sidebar_position: 50 -sidebar_label: VIEW +toc_priority: 50 +toc_title: VIEW --- # ALTER TABLE … MODIFY QUERY Statement {#alter-modify-query} diff --git a/docs/en/sql-reference/statements/attach.md b/docs/en/sql-reference/statements/attach.md index bc7b2be333f..2949ac6db38 100644 --- a/docs/en/sql-reference/statements/attach.md +++ b/docs/en/sql-reference/statements/attach.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: ATTACH +toc_priority: 40 +toc_title: ATTACH --- # ATTACH Statement {#attach} diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index 1164a8b8be6..c9ad40860f7 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -1,6 +1,6 @@ --- -sidebar_position: 41 -sidebar_label: CHECK +toc_priority: 41 +toc_title: CHECK --- # CHECK TABLE Statement {#check-table} diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md index 18ed94bef79..787bbc02346 100644 --- a/docs/en/sql-reference/statements/create/database.md +++ b/docs/en/sql-reference/statements/create/database.md @@ -1,6 +1,6 @@ --- -sidebar_position: 35 -sidebar_label: DATABASE +toc_priority: 35 +toc_title: DATABASE --- # CREATE DATABASE {#query-language-create-database} diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index 246625cc901..86ab8f977b0 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: DICTIONARY +toc_priority: 38 +toc_title: DICTIONARY --- # CREATE DICTIONARY {#create-dictionary-query} diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index a87d3d70e54..ddfcdfef521 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: FUNCTION +toc_priority: 38 +toc_title: FUNCTION --- # CREATE FUNCTION {#create-function} diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 666a2c66d2f..3df62869e2b 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -1,9 +1,10 @@ --- -sidebar_position: 34 -sidebar_label: CREATE +toc_folder_title: CREATE +toc_priority: 34 +toc_title: Overview --- -# CREATE Queries +# CREATE Queries {#create-queries} Create queries make a new entity of one of the following kinds: diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index 931da165a73..767846ead52 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -1,6 +1,6 @@ --- -sidebar_position: 42 -sidebar_label: QUOTA +toc_priority: 42 +toc_title: QUOTA --- # CREATE QUOTA {#create-quota-statement} diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 5f7db960f27..e0e58f7a0f6 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: ROLE +toc_priority: 40 +toc_title: ROLE --- # CREATE ROLE {#create-role-statement} diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 58b7b1e2cb9..3f88d794619 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -1,15 +1,14 @@ --- -sidebar_position: 41 -sidebar_label: ROW POLICY +toc_priority: 41 +toc_title: ROW POLICY --- # CREATE ROW POLICY {#create-row-policy-statement} Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table. -:::warning -Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. -::: +!!! note "Warning" + Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. Syntax: @@ -31,17 +30,16 @@ In the section `TO` you can provide a list of users and roles this policy should Keyword `ALL` means all the ClickHouse users including current user. Keyword `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost` -:::note -If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy +!!! note "Note" + If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy -`CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` + `CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` -forbids the users `mira` and `peter` to see the rows with `b != 1`, and any non-mentioned user (e.g., the user `paul`) will see no rows from `mydb.table1` at all. + forbids the users `mira` and `peter` to see the rows with `b != 1`, and any non-mentioned user (e.g., the user `paul`) will see no rows from `mydb.table1` at all. -If that's not desirable it can't be fixed by adding one more row policy, like the following: + If that's not desirable it can't be fixed by adding one more row policy, like the following: -`CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` -::: + `CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` ## AS Clause {#create-row-policy-as} diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index 0cc633d9770..07bb54c9da3 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -1,6 +1,6 @@ --- -sidebar_position: 43 -sidebar_label: SETTINGS PROFILE +toc_priority: 43 +toc_title: SETTINGS PROFILE --- # CREATE SETTINGS PROFILE {#create-settings-profile-statement} diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index c477e41ba02..ee663c92695 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -1,6 +1,6 @@ --- -sidebar_position: 36 -sidebar_label: TABLE +toc_priority: 36 +toc_title: TABLE --- # CREATE TABLE {#create-table-query} @@ -159,9 +159,8 @@ ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); ``` -:::warning -You can't combine both ways in one query. -::: +!!! warning "Warning" + You can't combine both ways in one query. ## Constraints {#constraints} @@ -215,9 +214,8 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default); Codecs can be combined in a pipeline, for example, `CODEC(Delta, Default)`. -:::warning -You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility. -::: +!!! warning "Warning" + You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility. Compression is supported for the following table engines: @@ -239,7 +237,7 @@ Codecs: High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage. -### Specialized Codecs {#specialized-codecs} +### Specialized Codecs {#create-query-specialized-codecs} These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. @@ -273,13 +271,11 @@ Encryption codecs: These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content). -:::warning -Most engines including the "*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed. -::: +!!! attention "Attention" + Most engines including the "*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed. -:::warning -If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging. -::: +!!! attention "Attention" + If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging. **Example** @@ -291,9 +287,8 @@ CREATE TABLE mytable ENGINE = MergeTree ORDER BY x; ``` -:::note -If compression needs to be applied, it must be explicitly specified. Otherwise, only encryption will be applied to data. -::: +!!!note "Note" + If compression needs to be applied, it must be explicitly specified. Otherwise, only encryption will be applied to data. **Example** @@ -335,9 +330,8 @@ It’s possible to use tables with [ENGINE = Memory](../../../engines/table-engi 'REPLACE' query allows you to update the table atomically. -:::note -This query is supported only for [Atomic](../../../engines/database-engines/atomic.md) database engine. -::: +!!!note "Note" + This query is supported only for [Atomic](../../../engines/database-engines/atomic.md) database engine. If you need to delete some data from a table, you can create a new table and fill it with a `SELECT` statement that does not retrieve unwanted data, then drop the old table and rename the new one: @@ -411,9 +405,8 @@ SELECT * FROM base.t1; You can add a comment to the table when you creating it. -:::note -The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). -::: +!!!note "Note" + The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). **Syntax** diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 0aad0961a8b..5dfcf891439 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -1,6 +1,6 @@ --- -sidebar_position: 39 -sidebar_label: USER +toc_priority: 39 +toc_title: USER --- # CREATE USER {#create-user-statement} @@ -52,9 +52,9 @@ Another way of specifying host is to use `@` syntax following the username. Exam - `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. - `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. -:::warning -ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. -::: +!!! info "Warning" + ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. + ## GRANTEES Clause {#grantees} diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index e31d1b4473f..f7d3a6d697a 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -1,6 +1,6 @@ --- -sidebar_position: 37 -sidebar_label: VIEW +toc_priority: 37 +toc_title: VIEW --- # CREATE VIEW {#create-view} @@ -49,11 +49,10 @@ When creating a materialized view with `TO [db].[table]`, you must not use `POPU A materialized view is implemented as follows: when inserting data to the table specified in `SELECT`, part of the inserted data is converted by this `SELECT` query, and the result is inserted in the view. -:::note -Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views. +!!! important "Important" + Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views. -Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view. -::: + Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view. If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it. @@ -69,9 +68,10 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop ## Live View [Experimental] {#live-view} -:::note -This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. -::: +!!! important "Important" + This is an experimental feature that may change in backwards-incompatible ways in the future releases. + Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. + ```sql CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... @@ -83,15 +83,14 @@ Live views are triggered by insert into the innermost table specified in the que Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery. -:::info -- [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table. -- Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view. -- Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved. -- Does not work with replicated or distributed tables where inserts are performed on different nodes. -- Can't be triggered by multiple tables. +!!! info "Limitations" + - [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table. + - Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view. + - Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved. + - Does not work with replicated or distributed tables where inserts are performed on different nodes. + - Can't be triggered by multiple tables. -See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround. -::: + See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround. ### Monitoring Live View Changes {#live-view-monitoring} @@ -247,9 +246,9 @@ Most common uses of live view tables include: ## Window View [Experimental] {#window-view} -:::info -This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. -::: +!!! important "Important" + This is an experimental feature that may change in backwards-incompatible ways in the future releases. + Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. ``` sql CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY time_window_function diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index 7fbe5bd2790..823a31ed313 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -1,6 +1,6 @@ --- -sidebar_position: 42 -sidebar_label: DESCRIBE +toc_priority: 42 +toc_title: DESCRIBE --- # DESCRIBE TABLE {#misc-describe-table} diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index bf20f7b3461..b77bcbc00fb 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -1,6 +1,6 @@ --- -sidebar_position: 43 -sidebar_label: DETACH +toc_priority: 43 +toc_title: DETACH --- # DETACH Statement {#detach} diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 0d3e1f7860d..552a7b5f1a9 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -1,6 +1,6 @@ --- -sidebar_position: 44 -sidebar_label: DROP +toc_priority: 44 +toc_title: DROP --- # DROP Statements {#drop} diff --git a/docs/en/sql-reference/statements/exchange.md b/docs/en/sql-reference/statements/exchange.md index abe3d40950e..91b0c48ddcf 100644 --- a/docs/en/sql-reference/statements/exchange.md +++ b/docs/en/sql-reference/statements/exchange.md @@ -1,6 +1,6 @@ --- -sidebar_position: 49 -sidebar_label: EXCHANGE +toc_priority: 49 +toc_title: EXCHANGE --- # EXCHANGE Statement {#exchange} @@ -8,9 +8,8 @@ sidebar_label: EXCHANGE Exchanges the names of two tables or dictionaries atomically. This task can also be accomplished with a [RENAME](./rename.md) query using a temporary name, but the operation is not atomic in that case. -:::note -The `EXCHANGE` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. -::: +!!! note "Note" + The `EXCHANGE` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. **Syntax** diff --git a/docs/en/sql-reference/statements/exists.md b/docs/en/sql-reference/statements/exists.md index 7c6cc812665..b7c4a487791 100644 --- a/docs/en/sql-reference/statements/exists.md +++ b/docs/en/sql-reference/statements/exists.md @@ -1,6 +1,6 @@ --- -sidebar_position: 45 -sidebar_label: EXISTS +toc_priority: 45 +toc_title: EXISTS --- # EXISTS Statement {#exists-statement} diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 80f8961a3e9..9c74c069f02 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -1,6 +1,6 @@ --- -sidebar_position: 39 -sidebar_label: EXPLAIN +toc_priority: 39 +toc_title: EXPLAIN --- # EXPLAIN Statement {#explain} @@ -138,9 +138,8 @@ Union ReadFromStorage (SystemNumbers) ``` -:::note -Step and query cost estimation is not supported. -::: +!!! note "Note" + Step and query cost estimation is not supported. When `json = 1`, the query plan is represented in JSON format. Every node is a dictionary that always has the keys `Node Type` and `Plans`. `Node Type` is a string with a step name. `Plans` is an array with child step descriptions. Other optional keys may be added depending on node type and settings. @@ -447,8 +446,8 @@ Result: └─────────────────────────────────────────────────────────┘ ``` -:::note -The validation is not complete, so a successfull query does not guarantee that the override would not cause issues. -::: +!!! note "Note" + The validation is not complete, so a successfull query does not guarantee that the override would + not cause issues. [Оriginal article](https://clickhouse.com/docs/en/sql-reference/statements/explain/) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 1ee330061b5..1b2b63ba0e7 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: GRANT +toc_priority: 38 +toc_title: GRANT --- # GRANT Statement {#grant} diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md index ab51cbb330c..a317e4a47de 100644 --- a/docs/en/sql-reference/statements/index.md +++ b/docs/en/sql-reference/statements/index.md @@ -1,9 +1,10 @@ --- -sidebar_position: 31 -sidebar_label: Statements +toc_folder_title: Statements +toc_hidden: true +toc_priority: 31 --- -# ClickHouse SQL Statements +# ClickHouse SQL Statements {#clickhouse-sql-statements} Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it’s own syntax and usage details that are described separately: diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 17d6ce1809b..f8eefad7051 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -1,9 +1,9 @@ --- -sidebar_position: 33 -sidebar_label: INSERT INTO +toc_priority: 33 +toc_title: INSERT INTO --- -# INSERT INTO Statement +## INSERT INTO Statement {#insert} Inserts data into a table. diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index 9fe207f24b2..eab6f602c4a 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -1,6 +1,6 @@ --- -sidebar_position: 46 -sidebar_label: KILL +toc_priority: 46 +toc_title: KILL --- # KILL Statements {#kill-statements} diff --git a/docs/en/sql-reference/statements/misc.md b/docs/en/sql-reference/statements/misc.md index 2751c5296c2..c553ef37f8d 100644 --- a/docs/en/sql-reference/statements/misc.md +++ b/docs/en/sql-reference/statements/misc.md @@ -1,6 +1,6 @@ --- toc_hidden: true -sidebar_position: 70 +toc_priority: 41 --- # Miscellaneous Statements {#miscellaneous-queries} diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 773284a1b30..30899cc2940 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -1,15 +1,14 @@ --- -sidebar_position: 47 -sidebar_label: OPTIMIZE +toc_priority: 47 +toc_title: OPTIMIZE --- # OPTIMIZE Statement {#misc_operations-optimize} This query tries to initialize an unscheduled merge of data parts for tables. -:::warning -`OPTIMIZE` can’t fix the `Too many parts` error. -::: +!!! warning "Warning" + `OPTIMIZE` can’t fix the `Too many parts` error. **Syntax** @@ -28,19 +27,16 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engin You can specify how long (in seconds) to wait for inactive replicas to execute `OPTIMIZE` queries by the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. -:::note -If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. -::: +!!! info "Note" + If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ## BY expression {#by-expression} If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key). -:::note -Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion. - -Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an `ALIAS` column. -::: +!!! note "Note" + Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion. + Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an `ALIAS` column. **Syntax** diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index b3bea3e3c37..c2192f1a6e1 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -1,6 +1,6 @@ --- -sidebar_position: 48 -sidebar_label: RENAME +toc_priority: 48 +toc_title: RENAME --- # RENAME Statement {#misc_operations-rename} @@ -8,9 +8,8 @@ sidebar_label: RENAME Renames databases, tables, or dictionaries. Several entities can be renamed in a single query. Note that the `RENAME` query with several entities is non-atomic operation. To swap entities names atomically, use the [EXCHANGE](./exchange.md) statement. -:::note -The `RENAME` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. -::: +!!! note "Note" + The `RENAME` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. **Syntax** diff --git a/docs/en/sql-reference/statements/revoke.md b/docs/en/sql-reference/statements/revoke.md index 4ffa8a21027..75005260c4a 100644 --- a/docs/en/sql-reference/statements/revoke.md +++ b/docs/en/sql-reference/statements/revoke.md @@ -1,6 +1,6 @@ --- -sidebar_position: 39 -sidebar_label: REVOKE +toc_priority: 39 +toc_title: REVOKE --- # REVOKE Statement {#revoke} diff --git a/docs/en/sql-reference/statements/select/all.md b/docs/en/sql-reference/statements/select/all.md index 6b35678fd92..ba66f63b447 100644 --- a/docs/en/sql-reference/statements/select/all.md +++ b/docs/en/sql-reference/statements/select/all.md @@ -1,5 +1,5 @@ --- -sidebar_label: ALL +toc_title: ALL --- # ALL Clause {#select-all} diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index f7fc08ae9ba..f138bcc45c7 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -1,5 +1,5 @@ --- -sidebar_label: ARRAY JOIN +toc_title: ARRAY JOIN --- # ARRAY JOIN Clause {#select-array-join-clause} diff --git a/docs/en/sql-reference/statements/select/distinct.md b/docs/en/sql-reference/statements/select/distinct.md index 898de4730ae..390afa46248 100644 --- a/docs/en/sql-reference/statements/select/distinct.md +++ b/docs/en/sql-reference/statements/select/distinct.md @@ -1,5 +1,5 @@ --- -sidebar_label: DISTINCT +toc_title: DISTINCT --- # DISTINCT Clause {#select-distinct} diff --git a/docs/en/sql-reference/statements/select/except.md b/docs/en/sql-reference/statements/select/except.md index dcaefd67ca9..e6d9b365a91 100644 --- a/docs/en/sql-reference/statements/select/except.md +++ b/docs/en/sql-reference/statements/select/except.md @@ -1,5 +1,5 @@ --- -sidebar_label: EXCEPT +toc_title: EXCEPT --- # EXCEPT Clause {#except-clause} diff --git a/docs/en/sql-reference/statements/select/format.md b/docs/en/sql-reference/statements/select/format.md index a7936509ad5..c3104bd12fe 100644 --- a/docs/en/sql-reference/statements/select/format.md +++ b/docs/en/sql-reference/statements/select/format.md @@ -1,5 +1,5 @@ --- -sidebar_label: FORMAT +toc_title: FORMAT --- # FORMAT Clause {#format-clause} diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 9d5147db13c..df30a0fb0d2 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -1,5 +1,5 @@ --- -sidebar_label: FROM +toc_title: FROM --- # FROM Clause {#select-from} diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index b08647271f1..969a39ce51f 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -1,5 +1,5 @@ --- -sidebar_label: GROUP BY +toc_title: GROUP BY --- # GROUP BY Clause {#select-group-by-clause} @@ -12,9 +12,8 @@ sidebar_label: GROUP BY When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). -:::note -There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. -::: +!!! note "Note" + There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. ## NULL Processing {#null-processing} @@ -56,9 +55,8 @@ The subtotals are calculated in the reverse order: at first subtotals are calcul In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line. -:::note -Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. -::: +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. **Example** @@ -116,9 +114,8 @@ As `GROUP BY` section has three key expressions, the result contains four tables In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line. -:::note -Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. -::: +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. **Example** @@ -209,9 +206,8 @@ This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` forma - In `Pretty*` formats, the row is output as a separate table after the main result. - In the other formats it is not available. -:::note -totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. -::: +!!! note "Note" + totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. `WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having.md) is present. The behavior depends on the `totals_mode` setting. diff --git a/docs/en/sql-reference/statements/select/having.md b/docs/en/sql-reference/statements/select/having.md index 9aee0cf4d63..93d56097b11 100644 --- a/docs/en/sql-reference/statements/select/having.md +++ b/docs/en/sql-reference/statements/select/having.md @@ -1,5 +1,5 @@ --- -sidebar_label: HAVING +toc_title: HAVING --- # HAVING Clause {#having-clause} diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 50dd8fecf3a..33644133153 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -1,9 +1,11 @@ --- -sidebar_position: 32 -sidebar_label: SELECT +title: SELECT Query +toc_folder_title: SELECT +toc_priority: 32 +toc_title: Overview --- -# SELECT Query +# SELECT Query {#select-queries-syntax} `SELECT` queries perform data retrieval. By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../../sql-reference/statements/insert-into.md) it can be forwarded to a different table. diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md index ef9868daebb..2243a35e4d8 100644 --- a/docs/en/sql-reference/statements/select/intersect.md +++ b/docs/en/sql-reference/statements/select/intersect.md @@ -1,5 +1,5 @@ --- -sidebar_label: INTERSECT +toc_title: INTERSECT --- # INTERSECT Clause {#intersect-clause} diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index b37285cb0cc..b949b9c83c0 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -1,8 +1,8 @@ --- -sidebar_label: INTO OUTFILE +toc_title: INTO OUTFILE --- -# INTO OUTFILE Clause +# INTO OUTFILE Clause {#into-outfile-clause} `INTO OUTFILE` clause redirects the result of a `SELECT` query to a file on the **client** side. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 0cf58d0b90f..3d302be561a 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -1,5 +1,5 @@ --- -sidebar_label: JOIN +toc_title: JOIN --- # JOIN Clause {#select-join} @@ -36,9 +36,8 @@ Additional join types available in ClickHouse: - `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. - `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. -:::note -When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). -::: +!!! note "Note" + When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). ## Settings {#join-settings} @@ -64,9 +63,8 @@ Rows are joined if the whole complex condition is met. If the conditions are not The `OR` operator inside the `ON` clause works using the hash join algorithm — for each `OR` argument with join keys for `JOIN`, a separate hash table is created, so memory consumption and query execution time grow linearly with an increase in the number of expressions `OR` of the `ON` clause. -:::note -If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far. -::: +!!! note "Note" + If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far. **Example** @@ -199,9 +197,8 @@ For example, consider the following tables: `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined. -:::note -`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. -::: +!!! note "Note" + `ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. ## Distributed JOIN {#global-join} diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md index 913b7b40338..68b459a46e8 100644 --- a/docs/en/sql-reference/statements/select/limit-by.md +++ b/docs/en/sql-reference/statements/select/limit-by.md @@ -1,5 +1,5 @@ --- -sidebar_label: LIMIT BY +toc_title: LIMIT BY --- # LIMIT BY Clause {#limit-by-clause} @@ -13,9 +13,8 @@ ClickHouse supports the following syntax variants: During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](order-by.md#select-order-by) clause or implicitly as a property of the table engine (row order is only guaranteed when using [ORDER BY](order-by.md#select-order-by), otherwise the row blocks will not be ordered due to multi-threading). Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. -:::note -`LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query. -::: +!!! note "Note" + `LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query. If you want to use column numbers instead of column names in the `LIMIT BY` clause, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). diff --git a/docs/en/sql-reference/statements/select/limit.md b/docs/en/sql-reference/statements/select/limit.md index 6b1c90041fe..6ed38b2dd64 100644 --- a/docs/en/sql-reference/statements/select/limit.md +++ b/docs/en/sql-reference/statements/select/limit.md @@ -1,5 +1,5 @@ --- -sidebar_label: LIMIT +toc_title: LIMIT --- # LIMIT Clause {#limit-clause} @@ -12,9 +12,8 @@ sidebar_label: LIMIT If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md) clause that explicitly sorts results, the choice of rows for the result may be arbitrary and non-deterministic. -:::note -The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting. -::: +!!! note "Note" + The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting. ## LIMIT … WITH TIES Modifier {#limit-with-ties} diff --git a/docs/en/sql-reference/statements/select/offset.md b/docs/en/sql-reference/statements/select/offset.md index e120845dbc6..20ebd972a24 100644 --- a/docs/en/sql-reference/statements/select/offset.md +++ b/docs/en/sql-reference/statements/select/offset.md @@ -1,5 +1,5 @@ --- -sidebar_label: OFFSET +toc_title: OFFSET --- # OFFSET FETCH Clause {#offset-fetch} @@ -30,13 +30,11 @@ SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; The `WITH TIES` option is used to return any additional rows that tie for the last place in the result set according to the `ORDER BY` clause. For example, if `fetch_row_count` is set to 5 but two additional rows match the values of the `ORDER BY` columns in the fifth row, the result set will contain seven rows. -:::note -According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present. -::: +!!! note "Note" + According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present. -:::note -The real offset can also depend on the [offset](../../../operations/settings/settings.md#offset) setting. -::: +!!! note "Note" + The real offset can also depend on the [offset](../../../operations/settings/settings.md#offset) setting. ## Examples {#examples} diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 46e483dddf4..b24f0213e4e 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -1,5 +1,5 @@ --- -sidebar_label: ORDER BY +toc_title: ORDER BY --- # ORDER BY Clause {#select-order-by} diff --git a/docs/en/sql-reference/statements/select/prewhere.md b/docs/en/sql-reference/statements/select/prewhere.md index c3aa2e14384..646bb83e692 100644 --- a/docs/en/sql-reference/statements/select/prewhere.md +++ b/docs/en/sql-reference/statements/select/prewhere.md @@ -1,5 +1,5 @@ --- -sidebar_label: PREWHERE +toc_title: PREWHERE --- # PREWHERE Clause {#prewhere-clause} @@ -18,9 +18,8 @@ If the [optimize_move_to_prewhere](../../../operations/settings/settings.md#opti If query has [FINAL](from.md#select-from-final) modifier, the `PREWHERE` optimization is not always correct. It is enabled only if both settings [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) and [optimize_move_to_prewhere_if_final](../../../operations/settings/settings.md#optimize_move_to_prewhere_if_final) are turned on. -:::note -The `PREWHERE` section is executed before `FINAL`, so the results of `FROM ... FINAL` queries may be skewed when using `PREWHERE` with fields not in the `ORDER BY` section of a table. -::: +!!! note "Attention" + The `PREWHERE` section is executed before `FINAL`, so the results of `FROM ... FINAL` queries may be skewed when using `PREWHERE` with fields not in the `ORDER BY` section of a table. ## Limitations {#limitations} diff --git a/docs/en/sql-reference/statements/select/sample.md b/docs/en/sql-reference/statements/select/sample.md index 3673a49a9e9..2405cb0a03c 100644 --- a/docs/en/sql-reference/statements/select/sample.md +++ b/docs/en/sql-reference/statements/select/sample.md @@ -1,5 +1,5 @@ --- -sidebar_label: SAMPLE +toc_title: SAMPLE --- # SAMPLE Clause {#select-sample-clause} @@ -14,9 +14,8 @@ Approximated query processing can be useful in the following cases: - When your raw data is not accurate, so approximation does not noticeably degrade the quality. - Business requirements target approximate results (for cost-effectiveness, or to market exact results to premium users). -:::note -You can only use sampling with the tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). -::: +!!! note "Note" + You can only use sampling with the tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). The features of data sampling are listed below: @@ -26,12 +25,11 @@ The features of data sampling are listed below: For the `SAMPLE` clause the following syntax is supported: -| SAMPLE Clause Syntax | Description | -|----------------------|------------------------------| -| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) | -| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) | -| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | - +| SAMPLE Clause Syntax | Description | +|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `SAMPLE k` | Here `k` is the number from 0 to 1.
The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) | +| `SAMPLE n` | Here `n` is a sufficiently large integer.
The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) | +| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1.
The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | ## SAMPLE K {#select-sample-k} diff --git a/docs/en/sql-reference/statements/select/union.md b/docs/en/sql-reference/statements/select/union.md index 8a1c7a770c9..6dfe554edf0 100644 --- a/docs/en/sql-reference/statements/select/union.md +++ b/docs/en/sql-reference/statements/select/union.md @@ -1,5 +1,5 @@ --- -sidebar_label: UNION +toc_title: UNION --- # UNION Clause {#union-clause} diff --git a/docs/en/sql-reference/statements/select/where.md b/docs/en/sql-reference/statements/select/where.md index c68f9d39d09..348b869e2db 100644 --- a/docs/en/sql-reference/statements/select/where.md +++ b/docs/en/sql-reference/statements/select/where.md @@ -1,5 +1,5 @@ --- -sidebar_label: WHERE +toc_title: WHERE --- # WHERE Clause {#select-where} @@ -10,9 +10,8 @@ If there is a `WHERE` clause, it must contain an expression with the `UInt8` typ `WHERE` expression is evaluated on the ability to use indexes and partition pruning, if the underlying table engine supports that. -:::note -There is a filtering optimization called [PREWHERE](../../../sql-reference/statements/select/prewhere.md). -::: +!!! note "Note" + There is a filtering optimization called [PREWHERE](../../../sql-reference/statements/select/prewhere.md). If you need to test a value for [NULL](../../../sql-reference/syntax.md#null-literal), use [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) operators or [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) and [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) functions. Otherwise an expression with `NULL` never passes. diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md index 39fcb752980..d6c8da261cb 100644 --- a/docs/en/sql-reference/statements/select/with.md +++ b/docs/en/sql-reference/statements/select/with.md @@ -1,5 +1,5 @@ --- -sidebar_label: WITH +toc_title: WITH --- # WITH Clause {#with-clause} diff --git a/docs/en/sql-reference/statements/set-role.md b/docs/en/sql-reference/statements/set-role.md index cac7ca28b92..cf14a9c6d75 100644 --- a/docs/en/sql-reference/statements/set-role.md +++ b/docs/en/sql-reference/statements/set-role.md @@ -1,6 +1,6 @@ --- -sidebar_position: 51 -sidebar_label: SET ROLE +toc_priority: 51 +toc_title: SET ROLE --- # SET ROLE Statement {#set-role-statement} diff --git a/docs/en/sql-reference/statements/set.md b/docs/en/sql-reference/statements/set.md index d2a1d30c797..e5de5c41284 100644 --- a/docs/en/sql-reference/statements/set.md +++ b/docs/en/sql-reference/statements/set.md @@ -1,6 +1,6 @@ --- -sidebar_position: 50 -sidebar_label: SET +toc_priority: 50 +toc_title: SET --- # SET Statement {#query-set} diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 75c5c121946..96cbee0b04d 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -1,6 +1,6 @@ --- -sidebar_position: 37 -sidebar_label: SHOW +toc_priority: 37 +toc_title: SHOW --- # SHOW Statements {#show-queries} @@ -361,9 +361,8 @@ SHOW ACCESS Returns a list of clusters. All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table. -:::note -`SHOW CLUSTER name` query displays the contents of system.clusters table for this cluster. -::: +!!! info "Note" + `SHOW CLUSTER name` query displays the contents of system.clusters table for this cluster. ### Syntax {#show-cluster-syntax} diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 1d638ab3965..b71853f29dd 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -1,6 +1,6 @@ --- -sidebar_position: 36 -sidebar_label: SYSTEM +toc_priority: 36 +toc_title: SYSTEM --- # SYSTEM Statements {#query-language-system} @@ -67,7 +67,7 @@ SELECT name, status FROM system.dictionaries; ## RELOAD MODELS {#query_language-system-reload-models} -Reloads all [CatBoost](../../../guides/developer/apply-catboost-model.md) models if the configuration was updated without restarting the server. +Reloads all [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-model-in-clickhouse) models if the configuration was updated without restarting the server. **Syntax** @@ -191,9 +191,8 @@ Provides possibility to stop background merges for tables in the MergeTree famil SYSTEM STOP MERGES [ON VOLUME | [db.]merge_tree_family_table_name] ``` -:::note -`DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before. -::: +!!! note "Note" + `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before. ### START MERGES {#query_language-system-start-merges} @@ -327,9 +326,8 @@ One may execute query after: Replica attaches locally found parts and sends info about them to Zookeeper. Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network). -:::warning -Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached. -::: +!!! warning "Warning" + Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached. **Syntax** diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index 393ba82b3cd..b5354196fa4 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -1,6 +1,6 @@ --- -sidebar_position: 52 -sidebar_label: TRUNCATE +toc_priority: 52 +toc_title: TRUNCATE --- # TRUNCATE Statement {#truncate-statement} @@ -17,6 +17,5 @@ You can use the [replication_alter_partitions_sync](../../operations/settings/se You can specify how long (in seconds) to wait for inactive replicas to execute `TRUNCATE` queries with the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. -:::note -If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. -::: \ No newline at end of file +!!! info "Note" + If the `replication_alter_partitions_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. diff --git a/docs/en/sql-reference/statements/use.md b/docs/en/sql-reference/statements/use.md index 869bf44fdeb..841c23d333d 100644 --- a/docs/en/sql-reference/statements/use.md +++ b/docs/en/sql-reference/statements/use.md @@ -1,6 +1,6 @@ --- -sidebar_position: 53 -sidebar_label: USE +toc_priority: 53 +toc_title: USE --- # USE Statement {#use} diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md index 688cf21e23c..be793d30f3d 100644 --- a/docs/en/sql-reference/statements/watch.md +++ b/docs/en/sql-reference/statements/watch.md @@ -1,13 +1,14 @@ --- -sidebar_position: 53 -sidebar_label: WATCH +toc_priority: 53 +toc_title: WATCH --- # WATCH Statement (Experimental) {#watch} -:::warning -This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`. -::: +!!! important "Important" + This is an experimental feature that may change in backwards-incompatible ways in the future releases. + Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`. + ``` sql WATCH [db.]live_view @@ -104,6 +105,5 @@ WATCH lv EVENTS LIMIT 1; The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause). -:::note -The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting. -::: \ No newline at end of file +!!! info "Note" + The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting. diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 10664549329..19efef3dc6a 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -1,6 +1,6 @@ --- -sidebar_position: 31 -sidebar_label: Syntax +toc_priority: 31 +toc_title: Syntax --- # Syntax {#syntax} diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index 5954ed1b439..a02c2a10fb7 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -1,6 +1,6 @@ --- -sidebar_position: 50 -sidebar_label: cluster +toc_priority: 50 +toc_title: cluster --- # cluster, clusterAllReplicas {#cluster-clusterallreplicas} @@ -9,9 +9,8 @@ Allows to access all shards in an existing cluster which configured in `remote_s `clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection. -:::note -All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table. -::: +!!! note "Note" + All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table. **Syntax** diff --git a/docs/en/sql-reference/table-functions/dictionary.md b/docs/en/sql-reference/table-functions/dictionary.md index f04a4b6eb24..ad30cb30adf 100644 --- a/docs/en/sql-reference/table-functions/dictionary.md +++ b/docs/en/sql-reference/table-functions/dictionary.md @@ -1,6 +1,6 @@ --- -sidebar_position: 54 -sidebar_label: dictionary function +toc_priority: 54 +toc_title: dictionary function --- # dictionary {#dictionary-function} diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 4b72b0d84f5..f7c2a9e6d5b 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -1,6 +1,6 @@ --- -sidebar_position: 37 -sidebar_label: file +toc_priority: 37 +toc_title: file --- # file {#file} @@ -106,9 +106,8 @@ Query the number of rows in all files of these two directories: SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` -:::warning -If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. **Example** diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index bb9ad3f7551..ae22e1a1b88 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -1,6 +1,6 @@ --- -sidebar_position: 47 -sidebar_label: generateRandom +toc_priority: 47 +toc_title: generateRandom --- # generateRandom {#generaterandom} diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 7f7dc53d27e..a7c3baca299 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -1,6 +1,6 @@ --- -sidebar_position: 45 -sidebar_label: hdfs +toc_priority: 45 +toc_title: hdfs --- # hdfs {#hdfs} @@ -78,9 +78,8 @@ SELECT count(*) FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -:::warning -If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. **Example** diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index b46b8e64a1a..6183fe83c38 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -1,6 +1,6 @@ --- -sidebar_position: 55 -sidebar_label: hdfsCluster +toc_priority: 55 +toc_title: hdfsCluster --- # hdfsCluster Table Function {#hdfsCluster-table-function} @@ -49,9 +49,8 @@ SELECT count(*) FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -:::warning -If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. **See Also** diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md index a51312324f0..24d67e31fa8 100644 --- a/docs/en/sql-reference/table-functions/index.md +++ b/docs/en/sql-reference/table-functions/index.md @@ -1,9 +1,10 @@ --- -sidebar_label: Table Functions -sidebar_position: 34 +toc_folder_title: Table Functions +toc_priority: 34 +toc_title: Introduction --- -# Table Functions +# Table Functions {#table-functions} Table functions are methods for constructing tables. @@ -19,9 +20,8 @@ You can use table functions in: - [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query. -:::warning -You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. -::: +!!! warning "Warning" + You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. | Function | Description | |------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/en/sql-reference/table-functions/input.md b/docs/en/sql-reference/table-functions/input.md index 916abb890ff..17707b798d6 100644 --- a/docs/en/sql-reference/table-functions/input.md +++ b/docs/en/sql-reference/table-functions/input.md @@ -1,6 +1,6 @@ --- -sidebar_position: 46 -sidebar_label: input +toc_priority: 46 +toc_title: input --- # input {#input} diff --git a/docs/en/sql-reference/table-functions/jdbc.md b/docs/en/sql-reference/table-functions/jdbc.md index 57128f7d146..9fe1333fc94 100644 --- a/docs/en/sql-reference/table-functions/jdbc.md +++ b/docs/en/sql-reference/table-functions/jdbc.md @@ -1,6 +1,6 @@ --- -sidebar_position: 43 -sidebar_label: jdbc +toc_priority: 43 +toc_title: jdbc --- # jdbc {#table-function-jdbc} diff --git a/docs/en/sql-reference/table-functions/merge.md b/docs/en/sql-reference/table-functions/merge.md index 301f0a69caf..c89f0f4cc5a 100644 --- a/docs/en/sql-reference/table-functions/merge.md +++ b/docs/en/sql-reference/table-functions/merge.md @@ -1,6 +1,6 @@ --- -sidebar_position: 38 -sidebar_label: merge +toc_priority: 38 +toc_title: merge --- # merge {#merge} diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index c6983d8fba1..b45ab86f60f 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -1,6 +1,6 @@ --- -sidebar_position: 42 -sidebar_label: mysql +toc_priority: 42 +toc_title: mysql --- # mysql {#mysql} @@ -55,9 +55,8 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', A table object with the same columns as the original MySQL table. -:::note -In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. -::: +!!! info "Note" + In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. **Examples** diff --git a/docs/en/sql-reference/table-functions/null.md b/docs/en/sql-reference/table-functions/null.md index 48df12bfece..4a8d221d620 100644 --- a/docs/en/sql-reference/table-functions/null.md +++ b/docs/en/sql-reference/table-functions/null.md @@ -1,6 +1,6 @@ --- -sidebar_position: 53 -sidebar_label: null function +toc_priority: 53 +toc_title: null function --- # null {#null-function} diff --git a/docs/en/sql-reference/table-functions/numbers.md b/docs/en/sql-reference/table-functions/numbers.md index c15c47cf725..f9735056b05 100644 --- a/docs/en/sql-reference/table-functions/numbers.md +++ b/docs/en/sql-reference/table-functions/numbers.md @@ -1,6 +1,6 @@ --- -sidebar_position: 39 -sidebar_label: numbers +toc_priority: 39 +toc_title: numbers --- # numbers {#numbers} diff --git a/docs/en/sql-reference/table-functions/odbc.md b/docs/en/sql-reference/table-functions/odbc.md index d2614337cdd..a8481fbfd68 100644 --- a/docs/en/sql-reference/table-functions/odbc.md +++ b/docs/en/sql-reference/table-functions/odbc.md @@ -1,6 +1,6 @@ --- -sidebar_position: 44 -sidebar_label: odbc +toc_priority: 44 +toc_title: odbc --- # odbc {#table-functions-odbc} diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index 6a30b1f3f0c..b2bdc2495e5 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -1,6 +1,6 @@ --- -sidebar_position: 42 -sidebar_label: postgresql +toc_priority: 42 +toc_title: postgresql --- # postgresql {#postgresql} @@ -26,9 +26,8 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) A table object with the same columns as the original PostgreSQL table. -:::note -In the `INSERT` query to distinguish table function `postgresql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. -::: +!!! info "Note" + In the `INSERT` query to distinguish table function `postgresql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. ## Implementation Details {#implementation-details} @@ -42,9 +41,8 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp PostgreSQL Array types converts into ClickHouse arrays. -:::note -Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows. -::: +!!! info "Note" + Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows. Supports multiple replicas that must be listed by `|`. For example: diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index 0eae00564ba..9effbb03553 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -1,6 +1,6 @@ --- -sidebar_position: 40 -sidebar_label: remote +toc_priority: 40 +toc_title: remote --- # remote, remoteSecure {#remote-remotesecure} diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 61dda209ee6..7dffd252dc9 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -1,6 +1,6 @@ --- -sidebar_position: 45 -sidebar_label: s3 +toc_priority: 45 +toc_title: s3 --- # s3 Table Function {#s3-table-function} @@ -95,9 +95,8 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ └─────────┘ ``` -:::warning -If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index dbd3538c692..65565aa92cb 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -1,6 +1,6 @@ --- -sidebar_position: 55 -sidebar_label: s3Cluster +toc_priority: 55 +toc_title: s3Cluster --- # s3Cluster Table Function {#s3Cluster-table-function} @@ -39,9 +39,8 @@ Count the total amount of rows in all files in the cluster `cluster_simple`: SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); ``` -:::warning -If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. **See Also** diff --git a/docs/en/sql-reference/table-functions/sqlite.md b/docs/en/sql-reference/table-functions/sqlite.md index 6058843ae61..be7bd92d7e7 100644 --- a/docs/en/sql-reference/table-functions/sqlite.md +++ b/docs/en/sql-reference/table-functions/sqlite.md @@ -1,6 +1,6 @@ --- -sidebar_position: 55 -sidebar_label: sqlite +toc_priority: 55 +toc_title: sqlite --- ## sqlite {#sqlite} diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 3f2f9c6a710..bfad7a67e0d 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -1,6 +1,6 @@ --- -sidebar_position: 41 -sidebar_label: url +toc_priority: 41 +toc_title: url --- # url {#url} diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md index 727cc04e5a2..f78120c370e 100644 --- a/docs/en/sql-reference/table-functions/view.md +++ b/docs/en/sql-reference/table-functions/view.md @@ -1,6 +1,6 @@ --- -sidebar_position: 51 -sidebar_label: view +toc_priority: 51 +toc_title: view --- ## view {#view} diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index e9a15995a16..0a55eafc7ab 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -1,6 +1,6 @@ --- -sidebar_position: 62 -sidebar_label: Window Functions +toc_priority: 62 +toc_title: Window Functions --- # Window Functions diff --git a/docs/en/whats-new/changelog/2017.md b/docs/en/whats-new/changelog/2017.md index 6a9f599daa0..af82c69386a 100644 --- a/docs/en/whats-new/changelog/2017.md +++ b/docs/en/whats-new/changelog/2017.md @@ -1,6 +1,6 @@ --- -sidebar_label: 2017 -sidebar_position: 26 +toc_priority: 79 +toc_title: '2017' --- ### ClickHouse Release 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21} diff --git a/docs/en/whats-new/changelog/2018.md b/docs/en/whats-new/changelog/2018.md index d4edca54e52..db09bcd8a03 100644 --- a/docs/en/whats-new/changelog/2018.md +++ b/docs/en/whats-new/changelog/2018.md @@ -1,6 +1,6 @@ --- -sidebar_label: 2018 -sidebar_position: 25 +toc_priority: 78 +toc_title: '2018' --- ## ClickHouse Release 18.16 {#clickhouse-release-18-16} diff --git a/docs/en/whats-new/changelog/2019.md b/docs/en/whats-new/changelog/2019.md index c41041705d9..aa06f5cb1e3 100644 --- a/docs/en/whats-new/changelog/2019.md +++ b/docs/en/whats-new/changelog/2019.md @@ -1,6 +1,6 @@ --- -sidebar_label: 2019 -sidebar_position: 22 +toc_priority: 77 +toc_title: '2019' --- ## ClickHouse Release 19.17 {#clickhouse-release-v19-17} diff --git a/docs/en/whats-new/changelog/2020.md b/docs/en/whats-new/changelog/2020.md index 7ec37c51eb1..e0afe256777 100644 --- a/docs/en/whats-new/changelog/2020.md +++ b/docs/en/whats-new/changelog/2020.md @@ -1,6 +1,6 @@ --- -sidebar_label: 2020 -sidebar_position: 21 +toc_priority: 76 +toc_title: '2020' --- ### ClickHouse release 20.12 diff --git a/docs/en/whats-new/changelog/2021.md b/docs/en/whats-new/changelog/2021.md index e4c430342ce..2e81d981990 100644 --- a/docs/en/whats-new/changelog/2021.md +++ b/docs/en/whats-new/changelog/2021.md @@ -1,8 +1,6 @@ --- -sidebar_label: 2021 -sidebar_position: 20 -keywords: [clickhouse, changelog] -description: Changelog +toc_priority: 75 +toc_title: '2021' --- ### ClickHouse release v21.12, 2021-12-15 diff --git a/docs/en/whats-new/changelog/index.md b/docs/en/whats-new/changelog/index.md index 22f6a30452d..517ea16f3e7 100644 --- a/docs/en/whats-new/changelog/index.md +++ b/docs/en/whats-new/changelog/index.md @@ -1,498 +1,7 @@ --- -sidebar_label: Changelog -sidebar_position: 1 -keywords: [clickhouse, changelog] -description: Changelog +toc_folder_title: Changelog +toc_priority: 74 +toc_title: '2022' --- -# ClickHouse Changelog - -### Table of Contents -**[ClickHouse release v22.3-lts, 2022-03-17](#223)**
-**[ClickHouse release v22.2, 2022-02-17](#222)**
-**[ClickHouse release v22.1, 2022-01-18](#221)**
-**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**
- - -## ClickHouse release v22.3-lts, 2022-03-17 - -#### Backward Incompatible Change - -* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). -* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). -* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). - -#### New Feature - -* Support for caching data locally for remote filesystems. It can be enabled for `s3` disks. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, we enabled the test suite on s3 filesystem and no more known issues exist, so it is started to be production ready. -* Add new table function `hive`. It can be used as follows `hive('', '', '', '', '')` for example `SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', 'id Nullable(String), score Nullable(Int32), day Nullable(String)', 'day')`. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)). -* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)). -* Support schema inference for inserting into table functions `file`/`hdfs`/`s3`/`url`. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)). -* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)). -* Display CPU and memory metrics in clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)). -* Implement `startsWith` and `endsWith` function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)). -* Add three functions for Map data type: 1. `mapReplace(map1, map2)` - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. `mapFilter` 3. `mapMap`. mapFilter and mapMap are higher order functions, accepting two arguments, the first argument is a lambda function with k, v pair as arguments, the second argument is a column of type Map. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)). -* Allow getting default user and password for clickhouse-client from the `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)). - -#### Experimental Feature - -* New data type `Object()`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`. -* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to only create `Replicated` tables or tables with stateless engines in `Replicated` databases. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). Note that `Replicated` database is still an experimental feature. - -#### Performance Improvement - -* Improve performance of insertion into `MergeTree` tables by optimizing sorting. Up to 2x improvement is observed on realistic benchmarks. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)). -* Columns pruning when reading Parquet, ORC and Arrow files from URL and S3. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Columns pruning when reading Parquet, ORC and Arrow files from Hive. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)). -* A bunch of performance optimizations from a performance superhero. Improve performance of processing queries with large `IN` section. Improve performance of `direct` dictionary if its source is `ClickHouse`. Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of `any` aggregate function by using more batching. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)). -* Multiple improvements for performance of `clickhouse-keeper`: less locking [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)), lower memory usage by streaming reading and writing of snapshot instead of full copy. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)), optimizing compaction of log store in the RAFT implementation. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)), versioning of the internal data structure [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)). - -#### Improvement - -* Allow asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)). -* Implicit type casting of the key argument for functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants`. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)). -* `EXPLAIN AST` query can output AST in form of a graph in Graphviz format: `EXPLAIN AST graph = 1 SELECT * FROM system.parts`. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)). -* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Change restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve schema inference with globs in File/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)). -* Play UI now correctly detects the preferred light/dark theme from the OS. [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)). -* Added `date_time_input_format = 'best_effort_us'`. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)). -* A new settings called `allow_plaintext_password` and `allow_no_password` are added in server configuration which turn on/off authentication types that can be potentially insecure in some environments. They are allowed by default. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)). -* Support for `DateTime64` data type in `Arrow` format, closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)). -* Reload `remote_url_allow_hosts` (filtering of outgoing connections) on config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Support `--testmode` parameter for `clickhouse-local`. This parameter enables interpretation of test hints that we use in functional tests. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add `distributed_depth` to query log. It is like a more detailed variant of `is_initial_query` [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)). -* Respect `remote_url_allow_hosts` for `MySQL` and `PostgreSQL` table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)). -* Added `disk_name` field to `system.part_log`. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)). -* Do not retry non-rertiable errors when querying remote URLs. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support distributed INSERT SELECT queries (the setting `parallel_distributed_insert_select`) table function `view()`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)). -* More precise memory tracking during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)). -* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)). -* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Use connection pool for Hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)). -* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)). -* Allow `LowCardinality` strings for `ngrambf_v1`/`tokenbf_v1` indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)). -* Allow opening empty sqlite db if the file doesn't exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Implement memory statistics for FreeBSD - this is required for `max_server_memory_usage` to work correctly. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)). -* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is an `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)). -* Show hints when user mistyped the name of a data skipping index. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)). -* Support `remote()`/`cluster()` table functions for `parallel_distributed_insert_select`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)). -* Do not reset logging that configured via `--log-file`/`--errorlog-file` command line options in case of empty configuration in the config file. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)). -* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow specifying argument names for executable UDFs. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)). -* `MaterializedMySQL` (experimental feature) now supports `materialized_mysql_tables_list` (a comma-separated list of MySQL database tables, which will be replicated by the MaterializedMySQL database engine. Default value: empty list — means all the tables will be replicated), mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)). -* Improve OpenTelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)). -* Make the znode `ctime` and `mtime` consistent between servers in ClickHouse Keeper. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)). - -#### Build/Testing/Packaging Improvement - -* Package repository is migrated to JFrog Artifactory (**Mikhail f. Shiryaev**). -* Randomize some settings in functional tests, so more possible combinations of settings will be tested. This is yet another fuzzing method to ensure better test coverage. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)). -* Drop PVS-Studio from our CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add an ability to build stripped binaries with CMake. In previous versions it was performed by dh-tools. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)). -* Smaller "fat-free" `clickhouse-keeper` build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)). -* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove `clickhouse-test` debian package as unneeded complication. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)). - -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) - -* A fix for HDFS integration: When the inner buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3) for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)). -* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)). -* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)). While using lz4 compression with a small max_read_buffer_size setting value. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)). While using lzma compression with small `max_read_buffer_size` setting value. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)). While using `brotli` compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible segfault in `JSONEachRow` schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)). -* Avoid std::terminate in case of exception in reading from remote VFS. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)). -* Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)). -* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)). -* Fix a corner case of `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)). -* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix MaterializedPostgreSQL (experimental feature) `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix MaterializedPostgreSQL (experimental feature) adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)). -* Fixed slightly incorrect translation of YAML configs to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)). -* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)). -* The `update_lag` external dictionary configuration option was unusable showing the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)). -* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)). -* Fix missing alias after function is optimized to a subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)). -* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary` (relevant to operation on remote VFS). Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix unexpected result when use -State type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)). -* Fix possible segfault in FileLog (experimental feature). Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible rare error `Cannot push block to port which already has data`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)). -* Integration with Hive: Fix unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)). -* Avoid busy polling in ClickHouse Keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)). -* Fix DateTime64 conversion from PostgreSQL. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible "Part directory doesn't exist" during `INSERT` into MergeTree table backed by VFS over s3. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)). -* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Fix bugs for multiple columns group by in `WindowView` (experimental feature). [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)). -* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). -* Fix bug for H3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)). -* Fix `No such file or directory` with enabled `fsync_part_directory` and vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)). -* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)). -* Stop to select part for mutate when the other replica has already updated the transaction log for `ReplatedMergeTree` engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Fix incorrect result of trivial count query when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). -* Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). - - -### ClickHouse release v22.2, 2022-02-17 - -#### Upgrade Notes - -* Applying data skipping indexes for queries with FINAL may produce incorrect result. In this release we disabled data skipping indexes by default for queries with FINAL (a new setting `use_skip_indexes_if_final` is introduced and disabled by default). [#34243](https://github.com/ClickHouse/ClickHouse/pull/34243) ([Azat Khuzhin](https://github.com/azat)). - -#### New Feature - -* Projections are production ready. Set `allow_experimental_projection_optimization` by default and deprecate this setting. [#34456](https://github.com/ClickHouse/ClickHouse/pull/34456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* An option to create a new files on insert for `File`/`S3`/`HDFS` engines. Allow to overwrite a file in `HDFS`. Throw an exception in attempt to overwrite a file in `S3` by default. Throw an exception in attempt to append data to file in formats that have a suffix (and thus don't support appends, like `Parquet`, `ORC`). Closes [#31640](https://github.com/ClickHouse/ClickHouse/issues/31640) Closes [#31622](https://github.com/ClickHouse/ClickHouse/issues/31622) Closes [#23862](https://github.com/ClickHouse/ClickHouse/issues/23862) Closes [#15022](https://github.com/ClickHouse/ClickHouse/issues/15022) Closes [#16674](https://github.com/ClickHouse/ClickHouse/issues/16674). [#33302](https://github.com/ClickHouse/ClickHouse/pull/33302) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a setting that allows a user to provide own deduplication semantic in `MergeTree`/`ReplicatedMergeTree` If provided, it's used instead of data digest to generate block ID. So, for example, by providing a unique value for the setting in each INSERT statement, the user can avoid the same inserted data being deduplicated. This closes: [#7461](https://github.com/ClickHouse/ClickHouse/issues/7461). [#32304](https://github.com/ClickHouse/ClickHouse/pull/32304) ([Igor Nikonov](https://github.com/devcrafter)). -* Add support of `DEFAULT` keyword for INSERT statements. Closes [#6331](https://github.com/ClickHouse/ClickHouse/issues/6331). [#33141](https://github.com/ClickHouse/ClickHouse/pull/33141) ([Andrii Buriachevskyi](https://github.com/1over)). -* `EPHEMERAL` column specifier is added to `CREATE TABLE` query. Closes [#9436](https://github.com/ClickHouse/ClickHouse/issues/9436). [#34424](https://github.com/ClickHouse/ClickHouse/pull/34424) ([yakov-olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Support `IF EXISTS` clause for `TTL expr TO [DISK|VOLUME] [IF EXISTS] 'xxx'` feature. Parts will be moved to disk or volume only if it exists on replica, so `MOVE TTL` rules will be able to behave differently on replicas according to the existing storage policies. Resolves [#34455](https://github.com/ClickHouse/ClickHouse/issues/34455). [#34504](https://github.com/ClickHouse/ClickHouse/pull/34504) ([Anton Popov](https://github.com/CurtizJ)). -* Allow set default table engine and to create tables without specifying ENGINE. [#34187](https://github.com/ClickHouse/ClickHouse/pull/34187) ([Ilya Yatsishin](https://github.com/qoega)). -* Add table function `format(format_name, data)`. [#34125](https://github.com/ClickHouse/ClickHouse/pull/34125) ([Kruglov Pavel](https://github.com/Avogar)). -* Detect format in `clickhouse-local` by file name even in the case when it is passed to stdin. [#33829](https://github.com/ClickHouse/ClickHouse/pull/33829) ([Kruglov Pavel](https://github.com/Avogar)). -* Add schema inference for `values` table function. Closes [#33811](https://github.com/ClickHouse/ClickHouse/issues/33811). [#34017](https://github.com/ClickHouse/ClickHouse/pull/34017) ([Kruglov Pavel](https://github.com/Avogar)). -* Dynamic reload of server TLS certificates on config reload. Closes [#15764](https://github.com/ClickHouse/ClickHouse/issues/15764). [#15765](https://github.com/ClickHouse/ClickHouse/pull/15765) ([johnskopis](https://github.com/johnskopis)). [#31257](https://github.com/ClickHouse/ClickHouse/pull/31257) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Now ReplicatedMergeTree can recover data when some of its disks are broken. [#13544](https://github.com/ClickHouse/ClickHouse/pull/13544) ([Amos Bird](https://github.com/amosbird)). -* Fault-tolerant connections in clickhouse-client: `clickhouse-client ... --host host1 --host host2 --port port2 --host host3 --port port --host host4`. [#34490](https://github.com/ClickHouse/ClickHouse/pull/34490) ([Kruglov Pavel](https://github.com/Avogar)). [#33824](https://github.com/ClickHouse/ClickHouse/pull/33824) ([Filippov Denis](https://github.com/DF5HSE)). -* Add `DEGREES` and `RADIANS` functions for MySQL compatibility. [#33769](https://github.com/ClickHouse/ClickHouse/pull/33769) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `h3ToCenterChild` function. [#33313](https://github.com/ClickHouse/ClickHouse/pull/33313) ([Bharat Nallan](https://github.com/bharatnc)). Add new h3 miscellaneous functions: `edgeLengthKm`,`exactEdgeLengthKm`,`exactEdgeLengthM`,`exactEdgeLengthRads`,`numHexagons`. [#33621](https://github.com/ClickHouse/ClickHouse/pull/33621) ([Bharat Nallan](https://github.com/bharatnc)). -* Add function `bitSlice` to extract bit subsequences from String/FixedString. [#33360](https://github.com/ClickHouse/ClickHouse/pull/33360) ([RogerYK](https://github.com/RogerYK)). -* Implemented `meanZTest` aggregate function. [#33354](https://github.com/ClickHouse/ClickHouse/pull/33354) ([achimbab](https://github.com/achimbab)). -* Add confidence intervals to T-tests aggregate functions. [#33260](https://github.com/ClickHouse/ClickHouse/pull/33260) ([achimbab](https://github.com/achimbab)). -* Add function `addressToLineWithInlines`. Close [#26211](https://github.com/ClickHouse/ClickHouse/issues/26211). [#33467](https://github.com/ClickHouse/ClickHouse/pull/33467) ([SuperDJY](https://github.com/cmsxbc)). -* Added `#!` and `# ` as a recognised start of a single line comment. Closes [#34138](https://github.com/ClickHouse/ClickHouse/issues/34138). [#34230](https://github.com/ClickHouse/ClickHouse/pull/34230) ([Aaron Katz](https://github.com/aaronstephenkatz)). - -#### Experimental Feature - -* Functions for text classification: language and charset detection. See [#23271](https://github.com/ClickHouse/ClickHouse/issues/23271). [#33314](https://github.com/ClickHouse/ClickHouse/pull/33314) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add memory overcommit to `MemoryTracker`. Added `guaranteed` settings for memory limits which represent soft memory limits. In case when hard memory limit is reached, `MemoryTracker` tries to cancel the most overcommited query. New setting `memory_usage_overcommit_max_wait_microseconds` specifies how long queries may wait another query to stop. Closes [#28375](https://github.com/ClickHouse/ClickHouse/issues/28375). [#31182](https://github.com/ClickHouse/ClickHouse/pull/31182) ([Dmitry Novik](https://github.com/novikd)). -* Enable stream to table join in WindowView. [#33729](https://github.com/ClickHouse/ClickHouse/pull/33729) ([vxider](https://github.com/Vxider)). -* Support `SET`, `YEAR`, `TIME` and `GEOMETRY` data types in `MaterializedMySQL` (experimental feature). Fixes [#18091](https://github.com/ClickHouse/ClickHouse/issues/18091), [#21536](https://github.com/ClickHouse/ClickHouse/issues/21536), [#26361](https://github.com/ClickHouse/ClickHouse/issues/26361). [#33429](https://github.com/ClickHouse/ClickHouse/pull/33429) ([zzsmdfj](https://github.com/zzsmdfj)). -* Fix various issues when projection is enabled by default. Each issue is described in separate commit. This is for [#33678](https://github.com/ClickHouse/ClickHouse/issues/33678) . This fixes [#34273](https://github.com/ClickHouse/ClickHouse/issues/34273). [#34305](https://github.com/ClickHouse/ClickHouse/pull/34305) ([Amos Bird](https://github.com/amosbird)). - -#### Performance Improvement - -* Support `optimize_read_in_order` if prefix of sorting key is already sorted. E.g. if we have sorting key `ORDER BY (a, b)` in table and query with `WHERE a = const ORDER BY b` clauses, now it will be applied reading in order of sorting key instead of full sort. [#32748](https://github.com/ClickHouse/ClickHouse/pull/32748) ([Anton Popov](https://github.com/CurtizJ)). -* Improve performance of partitioned insert into table functions `URL`, `S3`, `File`, `HDFS`. Closes [#34348](https://github.com/ClickHouse/ClickHouse/issues/34348). [#34510](https://github.com/ClickHouse/ClickHouse/pull/34510) ([Maksim Kita](https://github.com/kitaisreal)). -* Multiple performance improvements of clickhouse-keeper. [#34484](https://github.com/ClickHouse/ClickHouse/pull/34484) [#34587](https://github.com/ClickHouse/ClickHouse/pull/34587) ([zhanglistar](https://github.com/zhanglistar)). -* `FlatDictionary` improve performance of dictionary data load. [#33871](https://github.com/ClickHouse/ClickHouse/pull/33871) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of `mapPopulateSeries` function. Closes [#33944](https://github.com/ClickHouse/ClickHouse/issues/33944). [#34318](https://github.com/ClickHouse/ClickHouse/pull/34318) ([Maksim Kita](https://github.com/kitaisreal)). -* `_file` and `_path` virtual columns (in file-like table engines) are made `LowCardinality` - it will make queries for multiple files faster. Closes [#34300](https://github.com/ClickHouse/ClickHouse/issues/34300). [#34317](https://github.com/ClickHouse/ClickHouse/pull/34317) ([flynn](https://github.com/ucasfl)). -* Speed up loading of data parts. It was not parallelized before: the setting `part_loading_threads` did not have effect. See [#4699](https://github.com/ClickHouse/ClickHouse/issues/4699). [#34310](https://github.com/ClickHouse/ClickHouse/pull/34310) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance of `LineAsString` format. This closes [#34303](https://github.com/ClickHouse/ClickHouse/issues/34303). [#34306](https://github.com/ClickHouse/ClickHouse/pull/34306) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Optimize `quantilesExact{Low,High}` to use `nth_element` instead of `sort`. [#34287](https://github.com/ClickHouse/ClickHouse/pull/34287) ([Danila Kutenin](https://github.com/danlark1)). -* Slightly improve performance of `Regexp` format. [#34202](https://github.com/ClickHouse/ClickHouse/pull/34202) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Minor improvement for analysis of scalar subqueries. [#34128](https://github.com/ClickHouse/ClickHouse/pull/34128) ([Federico Rodriguez](https://github.com/fedrod)). -* Make ORDER BY tuple almost as fast as ORDER BY columns. We have special optimizations for multiple column ORDER BY: https://github.com/ClickHouse/ClickHouse/pull/10831 . It's beneficial to also apply to tuple columns. [#34060](https://github.com/ClickHouse/ClickHouse/pull/34060) ([Amos Bird](https://github.com/amosbird)). -* Rework and reintroduce the scalar subqueries cache to Materialized Views execution. [#33958](https://github.com/ClickHouse/ClickHouse/pull/33958) ([Raúl Marín](https://github.com/Algunenano)). -* Slightly improve performance of `ORDER BY` by adding x86-64 AVX-512 support for `memcmpSmall` functions to accelerate memory comparison. It works only if you compile ClickHouse by yourself. [#33706](https://github.com/ClickHouse/ClickHouse/pull/33706) ([hanqf-git](https://github.com/hanqf-git)). -* Improve `range_hashed` dictionary performance if for key there are a lot of intervals. Fixes [#23821](https://github.com/ClickHouse/ClickHouse/issues/23821). [#33516](https://github.com/ClickHouse/ClickHouse/pull/33516) ([Maksim Kita](https://github.com/kitaisreal)). -* For inserts and merges into S3, write files in parallel whenever possible (TODO: check if it's merged). [#33291](https://github.com/ClickHouse/ClickHouse/pull/33291) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Improve `clickhouse-keeper` performance and fix several memory leaks in NuRaft library. [#33329](https://github.com/ClickHouse/ClickHouse/pull/33329) ([alesapin](https://github.com/alesapin)). - -#### Improvement - -* Support asynchronous inserts in `clickhouse-client` for queries with inlined data. [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) ([Anton Popov](https://github.com/CurtizJ)). -* Functions `dictGet`, `dictHas` implicitly cast key argument to dictionary key structure, if they are different. [#33672](https://github.com/ClickHouse/ClickHouse/pull/33672) ([Maksim Kita](https://github.com/kitaisreal)). -* Improvements for `range_hashed` dictionaries. Improve performance of load time if there are multiple attributes. Allow to create a dictionary without attributes. Added option to specify strategy when intervals `start` and `end` have `Nullable` type `convert_null_range_bound_to_open` by default is `true`. Closes [#29791](https://github.com/ClickHouse/ClickHouse/issues/29791). Allow to specify `Float`, `Decimal`, `DateTime64`, `Int128`, `Int256`, `UInt128`, `UInt256` as range types. `RangeHashedDictionary` added support for range values that extend `Int64` type. Closes [#28322](https://github.com/ClickHouse/ClickHouse/issues/28322). Added option `range_lookup_strategy` to specify range lookup type `min`, `max` by default is `min` . Closes [#21647](https://github.com/ClickHouse/ClickHouse/issues/21647). Fixed allocated bytes calculations. Fixed type name in `system.dictionaries` in case of `ComplexKeyHashedDictionary`. [#33927](https://github.com/ClickHouse/ClickHouse/pull/33927) ([Maksim Kita](https://github.com/kitaisreal)). -* `flat`, `hashed`, `hashed_array` dictionaries now support creating with empty attributes, with support of reading the keys and using `dictHas`. Fixes [#33820](https://github.com/ClickHouse/ClickHouse/issues/33820). [#33918](https://github.com/ClickHouse/ClickHouse/pull/33918) ([Maksim Kita](https://github.com/kitaisreal)). -* Added support for `DateTime64` data type in dictionaries. [#33914](https://github.com/ClickHouse/ClickHouse/pull/33914) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow to write `s3(url, access_key_id, secret_access_key)` (autodetect of data format and table structure, but with explicit credentials). [#34503](https://github.com/ClickHouse/ClickHouse/pull/34503) ([Kruglov Pavel](https://github.com/Avogar)). -* Added sending of the output format back to client like it's done in HTTP protocol as suggested in [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). Closes [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). [#34499](https://github.com/ClickHouse/ClickHouse/pull/34499) ([Vitaly Baranov](https://github.com/vitlibar)). -* Send ProfileEvents statistics in case of INSERT SELECT query (to display query metrics in `clickhouse-client` for this type of queries). [#34498](https://github.com/ClickHouse/ClickHouse/pull/34498) ([Dmitry Novik](https://github.com/novikd)). -* Recognize `.jsonl` extension for JSONEachRow format. [#34496](https://github.com/ClickHouse/ClickHouse/pull/34496) ([Kruglov Pavel](https://github.com/Avogar)). -* Improve schema inference in clickhouse-local. Allow to write just `clickhouse-local -q "select * from table" < data.format`. [#34495](https://github.com/ClickHouse/ClickHouse/pull/34495) ([Kruglov Pavel](https://github.com/Avogar)). -* Privileges CREATE/ALTER/DROP ROW POLICY now can be granted on a table or on `database.*` as well as globally `*.*`. [#34489](https://github.com/ClickHouse/ClickHouse/pull/34489) ([Vitaly Baranov](https://github.com/vitlibar)). -* Allow to export arbitrary large files to `s3`. Add two new settings: `s3_upload_part_size_multiply_factor` and `s3_upload_part_size_multiply_parts_count_threshold`. Now each time `s3_upload_part_size_multiply_parts_count_threshold` uploaded to S3 from a single query `s3_min_upload_part_size` multiplied by `s3_upload_part_size_multiply_factor`. Fixes [#34244](https://github.com/ClickHouse/ClickHouse/issues/34244). [#34422](https://github.com/ClickHouse/ClickHouse/pull/34422) ([alesapin](https://github.com/alesapin)). -* Allow to skip not found (404) URLs for globs when using URL storage / table function. Also closes [#34359](https://github.com/ClickHouse/ClickHouse/issues/34359). [#34392](https://github.com/ClickHouse/ClickHouse/pull/34392) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Default input and output formats for `clickhouse-local` that can be overriden by --input-format and --output-format. Close [#30631](https://github.com/ClickHouse/ClickHouse/issues/30631). [#34352](https://github.com/ClickHouse/ClickHouse/pull/34352) ([李扬](https://github.com/taiyang-li)). -* Add options for `clickhouse-format`. Which close [#30528](https://github.com/ClickHouse/ClickHouse/issues/30528) - `max_query_size` - `max_parser_depth`. [#34349](https://github.com/ClickHouse/ClickHouse/pull/34349) ([李扬](https://github.com/taiyang-li)). -* Better handling of pre-inputs before client start. This is for [#34308](https://github.com/ClickHouse/ClickHouse/issues/34308). [#34336](https://github.com/ClickHouse/ClickHouse/pull/34336) ([Amos Bird](https://github.com/amosbird)). -* `REGEXP_MATCHES` and `REGEXP_REPLACE` function aliases for compatibility with PostgreSQL. Close [#30885](https://github.com/ClickHouse/ClickHouse/issues/30885). [#34334](https://github.com/ClickHouse/ClickHouse/pull/34334) ([李扬](https://github.com/taiyang-li)). -* Some servers expect a User-Agent header in their HTTP requests. A `User-Agent` header entry has been added to HTTP requests of the form: User-Agent: ClickHouse/VERSION_STRING. [#34330](https://github.com/ClickHouse/ClickHouse/pull/34330) ([Saad Ur Rahman](https://github.com/surahman)). -* Cancel merges before acquiring table lock for `TRUNCATE` query to avoid `DEADLOCK_AVOIDED` error in some cases. Fixes [#34302](https://github.com/ClickHouse/ClickHouse/issues/34302). [#34304](https://github.com/ClickHouse/ClickHouse/pull/34304) ([tavplubix](https://github.com/tavplubix)). -* Change severity of the "Cancelled merging parts" message in logs, because it's not an error. This closes [#34148](https://github.com/ClickHouse/ClickHouse/issues/34148). [#34232](https://github.com/ClickHouse/ClickHouse/pull/34232) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add ability to compose PostgreSQL-style cast operator `::` with expressions using `[]` and `.` operators (array and tuple indexing). [#34229](https://github.com/ClickHouse/ClickHouse/pull/34229) ([Nikolay Degterinsky](https://github.com/evillique)). -* Recognize `YYYYMMDD-hhmmss` format in `parseDateTimeBestEffort` function. This closes [#34206](https://github.com/ClickHouse/ClickHouse/issues/34206). [#34208](https://github.com/ClickHouse/ClickHouse/pull/34208) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow carriage return in the middle of the line while parsing by `Regexp` format. This closes [#34200](https://github.com/ClickHouse/ClickHouse/issues/34200). [#34205](https://github.com/ClickHouse/ClickHouse/pull/34205) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow to parse dictionary's `PRIMARY KEY` as `PRIMARY KEY (id, value)`; previously supported only `PRIMARY KEY id, value`. Closes [#34135](https://github.com/ClickHouse/ClickHouse/issues/34135). [#34141](https://github.com/ClickHouse/ClickHouse/pull/34141) ([Maksim Kita](https://github.com/kitaisreal)). -* An optional argument for `splitByChar` to limit the number of resulting elements. close [#34081](https://github.com/ClickHouse/ClickHouse/issues/34081). [#34140](https://github.com/ClickHouse/ClickHouse/pull/34140) ([李扬](https://github.com/taiyang-li)). -* Improving the experience of multiple line editing for clickhouse-client. This is a follow-up of [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123). [#34114](https://github.com/ClickHouse/ClickHouse/pull/34114) ([Amos Bird](https://github.com/amosbird)). -* Add `UUID` suport in `MsgPack` input/output format. [#34065](https://github.com/ClickHouse/ClickHouse/pull/34065) ([Kruglov Pavel](https://github.com/Avogar)). -* Tracing context (for OpenTelemetry) is now propagated from GRPC client metadata (this change is relevant for GRPC client-server protocol). [#34064](https://github.com/ClickHouse/ClickHouse/pull/34064) ([andremarianiello](https://github.com/andremarianiello)). -* Supports all types of `SYSTEM` queries with `ON CLUSTER` clause. [#34005](https://github.com/ClickHouse/ClickHouse/pull/34005) ([小路](https://github.com/nicelulu)). -* Improve memory accounting for queries that are using less than `max_untracker_memory`. [#34001](https://github.com/ClickHouse/ClickHouse/pull/34001) ([Azat Khuzhin](https://github.com/azat)). -* Fixed UTF-8 string case-insensitive search when lowercase and uppercase characters are represented by different number of bytes. Example is `ẞ` and `ß`. This closes [#7334](https://github.com/ClickHouse/ClickHouse/issues/7334). [#33992](https://github.com/ClickHouse/ClickHouse/pull/33992) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Detect format and schema from stdin in `clickhouse-local`. [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960) ([Kruglov Pavel](https://github.com/Avogar)). -* Correctly handle the case of misconfiguration when multiple disks are using the same path on the filesystem. [#29072](https://github.com/ClickHouse/ClickHouse/issues/29072). [#33905](https://github.com/ClickHouse/ClickHouse/pull/33905) ([zhongyuankai](https://github.com/zhongyuankai)). -* Try every resolved IP address while getting S3 proxy. S3 proxies are rarely used, mostly in Yandex Cloud. [#33862](https://github.com/ClickHouse/ClickHouse/pull/33862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Support EXPLAIN AST CREATE FUNCTION query `EXPLAIN AST CREATE FUNCTION mycast AS (n) -> cast(n as String)` will return `EXPLAIN AST CREATE FUNCTION mycast AS n -> CAST(n, 'String')`. [#33819](https://github.com/ClickHouse/ClickHouse/pull/33819) ([李扬](https://github.com/taiyang-li)). -* Added support for cast from `Map(Key, Value)` to `Array(Tuple(Key, Value))`. [#33794](https://github.com/ClickHouse/ClickHouse/pull/33794) ([Maksim Kita](https://github.com/kitaisreal)). -* Add some improvements and fixes for `Bool` data type. Fixes [#33244](https://github.com/ClickHouse/ClickHouse/issues/33244). [#33737](https://github.com/ClickHouse/ClickHouse/pull/33737) ([Kruglov Pavel](https://github.com/Avogar)). -* Parse and store OpenTelemetry trace-id in big-endian order. [#33723](https://github.com/ClickHouse/ClickHouse/pull/33723) ([Frank Chen](https://github.com/FrankChen021)). -* Improvement for `fromUnixTimestamp64` family functions.. They now accept any integer value that can be converted to `Int64`. This closes: [#14648](https://github.com/ClickHouse/ClickHouse/issues/14648). [#33505](https://github.com/ClickHouse/ClickHouse/pull/33505) ([Andrey Zvonov](https://github.com/zvonand)). -* Reimplement `_shard_num` from constants (see [#7624](https://github.com/ClickHouse/ClickHouse/issues/7624)) with `shardNum()` function (seee [#27020](https://github.com/ClickHouse/ClickHouse/issues/27020)), to avoid possible issues (like those that had been found in [#16947](https://github.com/ClickHouse/ClickHouse/issues/16947)). [#33392](https://github.com/ClickHouse/ClickHouse/pull/33392) ([Azat Khuzhin](https://github.com/azat)). -* Enable binary arithmetic (plus, minus, multiply, division, least, greatest) between Decimal and Float. [#33355](https://github.com/ClickHouse/ClickHouse/pull/33355) ([flynn](https://github.com/ucasfl)). -* Respect cgroups limits in max_threads autodetection. [#33342](https://github.com/ClickHouse/ClickHouse/pull/33342) ([JaySon](https://github.com/JaySon-Huang)). -* Add new clickhouse-keeper setting `min_session_timeout_ms`. Now clickhouse-keeper will determine client session timeout according to `min_session_timeout_ms` and `session_timeout_ms` settings. [#33288](https://github.com/ClickHouse/ClickHouse/pull/33288) ([JackyWoo](https://github.com/JackyWoo)). -* Added `UUID` data type support for functions `hex` and `bin`. [#32170](https://github.com/ClickHouse/ClickHouse/pull/32170) ([Frank Chen](https://github.com/FrankChen021)). -* Fix reading of subcolumns with dots in their names. In particular fixed reading of `Nested` columns, if their element names contain dots (e.g ```Nested(`keys.name` String, `keys.id` UInt64, values UInt64)```). [#34228](https://github.com/ClickHouse/ClickHouse/pull/34228) ([Anton Popov](https://github.com/CurtizJ)). -* Fixes `parallel_view_processing = 0` not working when inserting into a table using `VALUES`. - Fixes `view_duration_ms` in the `query_views_log` not being set correctly for materialized views. [#34067](https://github.com/ClickHouse/ClickHouse/pull/34067) ([Raúl Marín](https://github.com/Algunenano)). -* Fix parsing tables structure from ZooKeeper: now metadata from ZooKeeper compared with local metadata in canonical form. It helps when canonical function names can change between ClickHouse versions. [#33933](https://github.com/ClickHouse/ClickHouse/pull/33933) ([sunny](https://github.com/sunny19930321)). -* Properly escape some characters for interaction with LDAP. [#33401](https://github.com/ClickHouse/ClickHouse/pull/33401) ([IlyaTsoi](https://github.com/IlyaTsoi)). - -#### Build/Testing/Packaging Improvement - -* Remove unbundled build support. [#33690](https://github.com/ClickHouse/ClickHouse/pull/33690) ([Azat Khuzhin](https://github.com/azat)). -* Ensure that tests don't depend on the result of non-stable sorting of equal elements. Added equal items ranges randomization in debug after sort to prevent issues when we rely on equal items sort order. [#34393](https://github.com/ClickHouse/ClickHouse/pull/34393) ([Maksim Kita](https://github.com/kitaisreal)). -* Add verbosity to a style check. [#34289](https://github.com/ClickHouse/ClickHouse/pull/34289) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Remove `clickhouse-test` debian package because it's obsolete. [#33948](https://github.com/ClickHouse/ClickHouse/pull/33948) ([Ilya Yatsishin](https://github.com/qoega)). -* Multiple improvements for build system to remove the possibility of occasionally using packages from the OS and to enforce hermetic builds. [#33695](https://github.com/ClickHouse/ClickHouse/pull/33695) ([Amos Bird](https://github.com/amosbird)). - -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) - -* Fixed the assertion in case of using `allow_experimental_parallel_reading_from_replicas` with `max_parallel_replicas` equals to 1. This fixes [#34525](https://github.com/ClickHouse/ClickHouse/issues/34525). [#34613](https://github.com/ClickHouse/ClickHouse/pull/34613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. It can reproduce if you have mostly empty arrays, but not always. And reading is performed in backward direction with ORDER BY ... DESC. This error is extremely unlikely to happen. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)). -* Fix wrong result of `round`/`roundBankers` if integer values of small types are rounded. Closes [#33267](https://github.com/ClickHouse/ClickHouse/issues/33267). [#34562](https://github.com/ClickHouse/ClickHouse/pull/34562) ([李扬](https://github.com/taiyang-li)). -* Sometimes query cancellation did not work immediately when we were reading multiple files from s3 or HDFS. Fixes [#34301](https://github.com/ClickHouse/ClickHouse/issues/34301) Relates to [#34397](https://github.com/ClickHouse/ClickHouse/issues/34397). [#34539](https://github.com/ClickHouse/ClickHouse/pull/34539) ([Dmitry Novik](https://github.com/novikd)). -* Fix exception `Chunk should have AggregatedChunkInfo in MergingAggregatedTransform` (in case of `optimize_aggregation_in_order = 1` and `distributed_aggregation_memory_efficient = 0`). Fixes [#34526](https://github.com/ClickHouse/ClickHouse/issues/34526). [#34532](https://github.com/ClickHouse/ClickHouse/pull/34532) ([Anton Popov](https://github.com/CurtizJ)). -* Fix comparison between integers and floats in index analysis. Previously it could lead to skipping some granules for reading by mistake. Fixes [#34493](https://github.com/ClickHouse/ClickHouse/issues/34493). [#34528](https://github.com/ClickHouse/ClickHouse/pull/34528) ([Anton Popov](https://github.com/CurtizJ)). -* Fix compression support in URL engine. [#34524](https://github.com/ClickHouse/ClickHouse/pull/34524) ([Frank Chen](https://github.com/FrankChen021)). -* Fix possible error 'file_size: Operation not supported' in files' schema autodetection. [#34479](https://github.com/ClickHouse/ClickHouse/pull/34479) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixes possible race with table deletion. [#34416](https://github.com/ClickHouse/ClickHouse/pull/34416) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible error `Cannot convert column Function to mask` in short circuit function evaluation. Closes [#34171](https://github.com/ClickHouse/ClickHouse/issues/34171). [#34415](https://github.com/ClickHouse/ClickHouse/pull/34415) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix potential crash when doing schema inference from url source. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)). -* For UDFs access permissions were checked for database level instead of global level as it should be. Closes [#34281](https://github.com/ClickHouse/ClickHouse/issues/34281). [#34404](https://github.com/ClickHouse/ClickHouse/pull/34404) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix wrong engine syntax in result of `SHOW CREATE DATABASE` query for databases with engine `Memory`. This closes [#34335](https://github.com/ClickHouse/ClickHouse/issues/34335). [#34345](https://github.com/ClickHouse/ClickHouse/pull/34345) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed a couple of extremely rare race conditions that might lead to broken state of replication queue and "intersecting parts" error. [#34297](https://github.com/ClickHouse/ClickHouse/pull/34297) ([tavplubix](https://github.com/tavplubix)). -* Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix current_user/current_address client information fields for inter-server communication (before this patch current_user/current_address will be preserved from the previous query). [#34263](https://github.com/ClickHouse/ClickHouse/pull/34263) ([Azat Khuzhin](https://github.com/azat)). -* Fix memory leak in case of some Exception during query processing with `optimize_aggregation_in_order=1`. [#34234](https://github.com/ClickHouse/ClickHouse/pull/34234) ([Azat Khuzhin](https://github.com/azat)). -* Fix metric `Query`, which shows the number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)). -* Fix schema inference for table runction `s3`. [#34186](https://github.com/ClickHouse/ClickHouse/pull/34186) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix rare and benign race condition in `HDFS`, `S3` and `URL` storage engines which can lead to additional connections. [#34172](https://github.com/ClickHouse/ClickHouse/pull/34172) ([alesapin](https://github.com/alesapin)). -* Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3 (virtual filesystem over s3 is an experimental feature that is not ready for production). [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)). -* Fix inserts to distributed tables in case of a change of native protocol. The last change was in the version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible data race in `File` table engine that was introduced in [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960). Closes [#34111](https://github.com/ClickHouse/ClickHouse/issues/34111). [#34113](https://github.com/ClickHouse/ClickHouse/pull/34113) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([tavplubix](https://github.com/tavplubix)). -* Fix asynchronous inserts with `Native` format. [#34068](https://github.com/ClickHouse/ClickHouse/pull/34068) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bug which lead to inability for server to start when both replicated access storage and keeper (embedded in clickhouse-server) are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)). -* Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix parsing IPv6 from query parameter (prepared statements) and fix IPv6 to string conversion. Closes [#33928](https://github.com/ClickHouse/ClickHouse/issues/33928). [#33971](https://github.com/ClickHouse/ClickHouse/pull/33971) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). -* Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). -* Aggregate function combinator `-If` did not correctly process `Nullable` filter argument. This closes [#27073](https://github.com/ClickHouse/ClickHouse/issues/27073). [#33920](https://github.com/ClickHouse/ClickHouse/pull/33920) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix potential race condition when doing remote disk read (virtual filesystem over s3 is an experimental feature that is not ready for production). [#33912](https://github.com/ClickHouse/ClickHouse/pull/33912) ([Amos Bird](https://github.com/amosbird)). -* Fix crash if SQL UDF is created with lambda with non identifier arguments. Closes [#33866](https://github.com/ClickHouse/ClickHouse/issues/33866). [#33868](https://github.com/ClickHouse/ClickHouse/pull/33868) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed `replica is not readonly` logical error on `SYSTEM RESTORE REPLICA` query when replica is actually readonly. Fixes [#33806](https://github.com/ClickHouse/ClickHouse/issues/33806). [#33847](https://github.com/ClickHouse/ClickHouse/pull/33847) ([tavplubix](https://github.com/tavplubix)). -* Fix memory leak in `clickhouse-keeper` in case of compression is used (default). [#33840](https://github.com/ClickHouse/ClickHouse/pull/33840) ([Azat Khuzhin](https://github.com/azat)). -* Fix index analysis with no common types available. [#33833](https://github.com/ClickHouse/ClickHouse/pull/33833) ([Amos Bird](https://github.com/amosbird)). -* Fix schema inference for `JSONEachRow` and `JSONCompactEachRow`. [#33830](https://github.com/ClickHouse/ClickHouse/pull/33830) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix usage of external dictionaries with `redis` source and large number of keys. [#33804](https://github.com/ClickHouse/ClickHouse/pull/33804) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bug in client that led to 'Connection reset by peer' in server. Closes [#33309](https://github.com/ClickHouse/ClickHouse/issues/33309). [#33790](https://github.com/ClickHouse/ClickHouse/pull/33790) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix parsing query INSERT INTO ... VALUES SETTINGS ... (...), ... [#33776](https://github.com/ClickHouse/ClickHouse/pull/33776) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix bug of check table when creating data part with wide format and projection. [#33774](https://github.com/ClickHouse/ClickHouse/pull/33774) ([李扬](https://github.com/taiyang-li)). -* Fix tiny race between count() and INSERT/merges/... in MergeTree (it is possible to return incorrect number of rows for SELECT with optimize_trivial_count_query). [#33753](https://github.com/ClickHouse/ClickHouse/pull/33753) ([Azat Khuzhin](https://github.com/azat)). -* Throw exception when directory listing request has failed in storage HDFS. [#33724](https://github.com/ClickHouse/ClickHouse/pull/33724) ([LiuNeng](https://github.com/liuneng1994)). -* Fix mutation when table contains projections. This fixes [#33010](https://github.com/ClickHouse/ClickHouse/issues/33010). This fixes [#33275](https://github.com/ClickHouse/ClickHouse/issues/33275). [#33679](https://github.com/ClickHouse/ClickHouse/pull/33679) ([Amos Bird](https://github.com/amosbird)). -* Correctly determine current database if `CREATE TEMPORARY TABLE AS SELECT` is queried inside a named HTTP session. This is a very rare use case. This closes [#8340](https://github.com/ClickHouse/ClickHouse/issues/8340). [#33676](https://github.com/ClickHouse/ClickHouse/pull/33676) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow some queries with sorting, LIMIT BY, ARRAY JOIN and lambda functions. This closes [#7462](https://github.com/ClickHouse/ClickHouse/issues/7462). [#33675](https://github.com/ClickHouse/ClickHouse/pull/33675) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix bug in "zero copy replication" (a feature that is under development and should not be used in production) which lead to data duplication in case of TTL move. Fixes [#33643](https://github.com/ClickHouse/ClickHouse/issues/33643). [#33642](https://github.com/ClickHouse/ClickHouse/pull/33642) ([alesapin](https://github.com/alesapin)). -* Fix `Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform` (in case of `optimize_aggregation_in_order = 1`). [#33637](https://github.com/ClickHouse/ClickHouse/pull/33637) ([Azat Khuzhin](https://github.com/azat)). -* Fix error `Bad cast from type ... to DB::DataTypeArray` which may happen when table has `Nested` column with dots in name, and default value is generated for it (e.g. during insert, when column is not listed). Continuation of [#28762](https://github.com/ClickHouse/ClickHouse/issues/28762). [#33588](https://github.com/ClickHouse/ClickHouse/pull/33588) ([Alexey Pavlenko](https://github.com/alexeypavlenko)). -* Export into `lz4` files has been fixed. Closes [#31421](https://github.com/ClickHouse/ClickHouse/issues/31421). [#31862](https://github.com/ClickHouse/ClickHouse/pull/31862) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix potential crash if `group_by_overflow_mode` was set to `any` (approximate GROUP BY) and aggregation was performed by single column of type `LowCardinality`. [#34506](https://github.com/ClickHouse/ClickHouse/pull/34506) ([DR](https://github.com/freedomDR)). -* Fix inserting to temporary tables via gRPC client-server protocol. Fixes [#34347](https://github.com/ClickHouse/ClickHouse/issues/34347), issue `#2`. [#34364](https://github.com/ClickHouse/ClickHouse/pull/34364) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix issue [#19429](https://github.com/ClickHouse/ClickHouse/issues/19429). [#34225](https://github.com/ClickHouse/ClickHouse/pull/34225) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix issue [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)). -* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). - - -### ClickHouse release v22.1, 2022-01-18 - -#### Upgrade Notes - -* The functions `left` and `right` were previously implemented in parser and now full-featured. Distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Resource usage by scalar subqueries is fully accounted since this version. With this change, rows read in scalar subqueries are now reported in the query_log. If the scalar subquery is cached (repeated or called for several rows) the rows read are only counted once. This change allows KILLing queries and reporting progress while they are executing scalar subqueries. [#32271](https://github.com/ClickHouse/ClickHouse/pull/32271) ([Raúl Marín](https://github.com/Algunenano)). - -#### New Feature - -* Implement data schema inference for input formats. Allow to skip structure (or write just `auto`) in table functions `file`, `url`, `s3`, `hdfs` and in parameters of `clickhouse-local` . Allow to skip structure in create query for table engines `File`, `HDFS`, `S3`, `URL`, `Merge`, `Buffer`, `Distributed` and `ReplicatedMergeTree` (if we add new replicas). [#32455](https://github.com/ClickHouse/ClickHouse/pull/32455) ([Kruglov Pavel](https://github.com/Avogar)). -* Detect format by file extension in `file`/`hdfs`/`s3`/`url` table functions and `HDFS`/`S3`/`URL` table engines and also for `SELECT INTO OUTFILE` and `INSERT FROM INFILE` [#33565](https://github.com/ClickHouse/ClickHouse/pull/33565) ([Kruglov Pavel](https://github.com/Avogar)). Close [#30918](https://github.com/ClickHouse/ClickHouse/issues/30918). [#33443](https://github.com/ClickHouse/ClickHouse/pull/33443) ([OnePiece](https://github.com/zhongyuankai)). -* A tool for collecting diagnostics data if you need support. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). -* Automatic cluster discovery via Zoo/Keeper. It allows to add replicas to the cluster without changing configuration on every server. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([vdimir](https://github.com/vdimir)). -* Implement hive table engine to access apache hive from clickhouse. This implements: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([taiyang-li](https://github.com/taiyang-li)). -* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorical values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([alexey-milovidov](https://github.com/alexey-milovidov)). Initial implementation by [Vanyok-All-is-OK](https://github.com/Vanyok-All-is-OK) and [antikvist](https://github.com/antikvist). -* Added table function `hdfsCluster` which allows processing files from HDFS in parallel from many nodes in a specified cluster, similarly to `s3Cluster`. [#32400](https://github.com/ClickHouse/ClickHouse/pull/32400) ([Zhichang Yu](https://github.com/yuzhichang)). -* Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)). -* Allow `COMMENT` in `CREATE VIEW` (for all VIEW kinds). [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)). -* Dynamically reinitialize listening ports and protocols when configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Added `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add new functions for `H3` coordinate system: `h3HexAreaKm2`, `h3CellAreaM2`, `h3CellAreaRads2`. [#33479](https://github.com/ClickHouse/ClickHouse/pull/33479) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `MONTHNAME` function. [#33436](https://github.com/ClickHouse/ClickHouse/pull/33436) ([usurai](https://github.com/usurai)). -* Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)). -* Add function `decodeURLFormComponent` slightly different to `decodeURLComponent`. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). -* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). - - -#### Performance Improvement - -* Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). -* More efficient handling of globs for URL storage. Now you can easily query million URLs in parallel with retries. Closes [#32866](https://github.com/ClickHouse/ClickHouse/issues/32866). [#32907](https://github.com/ClickHouse/ClickHouse/pull/32907) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Avoid exponential backtracking in parser. This closes [#20158](https://github.com/ClickHouse/ClickHouse/issues/20158). [#33481](https://github.com/ClickHouse/ClickHouse/pull/33481) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Abuse of `untuple` function was leading to exponential complexity of query analysis (found by fuzzer). This closes [#33297](https://github.com/ClickHouse/ClickHouse/issues/33297). [#33445](https://github.com/ClickHouse/ClickHouse/pull/33445) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Reduce allocated memory for dictionaries with string attributes. [#33466](https://github.com/ClickHouse/ClickHouse/pull/33466) ([Maksim Kita](https://github.com/kitaisreal)). -* Slight performance improvement of `reinterpret` function. [#32587](https://github.com/ClickHouse/ClickHouse/pull/32587) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Non significant change. In extremely rare cases when data part is lost on every replica, after merging of some data parts, the subsequent queries may skip less amount of partitions during partition pruning. This hardly affects anything. [#32220](https://github.com/ClickHouse/ClickHouse/pull/32220) ([Azat Khuzhin](https://github.com/azat)). -* Improve `clickhouse-keeper` writing performance by optimization the size calculation logic. [#32366](https://github.com/ClickHouse/ClickHouse/pull/32366) ([zhanglistar](https://github.com/zhanglistar)). -* Optimize single part projection materialization. This closes [#31669](https://github.com/ClickHouse/ClickHouse/issues/31669). [#31885](https://github.com/ClickHouse/ClickHouse/pull/31885) ([Amos Bird](https://github.com/amosbird)). -* Improve query performance of system tables. [#33312](https://github.com/ClickHouse/ClickHouse/pull/33312) ([OnePiece](https://github.com/zhongyuankai)). -* Optimize selecting of MergeTree parts that can be moved between volumes. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([OnePiece](https://github.com/zhongyuankai)). -* Fix `sparse_hashed` dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). - - -#### Experimental Feature - -* Parallel reading from multiple replicas within a shard during distributed query without using sample key. To enable this, set `allow_experimental_parallel_reading_from_replicas = 1` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). -* Add "TABLE OVERRIDE" feature for customizing MaterializedMySQL table schemas. [#32325](https://github.com/ClickHouse/ClickHouse/pull/32325) ([Stig Bakken](https://github.com/stigsb)). -* Add `EXPLAIN TABLE OVERRIDE` query. [#32836](https://github.com/ClickHouse/ClickHouse/pull/32836) ([Stig Bakken](https://github.com/stigsb)). -* Support TABLE OVERRIDE clause for MaterializedPostgreSQL. RFC: [#31480](https://github.com/ClickHouse/ClickHouse/issues/31480). [#32749](https://github.com/ClickHouse/ClickHouse/pull/32749) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Change ZooKeeper path for zero-copy marks for shared data. Note that "zero-copy replication" is non-production feature (in early stages of development) that you shouldn't use anyway. But in case if you have used it, let you keep in mind this change. [#32061](https://github.com/ClickHouse/ClickHouse/pull/32061) ([ianton-ru](https://github.com/ianton-ru)). -* Events clause support for WINDOW VIEW watch query. [#32607](https://github.com/ClickHouse/ClickHouse/pull/32607) ([vxider](https://github.com/Vxider)). -* Fix ACL with explicit digit hash in `clickhouse-keeper`: now the behavior consistent with ZooKeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)). [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246). -* Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). - - -#### Improvement - -* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. -* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). -* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). -* Support hints for mistyped setting names for clickhouse-client and clickhouse-local. Closes [#32237](https://github.com/ClickHouse/ClickHouse/issues/32237). [#32841](https://github.com/ClickHouse/ClickHouse/pull/32841) ([凌涛](https://github.com/lingtaolf)). -* Allow to use virtual columns in Materialized Views. Close [#11210](https://github.com/ClickHouse/ClickHouse/issues/11210). [#33482](https://github.com/ClickHouse/ClickHouse/pull/33482) ([OnePiece](https://github.com/zhongyuankai)). -* Add config to disable IPv6 in clickhouse-keeper if needed. This close [#33381](https://github.com/ClickHouse/ClickHouse/issues/33381). [#33450](https://github.com/ClickHouse/ClickHouse/pull/33450) ([Wu Xueyang](https://github.com/wuxueyang96)). -* Add more info to `system.build_options` about current git revision. [#33431](https://github.com/ClickHouse/ClickHouse/pull/33431) ([taiyang-li](https://github.com/taiyang-li)). -* `clickhouse-local`: track memory under `--max_memory_usage_in_client` option. [#33341](https://github.com/ClickHouse/ClickHouse/pull/33341) ([Azat Khuzhin](https://github.com/azat)). -* Allow negative intervals in function `intervalLengthSum`. Their length will be added as well. This closes [#33323](https://github.com/ClickHouse/ClickHouse/issues/33323). [#33335](https://github.com/ClickHouse/ClickHouse/pull/33335) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* `LineAsString` can be used as output format. This closes [#30919](https://github.com/ClickHouse/ClickHouse/issues/30919). [#33331](https://github.com/ClickHouse/ClickHouse/pull/33331) ([Sergei Trifonov](https://github.com/serxa)). -* Support `` in cluster configuration, as an alternative form of `1`. Close [#33270](https://github.com/ClickHouse/ClickHouse/issues/33270). [#33330](https://github.com/ClickHouse/ClickHouse/pull/33330) ([SuperDJY](https://github.com/cmsxbc)). -* Pressing Ctrl+C twice will terminate `clickhouse-benchmark` immediately without waiting for in-flight queries. This closes [#32586](https://github.com/ClickHouse/ClickHouse/issues/32586). [#33303](https://github.com/ClickHouse/ClickHouse/pull/33303) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support Unix timestamp with milliseconds in `parseDateTimeBestEffort` function. [#33276](https://github.com/ClickHouse/ClickHouse/pull/33276) ([Ben](https://github.com/benbiti)). -* Allow to cancel query while reading data from external table in the formats: `Arrow` / `Parquet` / `ORC` - it failed to be cancelled it case of big files and setting input_format_allow_seeks as false. Closes [#29678](https://github.com/ClickHouse/ClickHouse/issues/29678). [#33238](https://github.com/ClickHouse/ClickHouse/pull/33238) ([Kseniia Sumarokova](https://github.com/kssenii)). -* If table engine supports `SETTINGS` clause, allow to pass the settings as key-value or via config. Add this support for MySQL. [#33231](https://github.com/ClickHouse/ClickHouse/pull/33231) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Correctly prevent Nullable primary keys if necessary. This is for [#32780](https://github.com/ClickHouse/ClickHouse/issues/32780). [#33218](https://github.com/ClickHouse/ClickHouse/pull/33218) ([Amos Bird](https://github.com/amosbird)). -* Add retry for `PostgreSQL` connections in case nothing has been fetched yet. Closes [#33199](https://github.com/ClickHouse/ClickHouse/issues/33199). [#33209](https://github.com/ClickHouse/ClickHouse/pull/33209) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095#issuecomment-1000577517). [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Send profile info inside `clickhouse-local`. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Short circuit evaluation: support for function `throwIf`. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)). -* (This only happens in unofficial builds). Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)). -* Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)). -* Flush all In-Memory data parts when WAL is not enabled while shutdown server or detaching table. [#32742](https://github.com/ClickHouse/ClickHouse/pull/32742) ([nauta](https://github.com/nautaa)). -* Allow to control connection timeouts for MySQL (previously was supported only for dictionary source). Closes [#16669](https://github.com/ClickHouse/ClickHouse/issues/16669). Previously default connect_timeout was rather small, now it is configurable. [#32734](https://github.com/ClickHouse/ClickHouse/pull/32734) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support `authSource` option for storage `MongoDB`. Closes [#32594](https://github.com/ClickHouse/ClickHouse/issues/32594). [#32702](https://github.com/ClickHouse/ClickHouse/pull/32702) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support `Date32` type in `genarateRandom` table function. [#32643](https://github.com/ClickHouse/ClickHouse/pull/32643) ([nauta](https://github.com/nautaa)). -* Add settings `max_concurrent_select_queries` and `max_concurrent_insert_queries` for control concurrent queries by query kind. Close [#3575](https://github.com/ClickHouse/ClickHouse/issues/3575). [#32609](https://github.com/ClickHouse/ClickHouse/pull/32609) ([SuperDJY](https://github.com/cmsxbc)). -* Improve handling nested structures with missing columns while reading data in `Protobuf` format. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/31988. [#32531](https://github.com/ClickHouse/ClickHouse/pull/32531) ([Vitaly Baranov](https://github.com/vitlibar)). -* Allow empty credentials for `MongoDB` engine. Closes [#26267](https://github.com/ClickHouse/ClickHouse/issues/26267). [#32460](https://github.com/ClickHouse/ClickHouse/pull/32460) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable some optimizations for window functions that may lead to exceptions. Closes [#31535](https://github.com/ClickHouse/ClickHouse/issues/31535). Closes [#31620](https://github.com/ClickHouse/ClickHouse/issues/31620). [#32453](https://github.com/ClickHouse/ClickHouse/pull/32453) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allows to connect to MongoDB 5.0. Closes [#31483](https://github.com/ClickHouse/ClickHouse/issues/31483),. [#32416](https://github.com/ClickHouse/ClickHouse/pull/32416) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Enable comparison between `Decimal` and `Float`. Closes [#22626](https://github.com/ClickHouse/ClickHouse/issues/22626). [#31966](https://github.com/ClickHouse/ClickHouse/pull/31966) ([flynn](https://github.com/ucasFL)). -* Added settings `command_read_timeout`, `command_write_timeout` for `StorageExecutable`, `StorageExecutablePool`, `ExecutableDictionary`, `ExecutablePoolDictionary`, `ExecutableUserDefinedFunctions`. Setting `command_read_timeout` controls timeout for reading data from command stdout in milliseconds. Setting `command_write_timeout` timeout for writing data to command stdin in milliseconds. Added settings `command_termination_timeout` for `ExecutableUserDefinedFunction`, `ExecutableDictionary`, `StorageExecutable`. Added setting `execute_direct` for `ExecutableUserDefinedFunction`, by default true. Added setting `execute_direct` for `ExecutableDictionary`, `ExecutablePoolDictionary`, by default false. [#30957](https://github.com/ClickHouse/ClickHouse/pull/30957) ([Maksim Kita](https://github.com/kitaisreal)). -* Bitmap aggregate functions will give correct result for out of range argument instead of wraparound. [#33127](https://github.com/ClickHouse/ClickHouse/pull/33127) ([DR](https://github.com/freedomDR)). -* Fix parsing incorrect queries with `FROM INFILE` statement. [#33521](https://github.com/ClickHouse/ClickHouse/pull/33521) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't allow to write into `S3` if path contains globs. [#33142](https://github.com/ClickHouse/ClickHouse/pull/33142) ([Kruglov Pavel](https://github.com/Avogar)). -* `--echo` option was not used by `clickhouse-client` in batch mode with single query. [#32843](https://github.com/ClickHouse/ClickHouse/pull/32843) ([N. Kolotov](https://github.com/nkolotov)). -* Use `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix surprisingly bad code in SQL ordinary function `file`. Now it supports symlinks. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Updating `modification_time` for data part in `system.parts` after part movement [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). -* Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). - - -#### Build/Testing/Packaging Improvement - -* Add packages, functional tests and Docker builds for AArch64 (ARM) version of ClickHouse. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) -* Prepare ClickHouse to be built with musl-libc. It is not enabled by default. [#33134](https://github.com/ClickHouse/ClickHouse/pull/33134) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make installation script working on FreeBSD. This closes [#33384](https://github.com/ClickHouse/ClickHouse/issues/33384). [#33418](https://github.com/ClickHouse/ClickHouse/pull/33418) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add `actionlint` for GitHub Actions workflows and verify workflow files via `act --list` to check the correct workflow syntax. [#33612](https://github.com/ClickHouse/ClickHouse/pull/33612) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add more tests for the nullable primary key feature. Add more tests with different types and merge tree kinds, plus randomly generated data. [#33228](https://github.com/ClickHouse/ClickHouse/pull/33228) ([Amos Bird](https://github.com/amosbird)). -* Add a simple tool to visualize flaky tests in web browser. [#33185](https://github.com/ClickHouse/ClickHouse/pull/33185) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Enable hermetic build for shared builds. This is mainly for developers. [#32968](https://github.com/ClickHouse/ClickHouse/pull/32968) ([Amos Bird](https://github.com/amosbird)). -* Update `libc++` and `libc++abi` to the latest. [#32484](https://github.com/ClickHouse/ClickHouse/pull/32484) ([Raúl Marín](https://github.com/Algunenano)). -* Added integration test for external .NET client ([ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)). [#23230](https://github.com/ClickHouse/ClickHouse/pull/23230) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)). -* Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([taiyang-li](https://github.com/taiyang-li)). -* Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([alexey-milovidov](https://github.com/alexey-milovidov)). - - -#### Bug Fix (user-visible misbehavior in official stable or prestable release) - -* Several fixes for format parsing. This is relevant if `clickhouse-server` is open for write access to adversary. Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). Fixed Apache Avro Union type index out of boundary issue in Apache Avro binary format. [#33022](https://github.com/ClickHouse/ClickHouse/pull/33022) ([Harry Lee](https://github.com/HarryLeeIBM)). Fix null pointer dereference in `LowCardinality` data when deserializing `LowCardinality` data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). -* ClickHouse Keeper handler will correctly remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). -* Potential off-by-one miscalculation of quotas: quota limit was not reached, but the limit was exceeded. This fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). -* Fixed CASTing from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)) [#27914](https://github.com/ClickHouse/ClickHouse/pull/27914) ([Vasily Nemkov](https://github.com/Enmk)). -* Fixed an exception like `Unknown aggregate function nothing` during an execution on a remote server. This fixes [#16689](https://github.com/ClickHouse/ClickHouse/issues/16689). [#26074](https://github.com/ClickHouse/ClickHouse/pull/26074) ([hexiaoting](https://github.com/hexiaoting)). -* Fix wrong database for JOIN without explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). -* Fix segfault in Apache `Avro` format that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix segfault in Apache `Arrow` format if schema contains `Dictionary` type. Closes [#33507](https://github.com/ClickHouse/ClickHouse/issues/33507). [#33529](https://github.com/ClickHouse/ClickHouse/pull/33529) ([Kruglov Pavel](https://github.com/Avogar)). -* Out of band `offset` and `limit` settings may be applied incorrectly for views. Close [#33289](https://github.com/ClickHouse/ClickHouse/issues/33289) [#33518](https://github.com/ClickHouse/ClickHouse/pull/33518) ([hexiaoting](https://github.com/hexiaoting)). -* Fix an exception `Block structure mismatch` which may happen during insertion into table with default nested `LowCardinality` column. Fixes [#33028](https://github.com/ClickHouse/ClickHouse/issues/33028). [#33504](https://github.com/ClickHouse/ClickHouse/pull/33504) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix dictionary expressions for `range_hashed` range min and range max attributes when created using DDL. Closes [#30809](https://github.com/ClickHouse/ClickHouse/issues/30809). [#33478](https://github.com/ClickHouse/ClickHouse/pull/33478) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix possible use-after-free for INSERT into Materialized View with concurrent DROP ([Azat Khuzhin](https://github.com/azat)). -* Do not try to read pass EOF (to workaround for a bug in the Linux kernel), this bug can be reproduced on kernels (3.14..5.9), and requires `index_granularity_bytes=0` (i.e. turn off adaptive index granularity). [#33372](https://github.com/ClickHouse/ClickHouse/pull/33372) ([Azat Khuzhin](https://github.com/azat)). -* The commands `SYSTEM SUSPEND` and `SYSTEM ... THREAD FUZZER` missed access control. It is fixed. Author: Kevin Michel. [#33333](https://github.com/ClickHouse/ClickHouse/pull/33333) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix when `COMMENT` for dictionaries does not appear in `system.tables`, `system.dictionaries`. Allow to modify the comment for `Dictionary` engine. Closes [#33251](https://github.com/ClickHouse/ClickHouse/issues/33251). [#33261](https://github.com/ClickHouse/ClickHouse/pull/33261) ([Maksim Kita](https://github.com/kitaisreal)). -* Add asynchronous inserts (with enabled setting `async_insert`) to query log. Previously such queries didn't appear in the query log. [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239) ([Anton Popov](https://github.com/CurtizJ)). -* Fix sending `WHERE 1 = 0` expressions for external databases query. Closes [#33152](https://github.com/ClickHouse/ClickHouse/issues/33152). [#33214](https://github.com/ClickHouse/ClickHouse/pull/33214) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix DDL validation for MaterializedPostgreSQL. Fix setting `materialized_postgresql_allow_automatic_update`. Closes [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33200](https://github.com/ClickHouse/ClickHouse/pull/33200) ([Kseniia Sumarokova](https://github.com/kssenii)). Make sure unused replication slots are always removed. Found in [#26952](https://github.com/ClickHouse/ClickHouse/issues/26952). [#33187](https://github.com/ClickHouse/ClickHouse/pull/33187) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix MaterializedPostreSQL detach/attach (removing / adding to replication) tables with non-default schema. Found in [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33179](https://github.com/ClickHouse/ClickHouse/pull/33179) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix DROP MaterializedPostgreSQL database. [#33468](https://github.com/ClickHouse/ClickHouse/pull/33468) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The metric `StorageBufferBytes` sometimes was miscalculated. [#33159](https://github.com/ClickHouse/ClickHouse/pull/33159) ([xuyatian](https://github.com/xuyatian)). -* Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix `s3` table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)). -* Fix wrong tuple output in `CSV` format in case of custom csv delimiter. [#32981](https://github.com/ClickHouse/ClickHouse/pull/32981) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix HDFS URL check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix throwing exception like positional argument out of bounds for non-positional arguments. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173)#event-5789668239. [#32961](https://github.com/ClickHouse/ClickHouse/pull/32961) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix UB in case of unexpected EOF during filling a set from HTTP query (i.e. if the client interrupted in the middle, i.e. `timeout 0.15s curl -Ss -F 's=@t.csv;' 'http://127.0.0.1:8123/?s_structure=key+Int&query=SELECT+dummy+IN+s'` and with large enough `t.csv`). [#32955](https://github.com/ClickHouse/ClickHouse/pull/32955) ([Azat Khuzhin](https://github.com/azat)). -* Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix `ORC` format stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([kreuzerkrieg](https://github.com/kreuzerkrieg)). -* `topKWeightedState` failed for some input types. [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([vdimir](https://github.com/vdimir)). -* Fix exception `Single chunk is expected from view inner query (LOGICAL_ERROR)` in materialized view. Fixes [#31419](https://github.com/ClickHouse/ClickHouse/issues/31419). [#32862](https://github.com/ClickHouse/ClickHouse/pull/32862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix optimization with lazy seek for async reads from remote filesystems. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([tavplubix](https://github.com/tavplubix)). -* Avoid reusing the scalar subquery cache when processing MV blocks. This fixes a bug when the scalar query reference the source table but it means that all subscalar queries in the MV definition will be calculated for each block. [#32811](https://github.com/ClickHouse/ClickHouse/pull/32811) ([Raúl Marín](https://github.com/Algunenano)). -* Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([tavplubix](https://github.com/tavplubix)). -* Fix crash when used `fuzzBits` function, close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). -* Fix error `Column is not under aggregate function` in case of MV with `GROUP BY (list of columns)` (which is pared as `GROUP BY tuple(...)`) over `Kafka`/`RabbitMQ`. Fixes [#32668](https://github.com/ClickHouse/ClickHouse/issues/32668) and [#32744](https://github.com/ClickHouse/ClickHouse/issues/32744). [#32751](https://github.com/ClickHouse/ClickHouse/pull/32751) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix `ALTER TABLE ... MATERIALIZE TTL` query with `TTL ... DELETE WHERE ...` and `TTL ... GROUP BY ...` modes. [#32695](https://github.com/ClickHouse/ClickHouse/pull/32695) ([Anton Popov](https://github.com/CurtizJ)). -* Fix `optimize_read_in_order` optimization in case when table engine is `Distributed` or `Merge` and its underlying `MergeTree` tables have monotonous function in prefix of sorting key. [#32670](https://github.com/ClickHouse/ClickHouse/pull/32670) ([Anton Popov](https://github.com/CurtizJ)). -* Fix LOGICAL_ERROR exception when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). -* Inserting into S3 with multipart upload to Google Cloud Storage may trigger abort. [#32504](https://github.com/ClickHouse/ClickHouse/issues/32504). [#32649](https://github.com/ClickHouse/ClickHouse/pull/32649) ([vdimir](https://github.com/vdimir)). -* Fix possible exception at `RabbitMQ` storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). -* Fix async inserts with formats `CustomSeparated`, `Template`, `Regexp`, `MsgPack` and `JSONAsString`. Previousely the async inserts with these formats didn't read any data. [#32530](https://github.com/ClickHouse/ClickHouse/pull/32530) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `groupBitmapAnd` function on distributed table. [#32529](https://github.com/ClickHouse/ClickHouse/pull/32529) ([minhthucdao](https://github.com/dmthuc)). -* Fix crash in JOIN found by fuzzer, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([vdimir](https://github.com/vdimir)). -* Proper handling of the case with Apache Arrow column duplication. [#32507](https://github.com/ClickHouse/ClickHouse/pull/32507) ([Dmitriy Mokhnatkin](https://github.com/DMokhnatkin)). -* Fix issue with ambiguous query formatting in distributed queries that led to errors when some table columns were named `ALL` or `DISTINCT`. This closes [#32391](https://github.com/ClickHouse/ClickHouse/issues/32391). [#32490](https://github.com/ClickHouse/ClickHouse/pull/32490) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). -* Fix broken select query when there are more than 2 row policies on same column, begin at second queries on the same session. [#31606](https://github.com/ClickHouse/ClickHouse/issues/31606). [#32291](https://github.com/ClickHouse/ClickHouse/pull/32291) ([SuperDJY](https://github.com/cmsxbc)). -* Fix fractional unix timestamp conversion to `DateTime64`, fractional part was reversed for negative unix timestamps (before 1970-01-01). [#32240](https://github.com/ClickHouse/ClickHouse/pull/32240) ([Ben](https://github.com/benbiti)). -* Some entries of replication queue might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([tavplubix](https://github.com/tavplubix)). -* Fix parsing of `APPLY lambda` column transformer which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `base64Encode` adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix hang up with command `DROP TABLE system.query_log sync`. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). - - -## [Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md) \ No newline at end of file +{% include "content/changelog.md" %} diff --git a/docs/en/whats-new/index.md b/docs/en/whats-new/index.md index ac2b41a6637..8033fdf71d9 100644 --- a/docs/en/whats-new/index.md +++ b/docs/en/whats-new/index.md @@ -1,10 +1,8 @@ --- -sidebar_label: What's New -sidebar_position: 500 -keywords: [clickhouse, what's, new, roadmap, changelog] -description: What's New in ClickHouse +toc_folder_title: What's New +toc_priority: 82 --- -# What’s New in ClickHouse {#whats-new-in-clickhouse} +# What’s New in ClickHouse? {#whats-new-in-clickhouse} There’s a short high-level [roadmap](../whats-new/roadmap.md) and a detailed [changelog](../whats-new/changelog/index.md) for releases that have already been published. diff --git a/docs/en/whats-new/roadmap.md b/docs/en/whats-new/roadmap.md index be7298ccd79..54f8f9d68a3 100644 --- a/docs/en/whats-new/roadmap.md +++ b/docs/en/whats-new/roadmap.md @@ -7,3 +7,4 @@ toc_title: Roadmap The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513). +{## [Original article](https://clickhouse.com/docs/en/roadmap/) ##} diff --git a/docs/en/whats-new/security-changelog.md b/docs/en/whats-new/security-changelog.md index 0a5c926f227..685f1c6d21d 100644 --- a/docs/en/whats-new/security-changelog.md +++ b/docs/en/whats-new/security-changelog.md @@ -1,11 +1,50 @@ --- -sidebar_label: Security Changelog -sidebar_position: 100 -keywords: [clickhouse, security, changelog] -description: Security Changelog +toc_priority: 76 +toc_title: Security Changelog --- +## Fixed in ClickHouse 21.10.2.15, 2021-10-18 {#fixed-in-clickhouse-release-21-10-2-215-2021-10-18} -# Security Changelog +### CVE-2021-43304 {#cve-2021-43304} + +Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy(op, ip, copy_end), don’t exceed the destination buffer’s limits. + +Credits: JFrog Security Research Team + +### CVE-2021-43305 {#cve-2021-43305} + +Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy(op, ip, copy_end), don’t exceed the destination buffer’s limits. This issue is very similar to CVE-2021-43304, but the vulnerable copy operation is in a different wildCopy call. + +Credits: JFrog Security Research Team + +### CVE-2021-42387 {#cve-2021-42387} + +Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the upper bounds of the source of the copy operation. + +Credits: JFrog Security Research Team + +### CVE-2021-42388 {#cve-2021-42388} + +Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the lower bounds of the source of the copy operation. + +Credits: JFrog Security Research Team + +### CVE-2021-42389 {#cve-2021-42389} + +Divide-by-zero in Clickhouse's Delta compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. + +Credits: JFrog Security Research Team + +### CVE-2021-42390 {#cve-2021-42390} + +Divide-by-zero in Clickhouse's DeltaDouble compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. + +Credits: JFrog Security Research Team + +### CVE-2021-42391 {#cve-2021-42391} + +Divide-by-zero in Clickhouse's Gorilla compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. + +Credits: JFrog Security Research Team ## Fixed in ClickHouse 21.4.3.21, 2021-04-12 {#fixed-in-clickhouse-release-21-4-3-21-2021-04-12} @@ -84,3 +123,5 @@ Credits: Andrey Krasichkov and Evgeny Sidorov of Yandex Information Security Tea Incorrect configuration in deb package could lead to the unauthorized use of the database. Credits: the UK’s National Cyber Security Centre (NCSC) + +{## [Original article](https://clickhouse.com/docs/en/security_changelog/) ##}