Merge branch 'master' into min-file-segment-size

This commit is contained in:
Kseniia Sumarokova 2023-07-04 12:21:41 +02:00 committed by GitHub
commit 24bb6ed566
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
70 changed files with 804 additions and 836 deletions

View File

@ -16,8 +16,9 @@ curl https://clickhouse.com/ | sh
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlighting and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming Events

View File

@ -4,22 +4,21 @@
#include <cstring>
#include "types.h"
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
namespace CityHash_v1_0_2 { struct uint128; }
namespace wide
{
template <size_t Bits, typename Signed>
class integer;
}
namespace impl
{
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
constexpr char hexDigitUppercase(unsigned char c)
{
return hex_digit_to_char_uppercase_table[c];
}
constexpr char hexDigitLowercase(unsigned char c)
{
return hex_digit_to_char_lowercase_table[c];
}
/// Maps 0..255 to 00..FF or 00..ff correspondingly
/// Maps 0..255 to 00..FF or 00..ff correspondingly.
constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
"000102030405060708090A0B0C0D0E0F"
"101112131415161718191A1B1C1D1E1F"
@ -56,16 +55,7 @@ constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
inline void writeHexByteUppercase(UInt8 byte, void * out)
{
memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
}
inline void writeHexByteLowercase(UInt8 byte, void * out)
{
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
}
/// Maps 0..255 to 00000000..11111111 correspondingly.
constexpr inline std::string_view bin_byte_to_char_table = //
"0000000000000001000000100000001100000100000001010000011000000111"
"0000100000001001000010100000101100001100000011010000111000001111"
@ -100,62 +90,7 @@ constexpr inline std::string_view bin_byte_to_char_table = //
"1111000011110001111100101111001111110100111101011111011011110111"
"1111100011111001111110101111101111111100111111011111111011111111";
inline void writeBinByte(UInt8 byte, void * out)
{
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
}
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
template <typename TUInt>
inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
{
union
{
TUInt value;
UInt8 uint8[sizeof(TUInt)];
};
value = uint_;
for (size_t i = 0; i < sizeof(TUInt); ++i)
{
if constexpr (std::endian::native == std::endian::little)
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
else
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
}
}
template <typename TUInt>
inline void writeHexUIntUppercase(TUInt uint_, char * out)
{
writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
}
template <typename TUInt>
inline void writeHexUIntLowercase(TUInt uint_, char * out)
{
writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
}
template <typename TUInt>
std::string getHexUIntUppercase(TUInt uint_)
{
std::string res(sizeof(TUInt) * 2, '\0');
writeHexUIntUppercase(uint_, res.data());
return res;
}
template <typename TUInt>
std::string getHexUIntLowercase(TUInt uint_)
{
std::string res(sizeof(TUInt) * 2, '\0');
writeHexUIntLowercase(uint_, res.data());
return res;
}
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
constexpr inline std::string_view hex_char_to_digit_table
= {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
@ -175,41 +110,182 @@ constexpr inline std::string_view hex_char_to_digit_table
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
256};
constexpr UInt8 unhex(char c)
/// Converts a hex digit '0'..'f' or '0'..'F' to its value 0..15.
constexpr UInt8 unhexDigit(char c)
{
return hex_char_to_digit_table[static_cast<UInt8>(c)];
}
constexpr UInt8 unhex2(const char * data)
/// Converts an unsigned integer in the native endian to hexadecimal representation and back. Used as a base class for HexConversion<T>.
template <typename TUInt, typename = void>
struct HexConversionUInt
{
return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
static const constexpr size_t num_hex_digits = sizeof(TUInt) * 2;
static void hex(TUInt uint_, char * out, std::string_view table)
{
union
{
TUInt value;
UInt8 uint8[sizeof(TUInt)];
};
value = uint_;
for (size_t i = 0; i < sizeof(TUInt); ++i)
{
if constexpr (std::endian::native == std::endian::little)
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
else
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
}
}
constexpr UInt16 unhex4(const char * data)
static TUInt unhex(const char * data)
{
return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
+ static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
TUInt res;
if constexpr (sizeof(TUInt) == 1)
{
res = static_cast<UInt8>(unhexDigit(data[0])) * 0x10 + static_cast<UInt8>(unhexDigit(data[1]));
}
template <typename TUInt>
constexpr TUInt unhexUInt(const char * data)
else if constexpr (sizeof(TUInt) == 2)
{
TUInt res = 0;
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
res = static_cast<UInt16>(unhexDigit(data[0])) * 0x1000 + static_cast<UInt16>(unhexDigit(data[1])) * 0x100
+ static_cast<UInt16>(unhexDigit(data[2])) * 0x10 + static_cast<UInt16>(unhexDigit(data[3]));
}
else if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
{
res = 0;
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
{
res <<= 4;
res += unhex(*data);
res += unhexDigit(*data);
}
}
else
{
res = 0;
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
{
res <<= 64;
res += unhexUInt<UInt64>(data);
res += HexConversionUInt<UInt64>::unhex(data);
}
}
return res;
}
};
/// Helper template class to convert a value of any supported type to hexadecimal representation and back.
template <typename T, typename SFINAE = void>
struct HexConversion;
template <typename TUInt>
struct HexConversion<TUInt, std::enable_if_t<std::is_integral_v<TUInt>>> : public HexConversionUInt<TUInt> {};
template <size_t Bits, typename Signed>
struct HexConversion<wide::integer<Bits, Signed>> : public HexConversionUInt<wide::integer<Bits, Signed>> {};
template <typename CityHashUInt128> /// Partial specialization here allows not to include <city.h> in this header.
struct HexConversion<CityHashUInt128, std::enable_if_t<std::is_same_v<CityHashUInt128, typename CityHash_v1_0_2::uint128>>>
{
static const constexpr size_t num_hex_digits = 32;
static void hex(const CityHashUInt128 & uint_, char * out, std::string_view table)
{
HexConversion<UInt64>::hex(uint_.high64, out, table);
HexConversion<UInt64>::hex(uint_.low64, out + 16, table);
}
static CityHashUInt128 unhex(const char * data)
{
CityHashUInt128 res;
res.high64 = HexConversion<UInt64>::unhex(data);
res.low64 = HexConversion<UInt64>::unhex(data + 16);
return res;
}
};
}
/// Produces a hexadecimal representation of an integer value with leading zeros (for checksums).
/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128.
/// It can be used with signed types as well, however they are written as corresponding unsigned numbers
/// using two's complement (i.e. for example "-1" is written as "0xFF", not as "-0x01").
template <typename T>
void writeHexUIntUppercase(const T & value, char * out)
{
impl::HexConversion<T>::hex(value, out, impl::hex_byte_to_char_uppercase_table);
}
template <typename T>
void writeHexUIntLowercase(const T & value, char * out)
{
impl::HexConversion<T>::hex(value, out, impl::hex_byte_to_char_lowercase_table);
}
template <typename T>
std::string getHexUIntUppercase(const T & value)
{
std::string res(impl::HexConversion<T>::num_hex_digits, '\0');
writeHexUIntUppercase(value, res.data());
return res;
}
template <typename T>
std::string getHexUIntLowercase(const T & value)
{
std::string res(impl::HexConversion<T>::num_hex_digits, '\0');
writeHexUIntLowercase(value, res.data());
return res;
}
constexpr char hexDigitUppercase(unsigned char c)
{
return impl::hex_digit_to_char_uppercase_table[c];
}
constexpr char hexDigitLowercase(unsigned char c)
{
return impl::hex_digit_to_char_lowercase_table[c];
}
inline void writeHexByteUppercase(UInt8 byte, void * out)
{
memcpy(out, &impl::hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
}
inline void writeHexByteLowercase(UInt8 byte, void * out)
{
memcpy(out, &impl::hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
}
/// Converts a hex representation with leading zeros back to an integer value.
/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128.
template <typename T>
constexpr T unhexUInt(const char * data)
{
return impl::HexConversion<T>::unhex(data);
}
/// Converts a hexadecimal digit '0'..'f' or '0'..'F' to UInt8.
constexpr UInt8 unhex(char c)
{
return impl::unhexDigit(c);
}
/// Converts two hexadecimal digits to UInt8.
constexpr UInt8 unhex2(const char * data)
{
return unhexUInt<UInt8>(data);
}
/// Converts four hexadecimal digits to UInt16.
constexpr UInt16 unhex4(const char * data)
{
return unhexUInt<UInt16>(data);
}
/// Produces a binary representation of a single byte.
inline void writeBinByte(UInt8 byte, void * out)
{
memcpy(out, &impl::bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
}

View File

@ -87,7 +87,6 @@ set (SRCS
src/LoggingRegistry.cpp
src/LogStream.cpp
src/MD5Engine.cpp
src/MemoryPool.cpp
src/MemoryStream.cpp
src/Message.cpp
src/Mutex.cpp

View File

@ -1,116 +0,0 @@
//
// MemoryPool.h
//
// Library: Foundation
// Package: Core
// Module: MemoryPool
//
// Definition of the MemoryPool class.
//
// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// SPDX-License-Identifier: BSL-1.0
//
#ifndef Foundation_MemoryPool_INCLUDED
#define Foundation_MemoryPool_INCLUDED
#include <cstddef>
#include <vector>
#include "Poco/Foundation.h"
#include "Poco/Mutex.h"
namespace Poco
{
class Foundation_API MemoryPool
/// A simple pool for fixed-size memory blocks.
///
/// The main purpose of this class is to speed-up
/// memory allocations, as well as to reduce memory
/// fragmentation in situations where the same blocks
/// are allocated all over again, such as in server
/// applications.
///
/// All allocated blocks are retained for future use.
/// A limit on the number of blocks can be specified.
/// Blocks can be preallocated.
{
public:
MemoryPool(std::size_t blockSize, int preAlloc = 0, int maxAlloc = 0);
/// Creates a MemoryPool for blocks with the given blockSize.
/// The number of blocks given in preAlloc are preallocated.
~MemoryPool();
void * get();
/// Returns a memory block. If there are no more blocks
/// in the pool, a new block will be allocated.
///
/// If maxAlloc blocks are already allocated, an
/// OutOfMemoryException is thrown.
void release(void * ptr);
/// Releases a memory block and returns it to the pool.
std::size_t blockSize() const;
/// Returns the block size.
int allocated() const;
/// Returns the number of allocated blocks.
int available() const;
/// Returns the number of available blocks in the pool.
private:
MemoryPool();
MemoryPool(const MemoryPool &);
MemoryPool & operator=(const MemoryPool &);
void clear();
enum
{
BLOCK_RESERVE = 128
};
typedef std::vector<char *> BlockVec;
std::size_t _blockSize;
int _maxAlloc;
int _allocated;
BlockVec _blocks;
FastMutex _mutex;
};
//
// inlines
//
inline std::size_t MemoryPool::blockSize() const
{
return _blockSize;
}
inline int MemoryPool::allocated() const
{
return _allocated;
}
inline int MemoryPool::available() const
{
return (int)_blocks.size();
}
} // namespace Poco
#endif // Foundation_MemoryPool_INCLUDED

View File

@ -1,105 +0,0 @@
//
// MemoryPool.cpp
//
// Library: Foundation
// Package: Core
// Module: MemoryPool
//
// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// SPDX-License-Identifier: BSL-1.0
//
#include "Poco/MemoryPool.h"
#include "Poco/Exception.h"
namespace Poco {
MemoryPool::MemoryPool(std::size_t blockSize, int preAlloc, int maxAlloc):
_blockSize(blockSize),
_maxAlloc(maxAlloc),
_allocated(preAlloc)
{
poco_assert (maxAlloc == 0 || maxAlloc >= preAlloc);
poco_assert (preAlloc >= 0 && maxAlloc >= 0);
int r = BLOCK_RESERVE;
if (preAlloc > r)
r = preAlloc;
if (maxAlloc > 0 && maxAlloc < r)
r = maxAlloc;
_blocks.reserve(r);
try
{
for (int i = 0; i < preAlloc; ++i)
{
_blocks.push_back(new char[_blockSize]);
}
}
catch (...)
{
clear();
throw;
}
}
MemoryPool::~MemoryPool()
{
clear();
}
void MemoryPool::clear()
{
for (BlockVec::iterator it = _blocks.begin(); it != _blocks.end(); ++it)
{
delete [] *it;
}
_blocks.clear();
}
void* MemoryPool::get()
{
FastMutex::ScopedLock lock(_mutex);
if (_blocks.empty())
{
if (_maxAlloc == 0 || _allocated < _maxAlloc)
{
++_allocated;
return new char[_blockSize];
}
else throw OutOfMemoryException("MemoryPool exhausted");
}
else
{
char* ptr = _blocks.back();
_blocks.pop_back();
return ptr;
}
}
void MemoryPool::release(void* ptr)
{
FastMutex::ScopedLock lock(_mutex);
try
{
_blocks.push_back(reinterpret_cast<char*>(ptr));
}
catch (...)
{
delete [] reinterpret_cast<char*>(ptr);
}
}
} // namespace Poco

View File

@ -19,7 +19,6 @@
#include "Poco/BufferedStreamBuf.h"
#include "Poco/Net/HTTPBufferAllocator.h"
#include "Poco/Net/Net.h"
@ -27,9 +26,9 @@ namespace Poco
{
namespace Net
{
constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 8 * 1024;
typedef Poco::BasicBufferedStreamBuf<char, std::char_traits<char>, HTTPBufferAllocator> HTTPBasicStreamBuf;
typedef Poco::BasicBufferedStreamBuf<char, std::char_traits<char>> HTTPBasicStreamBuf;
}

View File

@ -1,53 +0,0 @@
//
// HTTPBufferAllocator.h
//
// Library: Net
// Package: HTTP
// Module: HTTPBufferAllocator
//
// Definition of the HTTPBufferAllocator class.
//
// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// SPDX-License-Identifier: BSL-1.0
//
#ifndef Net_HTTPBufferAllocator_INCLUDED
#define Net_HTTPBufferAllocator_INCLUDED
#include <ios>
#include "Poco/MemoryPool.h"
#include "Poco/Net/Net.h"
namespace Poco
{
namespace Net
{
class Net_API HTTPBufferAllocator
/// A BufferAllocator for HTTP streams.
{
public:
static char * allocate(std::streamsize size);
static void deallocate(char * ptr, std::streamsize size);
enum
{
BUFFER_SIZE = 128 * 1024
};
private:
static Poco::MemoryPool _pool;
};
}
} // namespace Poco::Net
#endif // Net_HTTPBufferAllocator_INCLUDED

View File

@ -21,7 +21,6 @@
#include <cstddef>
#include <istream>
#include <ostream>
#include "Poco/MemoryPool.h"
#include "Poco/Net/HTTPBasicStreamBuf.h"
#include "Poco/Net/Net.h"
@ -80,12 +79,6 @@ namespace Net
public:
HTTPChunkedInputStream(HTTPSession & session);
~HTTPChunkedInputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};
@ -95,12 +88,6 @@ namespace Net
public:
HTTPChunkedOutputStream(HTTPSession & session);
~HTTPChunkedOutputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};

View File

@ -78,12 +78,6 @@ namespace Net
public:
HTTPFixedLengthInputStream(HTTPSession & session, HTTPFixedLengthStreamBuf::ContentLength length);
~HTTPFixedLengthInputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};
@ -93,12 +87,6 @@ namespace Net
public:
HTTPFixedLengthOutputStream(HTTPSession & session, HTTPFixedLengthStreamBuf::ContentLength length);
~HTTPFixedLengthOutputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};

View File

@ -21,7 +21,6 @@
#include <cstddef>
#include <istream>
#include <ostream>
#include "Poco/MemoryPool.h"
#include "Poco/Net/HTTPBasicStreamBuf.h"
#include "Poco/Net/Net.h"
@ -74,12 +73,6 @@ namespace Net
public:
HTTPHeaderInputStream(HTTPSession & session);
~HTTPHeaderInputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};
@ -89,12 +82,6 @@ namespace Net
public:
HTTPHeaderOutputStream(HTTPSession & session);
~HTTPHeaderOutputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};

View File

@ -192,7 +192,7 @@ namespace Net
HTTPSession & operator=(const HTTPSession &);
StreamSocket _socket;
char * _pBuffer;
std::unique_ptr<char[]> _pBuffer;
char * _pCurrent;
char * _pEnd;
bool _keepAlive;

View File

@ -21,7 +21,6 @@
#include <cstddef>
#include <istream>
#include <ostream>
#include "Poco/MemoryPool.h"
#include "Poco/Net/HTTPBasicStreamBuf.h"
#include "Poco/Net/Net.h"
@ -75,12 +74,6 @@ namespace Net
public:
HTTPInputStream(HTTPSession & session);
~HTTPInputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};
@ -90,12 +83,6 @@ namespace Net
public:
HTTPOutputStream(HTTPSession & session);
~HTTPOutputStream();
void * operator new(std::size_t size);
void operator delete(void * ptr);
private:
static Poco::MemoryPool _pool;
};

View File

@ -1,44 +0,0 @@
//
// HTTPBufferAllocator.cpp
//
// Library: Net
// Package: HTTP
// Module: HTTPBufferAllocator
//
// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// SPDX-License-Identifier: BSL-1.0
//
#include "Poco/Net/HTTPBufferAllocator.h"
using Poco::MemoryPool;
namespace Poco {
namespace Net {
MemoryPool HTTPBufferAllocator::_pool(HTTPBufferAllocator::BUFFER_SIZE, 16);
char* HTTPBufferAllocator::allocate(std::streamsize size)
{
poco_assert_dbg (size == BUFFER_SIZE);
return reinterpret_cast<char*>(_pool.get());
}
void HTTPBufferAllocator::deallocate(char* ptr, std::streamsize size)
{
poco_assert_dbg (size == BUFFER_SIZE);
_pool.release(ptr);
}
} } // namespace Poco::Net

View File

@ -34,7 +34,7 @@ namespace Net {
HTTPChunkedStreamBuf::HTTPChunkedStreamBuf(HTTPSession& session, openmode mode):
HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode),
HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode),
_session(session),
_mode(mode),
_chunk(0)
@ -181,10 +181,6 @@ HTTPChunkedStreamBuf* HTTPChunkedIOS::rdbuf()
// HTTPChunkedInputStream
//
Poco::MemoryPool HTTPChunkedInputStream::_pool(sizeof(HTTPChunkedInputStream));
HTTPChunkedInputStream::HTTPChunkedInputStream(HTTPSession& session):
HTTPChunkedIOS(session, std::ios::in),
std::istream(&_buf)
@ -196,34 +192,10 @@ HTTPChunkedInputStream::~HTTPChunkedInputStream()
{
}
void* HTTPChunkedInputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPChunkedInputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
//
// HTTPChunkedOutputStream
//
Poco::MemoryPool HTTPChunkedOutputStream::_pool(sizeof(HTTPChunkedOutputStream));
HTTPChunkedOutputStream::HTTPChunkedOutputStream(HTTPSession& session):
HTTPChunkedIOS(session, std::ios::out),
std::ostream(&_buf)
@ -235,24 +207,4 @@ HTTPChunkedOutputStream::~HTTPChunkedOutputStream()
{
}
void* HTTPChunkedOutputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPChunkedOutputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
} } // namespace Poco::Net

View File

@ -30,7 +30,7 @@ namespace Net {
HTTPFixedLengthStreamBuf::HTTPFixedLengthStreamBuf(HTTPSession& session, ContentLength length, openmode mode):
HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode),
HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode),
_session(session),
_length(length),
_count(0)
@ -109,9 +109,6 @@ HTTPFixedLengthStreamBuf* HTTPFixedLengthIOS::rdbuf()
//
Poco::MemoryPool HTTPFixedLengthInputStream::_pool(sizeof(HTTPFixedLengthInputStream));
HTTPFixedLengthInputStream::HTTPFixedLengthInputStream(HTTPSession& session, HTTPFixedLengthStreamBuf::ContentLength length):
HTTPFixedLengthIOS(session, length, std::ios::in),
std::istream(&_buf)
@ -124,33 +121,10 @@ HTTPFixedLengthInputStream::~HTTPFixedLengthInputStream()
}
void* HTTPFixedLengthInputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPFixedLengthInputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
//
// HTTPFixedLengthOutputStream
//
Poco::MemoryPool HTTPFixedLengthOutputStream::_pool(sizeof(HTTPFixedLengthOutputStream));
HTTPFixedLengthOutputStream::HTTPFixedLengthOutputStream(HTTPSession& session, HTTPFixedLengthStreamBuf::ContentLength length):
HTTPFixedLengthIOS(session, length, std::ios::out),
std::ostream(&_buf)
@ -163,23 +137,4 @@ HTTPFixedLengthOutputStream::~HTTPFixedLengthOutputStream()
}
void* HTTPFixedLengthOutputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPFixedLengthOutputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
} } // namespace Poco::Net

View File

@ -26,7 +26,7 @@ namespace Net {
HTTPHeaderStreamBuf::HTTPHeaderStreamBuf(HTTPSession& session, openmode mode):
HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode),
HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode),
_session(session),
_end(false)
{
@ -101,10 +101,6 @@ HTTPHeaderStreamBuf* HTTPHeaderIOS::rdbuf()
// HTTPHeaderInputStream
//
Poco::MemoryPool HTTPHeaderInputStream::_pool(sizeof(HTTPHeaderInputStream));
HTTPHeaderInputStream::HTTPHeaderInputStream(HTTPSession& session):
HTTPHeaderIOS(session, std::ios::in),
std::istream(&_buf)
@ -116,34 +112,10 @@ HTTPHeaderInputStream::~HTTPHeaderInputStream()
{
}
void* HTTPHeaderInputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPHeaderInputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
//
// HTTPHeaderOutputStream
//
Poco::MemoryPool HTTPHeaderOutputStream::_pool(sizeof(HTTPHeaderOutputStream));
HTTPHeaderOutputStream::HTTPHeaderOutputStream(HTTPSession& session):
HTTPHeaderIOS(session, std::ios::out),
std::ostream(&_buf)
@ -155,24 +127,4 @@ HTTPHeaderOutputStream::~HTTPHeaderOutputStream()
{
}
void* HTTPHeaderOutputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPHeaderOutputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
} } // namespace Poco::Net

View File

@ -13,8 +13,8 @@
#include "Poco/Net/HTTPSession.h"
#include "Poco/Net/HTTPBufferAllocator.h"
#include "Poco/Net/NetException.h"
#include "Poco/Net/HTTPBasicStreamBuf.h"
#include <cstring>
@ -68,14 +68,6 @@ HTTPSession::HTTPSession(const StreamSocket& socket, bool keepAlive):
HTTPSession::~HTTPSession()
{
try
{
if (_pBuffer) HTTPBufferAllocator::deallocate(_pBuffer, HTTPBufferAllocator::BUFFER_SIZE);
}
catch (...)
{
poco_unexpected();
}
try
{
close();
@ -177,10 +169,10 @@ void HTTPSession::refill()
{
if (!_pBuffer)
{
_pBuffer = HTTPBufferAllocator::allocate(HTTPBufferAllocator::BUFFER_SIZE);
_pBuffer = std::make_unique<char[]>(HTTP_DEFAULT_BUFFER_SIZE);
}
_pCurrent = _pEnd = _pBuffer;
int n = receive(_pBuffer, HTTPBufferAllocator::BUFFER_SIZE);
_pCurrent = _pEnd = _pBuffer.get();
int n = receive(_pBuffer.get(), HTTP_DEFAULT_BUFFER_SIZE);
_pEnd += n;
}
@ -199,7 +191,7 @@ void HTTPSession::connect(const SocketAddress& address)
_socket.setNoDelay(true);
// There may be leftover data from a previous (failed) request in the buffer,
// so we clear it.
_pCurrent = _pEnd = _pBuffer;
_pCurrent = _pEnd = _pBuffer.get();
}

View File

@ -26,7 +26,7 @@ namespace Net {
HTTPStreamBuf::HTTPStreamBuf(HTTPSession& session, openmode mode):
HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode),
HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode),
_session(session),
_mode(mode)
{
@ -96,10 +96,6 @@ HTTPStreamBuf* HTTPIOS::rdbuf()
// HTTPInputStream
//
Poco::MemoryPool HTTPInputStream::_pool(sizeof(HTTPInputStream));
HTTPInputStream::HTTPInputStream(HTTPSession& session):
HTTPIOS(session, std::ios::in),
std::istream(&_buf)
@ -112,33 +108,11 @@ HTTPInputStream::~HTTPInputStream()
}
void* HTTPInputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPInputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
//
// HTTPOutputStream
//
Poco::MemoryPool HTTPOutputStream::_pool(sizeof(HTTPOutputStream));
HTTPOutputStream::HTTPOutputStream(HTTPSession& session):
HTTPIOS(session, std::ios::out),
std::ostream(&_buf)
@ -150,24 +124,4 @@ HTTPOutputStream::~HTTPOutputStream()
{
}
void* HTTPOutputStream::operator new(std::size_t size)
{
return _pool.get();
}
void HTTPOutputStream::operator delete(void* ptr)
{
try
{
_pool.release(ptr);
}
catch (...)
{
poco_unexpected();
}
}
} } // namespace Poco::Net

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d
Subproject commit 377220ef351ae24994a5fcd2b5fa3930d00c4db0

View File

@ -6,7 +6,7 @@ Usage:
Build deb package with `clang-14` in `debug` mode:
```
$ mkdir deb/test_output
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build
$ ls -l deb/test_output
-rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb

View File

@ -112,12 +112,12 @@ def run_docker_image_with_env(
subprocess.check_call(cmd, shell=True)
def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool:
return build_type == "" and package_type == "deb" and sanitizer == ""
def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool:
return not debug_build and package_type == "deb" and sanitizer == ""
def parse_env_variables(
build_type: str,
debug_build: bool,
compiler: str,
sanitizer: str,
package_type: str,
@ -233,7 +233,7 @@ def parse_env_variables(
build_target = (
f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge"
)
if is_release_build(build_type, package_type, sanitizer):
if is_release_build(debug_build, package_type, sanitizer):
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1")
if is_cross_arm:
@ -253,8 +253,8 @@ def parse_env_variables(
if sanitizer:
result.append(f"SANITIZER={sanitizer}")
if build_type:
result.append(f"BUILD_TYPE={build_type.capitalize()}")
if debug_build:
result.append("BUILD_TYPE=DEBUG")
else:
result.append("BUILD_TYPE=None")
@ -359,7 +359,7 @@ def parse_args() -> argparse.Namespace:
help="ClickHouse git repository",
)
parser.add_argument("--output-dir", type=dir_name, required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--debug-build", action="store_true")
parser.add_argument(
"--compiler",
@ -464,7 +464,7 @@ def main():
build_image(image_with_version, dockerfile)
env_prepared = parse_env_variables(
args.build_type,
args.debug_build,
args.compiler,
args.sanitizer,
args.package_type,

View File

@ -0,0 +1,29 @@
---
toc_priority:
toc_title:
---
# data_type_name {#data_type-name}
Description.
**Parameters** (Optional)
- `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
- `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
**Examples**
```sql
```
## Additional Info {#additional-info} (Optional)
The name of an additional section can be any, for example, **Usage**.
**See Also** (Optional)
- [link](#)
[Original article](https://clickhouse.com/docs/en/data-types/<data-type-name>/) <!--hide-->

View File

@ -0,0 +1,63 @@
# EngineName {#enginename}
- What the Database/Table engine does.
- Relations with other engines if they exist.
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE ...
```
or
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE ...
```
**Engine Parameters**
**Query Clauses** (for Table engines only)
## Virtual columns {#virtual-columns} (for Table engines only)
List and virtual columns with description, if they exist.
## Data Types Support {#data_types-support} (for Database engines only)
| EngineName | ClickHouse |
|-----------------------|------------------------------------|
| NativeDataTypeName | [ClickHouseDataTypeName](link#) |
## Specifics and recommendations {#specifics-and-recommendations}
Algorithms
Specifics of read and write processes
Examples of tasks
Recommendations for usage
Specifics of data storage
## Usage Example {#usage-example}
The example must show usage and use cases. The following text contains the recommended parts of this section.
Input table:
``` text
```
Query:
``` sql
```
Result:
``` text
```
Follow up with any text to clarify the example.
**See Also**
- [link](#)

View File

@ -0,0 +1,51 @@
## functionName {#functionname-in-lower-case}
Short description.
**Syntax** (without SELECT)
``` sql
<function syntax>
```
Alias: `<alias name>`. (Optional)
More text (Optional).
**Arguments** (Optional)
- `x` — Description. Optional (only for optional arguments). Possible values: <values list>. Default value: <value>. [Type name](relative/path/to/type/dscr.md#type).
- `y` — Description. Optional (only for optional arguments). Possible values: <values list>.Default value: <value>. [Type name](relative/path/to/type/dscr.md#type).
**Parameters** (Optional, only for parametric aggregate functions)
- `z` — Description. Optional (only for optional parameters). Possible values: <values list>. Default value: <value>. [Type name](relative/path/to/type/dscr.md#type).
**Returned value(s)**
- Returned values list.
Type: [Type name](relative/path/to/type/dscr.md#type).
**Example**
The example must show usage and/or a use cases. The following text contains recommended parts of an example.
Input table (Optional):
``` text
```
Query:
``` sql
```
Result:
``` text
```
**See Also** (Optional)
- [link](#)

View File

@ -0,0 +1,33 @@
## server_setting_name {#server_setting_name}
Description.
Describe what is configured in this section of settings.
Possible value: ...
Default value: ...
**Settings** (Optional)
If the section contains several settings, list them here. Specify possible values and default values:
- setting_1 — Description.
- setting_2 — Description.
**Example**
```xml
<server_setting_name>
<setting_1> ... </setting_1>
<setting_2> ... </setting_2>
</server_setting_name>
```
**Additional Info** (Optional)
The name of an additional section can be any, for example, **Usage**.
**See Also** (Optional)
- [link](#)

View File

@ -0,0 +1,27 @@
## setting_name {#setting_name}
Description.
For the switch setting, use the typical phrase: “Enables or disables something …”.
Possible values:
*For switcher setting:*
- 0 — Disabled.
- 1 — Enabled.
*For another setting (typical phrases):*
- Positive integer.
- 0 — Disabled or unlimited or something else.
Default value: `value`.
**Additional Info** (Optional)
The name of an additional section can be any, for example, **Usage**.
**See Also** (Optional)
- [link](#)

View File

@ -0,0 +1,24 @@
# Statement name (for example, SHOW USER) {#statement-name-in-lower-case}
Brief description of what the statement does.
**Syntax**
```sql
Syntax of the statement.
```
## Other necessary sections of the description (Optional) {#anchor}
Examples of descriptions with a complicated structure:
- https://clickhouse.com/docs/en/sql-reference/statements/grant/
- https://clickhouse.com/docs/en/sql-reference/statements/revoke/
- https://clickhouse.com/docs/en/sql-reference/statements/select/join/
**See Also** (Optional)
Links to related topics as a list.
- [link](#)

View File

@ -0,0 +1,25 @@
# system.table_name {#system-tables_table-name}
Description.
Columns:
- `column_name` ([data_type_name](path/to/data_type.md)) — Description.
**Example**
Query:
``` sql
SELECT * FROM system.table_name
```
Result:
``` text
Some output. It shouldn't be too long.
```
**See Also**
- [Article name](path/to/article_name.md) — Some words about referenced information.

View File

@ -13,6 +13,20 @@ Supported platforms:
- AArch64
- Power9 (experimental)
## Building in docker
We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage:
```bash
# define a directory for the output artifacts
output_dir="build_results"
# a simplest build
./docker/packager/packager --package-type=binary --output-dir "$output_dir"
# build debian packages
./docker/packager/packager --package-type=deb --output-dir "$output_dir"
# by default, debian packages use thin LTO, so we can override it to speed up the build
CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "$output_dir"
```
## Building on Ubuntu
The following tutorial is based on Ubuntu Linux.

View File

@ -378,6 +378,10 @@ request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI ch
https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the
build.
### macOS-only: Install with Homebrew
To install ClickHouse using the popular `brew` package manager, follow the instructions listed in the [ClickHouse Homebrew tap](https://github.com/ClickHouse/homebrew-clickhouse).
## Launch {#launch}
To start the server as a daemon, run:

View File

@ -37,7 +37,7 @@ SipHash getHashOfLoadedBinary()
std::string getHashOfLoadedBinaryHex()
{
SipHash hash = getHashOfLoadedBinary();
std::array<UInt64, 2> checksum;
UInt128 checksum;
hash.get128(checksum);
return getHexUIntUppercase(checksum);
}

View File

@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
/// TODO mess up of endianness in error message.
message << "Checksum doesn't match: corrupted data."
" Reference: " + getHexUIntLowercase(expected_checksum.high64) + getHexUIntLowercase(expected_checksum.low64)
+ ". Actual: " + getHexUIntLowercase(calculated_checksum.high64) + getHexUIntLowercase(calculated_checksum.low64)
" Reference: " + getHexUIntLowercase(expected_checksum)
+ ". Actual: " + getHexUIntLowercase(calculated_checksum)
+ ". Size of compressed block: " + toString(size);
const char * message_hardware_failure = "This is most likely due to hardware failure. "

View File

@ -14,6 +14,9 @@ namespace QueryPlanOptimizations
void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Second pass is used to apply read-in-order and attach a predicate to PK.
void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them.
/// After that it add CreateSetsStep for the subqueries that has not be used in the filters.
void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Optimization (first pass) is a function applied to QueryPlan::Node.
/// It can read and update subtree of specified node.

View File

@ -28,8 +28,6 @@ void optimizePrimaryKeyCondition(const Stack & stack)
else
break;
}
source_step_with_filter->onAddFilterFinish();
}
}

View File

@ -167,7 +167,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
optimizePrewhere(stack, nodes);
optimizePrimaryKeyCondition(stack);
enableMemoryBoundMerging(*stack.back().node, nodes);
addPlansForSets(*stack.back().node, nodes);
stack.pop_back();
}
@ -178,5 +177,35 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
"No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1");
}
void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
{
Stack stack;
stack.push_back({.node = &root});
while (!stack.empty())
{
/// NOTE: frame cannot be safely used after stack was modified.
auto & frame = stack.back();
/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{
auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
++frame.next_child;
stack.push_back(next_frame);
continue;
}
if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter *>(frame.node->step.get()))
{
source_step_with_filter->applyFilters();
}
addPlansForSets(*frame.node, nodes);
stack.pop_back();
}
}
}
}

View File

@ -482,6 +482,7 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett
QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizeTreeThirdPass(*root, nodes);
updateDataStreams(*root);
}

View File

@ -1299,14 +1299,11 @@ static void buildIndexes(
indexes->skip_indexes = std::move(skip_indexes);
}
void ReadFromMergeTree::onAddFilterFinish()
{
if (!filter_nodes.nodes.empty())
void ReadFromMergeTree::applyFilters()
{
auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info);
buildIndexes(indexes, filter_actions_dag, data, context, query_info, metadata_for_reading);
}
}
MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
MergeTreeData::DataPartsVector parts,

View File

@ -226,7 +226,7 @@ public:
size_t getNumStreams() const { return requested_num_streams; }
bool isParallelReadingEnabled() const { return read_task_callback != std::nullopt; }
void onAddFilterFinish() override;
void applyFilters() override;
private:
static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl(

View File

@ -37,7 +37,8 @@ public:
filter_dags.push_back(std::move(filter_dag));
}
virtual void onAddFilterFinish() {}
/// Apply filters that can optimize reading from storage.
virtual void applyFilters() {}
protected:
std::vector<ActionsDAGPtr> filter_dags;

View File

@ -39,9 +39,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi
if (expected_checksum != calculated_checksum)
{
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
"Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.",
getHexUIntLowercase(expected_checksum.high64), getHexUIntLowercase(expected_checksum.low64),
getHexUIntLowercase(calculated_checksum.high64), getHexUIntLowercase(calculated_checksum.low64));
"Checksum of extra info doesn't match: corrupted data. Reference: {}. Actual: {}.",
getHexUIntLowercase(expected_checksum), getHexUIntLowercase(calculated_checksum));
}
/// Read the parts of the header.

View File

@ -80,6 +80,11 @@ DataPartStorageIteratorPtr DataPartStorageOnDiskFull::iterate() const
volume->getDisk()->iterateDirectory(fs::path(root_path) / part_dir));
}
Poco::Timestamp DataPartStorageOnDiskFull::getFileLastModified(const String & file_name) const
{
return volume->getDisk()->getLastModified(fs::path(root_path) / part_dir / file_name);
}
size_t DataPartStorageOnDiskFull::getFileSize(const String & file_name) const
{
return volume->getDisk()->getFileSize(fs::path(root_path) / part_dir / file_name);

View File

@ -20,6 +20,7 @@ public:
bool isDirectory(const std::string & name) const override;
DataPartStorageIteratorPtr iterate() const override;
Poco::Timestamp getFileLastModified(const String & file_name) const override;
size_t getFileSize(const std::string & file_name) const override;
UInt32 getRefCount(const std::string & file_name) const override;
std::string getRemotePath(const std::string & file_name) const override;

View File

@ -122,6 +122,7 @@ public:
virtual DataPartStorageIteratorPtr iterate() const = 0;
/// Get metadata for a file inside path dir.
virtual Poco::Timestamp getFileLastModified(const std::string & file_name) const = 0;
virtual size_t getFileSize(const std::string & file_name) const = 0;
virtual UInt32 getRefCount(const std::string & file_name) const = 0;

View File

@ -116,6 +116,8 @@ public:
/// Otherwise return information about column size on disk.
ColumnSize getColumnSize(const String & column_name) const;
virtual std::optional<time_t> getColumnModificationTime(const String & column_name) const = 0;
/// NOTE: Returns zeros if secondary indexes are not found in checksums.
/// Otherwise return information about secondary index size on disk.
IndexSize getSecondaryIndexSize(const String & secondary_index_name) const;

View File

@ -307,19 +307,7 @@ static void updateHash(SipHash & hash, const std::string & data)
/// Hash is the same as MinimalisticDataPartChecksums::hash_of_all_files
String MergeTreeDataPartChecksums::getTotalChecksumHex() const
{
SipHash hash_of_all_files;
for (const auto & [name, checksum] : files)
{
updateHash(hash_of_all_files, name);
hash_of_all_files.update(checksum.file_hash);
}
UInt64 lo;
UInt64 hi;
hash_of_all_files.get128(lo, hi);
return getHexUIntUppercase(hi) + getHexUIntUppercase(lo);
return getHexUIntUppercase(getTotalChecksumUInt128());
}
MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTotalChecksumUInt128() const

View File

@ -144,6 +144,11 @@ bool MergeTreeDataPartCompact::hasColumnFiles(const NameAndTypePair & column) co
return (bin_checksum != checksums.files.end() && mrk_checksum != checksums.files.end());
}
std::optional<time_t> MergeTreeDataPartCompact::getColumnModificationTime(const String & /* column_name */) const
{
return getDataPartStorage().getFileLastModified(DATA_FILE_NAME_WITH_EXTENSION).epochTime();
}
void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) const
{
checkConsistencyBase();

View File

@ -55,6 +55,8 @@ public:
bool hasColumnFiles(const NameAndTypePair & column) const override;
std::optional<time_t> getColumnModificationTime(const String & column_name) const override;
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; }
~MergeTreeDataPartCompact() override;

View File

@ -43,6 +43,7 @@ public:
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override;
DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override;
std::optional<time_t> getColumnModificationTime(const String & /* column_name */) const override { return {}; }
MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;

View File

@ -260,6 +260,18 @@ bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const
return res;
}
std::optional<time_t> MergeTreeDataPartWide::getColumnModificationTime(const String & column_name) const
{
try
{
return getDataPartStorage().getFileLastModified(column_name + DATA_FILE_EXTENSION).epochTime();
}
catch (const fs::filesystem_error &)
{
return {};
}
}
String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const
{
String filename;

View File

@ -54,6 +54,8 @@ public:
bool hasColumnFiles(const NameAndTypePair & column) const override;
std::optional<time_t> getColumnModificationTime(const String & column_name) const override;
protected:
static void loadIndexGranularityImpl(
MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_,

View File

@ -110,6 +110,9 @@ static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & colu
if (node.isConstant())
return;
if (node.isSubqueryOrSet())
return;
if (!node.isFunction())
{
auto column_name = node.getColumnName();

View File

@ -250,8 +250,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
ErrorCodes::CORRUPTED_DATA,
"Checksums doesn't match in part {} for {}. Expected: {}. Found {}.",
part->name, file_path,
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));
getHexUIntUppercase(disk_checksum),
getHexUIntUppercase(cache_checksums[i]));
disk_checksums.push_back(disk_checksum);
continue;
@ -287,8 +287,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
ErrorCodes::CORRUPTED_DATA,
"Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.",
part->name, proj_name,
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));
getHexUIntUppercase(disk_checksum),
getHexUIntUppercase(cache_checksums[i]));
disk_checksums.push_back(disk_checksum);
}
return results;

View File

@ -181,6 +181,21 @@ bool RPNBuilderTreeNode::isConstant() const
}
}
bool RPNBuilderTreeNode::isSubqueryOrSet() const
{
if (ast_node)
{
return
typeid_cast<const ASTSubquery *>(ast_node) ||
typeid_cast<const ASTTableIdentifier *>(ast_node);
}
else
{
const auto * node_without_alias = getNodeWithoutAlias(dag_node);
return node_without_alias->result_type->getTypeId() == TypeIndex::Set;
}
}
ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const
{
if (!isConstant())

View File

@ -98,6 +98,8 @@ public:
/// Is node constant
bool isConstant() const;
bool isSubqueryOrSet() const;
/** Get constant as constant column.
* Node must be constant before calling these method, otherwise logical exception is thrown.
*/

View File

@ -2900,8 +2900,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
desired_checksums = MinimalisticDataPartChecksums::deserializeFrom(desired_checksums_str);
}
const auto [lo, hi] = desired_checksums.hash_of_all_files;
log_entry.part_checksum = getHexUIntUppercase(hi) + getHexUIntUppercase(lo);
log_entry.part_checksum = getHexUIntUppercase(desired_checksums.hash_of_all_files);
}
else
{

View File

@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage(
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_all_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
columns[res_index++]->insert(getHexUIntLowercase(checksum));
}
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_uncompressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
columns[res_index++]->insert(getHexUIntLowercase(checksum));
}
if (columns_mask[src_index++])
{
auto checksum = helper.uncompressed_hash_of_compressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
columns[res_index++]->insert(getHexUIntLowercase(checksum));
}
}

View File

@ -8,6 +8,7 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeUUID.h>
#include <Storages/VirtualColumnUtils.h>
@ -62,6 +63,8 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
{"column_data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_marks_bytes", std::make_shared<DataTypeUInt64>()},
{"column_modification_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
{"serialization_kind", std::make_shared<DataTypeString>()},
{"subcolumns.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"subcolumns.types", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
@ -235,6 +238,13 @@ void StorageSystemPartsColumns::processNextStorage(
columns[res_index++]->insert(column_size.data_uncompressed);
if (columns_mask[src_index++])
columns[res_index++]->insert(column_size.marks);
if (columns_mask[src_index++])
{
if (auto column_modification_time = part->getColumnModificationTime(column.name))
columns[res_index++]->insert(UInt64(column_modification_time.value()));
else
columns[res_index++]->insertDefault();
}
auto serialization = part->getSerialization(column.name);
if (columns_mask[src_index++])

View File

@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage(
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_all_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
columns[res_index++]->insert(getHexUIntLowercase(checksum));
}
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_uncompressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
columns[res_index++]->insert(getHexUIntLowercase(checksum));
}
if (columns_mask[src_index++])
{
auto checksum = helper.uncompressed_hash_of_compressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
columns[res_index++]->insert(getHexUIntLowercase(checksum));
}
}

View File

@ -7,6 +7,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeNullable.h>
#include <Storages/VirtualColumnUtils.h>
#include <Databases/IDatabase.h>
#include <Parsers/queryToString.h>
@ -66,7 +67,8 @@ StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const S
{"column_bytes_on_disk", std::make_shared<DataTypeUInt64>()},
{"column_data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_marks_bytes", std::make_shared<DataTypeUInt64>()}
{"column_marks_bytes", std::make_shared<DataTypeUInt64>()},
{"column_modification_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
}
)
{
@ -247,6 +249,13 @@ void StorageSystemProjectionPartsColumns::processNextStorage(
columns[res_index++]->insert(column_size.data_uncompressed);
if (columns_mask[src_index++])
columns[res_index++]->insert(column_size.marks);
if (columns_mask[src_index++])
{
if (auto column_modification_time = part->getColumnModificationTime(column.name))
columns[res_index++]->insert(UInt64(column_modification_time.value()));
else
columns[res_index++]->insertDefault();
}
if (has_state_column)
columns[res_index++]->insert(part->stateString());

View File

@ -184,7 +184,7 @@ public:
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override;
void onAddFilterFinish() override;
void applyFilters() override;
private:
void fillData(MutableColumns & res_columns);
@ -421,7 +421,7 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont
}
void ReadFromSystemZooKeeper::onAddFilterFinish()
void ReadFromSystemZooKeeper::applyFilters()
{
paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
}

View File

@ -45,7 +45,7 @@ def _can_export_binaries(build_config: BuildConfig) -> bool:
return False
if build_config["sanitizer"] != "":
return True
if build_config["build_type"] != "":
if build_config["debug_build"]:
return True
return False
@ -66,8 +66,8 @@ def get_packager_cmd(
f"--package-type={package_type} --compiler={comp}"
)
if build_config["build_type"]:
cmd += f" --build-type={build_config['build_type']}"
if build_config["debug_build"]:
cmd += " --debug-build"
if build_config["sanitizer"]:
cmd += f" --sanitizer={build_config['sanitizer']}"
if build_config["tidy"] == "enable":

View File

@ -70,7 +70,7 @@ def get_failed_report(
message = f"{job_name} failed"
build_result = BuildResult(
compiler="unknown",
build_type="unknown",
debug_build=False,
sanitizer="unknown",
status=message,
elapsed_seconds=0,
@ -85,7 +85,7 @@ def process_report(
build_config = build_report["build_config"]
build_result = BuildResult(
compiler=build_config["compiler"],
build_type=build_config["build_type"],
debug_build=build_config["debug_build"],
sanitizer=build_config["sanitizer"],
status="success" if build_report["status"] else "failure",
elapsed_seconds=build_report["elapsed_seconds"],

View File

@ -10,7 +10,7 @@ CI_CONFIG = {
"build_config": {
"package_release": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "deb",
"static_binary_name": "amd64",
@ -21,7 +21,7 @@ CI_CONFIG = {
},
"coverity": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "coverity",
"tidy": "disable",
@ -31,7 +31,7 @@ CI_CONFIG = {
},
"package_aarch64": {
"compiler": "clang-16-aarch64",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "deb",
"static_binary_name": "aarch64",
@ -42,7 +42,7 @@ CI_CONFIG = {
},
"package_asan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "address",
"package_type": "deb",
"tidy": "disable",
@ -51,7 +51,7 @@ CI_CONFIG = {
},
"package_ubsan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "undefined",
"package_type": "deb",
"tidy": "disable",
@ -60,7 +60,7 @@ CI_CONFIG = {
},
"package_tsan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "thread",
"package_type": "deb",
"tidy": "disable",
@ -69,7 +69,7 @@ CI_CONFIG = {
},
"package_msan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "memory",
"package_type": "deb",
"tidy": "disable",
@ -78,7 +78,7 @@ CI_CONFIG = {
},
"package_debug": {
"compiler": "clang-16",
"build_type": "debug",
"debug_build": True,
"sanitizer": "",
"package_type": "deb",
"tidy": "disable",
@ -87,7 +87,7 @@ CI_CONFIG = {
},
"binary_release": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"tidy": "disable",
@ -96,7 +96,7 @@ CI_CONFIG = {
},
"binary_tidy": {
"compiler": "clang-16",
"build_type": "debug",
"debug_build": True,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "debug-amd64",
@ -106,7 +106,7 @@ CI_CONFIG = {
},
"binary_darwin": {
"compiler": "clang-16-darwin",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "macos",
@ -116,7 +116,7 @@ CI_CONFIG = {
},
"binary_aarch64": {
"compiler": "clang-16-aarch64",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"tidy": "disable",
@ -125,7 +125,7 @@ CI_CONFIG = {
},
"binary_aarch64_v80compat": {
"compiler": "clang-16-aarch64-v80compat",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "aarch64v80compat",
@ -135,7 +135,7 @@ CI_CONFIG = {
},
"binary_freebsd": {
"compiler": "clang-16-freebsd",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "freebsd",
@ -145,7 +145,7 @@ CI_CONFIG = {
},
"binary_darwin_aarch64": {
"compiler": "clang-16-darwin-aarch64",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "macos-aarch64",
@ -155,7 +155,7 @@ CI_CONFIG = {
},
"binary_ppc64le": {
"compiler": "clang-16-ppc64le",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "powerpc64le",
@ -165,7 +165,7 @@ CI_CONFIG = {
},
"binary_amd64_compat": {
"compiler": "clang-16-amd64-compat",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "amd64compat",

View File

@ -239,7 +239,7 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes
@dataclass
class BuildResult:
compiler: str
build_type: str
debug_build: bool
sanitizer: str
status: str
elapsed_seconds: int
@ -484,8 +484,8 @@ def create_build_html_report(
):
row = "<tr>"
row += f"<td>{build_result.compiler}</td>"
if build_result.build_type:
row += f"<td>{build_result.build_type}</td>"
if build_result.debug_build:
row += "<td>debug</td>"
else:
row += "<td>relwithdebuginfo</td>"
if build_result.sanitizer:

View File

@ -565,6 +565,7 @@ CREATE TABLE system.parts_columns
`column_data_compressed_bytes` UInt64,
`column_data_uncompressed_bytes` UInt64,
`column_marks_bytes` UInt64,
`column_modification_time` Nullable(DateTime),
`serialization_kind` String,
`subcolumns.names` Array(String),
`subcolumns.types` Array(String),
@ -750,6 +751,7 @@ CREATE TABLE system.projection_parts_columns
`column_data_compressed_bytes` UInt64,
`column_data_uncompressed_bytes` UInt64,
`column_marks_bytes` UInt64,
`column_modification_time` Nullable(DateTime),
`bytes` UInt64,
`marks_size` UInt64,
`part_name` String

View File

@ -0,0 +1,6 @@
Wide key 1 1
Wide key 1 1
Wide value 1 0
Compact key 1 1
Compact key 1 1
Compact value 1 1

View File

@ -0,0 +1,30 @@
-- Tags: no-s3-storage
-- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details)
set mutations_sync=1;
{# modification time of the part folder and column files not always equal, this is how much seconds of difference is allowed #}
{% set mtime_diff_in_seconds = 5 %}
{% for id, settings, file_per_column in [
("wide", "min_bytes_for_wide_part=0, min_rows_for_wide_part=0", true),
("compact", "min_bytes_for_wide_part=1000, min_rows_for_wide_part=100", false)
]
%}
drop table if exists data_{{ id }};
create table data_{{ id }} (key Int) engine=MergeTree() order by tuple() settings {{ settings }};
insert into data_{{ id }} values (1);
select sleep(3) format Null;
select part_type, column, now()-modification_time < 10, modification_time - column_modification_time < {{ mtime_diff_in_seconds }} from system.parts_columns where database = currentDatabase() and table = 'data_{{ id }}';
alter table data_{{ id }} add column value Int default 0;
alter table data_{{ id }} materialize column value;
select part_type, column, now()-modification_time < 10,
{% if file_per_column %}
modification_time - column_modification_time >= 3
{% else %}
modification_time - column_modification_time < {{ mtime_diff_in_seconds }}
{% endif %}
from system.parts_columns where active and database = currentDatabase() and table = 'data_{{ id }}' order by column;
{% endfor %}

View File

@ -0,0 +1,8 @@
PREWHERE a IN
PREWHERE a IN
PREWHERE a IN
PREWHERE a IN
PREWHERE b NOT IN
PREWHERE b NOT IN
PREWHERE b NOT IN
PREWHERE b NOT IN

View File

@ -0,0 +1,56 @@
DROP TABLE IF EXISTS t_02809;
CREATE TABLE t_02809(a Int64, b Int64, s String)
ENGINE=MergeTree order by tuple()
AS SELECT number, number%10, toString(arrayMap(i-> cityHash64(i*number), range(50))) FROM numbers(10000);
CREATE TABLE t_02809_set(c Int64)
ENGINE=Set()
AS SELECT * FROM numbers(10);
CREATE TABLE t_02809_aux(c Int64)
ENGINE=Memory()
AS SELECT * FROM numbers(10);
SET optimize_move_to_prewhere=1;
-- Queries with 'IN'
SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one)
) WHERE explain LIKE '%WHERE%';
SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN (1,2,3)
) WHERE explain LIKE '%WHERE%';
SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN t_02809_set
) WHERE explain LIKE '%WHERE%';
SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN t_02809_aux
) WHERE explain LIKE '%WHERE%';
-- Queries with 'NOT IN'
SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN (SELECT * FROM system.one)
) WHERE explain LIKE '%WHERE%';
SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN (1,2,3)
) WHERE explain LIKE '%WHERE%';
SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN t_02809_set
) WHERE explain LIKE '%WHERE%';
SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN t_02809_aux
) WHERE explain LIKE '%WHERE%';
DROP TABLE t_02809;
DROP TABLE t_02809_set;
DROP TABLE t_02809_aux;

View File

@ -45,7 +45,7 @@ int main(int, char **)
{
auto flipped = flipBit(str, pos);
auto checksum = CityHash_v1_0_2::CityHash128(flipped.data(), flipped.size());
std::cout << getHexUIntLowercase(checksum.high64) << getHexUIntLowercase(checksum.low64) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
std::cout << getHexUIntLowercase(checksum) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
}
return 0;