mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Add blockSerializedSize() function (size on disk without compression)
Sometimes it is useful to know how much does this data will take on disk, with blockSerializedSize() you can know this (although without compression). This can be a major knowledge for various aggregation functions that tracking some state (i.e. uniqCombined).
This commit is contained in:
parent
f979ce31ca
commit
e89ceae61a
66
dbms/src/Functions/blockSerializedSize.cpp
Normal file
66
dbms/src/Functions/blockSerializedSize.cpp
Normal file
@ -0,0 +1,66 @@
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/NullWriteBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Returns size on disk for *block* (without taking into account compression).
|
||||
class FunctionBlockSerializedSize : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "blockSerializedSize";
|
||||
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionBlockSerializedSize>();
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
||||
{
|
||||
UInt64 size = 0;
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
size += blockSerializedSizeOne(block.getByPosition(arguments[i]));
|
||||
|
||||
block.getByPosition(result).column = DataTypeUInt64().createColumnConst(
|
||||
input_rows_count, size)->convertToFullColumnIfConst();
|
||||
}
|
||||
|
||||
UInt64 blockSerializedSizeOne(const ColumnWithTypeAndName & elem) const
|
||||
{
|
||||
ColumnPtr full_column = elem.column->convertToFullColumnIfConst();
|
||||
|
||||
IDataType::SerializeBinaryBulkSettings settings;
|
||||
NullWriteBuffer out;
|
||||
|
||||
settings.getter = [&out](IDataType::SubstreamPath) -> WriteBuffer * { return &out; };
|
||||
|
||||
IDataType::SerializeBinaryBulkStatePtr state;
|
||||
elem.type->serializeBinaryBulkWithMultipleStreams(*full_column,
|
||||
0 /** offset */, 0 /** limit */,
|
||||
settings, state);
|
||||
|
||||
return out.count();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void registerFunctionBlockSerializedSize(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionBlockSerializedSize>();
|
||||
}
|
||||
|
||||
}
|
@ -14,6 +14,7 @@ void registerFunctionFQDN(FunctionFactory &);
|
||||
void registerFunctionVisibleWidth(FunctionFactory &);
|
||||
void registerFunctionToTypeName(FunctionFactory &);
|
||||
void registerFunctionGetSizeOfEnumType(FunctionFactory &);
|
||||
void registerFunctionBlockSerializedSize(FunctionFactory &);
|
||||
void registerFunctionToColumnTypeName(FunctionFactory &);
|
||||
void registerFunctionDumpColumnStructure(FunctionFactory &);
|
||||
void registerFunctionDefaultValueOfArgumentType(FunctionFactory &);
|
||||
@ -72,6 +73,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionVisibleWidth(factory);
|
||||
registerFunctionToTypeName(factory);
|
||||
registerFunctionGetSizeOfEnumType(factory);
|
||||
registerFunctionBlockSerializedSize(factory);
|
||||
registerFunctionToColumnTypeName(factory);
|
||||
registerFunctionDumpColumnStructure(factory);
|
||||
registerFunctionDefaultValueOfArgumentType(factory);
|
||||
|
@ -0,0 +1,24 @@
|
||||
UInt8 1
|
||||
Nullable(UInt8) 2
|
||||
UInt32 4
|
||||
UInt64 8
|
||||
Nullable(UInt64) 9
|
||||
|
||||
String 4
|
||||
FixedString(32) 32
|
||||
|
||||
Enum8 1
|
||||
|
||||
Array 12
|
||||
|
||||
uniqCombinedState(100) 402
|
||||
uniqCombinedState(10000) 81993
|
||||
uniqCombinedState(100000) 81993
|
||||
uniqCombinedState(1000000) 81993
|
||||
uniqCombinedState(10000000) 81993
|
||||
uniqCombined64State(10000000) 98505
|
||||
|
||||
String,UInt8 5
|
||||
|
||||
Block(UInt32) 16
|
||||
Block(UInt32) 16
|
29
dbms/tests/queries/0_stateless/01073_blockSerializedSize.sql
Normal file
29
dbms/tests/queries/0_stateless/01073_blockSerializedSize.sql
Normal file
@ -0,0 +1,29 @@
|
||||
select 'UInt8', blockSerializedSize(0);
|
||||
select 'Nullable(UInt8)', blockSerializedSize(toNullable(0));
|
||||
select 'UInt32', blockSerializedSize(0xdeadbeaf);
|
||||
select 'UInt64', blockSerializedSize(0xdeadbeafdead);
|
||||
select 'Nullable(UInt64)', blockSerializedSize(toNullable(0xdeadbeafdead));
|
||||
|
||||
select '';
|
||||
select 'String', blockSerializedSize('foo');
|
||||
select 'FixedString(32)', blockSerializedSize(cast('foo', 'FixedString(32)'));
|
||||
|
||||
select '';
|
||||
select 'Enum8', blockSerializedSize(cast('a' as Enum8('a' = 1, 'b' = 2)));
|
||||
|
||||
select '';
|
||||
select 'Array', blockSerializedSize(['foo']);
|
||||
|
||||
select '';
|
||||
select 'uniqCombinedState(100)', blockSerializedSize(uniqCombinedState(number)) from (select number from system.numbers limit 100);
|
||||
select 'uniqCombinedState(10000)', blockSerializedSize(uniqCombinedState(number)) from (select number from system.numbers limit 10000);
|
||||
select 'uniqCombinedState(100000)', blockSerializedSize(uniqCombinedState(number)) from (select number from system.numbers limit 100000);
|
||||
select 'uniqCombinedState(1000000)', blockSerializedSize(uniqCombinedState(number)) from (select number from system.numbers limit 1000000);
|
||||
select 'uniqCombinedState(10000000)', blockSerializedSize(uniqCombinedState(number)) from (select number from system.numbers limit 10000000);
|
||||
select 'uniqCombined64State(10000000)', blockSerializedSize(uniqCombined64State(number)) from (select number from system.numbers limit 10000000);
|
||||
|
||||
select '';
|
||||
select 'String,UInt8', blockSerializedSize('foo', 1);
|
||||
|
||||
select '';
|
||||
select 'Block(UInt32)', blockSerializedSize(number) from numbers(2);
|
@ -603,6 +603,34 @@ SELECT getSizeOfEnumType( CAST('a' AS Enum8('a' = 1, 'b' = 2) ) ) AS x
|
||||
└───┘
|
||||
```
|
||||
|
||||
## blockSerializedSize
|
||||
|
||||
Returns size on disk (without taking into account compression).
|
||||
|
||||
|
||||
```sql
|
||||
blockSerializedSize(value[, value[, ...]])
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- `value` — Any value.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The number of bytes that will be written to disk for block of values (without compression).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT blockSerializedSize(maxState(1)) as x
|
||||
```
|
||||
```text
|
||||
┌─x─┐
|
||||
│ 2 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## toColumnTypeName
|
||||
|
||||
Returns the name of the class that represents the data type of the column in RAM.
|
||||
|
Loading…
Reference in New Issue
Block a user