mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Fix writing ORC statistics for unsigned types
This commit is contained in:
parent
764cdb971c
commit
fd93097130
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit e24f2c2a3ca0769c96704ab20ad6f512a83ea2ad
|
||||
Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f
|
@ -269,7 +269,12 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy
|
||||
case orc::SHORT:
|
||||
case orc::INT:
|
||||
case orc::LONG: {
|
||||
/// May throw exception
|
||||
/// May throw exception.
|
||||
///
|
||||
/// In particular, it'll throw if we request the column as unsigned, like this:
|
||||
/// SELECT * FROM file('t.orc', ORC, 'x UInt8') WHERE x > 10
|
||||
/// We have to reject this, otherwise it would miss values > 127 (because
|
||||
/// they're treated as negative by ORC).
|
||||
auto val = field.get<Int64>();
|
||||
return orc::Literal(val);
|
||||
}
|
||||
|
@ -315,18 +315,20 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
if (null_bytemap)
|
||||
orc_column.hasNulls = true;
|
||||
|
||||
/// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to
|
||||
/// make the ORC library calculate min and max correctly.
|
||||
switch (type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Enum8: [[fallthrough]];
|
||||
case TypeIndex::Int8:
|
||||
{
|
||||
/// Note: Explicit cast to avoid clang-tidy error: 'signed char' to 'long' conversion; consider casting to 'unsigned char' first.
|
||||
writeNumbers<Int8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const Int8 & value){ return static_cast<int64_t>(value); });
|
||||
writeNumbers<Int8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const Int8 & value){ return Int64(Int8(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::UInt8:
|
||||
{
|
||||
writeNumbers<UInt8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt8 & value){ return value; });
|
||||
writeNumbers<UInt8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt8 & value){ return Int64(Int8(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Enum16: [[fallthrough]];
|
||||
@ -338,7 +340,7 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
case TypeIndex::Date: [[fallthrough]];
|
||||
case TypeIndex::UInt16:
|
||||
{
|
||||
writeNumbers<UInt16, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt16 & value){ return value; });
|
||||
writeNumbers<UInt16, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt16 & value){ return Int64(Int16(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Date32: [[fallthrough]];
|
||||
@ -349,12 +351,12 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
}
|
||||
case TypeIndex::UInt32:
|
||||
{
|
||||
writeNumbers<UInt32, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt32 & value){ return value; });
|
||||
writeNumbers<UInt32, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt32 & value){ return Int64(Int32(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::IPv4:
|
||||
{
|
||||
writeNumbers<IPv4, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const IPv4 & value){ return value.toUnderType(); });
|
||||
writeNumbers<IPv4, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const IPv4 & value){ return Int64(Int32(value.toUnderType())); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Int64:
|
||||
|
@ -1,4 +1,4 @@
|
||||
-- Tags: no-fasttest, no-parallel, no-cpu-aarch64
|
||||
-- Tags: no-fasttest, no-parallel
|
||||
|
||||
set output_format_orc_string_as_string = 1;
|
||||
set output_format_orc_row_index_stride = 100;
|
||||
|
41
tests/queries/0_stateless/03164_orc_signedness.reference
Normal file
41
tests/queries/0_stateless/03164_orc_signedness.reference
Normal file
@ -0,0 +1,41 @@
|
||||
-- { echoOn }
|
||||
select x from file('i8.orc') where indexHint(x = -128);
|
||||
-128
|
||||
select x from file('i8.orc') where indexHint(x = 128);
|
||||
select x from file('u8.orc') where indexHint(x = -128);
|
||||
-128
|
||||
select x from file('u8.orc') where indexHint(x = 128);
|
||||
select x from file('i16.orc') where indexHint(x = -32768);
|
||||
-32768
|
||||
select x from file('i16.orc') where indexHint(x = 32768);
|
||||
select x from file('u16.orc') where indexHint(x = -32768);
|
||||
-32768
|
||||
select x from file('u16.orc') where indexHint(x = 32768);
|
||||
select x from file('i32.orc') where indexHint(x = -2147483648);
|
||||
-2147483648
|
||||
select x from file('i32.orc') where indexHint(x = 2147483648);
|
||||
select x from file('u32.orc') where indexHint(x = -2147483648);
|
||||
-2147483648
|
||||
select x from file('u32.orc') where indexHint(x = 2147483648);
|
||||
select x from file('i64.orc') where indexHint(x = -9223372036854775808);
|
||||
-9223372036854775808
|
||||
select x from file('i64.orc') where indexHint(x = 9223372036854775808);
|
||||
-9223372036854775808
|
||||
select x from file('u64.orc') where indexHint(x = -9223372036854775808);
|
||||
-9223372036854775808
|
||||
select x from file('u64.orc') where indexHint(x = 9223372036854775808);
|
||||
-9223372036854775808
|
||||
select x from file('u8.orc', ORC, 'x UInt8') where indexHint(x > 10);
|
||||
128
|
||||
select x from file('u8.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
||||
18446744073709551488
|
||||
select x from file('u16.orc', ORC, 'x UInt16') where indexHint(x > 10);
|
||||
32768
|
||||
select x from file('u16.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
||||
18446744073709518848
|
||||
select x from file('u32.orc', ORC, 'x UInt32') where indexHint(x > 10);
|
||||
2147483648
|
||||
select x from file('u32.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
||||
18446744071562067968
|
||||
select x from file('u64.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
||||
9223372036854775808
|
40
tests/queries/0_stateless/03164_orc_signedness.sql
Normal file
40
tests/queries/0_stateless/03164_orc_signedness.sql
Normal file
@ -0,0 +1,40 @@
|
||||
set input_format_orc_filter_push_down = 1;
|
||||
set engine_file_truncate_on_insert = 1;
|
||||
|
||||
insert into function file('i8.orc') select materialize(-128)::Int8 as x;
|
||||
insert into function file('u8.orc') select materialize(128)::UInt8 as x;
|
||||
insert into function file('i16.orc') select materialize(-32768)::Int16 as x;
|
||||
insert into function file('u16.orc') select materialize(32768)::UInt16 as x;
|
||||
insert into function file('i32.orc') select materialize(-2147483648)::Int32 as x;
|
||||
insert into function file('u32.orc') select materialize(2147483648)::UInt32 as x;
|
||||
insert into function file('i64.orc') select materialize(-9223372036854775808)::Int64 as x;
|
||||
insert into function file('u64.orc') select materialize(9223372036854775808)::UInt64 as x;
|
||||
|
||||
-- { echoOn }
|
||||
select x from file('i8.orc') where indexHint(x = -128);
|
||||
select x from file('i8.orc') where indexHint(x = 128);
|
||||
select x from file('u8.orc') where indexHint(x = -128);
|
||||
select x from file('u8.orc') where indexHint(x = 128);
|
||||
|
||||
select x from file('i16.orc') where indexHint(x = -32768);
|
||||
select x from file('i16.orc') where indexHint(x = 32768);
|
||||
select x from file('u16.orc') where indexHint(x = -32768);
|
||||
select x from file('u16.orc') where indexHint(x = 32768);
|
||||
|
||||
select x from file('i32.orc') where indexHint(x = -2147483648);
|
||||
select x from file('i32.orc') where indexHint(x = 2147483648);
|
||||
select x from file('u32.orc') where indexHint(x = -2147483648);
|
||||
select x from file('u32.orc') where indexHint(x = 2147483648);
|
||||
|
||||
select x from file('i64.orc') where indexHint(x = -9223372036854775808);
|
||||
select x from file('i64.orc') where indexHint(x = 9223372036854775808);
|
||||
select x from file('u64.orc') where indexHint(x = -9223372036854775808);
|
||||
select x from file('u64.orc') where indexHint(x = 9223372036854775808);
|
||||
|
||||
select x from file('u8.orc', ORC, 'x UInt8') where indexHint(x > 10);
|
||||
select x from file('u8.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
||||
select x from file('u16.orc', ORC, 'x UInt16') where indexHint(x > 10);
|
||||
select x from file('u16.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
||||
select x from file('u32.orc', ORC, 'x UInt32') where indexHint(x > 10);
|
||||
select x from file('u32.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
||||
select x from file('u64.orc', ORC, 'x UInt64') where indexHint(x > 10);
|
Loading…
Reference in New Issue
Block a user