Merge pull request #71500 from bigo-sg/fix_uninitialized_orc_data

Fix date32 out of range caused by uninitialized orc `DataBuffer`
This commit is contained in:
Nikita Taranov 2024-11-12 11:26:58 +00:00 committed by GitHub
commit 021301430b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 42 additions and 8 deletions

View File

@ -1534,15 +1534,23 @@ static ColumnWithTypeAndName readColumnWithDateData(
for (size_t i = 0; i < orc_int_column->numElements; ++i)
{
Int32 days_num = static_cast<Int32>(orc_int_column->data[i]);
if (check_date_range && (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM || days_num < -DAYNUM_OFFSET_EPOCH))
throw Exception(
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
"Input value {} of a column \"{}\" exceeds the range of type Date32",
days_num,
column_name);
if (!orc_int_column->hasNulls || orc_int_column->notNull[i])
{
Int32 days_num = static_cast<Int32>(orc_int_column->data[i]);
if (check_date_range && (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM || days_num < -DAYNUM_OFFSET_EPOCH))
throw Exception(
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE,
"Input value {} of a column \"{}\" exceeds the range of type Date32",
days_num,
column_name);
column_data.push_back(days_num);
column_data.push_back(days_num);
}
else
{
/// ORC library doesn't guarantee that orc_int_column->data[i] is initialized to zero when orc_int_column->notNull[i] is false since https://github.com/ClickHouse/ClickHouse/pull/69473
column_data.push_back(0);
}
}
return {std::move(internal_column), internal_type, column_name};

View File

@ -0,0 +1,12 @@
number Nullable(Int64)
date_field Nullable(Date32)
\N
1970-01-02
\N
1970-01-04
\N
1970-01-06
\N
1970-01-08
\N
1970-01-10

View File

@ -0,0 +1,14 @@
-- Tags: no-fasttest, no-parallel
SET session_timezone = 'UTC';
SET engine_file_truncate_on_insert = 1;
insert into function file('03259.orc', 'ORC')
select
number,
if (number % 2 = 0, null, toDate32(number)) as date_field
from numbers(10);
desc file('03259.orc', 'ORC');
select date_field from file('03259.orc', 'ORC') order by number;