Merge pull request #71500 from bigo-sg/fix_uninitialized_orc_data

Fix date32 out of range caused by uninitialized orc `DataBuffer`
This commit is contained in:
Nikita Taranov 2024-11-12 11:26:58 +00:00 committed by GitHub
commit 021301430b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 42 additions and 8 deletions

View File

@ -1533,6 +1533,8 @@ static ColumnWithTypeAndName readColumnWithDateData(
column_data.reserve(orc_int_column->numElements);
for (size_t i = 0; i < orc_int_column->numElements; ++i)
{
if (!orc_int_column->hasNulls || orc_int_column->notNull[i])
{
Int32 days_num = static_cast<Int32>(orc_int_column->data[i]);
if (check_date_range && (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM || days_num < -DAYNUM_OFFSET_EPOCH))
@ -1544,6 +1546,12 @@ static ColumnWithTypeAndName readColumnWithDateData(
column_data.push_back(days_num);
}
else
{
/// ORC library doesn't guarantee that orc_int_column->data[i] is initialized to zero when orc_int_column->notNull[i] is false since https://github.com/ClickHouse/ClickHouse/pull/69473
column_data.push_back(0);
}
}
return {std::move(internal_column), internal_type, column_name};
}

View File

@ -0,0 +1,12 @@
number Nullable(Int64)
date_field Nullable(Date32)
\N
1970-01-02
\N
1970-01-04
\N
1970-01-06
\N
1970-01-08
\N
1970-01-10

View File

@ -0,0 +1,14 @@
-- Tags: no-fasttest, no-parallel
SET session_timezone = 'UTC';
SET engine_file_truncate_on_insert = 1;
insert into function file('03259.orc', 'ORC')
select
number,
if (number % 2 = 0, null, toDate32(number)) as date_field
from numbers(10);
desc file('03259.orc', 'ORC');
select date_field from file('03259.orc', 'ORC') order by number;