mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge pull request #66025 from KevinyhZou/support_orc_reader_timezone
Support ORC file read by writer time zone
This commit is contained in:
commit
aaf5412c71
@ -1062,6 +1062,7 @@ class IColumn;
|
||||
M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \
|
||||
M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
|
||||
M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \
|
||||
M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \
|
||||
M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \
|
||||
M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \
|
||||
M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \
|
||||
|
@ -61,6 +61,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
|
||||
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
|
||||
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
|
||||
{"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
|
||||
{"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
|
||||
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
|
||||
{"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
|
||||
|
@ -243,6 +243,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride;
|
||||
format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
|
||||
format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down;
|
||||
format_settings.orc.read_use_writer_time_zone = settings.input_format_orc_read_use_writer_time_zone;
|
||||
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
|
||||
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
||||
format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;
|
||||
|
@ -403,6 +403,7 @@ struct FormatSettings
|
||||
bool use_fast_decoder = true;
|
||||
bool filter_push_down = true;
|
||||
UInt64 output_row_index_stride = 10'000;
|
||||
bool read_use_writer_time_zone = false;
|
||||
} orc{};
|
||||
|
||||
/// For capnProto format we should determine how to
|
||||
|
@ -900,6 +900,11 @@ bool NativeORCBlockInputFormat::prepareStripeReader()
|
||||
|
||||
orc::RowReaderOptions row_reader_options;
|
||||
row_reader_options.includeTypes(include_indices);
|
||||
if (format_settings.orc.read_use_writer_time_zone)
|
||||
{
|
||||
String writer_time_zone = current_stripe_info->getWriterTimezone();
|
||||
row_reader_options.setTimezoneName(writer_time_zone);
|
||||
}
|
||||
row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength());
|
||||
if (format_settings.orc.filter_push_down && sarg)
|
||||
{
|
||||
|
@ -0,0 +1 @@
|
||||
1 2024-06-30 20:00:00.000
|
12
tests/queries/0_stateless/03198_orc_read_time_zone.sh
Executable file
12
tests/queries/0_stateless/03198_orc_read_time_zone.sh
Executable file
@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists test"
|
||||
$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id"
|
||||
$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'"
|
||||
$CLICKHOUSE_CLIENT -q "drop table test"
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user