fixed bug : unexpected result when using in clause for filtering partitions

This commit is contained in:
lgbo-ustc 2022-02-28 16:47:50 +08:00 committed by liangjiabiao
parent db69ab9d17
commit 5885cfd869
4 changed files with 17 additions and 4 deletions

View File

@ -2,5 +2,8 @@
hive -e "create database test"
hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; "
hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'"
hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text "
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;"
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;"

View File

@ -1,7 +1,7 @@
version: '2.3'
services:
hdfs1:
image: lgboustc/hive_test:v1.0
image: lgboustc/hive_test:v2.0
hostname: hivetest
restart: always
entrypoint: bash /start.sh

View File

@ -36,7 +36,7 @@ public:
ContextPtr /* query_context */,
const StorageMetadataPtr & /* metadata_snapshot */) const override
{
return false;
return true;
}

View File

@ -30,7 +30,8 @@ def test_create_parquet_table(started_cluster):
node.query("set input_format_parquet_allow_missing_columns = true")
result = node.query("""
DROP TABLE IF EXISTS default.demo_parquet;
CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day)
CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day);
CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour);
""")
logging.info("create result {}".format(result))
time.sleep(120)
@ -70,6 +71,15 @@ def test_parquet_groupby(started_cluster):
2021-11-16 2
"""
assert result == expected_result
def test_parquet_in_filter(started_cluster):
logging.info('Start testing groupby ...')
node = started_cluster.instances['h0_0_0']
result = node.query("""
SELECT day, count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00')
""")
expected_result = """2021-11-05 2"""
assert result == expected_result
def test_orc_groupby(started_cluster):
logging.info('Start testing groupby ...')
node = started_cluster.instances['h0_0_0']