mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
fixed bug : unexpected result when using in clause for filtering partitions
This commit is contained in:
parent
db69ab9d17
commit
5885cfd869
@ -2,5 +2,8 @@
|
||||
hive -e "create database test"
|
||||
|
||||
hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; "
|
||||
hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'"
|
||||
hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text "
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;"
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;"
|
||||
|
@ -1,7 +1,7 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
hdfs1:
|
||||
image: lgboustc/hive_test:v1.0
|
||||
image: lgboustc/hive_test:v2.0
|
||||
hostname: hivetest
|
||||
restart: always
|
||||
entrypoint: bash /start.sh
|
||||
|
@ -36,7 +36,7 @@ public:
|
||||
ContextPtr /* query_context */,
|
||||
const StorageMetadataPtr & /* metadata_snapshot */) const override
|
||||
{
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,7 +30,8 @@ def test_create_parquet_table(started_cluster):
|
||||
node.query("set input_format_parquet_allow_missing_columns = true")
|
||||
result = node.query("""
|
||||
DROP TABLE IF EXISTS default.demo_parquet;
|
||||
CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day)
|
||||
CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day);
|
||||
CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour);
|
||||
""")
|
||||
logging.info("create result {}".format(result))
|
||||
time.sleep(120)
|
||||
@ -70,6 +71,15 @@ def test_parquet_groupby(started_cluster):
|
||||
2021-11-16 2
|
||||
"""
|
||||
assert result == expected_result
|
||||
|
||||
def test_parquet_in_filter(started_cluster):
|
||||
logging.info('Start testing groupby ...')
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
result = node.query("""
|
||||
SELECT day, count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00')
|
||||
""")
|
||||
expected_result = """2021-11-05 2"""
|
||||
assert result == expected_result
|
||||
def test_orc_groupby(started_cluster):
|
||||
logging.info('Start testing groupby ...')
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
|
Loading…
Reference in New Issue
Block a user