Merge pull request #13449 from ClickHouse/mixed_granularity_parts_by_default

Enable mixed granularity parts by default
This commit is contained in:
alexey-milovidov 2020-08-09 01:36:59 +03:00 committed by GitHub
commit aefa9b297b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 23 additions and 33 deletions

View File

@ -85,7 +85,7 @@ struct Settings;
M(Int64, merge_with_ttl_timeout, 3600 * 24, "Minimal time in seconds, when merge with TTL can be repeated.", 0) \
M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
M(Bool, write_final_mark, 1, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)", 0) \
M(Bool, enable_mixed_granularity_parts, 0, "Enable parts with adaptive and non adaptive granularity", 0) \
M(Bool, enable_mixed_granularity_parts, 1, "Enable parts with adaptive and non adaptive granularity", 0) \
M(MaxThreads, max_part_loading_threads, 0, "The number of threads to load data parts at startup.", 0) \
M(MaxThreads, max_part_removal_threads, 0, "The number of threads for concurrent removal of inactive data parts. One is usually enough, but in 'Google Compute Environment SSD Persistent Disks' file removal (unlink) operation is extraordinarily slow and you probably have to increase this number (recommended is up to 16).", 0) \
M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \

View File

@ -301,11 +301,16 @@ def test_mixed_granularity_single_node(start_dynamic_cluster, node):
def test_version_update_two_nodes(start_dynamic_cluster):
node11.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 1, 333), (toDate('2018-10-02'), 2, 444)")
node12.query("SYSTEM SYNC REPLICA table_with_default_granularity")
node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=10)
assert node12.query("SELECT COUNT() FROM table_with_default_granularity") == '2\n'
node12.restart_with_latest_version()
def callback(n):
n.replace_config("/etc/clickhouse-server/merge_tree_settings.xml", "<yandex><merge_tree><enable_mixed_granularity_parts>0</enable_mixed_granularity_parts></merge_tree></yandex>")
n.replace_config("/etc/clickhouse-server/config.d/merge_tree_settings.xml", "<yandex><merge_tree><enable_mixed_granularity_parts>0</enable_mixed_granularity_parts></merge_tree></yandex>")
node12.restart_with_latest_version(callback_onstop=callback)
node12.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 3, 333), (toDate('2018-10-02'), 4, 444)")
node11.query("SYSTEM SYNC REPLICA table_with_default_granularity")
node11.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=10)
assert node11.query("SELECT COUNT() FROM table_with_default_granularity") == '4\n'
node12.query(
@ -329,14 +334,14 @@ def test_version_update_two_nodes(start_dynamic_cluster):
node11.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=5)
node12.query("INSERT INTO table_with_default_granularity_new VALUES (toDate('2018-10-01'), 3, 333), (toDate('2018-10-02'), 4, 444)")
node11.restart_with_latest_version() # just to be sure
node11.restart_with_latest_version(callback_onstop=callback) # just to be sure
node11.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=5)
node12.query("SYSTEM SYNC REPLICA table_with_default_granularity_new", timeout=5)
node11.query("SELECT COUNT() FROM table_with_default_granularity_new") == "4\n"
node12.query("SELECT COUNT() FROM table_with_default_granularity_new") == "4\n"
node11.query("SYSTEM SYNC REPLICA table_with_default_granularity")
node11.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=5)
node11.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 5, 333), (toDate('2018-10-02'), 6, 444)")
node12.query("SYSTEM SYNC REPLICA table_with_default_granularity")
node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=5)
assert node12.query("SELECT COUNT() FROM table_with_default_granularity") == '6\n'

View File

@ -32,12 +32,12 @@ def test_attach_detach(start_cluster):
node2.query("""
CREATE TABLE test (key UInt64)
ENGINE = ReplicatedMergeTree('/clickhouse/test', '2')
ORDER BY tuple()""")
ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 0""")
node2.query("INSERT INTO test VALUES (3), (4)")
node1.query("SYSTEM SYNC REPLICA test")
node2.query("SYSTEM SYNC REPLICA test")
node1.query("SYSTEM SYNC REPLICA test", timeout=10)
node2.query("SYSTEM SYNC REPLICA test", timeout=10)
assert node1.query("SELECT COUNT() FROM test") == "4\n"
assert node2.query("SELECT COUNT() FROM test") == "4\n"

View File

@ -27,7 +27,7 @@ def test_creating_table_different_setting(start_cluster):
node1.query("CREATE TABLE t1 (c1 String, c2 String) ENGINE=ReplicatedMergeTree('/clickhouse/t1', '1') ORDER BY tuple(c1) SETTINGS index_granularity_bytes = 0")
node1.query("INSERT INTO t1 VALUES('x', 'y')")
node2.query("CREATE TABLE t1 (c1 String, c2 String) ENGINE=ReplicatedMergeTree('/clickhouse/t1', '2') ORDER BY tuple(c1)")
node2.query("CREATE TABLE t1 (c1 String, c2 String) ENGINE=ReplicatedMergeTree('/clickhouse/t1', '2') ORDER BY tuple(c1) SETTINGS enable_mixed_granularity_parts = 0")
node1.query("INSERT INTO t1 VALUES('a', 'b')")
node2.query("SYSTEM SYNC REPLICA t1", timeout=5)
@ -64,7 +64,7 @@ def test_old_node_with_new_node(start_cluster):
node3.query("CREATE TABLE t2 (c1 String, c2 String) ENGINE=ReplicatedMergeTree('/clickhouse/t2', '3') ORDER BY tuple(c1)")
node3.query("INSERT INTO t2 VALUES('x', 'y')")
node2.query("CREATE TABLE t2 (c1 String, c2 String) ENGINE=ReplicatedMergeTree('/clickhouse/t2', '2') ORDER BY tuple(c1)")
node2.query("CREATE TABLE t2 (c1 String, c2 String) ENGINE=ReplicatedMergeTree('/clickhouse/t2', '2') ORDER BY tuple(c1) SETTINGS enable_mixed_granularity_parts = 0")
node3.query("INSERT INTO t2 VALUES('a', 'b')")
node2.query("SYSTEM SYNC REPLICA t2", timeout=5)

View File

@ -29,7 +29,7 @@ def insert_random_data(table, node, size):
str(get_random_array()))) +
')' for i in range(size)
]
node.query("INSERT INTO {} VALUES {}".format(table, ','.join(data)))
def create_tables(name, nodes, node_settings, shard):
@ -40,7 +40,7 @@ def create_tables(name, nodes, node_settings, shard):
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{shard}/{name}', '{repl}')
PARTITION BY toYYYYMM(date)
ORDER BY id
SETTINGS index_granularity = 64, index_granularity_bytes = {index_granularity_bytes},
SETTINGS index_granularity = 64, index_granularity_bytes = {index_granularity_bytes},
min_rows_for_wide_part = {min_rows_for_wide_part}, min_rows_for_compact_part = {min_rows_for_compact_part},
in_memory_parts_enable_wal = 1
'''.format(name=name, shard=shard, repl=i, **settings))
@ -84,7 +84,7 @@ def start_cluster():
create_tables('polymorphic_table', [node1, node2], [settings_default, settings_default], "shard1")
create_tables('compact_parts_only', [node1, node2], [settings_compact_only, settings_compact_only], "shard1")
create_tables('non_adaptive_table', [node1, node2], [settings_not_adaptive, settings_default], "shard1")
create_tables('non_adaptive_table', [node1, node2], [settings_not_adaptive, settings_not_adaptive], "shard1")
create_tables('polymorphic_table_compact', [node3, node4], [settings_compact, settings_wide], "shard2")
create_tables('polymorphic_table_wide', [node3, node4], [settings_wide, settings_compact], "shard2")
create_tables_old_format('polymorphic_table', [node5, node6], "shard3")
@ -184,7 +184,6 @@ def test_compact_parts_only(start_cluster):
assert TSV(node2.query("SELECT part_type, count() FROM system.parts " \
"WHERE table = 'compact_parts_only' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected)
# Check that follower replicas create parts of the same type, which leader has chosen at merge.
@pytest.mark.parametrize(
('table', 'part_type'),
@ -268,7 +267,7 @@ def test_polymorphic_parts_diff_versions(start_cluster_diff_versions):
@pytest.mark.skip(reason="compatability is temporary broken")
def test_polymorphic_parts_diff_versions_2(start_cluster_diff_versions):
# Replication doesn't work on old version if part is created in compact format, because
# Replication doesn't work on old version if part is created in compact format, because
# this version doesn't know anything about it. It's considered to be ok.
node_old = node7
@ -465,8 +464,8 @@ def test_in_memory_alters(start_cluster):
def test_polymorphic_parts_index(start_cluster):
node1.query('''
CREATE TABLE index_compact(a UInt32, s String)
ENGINE = MergeTree ORDER BY a
CREATE TABLE index_compact(a UInt32, s String)
ENGINE = MergeTree ORDER BY a
SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100''')
node1.query("INSERT INTO index_compact SELECT number, toString(number) FROM numbers(100)")

View File

@ -16,20 +16,6 @@ OPTIMIZE TABLE mixed_granularity_table FINAL;
SELECT COUNT() FROM mixed_granularity_table;
-- check strange cases when we try to replace parts from another tables but with different granularity settings
DROP TABLE IF EXISTS non_mixed_granularity_adaptive_table;
CREATE TABLE non_mixed_granularity_adaptive_table AS test.hits;
INSERT INTO non_mixed_granularity_adaptive_table SELECT * FROM test.hits LIMIT 10;
ALTER TABLE non_mixed_granularity_adaptive_table REPLACE PARTITION 201403 FROM test.hits; -- { serverError 36 }
DROP TABLE IF EXISTS non_mixed_granularity_adaptive_table;
DROP TABLE IF EXISTS non_mixed_granularity_non_adaptive_table;
CREATE TABLE non_mixed_granularity_non_adaptive_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, index_granularity_bytes=0; -- same with hits, but enabled mixed granularity and fixed_granularity
INSERT INTO non_mixed_granularity_non_adaptive_table SELECT * FROM test.hits LIMIT 10;