mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
209 lines
6.5 KiB
Python
209 lines
6.5 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# -*- coding: UTF-8 -*-
|
||
|
"""
|
||
|
print_backup_info: Extract information about a backup from ".backup" file
|
||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
|
Usage: print_backup_info <path-to-backup-xml>
|
||
|
"""
|
||
|
import sys
|
||
|
import os
|
||
|
import xml.etree.ElementTree as ET
|
||
|
|
||
|
|
||
|
def main():
|
||
|
if len(sys.argv) != 2:
|
||
|
print(__doc__)
|
||
|
sys.exit(1)
|
||
|
backup_xml = sys.argv[1]
|
||
|
|
||
|
if not os.path.isfile(backup_xml):
|
||
|
print("error: {} does not exist".format(backup_xml))
|
||
|
sys.exit(1)
|
||
|
|
||
|
# Process the file line-by-line
|
||
|
tree = ET.parse(backup_xml)
|
||
|
root = tree.getroot()
|
||
|
contents = root.find("contents")
|
||
|
|
||
|
version_node = root.find("version")
|
||
|
version = int(version_node.text) if (version_node != None) else None
|
||
|
|
||
|
timestamp_node = root.find("timestamp")
|
||
|
timestamp = timestamp_node.text if (timestamp_node != None) else None
|
||
|
|
||
|
base_backup_node = root.find("base_backup")
|
||
|
base_backup = base_backup_node.text if (base_backup_node != None) else None
|
||
|
|
||
|
number_of_files = 0
|
||
|
size_of_files = 0
|
||
|
number_of_files_from_base_backup = 0
|
||
|
size_of_files_from_base_backup = 0
|
||
|
databases = set()
|
||
|
tables = {}
|
||
|
|
||
|
for file in contents:
|
||
|
name = file.find("name").text
|
||
|
size = int(file.find("size").text)
|
||
|
|
||
|
use_base_node = file.find("use_base")
|
||
|
use_base = (use_base_node.text == "true") if (use_base_node != None) else False
|
||
|
|
||
|
if use_base:
|
||
|
base_size_node = file.find("base_size")
|
||
|
base_size = int(base_size_node.text) if (base_size_node != None) else size
|
||
|
else:
|
||
|
base_size = 0
|
||
|
|
||
|
data_file_node = file.find("data_file")
|
||
|
data_file = data_file_node.text if (data_file_node != None) else name
|
||
|
|
||
|
has_data_file = name == data_file
|
||
|
|
||
|
if has_data_file:
|
||
|
if size > base_size:
|
||
|
number_of_files += 1
|
||
|
size_of_files += size - base_size
|
||
|
if base_size > 0:
|
||
|
number_of_files_from_base_backup += 1
|
||
|
size_of_files_from_base_backup += base_size
|
||
|
|
||
|
table_name = extract_table_name_from_path(name)
|
||
|
if table_name:
|
||
|
if table_name not in tables:
|
||
|
tables[table_name] = [0, 0, 0, 0]
|
||
|
if not name.endswith(".sql") and has_data_file:
|
||
|
table_info = tables[table_name]
|
||
|
if size > base_size:
|
||
|
table_info[0] += 1
|
||
|
table_info[1] += size - base_size
|
||
|
if base_size > 0:
|
||
|
table_info[2] += 1
|
||
|
table_info[3] += base_size
|
||
|
tables[table_name] = table_info
|
||
|
|
||
|
database_name = extract_database_name_from_path(name)
|
||
|
if database_name:
|
||
|
databases.add(database_name)
|
||
|
|
||
|
size_of_backup = size_of_files + os.path.getsize(backup_xml)
|
||
|
|
||
|
print(f"version={version}")
|
||
|
print(f"timestamp={timestamp}")
|
||
|
print(f"base_backup={base_backup}")
|
||
|
print(f"size_of_backup={size_of_backup}")
|
||
|
print(f"number_of_files={number_of_files}")
|
||
|
print(f"size_of_files={size_of_files}")
|
||
|
print(f"number_of_files_from_base_backup={number_of_files_from_base_backup}")
|
||
|
print(f"size_of_files_from_base_backup={size_of_files_from_base_backup}")
|
||
|
print(f"number_of_databases={len(databases)}")
|
||
|
print(f"number_of_tables={len(tables)}")
|
||
|
|
||
|
print()
|
||
|
|
||
|
print(f"{len(databases)} database(s):")
|
||
|
for database_name in sorted(databases):
|
||
|
print(database_name)
|
||
|
|
||
|
print()
|
||
|
|
||
|
print(f"{len(tables)} table(s):")
|
||
|
table_info_format = "{:>70} | {:>20} | {:>20} | {:>26} | {:>30}"
|
||
|
table_info_separator_line = (
|
||
|
"{:->70}-+-{:->20}-+-{:->20}-+-{:->26}-+-{:->30}".format("", "", "", "", "")
|
||
|
)
|
||
|
table_info_title_line = table_info_format.format(
|
||
|
"table name",
|
||
|
"num_files",
|
||
|
"size_of_files",
|
||
|
"num_files_from_base_backup",
|
||
|
"size_of_files_from_base_backup",
|
||
|
)
|
||
|
print(table_info_title_line)
|
||
|
print(table_info_separator_line)
|
||
|
for table_name in sorted(tables):
|
||
|
table_info = tables[table_name]
|
||
|
print(
|
||
|
table_info_format.format(
|
||
|
table_name, table_info[0], table_info[1], table_info[2], table_info[3]
|
||
|
)
|
||
|
)
|
||
|
|
||
|
|
||
|
# Extracts a table name from a path inside a backup.
|
||
|
# For example, extracts 'default.tbl' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'.
|
||
|
def extract_table_name_from_path(path):
|
||
|
path = strip_shards_replicas_from_path(path)
|
||
|
if not path:
|
||
|
return None
|
||
|
if path.startswith("metadata/"):
|
||
|
path = path[len("metadata/") :]
|
||
|
sep = path.find("/")
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
database_name = path[:sep]
|
||
|
path = path[sep + 1 :]
|
||
|
sep = path.find(".sql")
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
table_name = path[:sep]
|
||
|
return database_name + "." + table_name
|
||
|
if path.startswith("data/"):
|
||
|
path = path[len("data/") :]
|
||
|
sep = path.find("/")
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
database_name = path[:sep]
|
||
|
path = path[sep + 1 :]
|
||
|
sep = path.find("/")
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
table_name = path[:sep]
|
||
|
return database_name + "." + table_name
|
||
|
return None
|
||
|
|
||
|
|
||
|
# Extracts a database name from a path inside a backup.
|
||
|
# For example, extracts 'default' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'.
|
||
|
def extract_database_name_from_path(path):
|
||
|
path = strip_shards_replicas_from_path(path)
|
||
|
if not path:
|
||
|
return None
|
||
|
if path.startswith("metadata/"):
|
||
|
path = path[len("metadata/") :]
|
||
|
sep = path.find(".sql")
|
||
|
if sep == -1 or path.find("/") != -1:
|
||
|
return None
|
||
|
return path[:sep]
|
||
|
if path.startswith("data/"):
|
||
|
path = path[len("data/") :]
|
||
|
sep = path.find("/")
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
return path[:sep]
|
||
|
return None
|
||
|
|
||
|
|
||
|
# Removes a prefix "shards/<number>/replicas/<number>/" from a path.
|
||
|
def strip_shards_replicas_from_path(path):
|
||
|
if path.startswith("shards"):
|
||
|
sep = path.find("/")
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
sep = path.find("/", sep + 1)
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
path = path[sep + 1 :]
|
||
|
if path.startswith("replicas"):
|
||
|
sep = path.find("/")
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
sep = path.find("/", sep + 1)
|
||
|
if sep == -1:
|
||
|
return None
|
||
|
path = path[sep + 1 :]
|
||
|
return path
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|