From 338a6e20f60bb21c99ee2c4f261d96bc55ec4b97 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 4 Sep 2020 09:12:16 +0300 Subject: [PATCH] Added a script to import git repository to ClickHouse --- utils/git-to-clickhouse/git-to-clickhouse.cpp | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/utils/git-to-clickhouse/git-to-clickhouse.cpp b/utils/git-to-clickhouse/git-to-clickhouse.cpp index 42920328ad7..314bba0d5b4 100644 --- a/utils/git-to-clickhouse/git-to-clickhouse.cpp +++ b/utils/git-to-clickhouse/git-to-clickhouse.cpp @@ -16,6 +16,101 @@ #include +/** How to use: + +DROP DATABASE IF EXISTS git; +CREATE DATABASE git; + +CREATE TABLE git.commits +( + hash String, + author_name LowCardinality(String), + author_email LowCardinality(String), + time DateTime, + message String, + files_added UInt32, + files_deleted UInt32, + files_renamed UInt32, + files_modified UInt32, + lines_added UInt32, + lines_deleted UInt32, + hunks_added UInt32, + hunks_removed UInt32, + hunks_changed UInt32 +) ENGINE = MergeTree ORDER BY time; + +CREATE TABLE git.file_changes +( + change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6), + new_file_path LowCardinality(String), + old_file_path LowCardinality(String), + lines_added UInt16, + lines_deleted UInt16, + hunks_added UInt16, + hunks_removed UInt16, + hunks_changed UInt16, + + commit_hash String, + author_name LowCardinality(String), + author_email LowCardinality(String), + time DateTime, + commit_message String, + commit_files_added UInt32, + commit_files_deleted UInt32, + commit_files_renamed UInt32, + commit_files_modified UInt32, + commit_lines_added UInt32, + commit_lines_deleted UInt32, + commit_hunks_added UInt32, + commit_hunks_removed UInt32, + commit_hunks_changed UInt32 +) ENGINE = MergeTree ORDER BY time; + +CREATE TABLE git.line_changes +( + sign Int8, + line_number_old UInt16, + line_number_new UInt16, + hunk_num UInt16, + hunk_start_line_number_old UInt16, + hunk_start_line_number_new UInt16, + hunk_context LowCardinality(String), + line LowCardinality(String), + indent UInt8, + line_type Enum('Empty' = 0, 'Comment' = 1, 'Punct' = 2, 'Code' = 3), + + file_change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6), + new_file_path LowCardinality(String), + old_file_path LowCardinality(String), + file_lines_added UInt16, + file_lines_deleted UInt16, + file_hunks_added UInt16, + file_hunks_removed UInt16, + file_hunks_changed UInt16, + + commit_hash String, + author_name LowCardinality(String), + author_email LowCardinality(String), + time DateTime, + commit_message String, + commit_files_added UInt32, + commit_files_deleted UInt32, + commit_files_renamed UInt32, + commit_files_modified UInt32, + commit_lines_added UInt32, + commit_lines_deleted UInt32, + commit_hunks_added UInt32, + commit_hunks_removed UInt32, + commit_hunks_changed UInt32 +) ENGINE = MergeTree ORDER BY time; + +clickhouse-client --query "INSERT INTO git.commits FORMAT TSV" < commits.tsv +clickhouse-client --query "INSERT INTO git.file_changes FORMAT TSV" < file_changes.tsv +clickhouse-client --query "INSERT INTO git.line_changes FORMAT TSV" < line_changes.tsv + + */ + + namespace DB { @@ -495,6 +590,7 @@ void processCommit(std::string hash, Result & result) if (file_change_and_line_changes) { ++commit.lines_deleted; + ++file_change_and_line_changes->file_change.lines_deleted; line_change.sign = -1; readStringUntilNextLine(line_change.line, in); @@ -530,6 +626,7 @@ void processCommit(std::string hash, Result & result) if (file_change_and_line_changes) { ++commit.lines_added; + ++file_change_and_line_changes->file_change.lines_added; line_change.sign = 1; readStringUntilNextLine(line_change.line, in);