Merge pull request #18234 from ClickHouse/return-git-import

Return clickhouse-git-import
This commit is contained in:
alexey-milovidov 2020-12-19 13:00:19 +03:00 committed by GitHub
commit 753aa36baf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 151 additions and 12 deletions

View File

@ -36,6 +36,10 @@ option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data t
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
${ENABLE_CLICKHOUSE_ALL})
# https://presentations.clickhouse.tech/matemarketing_2020/
option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories"
${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY)
option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)" OFF)
else ()
@ -52,8 +56,7 @@ else()
endif()
if (NOT ENABLE_CLICKHOUSE_CLIENT)
message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run
tests")
message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run tests")
else()
message(STATUS "Client mode: ON")
endif()
@ -112,6 +115,12 @@ else()
message(STATUS "ClickHouse install: OFF")
endif()
if (ENABLE_CLICKHOUSE_GIT_IMPORT)
message(STATUS "ClickHouse git-import: ON")
else()
message(STATUS "ClickHouse git-import: OFF")
endif()
if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
set(CLICKHOUSE_ONE_SHARED ON)
endif()

View File

@ -205,13 +205,13 @@ struct Commit
void writeTextWithoutNewline(WriteBuffer & out) const
{
writeText(hash, out);
writeEscapedString(hash, out);
writeChar('\t', out);
writeText(author, out);
writeEscapedString(author, out);
writeChar('\t', out);
writeText(time, out);
writeChar('\t', out);
writeText(message, out);
writeEscapedString(message, out);
writeChar('\t', out);
writeText(files_added, out);
writeChar('\t', out);
@ -273,11 +273,11 @@ struct FileChange
{
writeText(change_type, out);
writeChar('\t', out);
writeText(path, out);
writeEscapedString(path, out);
writeChar('\t', out);
writeText(old_path, out);
writeEscapedString(old_path, out);
writeChar('\t', out);
writeText(file_extension, out);
writeEscapedString(file_extension, out);
writeChar('\t', out);
writeText(lines_added, out);
writeChar('\t', out);
@ -399,17 +399,17 @@ struct LineChange
writeChar('\t', out);
writeText(hunk_lines_deleted, out);
writeChar('\t', out);
writeText(hunk_context, out);
writeEscapedString(hunk_context, out);
writeChar('\t', out);
writeText(line, out);
writeEscapedString(line, out);
writeChar('\t', out);
writeText(indent, out);
writeChar('\t', out);
writeText(line_type, out);
writeChar('\t', out);
writeText(prev_commit_hash, out);
writeEscapedString(prev_commit_hash, out);
writeChar('\t', out);
writeText(prev_author, out);
writeEscapedString(prev_author, out);
writeChar('\t', out);
writeText(prev_time, out);
}

View File

@ -0,0 +1,4 @@
913
888
2931
160553

View File

@ -0,0 +1,125 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
# Clone some not too large repository and create a database from it.
# Protection for network errors
for _ in {1..10}; do
rm -rf ./clickhouse-odbc
git clone --quiet https://github.com/ClickHouse/clickhouse-odbc.git && pushd clickhouse-odbc > /dev/null && git checkout --quiet 5d84ec591c53cbb272593f024230a052690fdf69 && break
sleep 1
done
${CLICKHOUSE_GIT_IMPORT} 2>&1 | wc -l
${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
DROP TABLE IF EXISTS commits;
DROP TABLE IF EXISTS file_changes;
DROP TABLE IF EXISTS line_changes;
CREATE TABLE commits
(
hash String,
author LowCardinality(String),
time DateTime,
message String,
files_added UInt32,
files_deleted UInt32,
files_renamed UInt32,
files_modified UInt32,
lines_added UInt32,
lines_deleted UInt32,
hunks_added UInt32,
hunks_removed UInt32,
hunks_changed UInt32
) ENGINE = MergeTree ORDER BY time;
CREATE TABLE file_changes
(
change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
path LowCardinality(String),
old_path LowCardinality(String),
file_extension LowCardinality(String),
lines_added UInt32,
lines_deleted UInt32,
hunks_added UInt32,
hunks_removed UInt32,
hunks_changed UInt32,
commit_hash String,
author LowCardinality(String),
time DateTime,
commit_message String,
commit_files_added UInt32,
commit_files_deleted UInt32,
commit_files_renamed UInt32,
commit_files_modified UInt32,
commit_lines_added UInt32,
commit_lines_deleted UInt32,
commit_hunks_added UInt32,
commit_hunks_removed UInt32,
commit_hunks_changed UInt32
) ENGINE = MergeTree ORDER BY time;
CREATE TABLE line_changes
(
sign Int8,
line_number_old UInt32,
line_number_new UInt32,
hunk_num UInt32,
hunk_start_line_number_old UInt32,
hunk_start_line_number_new UInt32,
hunk_lines_added UInt32,
hunk_lines_deleted UInt32,
hunk_context LowCardinality(String),
line LowCardinality(String),
indent UInt8,
line_type Enum('Empty' = 0, 'Comment' = 1, 'Punct' = 2, 'Code' = 3),
prev_commit_hash String,
prev_author LowCardinality(String),
prev_time DateTime,
file_change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6),
path LowCardinality(String),
old_path LowCardinality(String),
file_extension LowCardinality(String),
file_lines_added UInt32,
file_lines_deleted UInt32,
file_hunks_added UInt32,
file_hunks_removed UInt32,
file_hunks_changed UInt32,
commit_hash String,
author LowCardinality(String),
time DateTime,
commit_message String,
commit_files_added UInt32,
commit_files_deleted UInt32,
commit_files_renamed UInt32,
commit_files_modified UInt32,
commit_lines_added UInt32,
commit_lines_deleted UInt32,
commit_hunks_added UInt32,
commit_hunks_removed UInt32,
commit_hunks_changed UInt32
) ENGINE = MergeTree ORDER BY time;
"
${CLICKHOUSE_CLIENT} --query "INSERT INTO commits FORMAT TSV" < commits.tsv
${CLICKHOUSE_CLIENT} --query "INSERT INTO file_changes FORMAT TSV" < file_changes.tsv
${CLICKHOUSE_CLIENT} --query "INSERT INTO line_changes FORMAT TSV" < line_changes.tsv
${CLICKHOUSE_CLIENT} --query "SELECT count() FROM commits"
${CLICKHOUSE_CLIENT} --query "SELECT count() FROM file_changes"
${CLICKHOUSE_CLIENT} --query "SELECT count() FROM line_changes"
${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
DROP TABLE commits;
DROP TABLE file_changes;
DROP TABLE line_changes;
"

View File

@ -23,6 +23,7 @@ export CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:="$CLICKHOUSE_CLIENT_BINARY ${CLICK
export CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"}
export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfuscator"}
export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark ${CLICKHOUSE_BENCHMARK_OPT0:-}"}
export CLICKHOUSE_GIT_IMPORT=${CLICKHOUSE_GIT_IMPORT="${CLICKHOUSE_BINARY}-git-import"}
export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:="/etc/clickhouse-server/config.xml"}
export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:="/etc/clickhouse-client/config.xml"}