Merge branch 'master' into fix-test-object-type

This commit is contained in:
Alexey Milovidov 2023-07-04 00:48:50 +02:00
commit ea790630b4
25 changed files with 144 additions and 40 deletions

View File

@ -16,8 +16,9 @@ curl https://clickhouse.com/ | sh
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlighting and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming Events

View File

@ -6,7 +6,7 @@ Usage:
Build deb package with `clang-14` in `debug` mode:
```
$ mkdir deb/test_output
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build
$ ls -l deb/test_output
-rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb

View File

@ -112,12 +112,12 @@ def run_docker_image_with_env(
subprocess.check_call(cmd, shell=True)
def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool:
return build_type == "" and package_type == "deb" and sanitizer == ""
def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool:
return not debug_build and package_type == "deb" and sanitizer == ""
def parse_env_variables(
build_type: str,
debug_build: bool,
compiler: str,
sanitizer: str,
package_type: str,
@ -233,7 +233,7 @@ def parse_env_variables(
build_target = (
f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge"
)
if is_release_build(build_type, package_type, sanitizer):
if is_release_build(debug_build, package_type, sanitizer):
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1")
if is_cross_arm:
@ -253,8 +253,8 @@ def parse_env_variables(
if sanitizer:
result.append(f"SANITIZER={sanitizer}")
if build_type:
result.append(f"BUILD_TYPE={build_type.capitalize()}")
if debug_build:
result.append("BUILD_TYPE=DEBUG")
else:
result.append("BUILD_TYPE=None")
@ -359,7 +359,7 @@ def parse_args() -> argparse.Namespace:
help="ClickHouse git repository",
)
parser.add_argument("--output-dir", type=dir_name, required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--debug-build", action="store_true")
parser.add_argument(
"--compiler",
@ -464,7 +464,7 @@ def main():
build_image(image_with_version, dockerfile)
env_prepared = parse_env_variables(
args.build_type,
args.debug_build,
args.compiler,
args.sanitizer,
args.package_type,

View File

@ -13,6 +13,20 @@ Supported platforms:
- AArch64
- Power9 (experimental)
## Building in docker
We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage:
```bash
# define a directory for the output artifacts
output_dir="build_results"
# a simplest build
./docker/packager/packager --package-type=binary --output-dir "$output_dir"
# build debian packages
./docker/packager/packager --package-type=deb --output-dir "$output_dir"
# by default, debian packages use thin LTO, so we can override it to speed up the build
CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "$output_dir"
```
## Building on Ubuntu
The following tutorial is based on Ubuntu Linux.

View File

@ -378,6 +378,10 @@ request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI ch
https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the
build.
### macOS-only: Install with Homebrew
To install ClickHouse using the popular `brew` package manager, follow the instructions listed in the [ClickHouse Homebrew tap](https://github.com/ClickHouse/homebrew-clickhouse).
## Launch {#launch}
To start the server as a daemon, run:

View File

@ -319,8 +319,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
{
auto column_with_default = col.column->cloneEmpty();
col.type->insertDefaultInto(*column_with_default);
column_with_default->finalize();
auto column = ColumnConst::create(std::move(column_with_default), 0);
const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name});
node = &dag->materializeNode(*node);

View File

@ -80,6 +80,11 @@ DataPartStorageIteratorPtr DataPartStorageOnDiskFull::iterate() const
volume->getDisk()->iterateDirectory(fs::path(root_path) / part_dir));
}
Poco::Timestamp DataPartStorageOnDiskFull::getFileLastModified(const String & file_name) const
{
return volume->getDisk()->getLastModified(fs::path(root_path) / part_dir / file_name);
}
size_t DataPartStorageOnDiskFull::getFileSize(const String & file_name) const
{
return volume->getDisk()->getFileSize(fs::path(root_path) / part_dir / file_name);

View File

@ -20,6 +20,7 @@ public:
bool isDirectory(const std::string & name) const override;
DataPartStorageIteratorPtr iterate() const override;
Poco::Timestamp getFileLastModified(const String & file_name) const override;
size_t getFileSize(const std::string & file_name) const override;
UInt32 getRefCount(const std::string & file_name) const override;
std::string getRemotePath(const std::string & file_name) const override;

View File

@ -122,6 +122,7 @@ public:
virtual DataPartStorageIteratorPtr iterate() const = 0;
/// Get metadata for a file inside path dir.
virtual Poco::Timestamp getFileLastModified(const std::string & file_name) const = 0;
virtual size_t getFileSize(const std::string & file_name) const = 0;
virtual UInt32 getRefCount(const std::string & file_name) const = 0;

View File

@ -116,6 +116,8 @@ public:
/// Otherwise return information about column size on disk.
ColumnSize getColumnSize(const String & column_name) const;
virtual std::optional<time_t> getColumnModificationTime(const String & column_name) const = 0;
/// NOTE: Returns zeros if secondary indexes are not found in checksums.
/// Otherwise return information about secondary index size on disk.
IndexSize getSecondaryIndexSize(const String & secondary_index_name) const;

View File

@ -144,6 +144,11 @@ bool MergeTreeDataPartCompact::hasColumnFiles(const NameAndTypePair & column) co
return (bin_checksum != checksums.files.end() && mrk_checksum != checksums.files.end());
}
std::optional<time_t> MergeTreeDataPartCompact::getColumnModificationTime(const String & /* column_name */) const
{
return getDataPartStorage().getFileLastModified(DATA_FILE_NAME_WITH_EXTENSION).epochTime();
}
void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) const
{
checkConsistencyBase();

View File

@ -55,6 +55,8 @@ public:
bool hasColumnFiles(const NameAndTypePair & column) const override;
std::optional<time_t> getColumnModificationTime(const String & column_name) const override;
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; }
~MergeTreeDataPartCompact() override;

View File

@ -43,6 +43,7 @@ public:
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override;
DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override;
std::optional<time_t> getColumnModificationTime(const String & /* column_name */) const override { return {}; }
MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;

View File

@ -260,6 +260,18 @@ bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const
return res;
}
std::optional<time_t> MergeTreeDataPartWide::getColumnModificationTime(const String & column_name) const
{
try
{
return getDataPartStorage().getFileLastModified(column_name + DATA_FILE_EXTENSION).epochTime();
}
catch (const fs::filesystem_error &)
{
return {};
}
}
String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const
{
String filename;

View File

@ -54,6 +54,8 @@ public:
bool hasColumnFiles(const NameAndTypePair & column) const override;
std::optional<time_t> getColumnModificationTime(const String & column_name) const override;
protected:
static void loadIndexGranularityImpl(
MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_,

View File

@ -8,6 +8,7 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeUUID.h>
#include <Storages/VirtualColumnUtils.h>
@ -62,6 +63,8 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
{"column_data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_marks_bytes", std::make_shared<DataTypeUInt64>()},
{"column_modification_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
{"serialization_kind", std::make_shared<DataTypeString>()},
{"subcolumns.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"subcolumns.types", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
@ -235,6 +238,13 @@ void StorageSystemPartsColumns::processNextStorage(
columns[res_index++]->insert(column_size.data_uncompressed);
if (columns_mask[src_index++])
columns[res_index++]->insert(column_size.marks);
if (columns_mask[src_index++])
{
if (auto column_modification_time = part->getColumnModificationTime(column.name))
columns[res_index++]->insert(UInt64(column_modification_time.value()));
else
columns[res_index++]->insertDefault();
}
auto serialization = part->getSerialization(column.name);
if (columns_mask[src_index++])

View File

@ -7,6 +7,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeNullable.h>
#include <Storages/VirtualColumnUtils.h>
#include <Databases/IDatabase.h>
#include <Parsers/queryToString.h>
@ -66,7 +67,8 @@ StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const S
{"column_bytes_on_disk", std::make_shared<DataTypeUInt64>()},
{"column_data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
{"column_marks_bytes", std::make_shared<DataTypeUInt64>()}
{"column_marks_bytes", std::make_shared<DataTypeUInt64>()},
{"column_modification_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
}
)
{
@ -247,6 +249,13 @@ void StorageSystemProjectionPartsColumns::processNextStorage(
columns[res_index++]->insert(column_size.data_uncompressed);
if (columns_mask[src_index++])
columns[res_index++]->insert(column_size.marks);
if (columns_mask[src_index++])
{
if (auto column_modification_time = part->getColumnModificationTime(column.name))
columns[res_index++]->insert(UInt64(column_modification_time.value()));
else
columns[res_index++]->insertDefault();
}
if (has_state_column)
columns[res_index++]->insert(part->stateString());

View File

@ -45,7 +45,7 @@ def _can_export_binaries(build_config: BuildConfig) -> bool:
return False
if build_config["sanitizer"] != "":
return True
if build_config["build_type"] != "":
if build_config["debug_build"]:
return True
return False
@ -66,8 +66,8 @@ def get_packager_cmd(
f"--package-type={package_type} --compiler={comp}"
)
if build_config["build_type"]:
cmd += f" --build-type={build_config['build_type']}"
if build_config["debug_build"]:
cmd += " --debug-build"
if build_config["sanitizer"]:
cmd += f" --sanitizer={build_config['sanitizer']}"
if build_config["tidy"] == "enable":

View File

@ -70,7 +70,7 @@ def get_failed_report(
message = f"{job_name} failed"
build_result = BuildResult(
compiler="unknown",
build_type="unknown",
debug_build=False,
sanitizer="unknown",
status=message,
elapsed_seconds=0,
@ -85,7 +85,7 @@ def process_report(
build_config = build_report["build_config"]
build_result = BuildResult(
compiler=build_config["compiler"],
build_type=build_config["build_type"],
debug_build=build_config["debug_build"],
sanitizer=build_config["sanitizer"],
status="success" if build_report["status"] else "failure",
elapsed_seconds=build_report["elapsed_seconds"],

View File

@ -10,7 +10,7 @@ CI_CONFIG = {
"build_config": {
"package_release": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "deb",
"static_binary_name": "amd64",
@ -21,7 +21,7 @@ CI_CONFIG = {
},
"coverity": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "coverity",
"tidy": "disable",
@ -31,7 +31,7 @@ CI_CONFIG = {
},
"package_aarch64": {
"compiler": "clang-16-aarch64",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "deb",
"static_binary_name": "aarch64",
@ -42,7 +42,7 @@ CI_CONFIG = {
},
"package_asan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "address",
"package_type": "deb",
"tidy": "disable",
@ -51,7 +51,7 @@ CI_CONFIG = {
},
"package_ubsan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "undefined",
"package_type": "deb",
"tidy": "disable",
@ -60,7 +60,7 @@ CI_CONFIG = {
},
"package_tsan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "thread",
"package_type": "deb",
"tidy": "disable",
@ -69,7 +69,7 @@ CI_CONFIG = {
},
"package_msan": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "memory",
"package_type": "deb",
"tidy": "disable",
@ -78,7 +78,7 @@ CI_CONFIG = {
},
"package_debug": {
"compiler": "clang-16",
"build_type": "debug",
"debug_build": True,
"sanitizer": "",
"package_type": "deb",
"tidy": "disable",
@ -87,7 +87,7 @@ CI_CONFIG = {
},
"binary_release": {
"compiler": "clang-16",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"tidy": "disable",
@ -96,7 +96,7 @@ CI_CONFIG = {
},
"binary_tidy": {
"compiler": "clang-16",
"build_type": "debug",
"debug_build": True,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "debug-amd64",
@ -106,7 +106,7 @@ CI_CONFIG = {
},
"binary_darwin": {
"compiler": "clang-16-darwin",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "macos",
@ -116,7 +116,7 @@ CI_CONFIG = {
},
"binary_aarch64": {
"compiler": "clang-16-aarch64",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"tidy": "disable",
@ -125,7 +125,7 @@ CI_CONFIG = {
},
"binary_aarch64_v80compat": {
"compiler": "clang-16-aarch64-v80compat",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "aarch64v80compat",
@ -135,7 +135,7 @@ CI_CONFIG = {
},
"binary_freebsd": {
"compiler": "clang-16-freebsd",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "freebsd",
@ -145,7 +145,7 @@ CI_CONFIG = {
},
"binary_darwin_aarch64": {
"compiler": "clang-16-darwin-aarch64",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "macos-aarch64",
@ -155,7 +155,7 @@ CI_CONFIG = {
},
"binary_ppc64le": {
"compiler": "clang-16-ppc64le",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "powerpc64le",
@ -165,7 +165,7 @@ CI_CONFIG = {
},
"binary_amd64_compat": {
"compiler": "clang-16-amd64-compat",
"build_type": "",
"debug_build": False,
"sanitizer": "",
"package_type": "binary",
"static_binary_name": "amd64compat",

View File

@ -239,7 +239,7 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes
@dataclass
class BuildResult:
compiler: str
build_type: str
debug_build: bool
sanitizer: str
status: str
elapsed_seconds: int
@ -484,8 +484,8 @@ def create_build_html_report(
):
row = "<tr>"
row += f"<td>{build_result.compiler}</td>"
if build_result.build_type:
row += f"<td>{build_result.build_type}</td>"
if build_result.debug_build:
row += "<td>debug</td>"
else:
row += "<td>relwithdebuginfo</td>"
if build_result.sanitizer:

View File

@ -565,6 +565,7 @@ CREATE TABLE system.parts_columns
`column_data_compressed_bytes` UInt64,
`column_data_uncompressed_bytes` UInt64,
`column_marks_bytes` UInt64,
`column_modification_time` Nullable(DateTime),
`serialization_kind` String,
`subcolumns.names` Array(String),
`subcolumns.types` Array(String),
@ -750,6 +751,7 @@ CREATE TABLE system.projection_parts_columns
`column_data_compressed_bytes` UInt64,
`column_data_uncompressed_bytes` UInt64,
`column_marks_bytes` UInt64,
`column_modification_time` Nullable(DateTime),
`bytes` UInt64,
`marks_size` UInt64,
`part_name` String

View File

@ -0,0 +1,6 @@
Wide key 1 1
Wide key 1 1
Wide value 1 0
Compact key 1 1
Compact key 1 1
Compact value 1 1

View File

@ -0,0 +1,30 @@
-- Tags: no-s3-storage
-- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details)
set mutations_sync=1;
{# modification time of the part folder and column files not always equal, this is how much seconds of difference is allowed #}
{% set mtime_diff_in_seconds = 5 %}
{% for id, settings, file_per_column in [
("wide", "min_bytes_for_wide_part=0, min_rows_for_wide_part=0", true),
("compact", "min_bytes_for_wide_part=1000, min_rows_for_wide_part=100", false)
]
%}
drop table if exists data_{{ id }};
create table data_{{ id }} (key Int) engine=MergeTree() order by tuple() settings {{ settings }};
insert into data_{{ id }} values (1);
select sleep(3) format Null;
select part_type, column, now()-modification_time < 10, modification_time - column_modification_time < {{ mtime_diff_in_seconds }} from system.parts_columns where database = currentDatabase() and table = 'data_{{ id }}';
alter table data_{{ id }} add column value Int default 0;
alter table data_{{ id }} materialize column value;
select part_type, column, now()-modification_time < 10,
{% if file_per_column %}
modification_time - column_modification_time >= 3
{% else %}
modification_time - column_modification_time < {{ mtime_diff_in_seconds }}
{% endif %}
from system.parts_columns where active and database = currentDatabase() and table = 'data_{{ id }}' order by column;
{% endfor %}