Merge pull request #71946 from ClickHouse/fix-s3-queue-log-processed-rows

Fix rows_processed column in system.s3/azure_queue_log broken in 24.6
This commit is contained in:
Kseniia Sumarokova 2024-11-19 13:50:43 +00:00 committed by GitHub
commit 58dd039aac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 25 additions and 6 deletions

View File

@ -639,7 +639,7 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio
/* overwrite_status */true);
}
appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), processed_rows_from_file, /* processed */success);
appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), /* processed */success);
}
for (const auto & file_metadata : failed_during_read_files)
@ -651,7 +651,7 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio
/* reduce_retry_count */true,
/* overwrite_status */false);
appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), processed_rows_from_file, /* processed */false);
appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), /* processed */false);
}
}
@ -666,7 +666,6 @@ void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path)
void ObjectStorageQueueSource::appendLogElement(
const std::string & filename,
ObjectStorageQueueMetadata::FileStatus & file_status_,
size_t processed_rows,
bool processed)
{
if (!system_queue_log)
@ -681,7 +680,7 @@ void ObjectStorageQueueSource::appendLogElement(
.table = storage_id.table_name,
.uuid = toString(storage_id.uuid),
.file_name = filename,
.rows_processed = processed_rows,
.rows_processed = file_status_.processed_rows,
.status = processed ? ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed : ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed,
.processing_start_time = file_status_.processing_start_time,
.processing_end_time = file_status_.processing_end_time,

View File

@ -167,7 +167,6 @@ private:
void appendLogElement(
const std::string & filename,
ObjectStorageQueueMetadata::FileStatus & file_status_,
size_t processed_rows,
bool processed);
};

View File

@ -1,4 +1,8 @@
<clickhouse>
<azure_queue_log>
<database>system</database>
<table>azure_queue_log</table>
</azure_queue_log>
<s3queue_log>
<database>system</database>
<table>s3queue_log</table>

View File

@ -319,7 +319,9 @@ def generate_random_string(length=6):
@pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"])
def test_delete_after_processing(started_cluster, mode, engine_name):
node = started_cluster.instances["instance"]
table_name = f"delete_after_processing_{mode}_{engine_name}"
table_name = (
f"delete_after_processing_{mode}_{engine_name}_{generate_random_string()}"
)
dst_table_name = f"{table_name}_dst"
files_path = f"{table_name}_data"
files_num = 5
@ -362,6 +364,21 @@ def test_delete_after_processing(started_cluster, mode, engine_name):
).splitlines()
] == sorted(total_values, key=lambda x: (x[0], x[1], x[2]))
node.query("system flush logs")
if engine_name == "S3Queue":
system_table_name = "s3queue_log"
else:
system_table_name = "azure_queue_log"
assert (
int(
node.query(
f"SELECT sum(rows_processed) FROM system.{system_table_name} WHERE table = '{table_name}'"
)
)
== files_num * row_num
)
if engine_name == "S3Queue":
minio = started_cluster.minio_client
objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True))