Try to understand why some tests fail in CI, but locally pass

This commit is contained in:
kssenii 2023-04-05 16:28:27 +02:00
parent 44b9bc5a91
commit e632dc5cdd
2 changed files with 19 additions and 1 deletions

View File

@ -7,6 +7,7 @@ import logging
import os
import json
import time
import glob
import pyspark
import delta
@ -30,7 +31,7 @@ from helpers.s3_tools import prepare_s3_bucket, upload_directory, get_file_conte
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
USER_FILES_PATH = os.path.join(SCRIPT_DIR, "./_instances/node1/database/user_files")
USER_FILES_PATH = os.path.join(SCRIPT_DIR, "_instances/node1/database/user_files")
@pytest.fixture(scope="module")
@ -132,6 +133,13 @@ def create_initial_data_file(node, query, table_name, compression_method="none")
return result_path
def print_recursive(path):
for root, dirs, files in os.walk(path):
for basename in files:
filename = os.path.join(root, basename)
print(f"Found file {filename}")
def test_single_log_file(started_cluster):
instance = started_cluster.instances["node1"]
minio_client = started_cluster.minio_client
@ -142,6 +150,7 @@ def test_single_log_file(started_cluster):
inserted_data = "SELECT number, toString(number + 1) FROM numbers(100)"
parquet_data_path = create_initial_data_file(instance, inserted_data, TABLE_NAME)
print_recursive(SCRIPT_DIR)
write_delta_from_file(spark, parquet_data_path, f"/{TABLE_NAME}")
files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "")
assert len(files) == 2 # 1 metadata files + 1 data file

View File

@ -8,6 +8,7 @@ import os
import json
import pytest
import time
import glob
from pyspark.sql.types import (
StructType,
@ -153,6 +154,13 @@ def create_initial_data_file(node, query, table_name, compression_method="none")
return result_path
def print_recursive(path):
for root, dirs, files in os.walk(path):
for basename in files:
filename = os.path.join(root, basename)
print(f"Found file {filename}")
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_single_iceberg_file(started_cluster, format_version):
instance = started_cluster.instances["node1"]
@ -163,6 +171,7 @@ def test_single_iceberg_file(started_cluster, format_version):
inserted_data = "SELECT number, toString(number) FROM numbers(100)"
parquet_data_path = create_initial_data_file(instance, inserted_data, TABLE_NAME)
print_recursive(SCRIPT_DIR)
write_iceberg_from_file(
spark, parquet_data_path, TABLE_NAME, format_version=format_version
)