Avoid additional maven requests for spark tests

This commit is contained in:
Mikhail f. Shiryaev 2023-06-29 12:24:19 +02:00
parent f728f97350
commit 340262814a
No known key found for this signature in database
GPG Key ID: 4B02ED204C7D93F4
2 changed files with 11 additions and 5 deletions

View File

@ -105,7 +105,11 @@ RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-had
# download spark and packages
# if you change packages, don't forget to update them in tests/integration/helpers/cluster.py
RUN echo ":quit" | /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.3.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" > /dev/null
RUN packages="org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,\
io.delta:delta-core_2.12:2.3.0,\
org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" \
&& /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "$packages" > /dev/null \
&& find /root/.ivy2/ -name '*.jar' -exec ln -sf {} /spark-3.3.2-bin-hadoop3/jars/ \;
RUN set -x \
&& addgroup --system dockremap \

View File

@ -624,10 +624,12 @@ class ClickHouseCluster:
# if you change packages, don't forget to update them in docker/test/integration/runner/dockerd-entrypoint.sh
(
pyspark.sql.SparkSession.builder.appName("spark_test")
.config(
"spark.jars.packages",
"org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0",
)
# The jars are now linked to "$SPARK_HOME/jars" and we don't
# need packages to be downloaded once and once again
# .config(
# "spark.jars.packages",
# "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0",
# )
.master("local")
.getOrCreate()
.stop()