Skip to content

Commit c7c86a4

Browse files
Refactor: improve and clean up Dockerfiles (#2957)
* Refactor: improve and clean up Dockerfiles * Refactor: improve and clean up Dockerfiles * Refactor: improve and clean up Dockerfiles * Refactor: improve and clean up Dockerfiles * Refactor: improve and clean up Dockerfiles * Refactor: improve and clean up Dockerfiles
1 parent aa72157 commit c7c86a4

File tree

8 files changed

+79
-93
lines changed

8 files changed

+79
-93
lines changed

getting-started/spark/notebooks/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
FROM docker.io/apache/spark:3.5.6-java17
2121

22-
ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
23-
ENV PYSPARK_PYTHON=/home/spark/venv/bin/python
22+
ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \
23+
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
2424

2525
USER root
2626

plugins/spark/v3.5/getting-started/notebooks/Dockerfile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
FROM docker.io/apache/spark:3.5.6-java17
2121

22-
ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
23-
ENV PYSPARK_PYTHON=/home/spark/venv/bin/python
22+
ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \
23+
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
2424

2525
USER root
2626

@@ -36,8 +36,7 @@ WORKDIR /home/spark
3636
COPY --chown=spark client /home/spark/client
3737
COPY --chown=spark regtests/requirements.txt /tmp
3838
COPY --chown=spark regtests/notebook_requirements.txt /tmp
39-
COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs /home/spark/polaris_libs
40-
39+
COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs/*bundle.jar /opt/spark/jars/
4140

4241
RUN python3 -m venv /home/spark/venv && \
4342
. /home/spark/venv/bin/activate && \

plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@
265265
"from pyspark.sql import SparkSession\n",
266266
"\n",
267267
"spark = (SparkSession.builder\n",
268-
" .config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\") # TODO: add a way to automatically discover the Jar\n",
268+
" # This jar is now automatically discovered, thus no longer needed\n",
269+
" #.config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\")\n",
269270
" .config(\"spark.jars.packages\", \"org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.2.1\")\n",
270271
" .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n",
271272
" .config('spark.sql.iceberg.vectorization.enabled', 'false')\n",

plugins/spark/v3.5/regtests/Dockerfile

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,29 @@
1818
#
1919

2020
FROM docker.io/apache/spark:3.5.6-java17
21-
ARG POLARIS_HOST=polaris
22-
ENV POLARIS_HOST=$POLARIS_HOST
23-
ENV SPARK_HOME=/opt/spark
24-
ENV CURRENT_SCALA_VERSION='2.12'
25-
ENV LANGUAGE='en_US:en'
21+
22+
ARG POLARIS_HOST=polaris \
23+
CURRENT_SCALA_VERSION=2.12
24+
25+
ENV POLARIS_HOST=${POLARIS_HOST} \
26+
CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION}
2627

2728
USER root
28-
RUN apt update
29-
RUN apt-get install -y diffutils wget curl
30-
RUN mkdir -p /home/spark && \
31-
chown -R spark /home/spark && \
32-
mkdir -p /tmp/polaris-regtests && \
33-
chown -R spark /tmp/polaris-regtests
34-
RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf
3529

36-
USER spark
30+
RUN apt-get update && \
31+
apt-get install -y --no-install-recommends diffutils wget curl && \
32+
rm -rf /var/lib/apt/lists/* && \
33+
mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \
34+
chown -R spark:spark /home/spark /tmp/polaris-regtests && \
35+
chmod -R 777 /opt/spark/conf
3736

3837
WORKDIR /home/spark/polaris
3938

40-
COPY --chown=spark ./v3.5 /home/spark/polaris/v3.5
39+
COPY --chown=spark:spark ./v3.5 /home/spark/polaris/v3.5
40+
41+
# /home/spark/.../regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205
42+
RUN chmod -R 777 /home/spark/polaris/v3.5/regtests
4143

42-
# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205
43-
USER root
44-
RUN chmod -R go+rwx /home/spark/polaris
45-
RUN chmod -R 777 ./v3.5/regtests
4644
USER spark
4745

4846
ENTRYPOINT ["./v3.5/regtests/run.sh"]

regtests/Dockerfile

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,45 +18,40 @@
1818
#
1919

2020
FROM docker.io/apache/spark:3.5.6-java17-python3
21+
2122
ARG POLARIS_HOST=polaris
22-
ENV POLARIS_HOST=$POLARIS_HOST
23-
ENV SPARK_HOME=/opt/spark
24-
ENV LANGUAGE='en_US:en'
23+
24+
ENV POLARIS_HOST=${POLARIS_HOST} \
25+
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip"
2526

2627
USER root
27-
RUN apt update
28-
RUN apt-get install -y diffutils wget curl python3.10-venv jq
29-
RUN mkdir -p /home/spark && \
30-
chown -R spark /home/spark && \
31-
mkdir -p /tmp/polaris-regtests && \
32-
chown -R spark /tmp/polaris-regtests
33-
RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf
28+
29+
RUN apt-get update && \
30+
apt-get install -y --no-install-recommends diffutils wget curl python3.10-venv jq && \
31+
rm -rf /var/lib/apt/lists/* && \
32+
mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \
33+
chown -R spark:spark /home/spark /tmp/polaris-regtests && \
34+
chmod -R 777 /opt/spark/conf
35+
36+
COPY --chown=spark:spark ./regtests/setup.sh ./regtests/pyspark-setup.sh ./regtests/requirements.txt /home/spark/polaris/regtests/
37+
COPY --chown=spark:spark ./client/python /home/spark/polaris/client/python
38+
COPY --chown=spark:spark ./polaris /home/spark/polaris/polaris
39+
COPY --chown=spark:spark ./spec /home/spark/polaris/spec
40+
COPY --chown=spark:spark ./regtests /home/spark/polaris/regtests
41+
42+
# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205
43+
RUN chmod -R go+rwx /home/spark/polaris
3444

3545
USER spark
36-
ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"
3746

38-
# Copy and run setup.sh separately so that test sources can change, but the setup script run is still cached
3947
WORKDIR /home/spark/polaris
40-
COPY --chown=spark ./regtests/setup.sh /home/spark/polaris/regtests/setup.sh
41-
COPY --chown=spark ./regtests/pyspark-setup.sh /home/spark/polaris/regtests/pyspark-setup.sh
42-
COPY --chown=spark ./client/python /home/spark/polaris/client/python
43-
COPY --chown=spark ./polaris /home/spark/polaris/polaris
44-
COPY --chown=spark ./spec /home/spark/polaris/spec
45-
COPY --chown=spark ./regtests/requirements.txt /tmp/
4648

4749
RUN python3 -m venv /home/spark/polaris/polaris-venv && \
4850
. /home/spark/polaris/polaris-venv/bin/activate && \
49-
pip install -r /tmp/requirements.txt && \
51+
pip install -r /home/spark/polaris/regtests/requirements.txt && \
5052
cd /home/spark/polaris/client/python && \
5153
poetry install && \
5254
deactivate && \
5355
/home/spark/polaris/regtests/setup.sh
5456

55-
COPY --chown=spark ./regtests /home/spark/polaris/regtests
56-
57-
# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205
58-
USER root
59-
RUN chmod -R go+rwx /home/spark/polaris
60-
USER spark
61-
6257
ENTRYPOINT ["./regtests/run.sh"]

runtime/admin/src/main/docker/Dockerfile.jvm

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,25 @@
1818
#
1919
FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966
2020

21-
LABEL org.opencontainers.image.source=https://github.com/apache/polaris
22-
LABEL org.opencontainers.image.description="Apache Polaris (incubating) Admin Tool"
23-
LABEL org.opencontainers.image.licenses=Apache-2.0
21+
LABEL org.opencontainers.image.source=https://github.com/apache/polaris \
22+
org.opencontainers.image.description="Apache Polaris (incubating) Admin Tool" \
23+
org.opencontainers.image.licenses=Apache-2.0
2424

25-
ENV LANGUAGE='en_US:en'
25+
ENV LANGUAGE='en_US:en' \
26+
USER=polaris \
27+
UID=10000 \
28+
HOME=/home/polaris
2629

2730
USER root
28-
RUN groupadd --gid 10001 polaris \
29-
&& useradd --uid 10000 --gid polaris polaris \
30-
&& chown -R polaris:polaris /opt/jboss/container \
31-
&& chown -R polaris:polaris /deployments
31+
32+
RUN groupadd --gid 10001 polaris && \
33+
useradd --uid 10000 --gid polaris -m polaris && \
34+
mkdir -p /deployments && \
35+
chown -R polaris:polaris /deployments /opt/jboss/container
3236

3337
USER polaris
38+
3439
WORKDIR /home/polaris
35-
ENV USER=polaris
36-
ENV UID=10000
37-
ENV HOME=/home/polaris
3840

3941
# We make four distinct layers so if there are application changes the library layers can be reused
4042
COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/

runtime/server/src/main/docker/Dockerfile.jvm

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,27 @@
1818
#
1919
FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966
2020

21-
LABEL org.opencontainers.image.source=https://github.com/apache/polaris
22-
LABEL org.opencontainers.image.description="Apache Polaris (incubating)"
23-
LABEL org.opencontainers.image.licenses=Apache-2.0
21+
LABEL org.opencontainers.image.source=https://github.com/apache/polaris \
22+
org.opencontainers.image.description="Apache Polaris (incubating)" \
23+
org.opencontainers.image.licenses=Apache-2.0
2424

25-
ENV LANGUAGE='en_US:en'
25+
ENV LANGUAGE='en_US:en' \
26+
USER=polaris \
27+
UID=10000 \
28+
HOME=/home/polaris \
29+
AB_JOLOKIA_OFF="" \
30+
JAVA_APP_JAR="/deployments/quarkus-run.jar"
2631

2732
USER root
28-
RUN groupadd --gid 10001 polaris \
29-
&& useradd --uid 10000 --gid polaris polaris \
30-
&& chown -R polaris:polaris /opt/jboss/container \
31-
&& chown -R polaris:polaris /deployments
33+
34+
RUN groupadd --gid 10001 polaris && \
35+
useradd --uid 10000 --gid polaris polaris && \
36+
chown -R polaris:polaris /opt/jboss/container && \
37+
chown -R polaris:polaris /deployments
3238

3339
USER polaris
40+
3441
WORKDIR /home/polaris
35-
ENV USER=polaris
36-
ENV UID=10000
37-
ENV HOME=/home/polaris
3842

3943
# We make four distinct layers so if there are application changes the library layers can be reused
4044
COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/
@@ -45,8 +49,4 @@ COPY --chown=polaris:polaris distribution/LICENSE /deployments/
4549
COPY --chown=polaris:polaris distribution/NOTICE /deployments/
4650
COPY --chown=polaris:polaris distribution/DISCLAIMER /deployments/
4751

48-
EXPOSE 8181
49-
EXPOSE 8182
50-
51-
ENV AB_JOLOKIA_OFF=""
52-
ENV JAVA_APP_JAR="/deployments/quarkus-run.jar"
52+
EXPOSE 8181 8182

site/docker/Dockerfile

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,14 @@ FROM ubuntu:24.04 AS hugo
2121

2222
ENV LANGUAGE='en_US:en'
2323

24-
RUN apt-get update
25-
RUN apt-get install --yes golang hugo asciidoctor npm curl
26-
RUN apt-get clean
27-
# http-server is used when building the static site to manually check it locally
28-
# (via `site/bin/create-static-site.sh --local` at http://localhost:8080/)
29-
RUN npm install --global http-server
30-
31-
# these dependencies are needed to build the static site
32-
#RUN npm install --global autoprefixer postcss postcss-cli http-server
33-
34-
RUN mkdir /polaris
35-
RUN mkdir /polaris/site
36-
RUN mkdir /polaris/site/resources
24+
RUN apt-get update && \
25+
apt-get install --yes --no-install-recommends golang hugo asciidoctor npm curl git && \
26+
rm -rf /var/lib/apt/lists/* && \
27+
npm install --global http-server && \
28+
mkdir -p /polaris/site/resources
3729

3830
COPY _run_in_docker.sh /hugo/run
3931

40-
EXPOSE 1313
41-
EXPOSE 8080
32+
EXPOSE 1313 8080
4233

4334
ENTRYPOINT ["/hugo/run"]

0 commit comments

Comments
 (0)