|
2 | 2 |
|
3 | 3 | FROM stackable/image/hadoop AS hadoop-builder |
4 | 4 |
|
5 | | -FROM stackable/image/java-devel AS builder |
| 5 | +FROM stackable/image/java-devel AS hive-builder |
6 | 6 |
|
7 | | -# Apache Hive up t0 4.x(!) officially requires Java 8 (there is no distincion between building and running). As of |
8 | | -# 2024-04-15 we for sure need Java 8 for building, but we used a Java 11 runtime for months now without any problems. |
| 7 | +# Apache Hive up to 4.0.x(!) officially requires Java 8 (there is no distinction between building and running). |
| 8 | +# As of 2024-04-15 we for sure need Java 8 for building, but we used a Java 11 runtime for months now without any problems. |
9 | 9 | # As we got weird TLS errors (https://stackable-workspace.slack.com/archives/C031A5BEFS7/p1713185172557459) with a |
10 | | -# Java 8 runtime we bumped the Runtime to Java 11 again. As we can only select a single version from the java-base |
11 | | -# image, we pick 11 (which is used in the final image), and install Java 8 here. |
| 10 | +# Java 8 runtime we bumped the Runtime to Java 11 again. |
12 | 11 |
|
13 | 12 | ARG PRODUCT |
14 | 13 | ARG HADOOP |
15 | 14 | ARG JMX_EXPORTER |
16 | | -ARG JACKSON_DATAFORMAT_XML |
17 | | -ARG JACKSON_JAXB_ANNOTATIONS |
18 | | -ARG POSTGRES_DRIVER |
19 | | -ARG AWS_JAVA_SDK_BUNDLE |
20 | | -ARG AZURE_STORAGE |
21 | | -ARG AZURE_KEYVAULT_CORE |
| 15 | + |
| 16 | +# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) |
| 17 | +# This can be used to speed up builds when disk space is of no concern. |
| 18 | +ARG DELETE_CACHES="true" |
22 | 19 |
|
23 | 20 | COPY --chown=stackable:stackable hive/stackable /stackable |
24 | 21 |
|
25 | 22 | USER stackable |
26 | 23 | WORKDIR /stackable |
27 | 24 |
|
28 | | -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . && \ |
29 | | - patches/apply_patches.sh ${PRODUCT} && \ |
30 | | - cd /stackable/apache-hive-${PRODUCT}-src/ && \ |
31 | | - mvn clean package -DskipTests --projects standalone-metastore && \ |
32 | | - mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable && \ |
33 | | - ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore && \ |
34 | | - cp /stackable/hive-metastore/bin/start-metastore /stackable/hive-metastore/bin/start-metastore.bak && \ |
35 | | - cp /stackable/bin/start-metastore /stackable/hive-metastore/bin && \ |
36 | | - rm -rf /stackable/apache-hive-${PRODUCT}-src |
| 25 | +# Cache mounts are owned by root by default |
| 26 | +# We need to explicitly give the uid to use which is hardcoded to "1000" in stackable-base |
| 27 | +RUN --mount=type=cache,id=maven-hive,uid=1000,target=/stackable/.m2/repository <<EOF |
| 28 | +curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . |
37 | 29 |
|
38 | | -COPY --chown=stackable:stackable --from=hadoop-builder /stackable/hadoop /stackable/hadoop |
| 30 | +patches/apply_patches.sh ${PRODUCT} |
39 | 31 |
|
40 | | -# Add a PostgreSQL driver, as this is the primary used persistence |
41 | | -RUN curl --fail -L https://repo.stackable.tech/repository/packages/pgjdbc/postgresql-${POSTGRES_DRIVER}.jar -o /stackable/hive-metastore/lib/postgresql-${POSTGRES_DRIVER}.jar |
| 32 | +cd /stackable/apache-hive-${PRODUCT}-src/ |
| 33 | +mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects standalone-metastore |
| 34 | +mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable |
42 | 35 |
|
43 | | -# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards |
44 | | -# This way the build will fail should one of the files not be available anymore in a later Hadoop version! |
| 36 | +ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore |
| 37 | +cp /stackable/bin/start-metastore /stackable/hive-metastore/bin |
| 38 | +rm -rf /stackable/apache-hive-${PRODUCT}-src |
45 | 39 |
|
46 | | -# Add S3 Support for Hive (support for s3a://) |
47 | | -RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive-metastore/lib/ |
48 | | -RUN cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive-metastore/lib/ |
| 40 | +curl --fail -L "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" |
| 41 | +ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar |
49 | 42 |
|
50 | | -# Add Azure ABFS support (support for abfs://) |
51 | | -RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive-metastore/lib/ |
52 | | -RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive-metastore/lib/ |
53 | | -RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive-metastore/lib/ |
| 43 | +# We're removing these to make the intermediate layer smaller |
| 44 | +# This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available |
| 45 | +# and we are sometimes running into errors because we're out of space. |
| 46 | +# Therefore, we try to clean up all layers as much as possible. |
| 47 | +if [ "${DELETE_CACHES}" = "true" ] ; then |
| 48 | + rm -rf /stackable/.m2/repository/* |
| 49 | + rm -rf /stackable/.npm/* |
| 50 | + rm -rf /stackable/.cache/* |
| 51 | +fi |
| 52 | +EOF |
54 | 53 |
|
55 | | -# The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode |
56 | | -# the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" |
57 | | -# This is a TEMPORARY fix which means that we can keep the hardcoded path in HDFS operator FOR NOW as it will still point to a newer version of JMX Exporter, despite the "0.16.1" in the name. |
58 | | -# At the same time a new HDFS Operator will still work with older images which do not have the symlink to the versionless jar. |
59 | | -# After one of our next releases (23.11 or 24.x) we should update the operator to point at the non-versioned symlink (jmx_prometheus_javaagent.jar) |
60 | | -# And then we can also remove the symlink to 0.16.1 from this Dockerfile. |
61 | | -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \ |
62 | | - ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \ |
63 | | - ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar |
64 | | - |
65 | | -# Logging. |
66 | | -# jackson-module-jaxb-annotations: this is no longer bundled with the hadoop-yarn/mapreduce libraries (excluded from the hadoop build). |
67 | | -RUN rm /stackable/hive-metastore/lib/log4j-slf4j-impl* && \ |
68 | | - curl --fail -L https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar -o /stackable/hive-metastore/lib/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar && \ |
69 | | - curl --fail -L https://repo.stackable.tech/repository/packages/jackson-module-jaxb-annotations/jackson-module-jaxb-annotations-${JACKSON_JAXB_ANNOTATIONS}.jar -o /stackable/hive-metastore/lib/jackson-module-jaxb-annotations-${JACKSON_JAXB_ANNOTATIONS}.jar |
70 | | - |
71 | | -# === |
72 | | -# For earlier versions this script removes the .class file that contains the |
73 | | -# vulnerable code. |
74 | | -# TODO: This can be restricted to target only versions which do not honor the environment |
75 | | -# varible that has been set above but this has not currently been implemented |
76 | | -COPY shared/log4shell.sh /bin |
77 | | -RUN /bin/log4shell.sh /stackable/apache-hive-metastore-${PRODUCT}-bin/ |
78 | | - |
79 | | -# Ensure no vulnerable files are left over |
80 | | -# This will currently report vulnerable files being present, as it also alerts on |
81 | | -# SocketNode.class, which we do not remove with our scripts. |
82 | | -# Further investigation will be needed whether this should also be removed. |
83 | | -COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64 |
84 | | -COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64 |
85 | | -COPY shared/log4shell_scanner /bin/log4shell_scanner |
86 | | -# log4shell_scanner does not work on symlinks! |
87 | | -RUN /bin/log4shell_scanner s /stackable/apache-hive-metastore-${PRODUCT}-bin/ |
88 | | -# === |
89 | | - |
90 | | -# syntax=docker/dockerfile:1@sha256:ac85f380a63b13dfcefa89046420e1781752bab202122f8f50032edf31be0021 |
91 | | -FROM stackable/image/java-base |
| 54 | + |
| 55 | +FROM stackable/image/java-base AS final |
92 | 56 |
|
93 | 57 | ARG PRODUCT |
94 | 58 | ARG HADOOP |
95 | 59 | ARG RELEASE |
| 60 | +ARG AWS_JAVA_SDK_BUNDLE |
| 61 | +ARG AZURE_STORAGE |
| 62 | +ARG AZURE_KEYVAULT_CORE |
96 | 63 |
|
97 | | -LABEL name="Apache Hive metastore" \ |
98 | | - maintainer="info@stackable.tech" \ |
99 | | - vendor="Stackable GmbH" \ |
100 | | - version="${PRODUCT}" \ |
101 | | - release="${RELEASE}" \ |
102 | | - summary="The Stackable image for Apache Hive metastore." \ |
103 | | - description="This image is deployed by the Stackable Operator for Apache Hive." |
104 | 64 |
|
105 | | -RUN microdnf update && \ |
106 | | - microdnf clean all && \ |
107 | | - rpm -qa --qf "%{NAME}-%{VERSION}-%{RELEASE}\n" | sort > /stackable/package_manifest.txt && \ |
108 | | - rm -rf /var/cache/yum |
| 65 | +ARG NAME="Apache Hive metastore" |
| 66 | +ARG DESCRIPTION="This image is deployed by the Stackable Operator for Apache Hive." |
| 67 | + |
| 68 | +LABEL name="Apache Hive metastore" |
| 69 | +LABEL version="${PRODUCT}" |
| 70 | +LABEL release="${RELEASE}" |
| 71 | +LABEL summary="The Stackable image for Apache Hive metastore." |
| 72 | +LABEL description="${DESCRIPTION}" |
| 73 | + |
| 74 | +# https://github.com/opencontainers/image-spec/blob/036563a4a268d7c08b51a08f05a02a0fe74c7268/annotations.md#annotations |
| 75 | +LABEL org.opencontainers.image.documentation="https://docs.stackable.tech/home/stable/hive/" |
| 76 | +LABEL org.opencontainers.image.version="${PRODUCT}" |
| 77 | +LABEL org.opencontainers.image.revision="${RELEASE}" |
| 78 | +LABEL org.opencontainers.image.title="${NAME}" |
| 79 | +LABEL org.opencontainers.image.description="${DESCRIPTION}" |
| 80 | + |
| 81 | +# https://docs.openshift.com/container-platform/4.16/openshift_images/create-images.html#defining-image-metadata |
| 82 | +# https://github.com/projectatomic/ContainerApplicationGenericLabels/blob/master/vendor/redhat/labels.md |
| 83 | +LABEL io.openshift.tags="ubi9,stackable,hive,sdp" |
| 84 | +LABEL io.k8s.description="${DESCRIPTION}" |
| 85 | +LABEL io.k8s.display-name="${NAME}" |
| 86 | + |
| 87 | +RUN <<EOF |
| 88 | +microdnf update |
| 89 | +microdnf clean all |
| 90 | +rpm -qa --qf "%{NAME}-%{VERSION}-%{RELEASE}\n" | sort > /stackable/package_manifest.txt |
| 91 | +rm -rf /var/cache/yum |
| 92 | +EOF |
109 | 93 |
|
110 | 94 | USER stackable |
111 | 95 | WORKDIR /stackable |
112 | 96 |
|
113 | | -# TODO: Try to use --link here, as it should be faster |
114 | | -COPY --chown=stackable:stackable --from=builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin |
115 | | -RUN ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore |
| 97 | +COPY --chown=stackable:stackable --from=hive-builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin |
| 98 | +RUN ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/hive-metastore |
116 | 99 |
|
117 | 100 | # It is useful to see which version of Hadoop is used at a glance |
118 | 101 | # Therefore the use of the full name here |
119 | | -COPY --chown=stackable:stackable --from=builder /stackable/hadoop /stackable/hadoop-${HADOOP} |
120 | | -RUN ln -s /stackable/hadoop-${HADOOP}/ /stackable/hadoop |
| 102 | +# TODO: Do we really need all of Hadoop in here? |
| 103 | +COPY --chown=stackable:stackable --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP} |
| 104 | +RUN ln -s /stackable/hadoop-${HADOOP} /stackable/hadoop |
| 105 | + |
| 106 | +# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards |
| 107 | +# This way the build will fail should one of the files not be available anymore in a later Hadoop version! |
| 108 | + |
| 109 | +# Add S3 Support for Hive (support for s3a://) |
| 110 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive-metastore/lib/ |
| 111 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive-metastore/lib/ |
| 112 | + |
| 113 | +# Add Azure ABFS support (support for abfs://) |
| 114 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive-metastore/lib/ |
| 115 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive-metastore/lib/ |
| 116 | +RUN cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive-metastore/lib/ |
121 | 117 |
|
122 | | -COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx |
| 118 | +COPY --chown=stackable:stackable --from=hive-builder /stackable/jmx /stackable/jmx |
123 | 119 | COPY hive/licenses /licenses |
124 | 120 |
|
125 | 121 | ENV HADOOP_HOME=/stackable/hadoop |
|
0 commit comments