From b30c75fcff3ba4c10b842f0dc5caa2c5ea833aed Mon Sep 17 00:00:00 2001 From: Fan Ting Wei Date: Mon, 9 Sep 2024 14:59:08 +0800 Subject: [PATCH] remove reference for spark version less than 2 --- make-distribution.sh | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/make-distribution.sh b/make-distribution.sh index 0a2222a..4e3591a 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -40,32 +40,12 @@ TERM=xterm-color ./dev/make-distribution.sh \ ${HIVE_INSTALL_FLAG:+"-Phive"} \ -DskipTests -SPARK_MAJOR_VERSION="$(echo "${SPARK_VERSION}" | cut -d '.' -f1)" -HADOOP_MAJOR_VERSION="$(echo "${HADOOP_VERSION}" | cut -d '.' -f1)" -HIVE_HADOOP3_HIVE_EXEC_URL=${HIVE_HADOOP3_HIVE_EXEC_URL:-https://github.com/guangie88/hive-exec-jar/releases/download/1.2.1.spark2-hadoop3/hive-exec-1.2.1.spark2.jar} - -# Replace Hive for Hadoop 3 since Hive 1.2.1 does not officially support Hadoop 3 when using Spark 2.y.z -# Note docker-image-tool.sh takes the jars from assembly/target/scala-2.*/jars -if [[ "${WITH_HIVE}" = "true" ]] && [[ "${SPARK_MAJOR_VERSION}" -eq 2 ]] && [[ "${HADOOP_MAJOR_VERSION}" -eq 3 ]]; then - HIVE_EXEC_JAR_NAME="hive-exec-1.2.1.spark2.jar" - TARGET_JAR_PATH="$(find assembly -type f -name "${HIVE_EXEC_JAR_NAME}")" - curl -LO "${HIVE_HADOOP3_HIVE_EXEC_URL}" && mv "${HIVE_EXEC_JAR_NAME}" "${TARGET_JAR_PATH}" - # Spark <= 2.4 uses ${TARGET_JAR_PATH} for Docker COPY, but Spark >= 3 uses dist/jars/ - cp "${TARGET_JAR_PATH}" "dist/jars/" -fi - SPARK_MAJOR_VERSION="$(echo "${SPARK_VERSION}" | cut -d '.' -f1)" SPARK_MINOR_VERSION="$(echo "${SPARK_VERSION}" | cut -d '.' -f2)" +HADOOP_MAJOR_VERSION="$(echo "${HADOOP_VERSION}" | cut -d '.' -f1)" -if [[ ${SPARK_MAJOR_VERSION} -eq 2 && ${SPARK_MINOR_VERSION} -eq 4 ]]; then # 2.4.z - # Same Dockerfiles as Spark v2.4.8, but allow override of base image to use Debian Buster - # and not using PYTHONENV and instead copies pyspark out like Spark 3.y.z - DOCKERFILE_BASE="../overrides/base/2.4.z/Dockerfile" - DOCKERFILE_PY="../overrides/python/2.4.z/Dockerfile" -else - DOCKERFILE_BASE="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile" - DOCKERFILE_PY="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile" -fi +DOCKERFILE_BASE="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile" +DOCKERFILE_PY="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile" if [[ ${SPARK_MAJOR_VERSION} -eq 3 && ${SPARK_MINOR_VERSION} -ge 4 ]]; then # >=3.4 # From Spark v3.4.0 onwards, openjdk is not the prefered base image source as it i @@ -76,21 +56,9 @@ else IMAGE_VARIANT="jre-slim-buster" fi -# Temporarily remove R build due to keyserver issue -# DOCKERFILE_R="./resource-managers/kubernetes/docker/src/main/dockerfiles/R/Dockerfile" - SPARK_LABEL="${SPARK_VERSION}" TAG_NAME="${SELF_VERSION}_${SPARK_LABEL}_hadoop-${HADOOP_VERSION}_scala-${SCALA_VERSION}_java-${JAVA_VERSION}" -# ./bin/docker-image-tool.sh \ -# -b java_image_tag=${JAVA_VERSION}-jre-slim-buster \ -# -r "${IMAGE_NAME}" \ -# -t "${TAG_NAME}" \ -# -f "${DOCKERFILE_BASE}" \ -# -p "${DOCKERFILE_PY}" \ -# -R "${DOCKERFILE_R}" \ -# build - ./bin/docker-image-tool.sh \ -b java_image_tag=${JAVA_VERSION}-${IMAGE_VARIANT} \ -r "${IMAGE_NAME}" \ @@ -101,6 +69,5 @@ TAG_NAME="${SELF_VERSION}_${SPARK_LABEL}_hadoop-${HADOOP_VERSION}_scala-${SCALA_ docker tag "${IMAGE_NAME}/spark:${TAG_NAME}" "${IMAGE_NAME}:${TAG_NAME}" docker tag "${IMAGE_NAME}/spark-py:${TAG_NAME}" "${IMAGE_NAME}-py:${TAG_NAME}" -# docker tag "${IMAGE_NAME}/spark-r:${TAG_NAME}" "${IMAGE_NAME}-r:${TAG_NAME}" popd >/dev/null