-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
72 lines (57 loc) · 3.47 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
FROM openjdk:8-jre
WORKDIR /opt
ENV HADOOP_HOME=/opt/hadoop
ENV HADOOP_VERSION=3.3.1
ENV HIVE_HOME=/opt/hive
ENV HIVE_VERSION=3.1.2
RUN mkdir ${HIVE_HOME}
RUN mkdir ${HADOOP_HOME}
RUN apt-get clean && \
apt-get update && \
apt-get upgrade -y && \
apt-get -qqy install curl && \
curl -L https://dlcdn.apache.org/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz | tar zxf - && \
curl -L https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \
mv apache-hive-${HIVE_VERSION}-bin/* ${HIVE_HOME} && \
mv hadoop-${HADOOP_VERSION}/* ${HADOOP_HOME} && \
apt-get install --only-upgrade openssl libssl1.1 libexpat1 && \
apt-get install -y libk5crypto3 libkrb5-3 libsqlite3-0
RUN rm ${HIVE_HOME}/lib/postgresql*.jar
RUN curl -o ${HIVE_HOME}/lib/postgresql-42.4.2.jar -L https://jdbc.postgresql.org/download/postgresql-42.4.2.jar
# Configure Hadoop AWS Jars to be available to hive
RUN ln -s ${HADOOP_HOME}/share/hadoop/tools/lib/*aws* ${HIVE_HOME}/lib
COPY conf ${HIVE_HOME}/conf
COPY scripts/entrypoint.sh ${HIVE_HOME}/entrypoint.sh
# Remove vulnerable Log4j version and install latest
ARG LOG4J_VERSION=2.17.1
ARG LOG4J_LOCATION="https://repo1.maven.org/maven2/org/apache/logging/log4j"
RUN \
rm -f ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-log4j12* && \
rm -f ${HADOOP_HOME}/share/hadoop/common/lib/log4j* && \
rm -f ${HADOOP_HOME}/share/hadoop/hdfs/lib/log4j* && \
rm -f ${HADOOP_HOME}/share/hadoop/yarn/hadoop-yarn-applications-catalog-webapp-3.3.1.war && \
rm -f ${HIVE_HOME}/lib/log4j-* && \
curl -o ${HIVE_HOME}/lib/log4j-1.2-api-${LOG4J_VERSION}.jar ${LOG4J_LOCATION}/log4j-1.2-api/${LOG4J_VERSION}/log4j-1.2-api-${LOG4J_VERSION}.jar && \
curl -o ${HIVE_HOME}/lib/log4j-api-${LOG4J_VERSION}.jar ${LOG4J_LOCATION}/log4j-api/${LOG4J_VERSION}/log4j-api-${LOG4J_VERSION}.jar && \
curl -o ${HIVE_HOME}/lib/log4j-core-${LOG4J_VERSION}.jar ${LOG4J_LOCATION}/log4j-core/${LOG4J_VERSION}/log4j-core-${LOG4J_VERSION}.jar && \
curl -o ${HIVE_HOME}/lib/log4j-slf4j-impl-${LOG4J_VERSION}.jar ${LOG4J_LOCATION}/log4j-slf4j-impl/${LOG4J_VERSION}/log4j-slf4j-impl-${LOG4J_VERSION}.jar
# https://docs.oracle.com/javase/7/docs/technotes/guides/net/properties.html
# Java caches dns results forever, don't cache dns results forever:
RUN touch ${JAVA_HOME}/lib/security/java.security
RUN sed -i '/networkaddress.cache.ttl/d' ${JAVA_HOME}/lib/security/java.security
RUN sed -i '/networkaddress.cache.negative.ttl/d' ${JAVA_HOME}/lib/security/java.security
RUN echo 'networkaddress.cache.ttl=0' >> ${JAVA_HOME}/lib/security/java.security
RUN echo 'networkaddress.cache.negative.ttl=0' >> ${JAVA_HOME}/lib/security/java.security
# imagebuilder expects the directory to be created before VOLUME
RUN mkdir -p /var/lib/hive /.beeline ${HOME}/.beeline
# to allow running as non-root
RUN groupadd -r hive --gid=1002 && \
useradd -r -g hive --uid=1002 -d ${HIVE_HOME} hive
RUN chown -R 1002:0 ${HIVE_HOME} ${HADOOP_HOME} /var/lib/hive /.beeline ${HOME}/.beeline /etc/passwd $(readlink -f ${JAVA_HOME}/lib/security/cacerts) && \
chmod -R u+rwx,g+rwx ${HIVE_HOME} ${HADOOP_HOME} /var/lib/hive /.beeline ${HOME}/.beeline /etc/passwd $(readlink -f ${JAVA_HOME}/lib/security/cacerts) && \
chown 1002:0 ${HIVE_HOME}/entrypoint.sh && chmod +x ${HIVE_HOME}/entrypoint.sh
ENV PATH=${HIVE_HOME}/bin:${HADOOP_HOME}/bin:$PATH
USER 1002
WORKDIR $HIVE_HOME
EXPOSE 9083
ENTRYPOINT ["sh", "-c", "/opt/hive/entrypoint.sh"]