Skip to content

Commit

Permalink
add passwordless ssh connection feature required by mpi among nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
louie-tsai committed May 29, 2024
1 parent 3c79e8c commit cc37019
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
23 changes: 23 additions & 0 deletions pytorch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,15 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin
gcc \
libgl1-mesa-glx \
libglib2.0-0 \
openssh-server \
net-tools \
virtualenv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

ENV SIGOPT_PROJECT=.
ARG SSHD_PORT=2345
ENV SSHD_PORT ${SSHD_PORT}

WORKDIR /
COPY multinode-requirements.txt .
Expand All @@ -100,6 +104,25 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src
wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \
wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE

# Enable passwordless ssh for mpirun
RUN mkdir /var/run/sshd
RUN sed -i'' -e's/^#PermitRootLogin prohibit-password$/PermitRootLogin yes/' /etc/ssh/sshd_config \
&& sed -i'' -e's/^#PasswordAuthentication yes$/PasswordAuthentication yes/' /etc/ssh/sshd_config \
&& sed -i'' -e's/^#PermitEmptyPasswords no$/PermitEmptyPasswords yes/' /etc/ssh/sshd_config \
&& sed -i'' -e's/^UsePAM yes/UsePAM no/' /etc/ssh/sshd_config \
&& echo "Port "$SSHD_PORT"" >> /etc/ssh/sshd_config \
&& echo "Host *" >> /etc/ssh/ssh_config \
&& echo " Port "$SSHD_PORT"" >> /etc/ssh/ssh_config \
&& echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config

EXPOSE ${SSHD_PORT}

RUN /usr/bin/ssh-keygen -t rsa -b 4096 -N '' -f ~/.ssh/id_rsa && \
chmod 600 ~/.ssh/id_rsa && \
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys



FROM ${PYTHON_BASE} AS ipex-xpu-base

RUN apt-get update && \
Expand Down
5 changes: 1 addition & 4 deletions pytorch/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,7 @@ services:
org.opencontainers.image.version: ${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-multinode
target: multinode
command: >
sh -c "python -c 'import neural_compressor;import
oneccl_bindings_for_pytorch as oneccl; print(\"Neural Compressor
Version:\", neural_compressor.__version__, \"\\nOneCCL:\",
oneccl.__version__)'"
bash -c "/usr/sbin/sshd -D"
extends: ipex-base
image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.3.0}-oneccl-inc-${INC_VERSION:-2.5.1}
xpu:
Expand Down

0 comments on commit cc37019

Please sign in to comment.