From de49a2675a7eab59de00fcebe2394e1b5b99d284 Mon Sep 17 00:00:00 2001 From: Qinlong Wang Date: Thu, 20 Jul 2023 14:48:57 +0800 Subject: [PATCH] Use master nodes when the job has no chief nodes. (#513) * Use master nodes when the job has no chief nodes * Format codes --- dlrover/python/master/node/job_manager.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dlrover/python/master/node/job_manager.py b/dlrover/python/master/node/job_manager.py index fe35cfaa1..666966de2 100644 --- a/dlrover/python/master/node/job_manager.py +++ b/dlrover/python/master/node/job_manager.py @@ -205,8 +205,12 @@ def _init_training_node_manager(self): self._elastic_job.get_node_service_addr, self._elastic_job.get_node_name, ) + + chief_nodes = self._job_nodes.get(NodeType.CHIEF, {}) + if not chief_nodes: + chief_nodes = self._job_nodes.get(NodeType.MASTER, {}) self._chief_manager = ChiefManager( - self._job_nodes.get(NodeType.CHIEF, {}), + chief_nodes, self._job_resource, self._relaunch_on_worker_failure, self._elastic_job.get_node_service_addr,