diff --git a/bootstrap/bootstrap/bootstrap.py b/bootstrap/bootstrap/bootstrap.py index 535dbf28d9..afd83c6680 100755 --- a/bootstrap/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap/bootstrap.py @@ -19,6 +19,7 @@ class Bootstrapper: DOCKER_CONFIG_FILE_PATH = DOCKER_CONFIG_PATH.joinpath("bootstrap/startup.json") HOST_CONFIG_PATH = os.environ.get("BLUEOS_CONFIG_PATH", "/tmp/blueos/.config") CORE_CONTAINER_NAME = "blueos-core" + SETTINGS_NAME_CORE = "core" core_last_response_time = time.time() def __init__(self, client: docker.DockerClient, low_level_api: docker.APIClient = None) -> None: @@ -61,10 +62,10 @@ def read_config_file() -> Dict[str, Any]: try: with open(Bootstrapper.DOCKER_CONFIG_FILE_PATH, encoding="utf-8") as config_file: config = json.load(config_file) - assert "core" in config, "missing core entry in startup.json" + assert Bootstrapper.SETTINGS_NAME_CORE in config, "missing core entry in startup.json" necessary_keys = ["image", "tag", "binds", "privileged", "network"] for key in necessary_keys: - assert key in config["core"], f"missing key in json file: {key}" + assert key in config[Bootstrapper.SETTINGS_NAME_CORE], f"missing key in json file: {key}" except Exception as error: print(f"unable to read startup.json file ({error}), reverting to defaults...") @@ -73,7 +74,7 @@ def read_config_file() -> Dict[str, Any]: with open(Bootstrapper.DEFAULT_FILE_PATH, encoding="utf-8") as config_file: config = json.load(config_file) - config["core"]["binds"][str(Bootstrapper.HOST_CONFIG_PATH)] = { + config[Bootstrapper.SETTINGS_NAME_CORE]["binds"][str(Bootstrapper.HOST_CONFIG_PATH)] = { "bind": str(Bootstrapper.DOCKER_CONFIG_PATH), "mode": "rw", } @@ -175,6 +176,8 @@ def start(self, component_name: str) -> bool: return False print(f"Starting {image_name}") + # Remove image if name already exist + self.remove(component_name) try: self.client.containers.run( f"{image_name}:{image_version}", @@ -189,7 +192,7 @@ def start(self, component_name: str) -> bool: self.overwrite_config_file_with_defaults() return False - print("Core started") + print(f"{component_name} started") return True def is_running(self, component: str) -> bool: @@ -201,20 +204,21 @@ def is_running(self, component: str) -> bool: Returns: bool: True if the chosen container is running """ - if not any(container.name.endswith(component) for container in self.client.containers.list()): - return False + return any(container.name.endswith(component) for container in self.client.containers.list()) - if component == "core": - try: - response = requests.get("http://localhost/version-chooser/v1.0/version/current", timeout=10) - if "core" in response.json()["repository"]: - self.core_last_response_time = time.time() - return True - return False - except Exception as e: - print(f"Could not talk to version chooser for {time.time() - self.core_last_response_time}: {e}") - return False - return True + def is_version_chooser_online(self) -> bool: + """Check if the version chooser service is online. + + Returns: + bool: True if version chooser is online, False otherwise. + """ + try: + response = requests.get("http://localhost/version-chooser/v1.0/version/current", timeout=10) + if Bootstrapper.SETTINGS_NAME_CORE in response.json()["repository"]: + return True + except Exception as e: + print(f"Could not talk to version chooser for {time.time() - self.core_last_response_time}: {e}") + return False def remove(self, container: str) -> None: """Deletes the chosen container if it exists (needed for updating the running image)""" @@ -228,21 +232,39 @@ def remove(self, container: str) -> None: def run(self) -> None: """Runs the bootstrapper""" + print("Starting main loop") while True: - time.sleep(1) + time.sleep(5) for image in self.read_config_file(): - if self.is_running(image): + # Start image if it's not running + if not self.is_running(image): + try: + if self.start(image): + print(f"{image} is not running, starting..") + except Exception as error: + warn(f"error: {type(error)}: {error}, retrying...") + + if image != Bootstrapper.SETTINGS_NAME_CORE: continue - # reset core to default if it's hasn't responded in 5 minutes - if time.time() - self.core_last_response_time > 300: - print("Core has not responded in 5 minutes, resetting to factory...") - self.overwrite_config_file_with_defaults() + + if self.is_version_chooser_online(): + self.core_last_response_time = time.time() + continue + + # Check if version chooser failed start before timeout + if time.time() - self.core_last_response_time < 300: + continue + + # Version choose failed, time to restarted core + self.core_last_response_time = time.time() + print("Core has not responded in 5 minutes, resetting to factory...") + self.overwrite_config_file_with_defaults() try: - self.remove(image) if self.start(image): - print("Done") + print("Restarted core..") except Exception as error: warn(f"error: {type(error)}: {error}, retrying...") + # This is required for the tests, we need to "finish" somehow if "pytest" in sys.modules: return diff --git a/bootstrap/test_bootstrap.py b/bootstrap/test_bootstrap.py index a985c580df..4e430d0aab 100644 --- a/bootstrap/test_bootstrap.py +++ b/bootstrap/test_bootstrap.py @@ -183,12 +183,18 @@ def test_is_running(self) -> None: fake_core = FakeContainer(Bootstrapper.CORE_CONTAINER_NAME) fake_client.set_active_dockers([fake_core]) + assert bootstrapper.is_running("core") is True + + @pytest.mark.timeout(10) + def test_is_version_chooser_online(self) -> None: + fake_client = FakeClient() + bootstrapper = Bootstrapper(fake_client, FakeLowLevelAPI()) self.mock_response.json.return_value = {"repository": []} - assert bootstrapper.is_running("core") is False + assert bootstrapper.is_version_chooser_online() is False self.mock_response.json.return_value = {"repository": ["core"]} - assert bootstrapper.is_running("core") is True + assert bootstrapper.is_version_chooser_online() is True @pytest.mark.timeout(10) def test_remove_core(self) -> None: