From 548ec61ebfb919d25f313b4772095b0c30844a29 Mon Sep 17 00:00:00 2001 From: 0xdeadcode Date: Sat, 26 Oct 2024 21:25:17 +0000 Subject: [PATCH] feat: add health-check to all nodes --- roles/vega_core/defaults/main.yaml | 4 ++++ roles/vega_core/handlers/main.yaml | 9 +++++++- roles/vega_core/tasks/health-check.yaml | 16 +++++++++++++ roles/vega_core/tasks/main.yaml | 6 ++++- .../system/vega-health-check.service.j2 | 23 +++++++++++++++++++ 5 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 roles/vega_core/tasks/health-check.yaml create mode 100644 roles/vega_core/templates/lib/systemd/system/vega-health-check.service.j2 diff --git a/roles/vega_core/defaults/main.yaml b/roles/vega_core/defaults/main.yaml index 2c9b168..4b7f877 100644 --- a/roles/vega_core/defaults/main.yaml +++ b/roles/vega_core/defaults/main.yaml @@ -57,3 +57,7 @@ vega_core_block_explorer_postgresql_database: "tendermint" vega_core_block_explorer_postgresql_password: "tendermint" vega_core_data_node_settings: {} + +vega_core_with_health_check: true +vega_core_health_check_repository: "nebula-dex/vega-health-check" +vega_core_health_check_version: "v0.1.0" diff --git a/roles/vega_core/handlers/main.yaml b/roles/vega_core/handlers/main.yaml index be09ecc..ed817be 100644 --- a/roles/vega_core/handlers/main.yaml +++ b/roles/vega_core/handlers/main.yaml @@ -3,7 +3,6 @@ ansible.builtin.service: state: "{{- 'restarted' if vega_core_run_network else 'stopped' -}}" daemon_reload: true - name: vegavisor listen: "Restart vegavisor" when: not ansible_check_mode @@ -15,3 +14,11 @@ name: blockexplorer listen: "Restart blockexplorer" when: not ansible_check_mode + +- name: Restart vega-health-check + ansible.builtin.service: + state: "{{- 'restarted' if vega_core_run_network else 'stopped' -}}" + daemon_reload: true + name: vega-health-check + listen: "Restart vega-health-check" + when: not ansible_check_mode diff --git a/roles/vega_core/tasks/health-check.yaml b/roles/vega_core/tasks/health-check.yaml new file mode 100644 index 0000000..178dca9 --- /dev/null +++ b/roles/vega_core/tasks/health-check.yaml @@ -0,0 +1,16 @@ +--- +- name: Download health-check binary + ansible.builtin.get_url: + force: true + url: "https://github.com/{{- vega_core_health_check_repository -}}/releases/download/{{- vega_core_health_check_version -}}/vega-health-check-linux-amd64" + dest: /usr/local/bin/vega-health-check + mode: '0755' + +- name: Install health-check systemd + ansible.builtin.template: + src: "lib/systemd/system/vega-health-check.service.j2" + dest: "/lib/systemd/system/vega-health-check.service" + owner: "root" + group: "root" + mode: "0644" + notify: "Restart vega-health-check" diff --git a/roles/vega_core/tasks/main.yaml b/roles/vega_core/tasks/main.yaml index 7ac7ee4..af679b5 100644 --- a/roles/vega_core/tasks/main.yaml +++ b/roles/vega_core/tasks/main.yaml @@ -26,4 +26,8 @@ when: vega_core_with_unsafe_reset_all - name: Restart network - ansible.builtin.import_tasks: restart_network.yaml + ansible.builtin.import_tasks: restart-network.yaml + +- name: Health-check setup + ansible.builtin.import_tasks: health-check.yaml + when: vega_core_with_health_check diff --git a/roles/vega_core/templates/lib/systemd/system/vega-health-check.service.j2 b/roles/vega_core/templates/lib/systemd/system/vega-health-check.service.j2 new file mode 100644 index 0000000..71f76f2 --- /dev/null +++ b/roles/vega_core/templates/lib/systemd/system/vega-health-check.service.j2 @@ -0,0 +1,23 @@ +[Unit] +Description=healthcheck +Documentation=https://github.com/vegaprotocol/ansible +After=network.target network-online.target +Requires=network-online.target + +[Service] +User=vega +Group=vega +{% if vega_core_with_block_explorer | default(false) %} +ExecStart=/usr/local/bin/vega-health-check blockexplorer --blockexplorer-api-url "http://localhost:1515" --core-url "http://localhost:3003" --http-port 8080 +{% elif vega_core_with_data_node | default(false) %} +ExecStart=/usr/local/bin/vega-health-check data-node --api-url "http://localhost:3008" --core-url "http://localhost:3003" --http-port 8080 +{% else %} +ExecStart=/usr/local/bin/vega-health-check vega --core-url "http://localhost:3003" --http-port 8080 +{% endif %} +TimeoutStopSec=10s +ProtectSystem=full +AmbientCapabilities=CAP_NET_BIND_SERVICE +CPUQuota=10% + +[Install] +WantedBy=multi-user.target \ No newline at end of file