From 3abe0cc7eb71df4826de831589d5135680a73086 Mon Sep 17 00:00:00 2001 From: Ivan Shumkov Date: Tue, 13 Aug 2024 13:14:01 +0700 Subject: [PATCH] refactor: split elastic_beat into core_filebeat, metricbeat and dashmate (#658) * refactor: split elastic_beat into core_filebeat, metricbeat and dashmate * refactor: use FQN and remove temp role * refactor: reuse filebeat config * fix: hosts * fix: hosts 2 * fix: start kibana only on logs-1 * fix: select host nodes * fix: filebeat_inputs.yml not found * fix: reading logs from gzip * fix: tenderdash indexing gz * fix: indexing old data * refactor: remove unused exclude --- ansible/deploy.yml | 32 ++--- ansible/group_vars/all | 1 + ansible/roles/core_filebeat/defaults/main.yml | 3 + ansible/roles/core_filebeat/tasks/main.yml | 32 +++++ .../vars/common.yml | 11 +- .../vars/core.yml | 2 +- ansible/roles/dashmate/tasks/logs.yml | 53 ++++++++ ansible/roles/dashmate/tasks/main.yml | 35 +----- .../roles/dashmate/vars/filebeat_inputs.yml | 61 +++++++++ ansible/roles/elastic_beats/tasks/main.yml | 118 ------------------ ansible/roles/elastic_beats/vars/drive.yml | 27 ---- .../roles/elastic_beats/vars/tenderdash.yml | 24 ---- .../elastic_stack/tasks/configure_cluster.yml | 15 +-- ansible/roles/elastic_stack/tasks/main.yml | 18 ++- ansible/roles/metricbeat/tasks/main.yml | 67 ++++++++++ 15 files changed, 263 insertions(+), 236 deletions(-) create mode 100644 ansible/roles/core_filebeat/defaults/main.yml create mode 100644 ansible/roles/core_filebeat/tasks/main.yml rename ansible/roles/{elastic_beats => core_filebeat}/vars/common.yml (60%) rename ansible/roles/{elastic_beats => core_filebeat}/vars/core.yml (88%) create mode 100644 ansible/roles/dashmate/tasks/logs.yml create mode 100644 ansible/roles/dashmate/vars/filebeat_inputs.yml delete mode 100644 ansible/roles/elastic_beats/tasks/main.yml delete mode 100644 ansible/roles/elastic_beats/vars/drive.yml delete mode 100644 ansible/roles/elastic_beats/vars/tenderdash.yml create mode 100644 ansible/roles/metricbeat/tasks/main.yml diff --git a/ansible/deploy.yml b/ansible/deploy.yml index 9bba5a1c..7f5aa1e2 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -78,6 +78,7 @@ become: true roles: - elastic_stack + - metricbeat - name: Setup load tester hosts: load_test @@ -90,7 +91,8 @@ hosts: metrics become: true roles: - - role: metrics + - metrics + - metricbeat - name: Set up miners hosts: miners @@ -100,8 +102,8 @@ - role: dashd tags: - dashd - - role: elastic_beats - core_container_name: dashd + - core_filebeat + - metricbeat - name: Set up mixers hosts: mixer_nodes @@ -111,8 +113,8 @@ - role: dashd tags: - dashd - - role: elastic_beats - core_container_name: dashd + - core_filebeat + - metricbeat - name: Set up core and tenderdash on seed nodes hosts: seed_nodes @@ -133,8 +135,8 @@ tags: - dashd - role: tenderdash - - role: elastic_beats - core_container_name: dashd + - core_filebeat + - metricbeat - name: Set up core on masternodes hosts: masternodes @@ -158,8 +160,8 @@ tags: - dashd - mn_status_report - - role: elastic_beats - core_container_name: dashd + - core_filebeat + - metricbeat # Start network @@ -189,8 +191,8 @@ dashd_zmq: true dashd_listen: true - insight - - role: elastic_beats - core_container_name: dashd + - core_filebeat + - metricbeat tags: - web @@ -205,8 +207,8 @@ enable_wallet: true tags: - dashd - - role: elastic_beats - core_container_name: dashd + - core_filebeat + - metricbeat # Register masternodes and set sporks @@ -283,9 +285,7 @@ - role: dash_cli - role: dashmate - role: mn_status_report - - role: elastic_beats - core_container_name: core - abci_logs_path: "{{ dashmate_logs_dir }}" + - role: metricbeat - name: Set up protx diff script hosts: masternodes diff --git a/ansible/group_vars/all b/ansible/group_vars/all index 3f932252..774ce1f7 100644 --- a/ansible/group_vars/all +++ b/ansible/group_vars/all @@ -157,6 +157,7 @@ kibana_encryptionkey: # Set to 50% of instance memory # https://www.elastic.co/guide/en/elasticsearch/guide/current/heap-sizing.html elastic_heap_size: 8g +metricbeat_enabled: true elastic_compose_project_name: elastic elastic_path: '{{ dashd_home }}/{{ elastic_compose_project_name }}' diff --git a/ansible/roles/core_filebeat/defaults/main.yml b/ansible/roles/core_filebeat/defaults/main.yml new file mode 100644 index 00000000..7da5862f --- /dev/null +++ b/ansible/roles/core_filebeat/defaults/main.yml @@ -0,0 +1,3 @@ +--- + +core_container_name: "dashd" diff --git a/ansible/roles/core_filebeat/tasks/main.yml b/ansible/roles/core_filebeat/tasks/main.yml new file mode 100644 index 00000000..001cf43e --- /dev/null +++ b/ansible/roles/core_filebeat/tasks/main.yml @@ -0,0 +1,32 @@ +--- + +- name: Load common filebeat config + ansible.builtin.include_vars: + file: common.yml + +- name: Get core container host info + community.docker.docker_host_info: + containers: true + containers_filters: + name: '{{ core_container_name }}' + register: core_host_info + +- name: Set container ids for core if core is running + ansible.builtin.set_fact: + core_container_id: '{{ core_host_info.containers[0].Id }}' + when: core_host_info.containers | length > 0 + +- name: Load core input config if core is running + ansible.builtin.include_vars: + file: core.yml + when: core_container_id is defined + +- name: Set up filebeat log monitoring + ansible.builtin.include_role: + name: geerlingguy.filebeat + +- name: Make sure filebeat is restarted + ansible.builtin.service: + name: filebeat + state: restarted + enabled: true diff --git a/ansible/roles/elastic_beats/vars/common.yml b/ansible/roles/core_filebeat/vars/common.yml similarity index 60% rename from ansible/roles/elastic_beats/vars/common.yml rename to ansible/roles/core_filebeat/vars/common.yml index d2b0f728..4567afcd 100644 --- a/ansible/roles/elastic_beats/vars/common.yml +++ b/ansible/roles/core_filebeat/vars/common.yml @@ -4,8 +4,15 @@ filebeat_version: 8.x filebeat_package: "filebeat={{ elastic_version }}" filebeat_output_logstash_enabled: false filebeat_output_elasticsearch_enabled: true -filebeat_output_elasticsearch_hosts: - - "{{ hostvars['logs-1'].private_ip }}:9200" +filebeat_output_elasticsearch_hosts: >- + {{ + query('inventory_hostnames', 'logs_nodes') | + map('extract', hostvars, ['private_ip']) | + map('string') | + product([':9200']) | + map('join') | + list + }} filebeat_output_elasticsearch_auth: username: "{{ elastic_username }}" password: "{{ elastic_password }}" diff --git a/ansible/roles/elastic_beats/vars/core.yml b/ansible/roles/core_filebeat/vars/core.yml similarity index 88% rename from ansible/roles/elastic_beats/vars/core.yml rename to ansible/roles/core_filebeat/vars/core.yml index ed7e161b..12810d32 100644 --- a/ansible/roles/elastic_beats/vars/core.yml +++ b/ansible/roles/core_filebeat/vars/core.yml @@ -2,7 +2,7 @@ filebeat_inputs: - type: container - enabled: "{{ core_host_info.containers | length > 0 }}" + enabled: true index: "logs-core-{{ dash_network_name }}-%{[agent.version]}" paths: - '/var/lib/docker/containers/{{ core_container_id }}/*.log' diff --git a/ansible/roles/dashmate/tasks/logs.yml b/ansible/roles/dashmate/tasks/logs.yml new file mode 100644 index 00000000..72037105 --- /dev/null +++ b/ansible/roles/dashmate/tasks/logs.yml @@ -0,0 +1,53 @@ +--- + +- name: Create logs dir + ansible.builtin.file: + path: '{{ dashmate_logs_dir }}' + state: directory + owner: '{{ dashmate_user }}' + group: '{{ dashmate_group }}' + recurse: true + +- name: Configure log rotation + ansible.builtin.include_role: + name: arillso.logrotate + vars: + logrotate_applications: + - name: platform-logs + definitions: + - logs: + - '{{ dashmate_logs_dir }}/*.log' + options: + - rotate 7 + - daily + - maxsize 1G + - missingok + - notifempty + - copytruncate + - compress + - delaycompress + +- name: Ensure logrotate runs hourly under systemd timer + ansible.builtin.lineinfile: + path: /lib/systemd/system/logrotate.timer + regexp: '^OnCalendar=hourly' + insertafter: '^OnCalendar=daily' + line: OnCalendar=hourly + +- name: Load common filebeat config + ansible.builtin.include_vars: + file: "{{ role_path }}/../core_filebeat/vars/common.yml" + +- name: Load filebeat inputs + ansible.builtin.include_vars: + file: filebeat_inputs.yml + +- name: Set up filebeat log monitoring + ansible.builtin.include_role: + name: geerlingguy.filebeat + +- name: Make sure filebeat is restarted + ansible.builtin.service: + name: filebeat + state: restarted + enabled: true diff --git a/ansible/roles/dashmate/tasks/main.yml b/ansible/roles/dashmate/tasks/main.yml index 25e6ba6e..44b0aadd 100644 --- a/ansible/roles/dashmate/tasks/main.yml +++ b/ansible/roles/dashmate/tasks/main.yml @@ -42,39 +42,8 @@ dir: '{{ dashmate_home }}' users: '{{ system_users + [dashmate_user_dict] }}' -- name: Create logs dir - ansible.builtin.file: - path: '{{ dashmate_logs_dir }}' - state: directory - owner: '{{ dashmate_user }}' - group: '{{ dashmate_group }}' - recurse: true - -- name: Configure log rotation - ansible.builtin.include_role: - name: arillso.logrotate - vars: - logrotate_applications: - - name: platform-logs - definitions: - - logs: - - '{{ dashmate_logs_dir }}/*.log' - options: - - rotate 7 - - daily - - maxsize 1G - - missingok - - notifempty - - copytruncate - - compress - - delaycompress - -- name: Ensure logrotate runs hourly under systemd timer - ansible.builtin.lineinfile: - path: /lib/systemd/system/logrotate.timer - regexp: '^OnCalendar=hourly' - insertafter: '^OnCalendar=daily' - line: OnCalendar=hourly +- name: Configure logs + ansible.builtin.import_tasks: ./logs.yml - name: Create dashmate config dir ansible.builtin.file: diff --git a/ansible/roles/dashmate/vars/filebeat_inputs.yml b/ansible/roles/dashmate/vars/filebeat_inputs.yml new file mode 100644 index 00000000..552254e5 --- /dev/null +++ b/ansible/roles/dashmate/vars/filebeat_inputs.yml @@ -0,0 +1,61 @@ +--- + +filebeat_inputs: + - type: log + enabled: true + index: "logs-core-{{ dash_network_name }}-%{[agent.version]}" + paths: + - "{{ dashmate_logs_dir }}/core.log" + processors: + - add_fields: + target: event + fields: + dataset: "core-{{ dash_network_name }}" + - dissect: + tokenizer: "%{?timestamp} %{message}" + overwrite_keys: true + target_prefix: "" + - type: log + enabled: "{{ dashmate_platform_enable }}" + json.message_key: message + index: "logs-drive.abci-{{ dash_network_name }}-%{[agent.version]}" + paths: + - "{{ dashmate_logs_dir }}/drive-json.log" + processors: + - timestamp: + field: json.timestamp + layouts: + - UNIX_MS + - add_fields: + target: event + fields: + dataset: "drive.abci-{{ dash_network_name }}" + - rename: + fields: + - from: "json.fields.message" + to: "message" + - from: "json.level" + to: "log.level" + ignore_missing: true + fail_on_error: true + - type: log + enabled: "{{ dashmate_platform_enable }}" + json.message_key: message + index: "logs-drive.tenderdash-{{ dash_network_name }}-%{[agent.version]}" + paths: + - "{{ dashmate_logs_dir }}/tenderdash.log" + processors: + - add_fields: + target: event + fields: + dataset: "drive.tenderdash-{{ dash_network_name }}" + - rename: + fields: + - from: "json.message" + to: "message" + ignore_missing: true + fail_on_error: true + - rename: + fields: + - from: "json.level" + to: "log.level" diff --git a/ansible/roles/elastic_beats/tasks/main.yml b/ansible/roles/elastic_beats/tasks/main.yml deleted file mode 100644 index 2646f6e0..00000000 --- a/ansible/roles/elastic_beats/tasks/main.yml +++ /dev/null @@ -1,118 +0,0 @@ ---- - -- name: Get core container host info - community.docker.docker_host_info: - containers: true - containers_filters: - name: '{{ core_container_name }}' - register: core_host_info - -- name: Get tenderdash container host info - community.docker.docker_host_info: - containers: true - containers_filters: - name: tender - register: tenderdash_host_info - -- name: Get drive container host info - community.docker.docker_host_info: - containers: true - containers_filters: - name: abci - register: drive_host_info - -- name: Set container ids for core and tenderdash - ansible.builtin.set_fact: - core_container_id: '{{ core_host_info.containers[0].Id if (core_host_info.containers | length > 0) else "null" }}' - tenderdash_container_id: '{{ tenderdash_host_info.containers[0].Id if (tenderdash_host_info.containers | length > 0) else "null" }}' - -- name: Load common filebeat config - ansible.builtin.include_vars: - file: common.yml - -- name: Load core input config - ansible.builtin.include_vars: - file: core.yml - -- name: Load tenderdash input config - ansible.builtin.include_vars: - file: tenderdash.yml - when: tenderdash_host_info.containers | length > 0 - -- name: Load drive input config - ansible.builtin.include_vars: - file: drive.yml - when: drive_host_info.containers | length > 0 - -- name: Merge drive and tenderdash input configs - ansible.builtin.set_fact: - platform_filebeat_inputs: "{{ [platform_filebeat_inputs, drive_filebeat_inputs] | community.general.lists_mergeby('index') }}" - when: platform_filebeat_inputs is defined and drive_filebeat_inputs is defined - -- name: Merge platform and core input configs - ansible.builtin.set_fact: - filebeat_inputs: "{{ [filebeat_inputs, platform_filebeat_inputs] | community.general.lists_mergeby('index') }}" - when: platform_filebeat_inputs is defined - -- name: Set up filebeat log monitoring - ansible.builtin.include_role: - name: geerlingguy.filebeat - -# TODO: Make sure we have retention policy for metrics -- name: Set up metricbeat - ansible.builtin.include_role: - name: elastic.beats - vars: - beats_version: "{{ elastic_version }}" - beat: metricbeat - beat_conf: - setup: - dashboards: - enabled: true - kibana: - host: "{{ hostvars['logs-1'].private_ip }}:5601" - username: "{{ elastic_username }}" - password: "{{ elastic_password }}" - metricbeat: - modules: - - module: system - metricsets: - - cpu # CPU usage - - load # CPU load averages - - memory # Memory usage - - network # Network IO - - process # Per process metrics - - process_summary # Process summary - - uptime # System Uptime - - socket_summary # Socket summary - - core # Per CPU core usage - - diskio # Disk IO - - fsstat # File system summary metrics - - socket # Sockets and connection info (linux only) - enabled: true - period: 10s - processes: ['.*'] - - # Configure the metric types that are included by these metricsets. - cpu.metrics: ["percentages", "normalized_percentages"] # The other available option is ticks. - core.metrics: ["percentages"] # The other available option is ticks. - - module: docker - metricsets: - - "container" - - "cpu" - - "diskio" - - "event" - - "healthcheck" - - "info" - - "memory" - - "network" - # - "network_summary" - hosts: ["unix:///var/run/docker.sock"] - period: 10s - enabled: true - output_conf: - elasticsearch: - hosts: - - "{{ hostvars['logs-1'].private_ip }}:9200" - username: "{{ elastic_username }}" - password: "{{ elastic_password }}" diff --git a/ansible/roles/elastic_beats/vars/drive.yml b/ansible/roles/elastic_beats/vars/drive.yml deleted file mode 100644 index 6cc5efc9..00000000 --- a/ansible/roles/elastic_beats/vars/drive.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- - -drive_filebeat_inputs: - - type: log - enabled: "{{ drive_host_info.containers | length > 0 }}" - json.message_key: message - exclude_files: ['\.gz$'] - index: "logs-drive.abci-{{ dash_network_name }}-%{[agent.version]}" - paths: - - "{{ abci_logs_path }}/drive-json*.log*" - processors: - - timestamp: - field: json.timestamp - layouts: - - UNIX_MS - - add_fields: - target: event - fields: - dataset: "drive.abci-{{ dash_network_name }}" - - rename: - fields: - - from: "json.fields.message" - to: "message" - - from: "json.level" - to: "log.level" - ignore_missing: true - fail_on_error: true diff --git a/ansible/roles/elastic_beats/vars/tenderdash.yml b/ansible/roles/elastic_beats/vars/tenderdash.yml deleted file mode 100644 index f958ec10..00000000 --- a/ansible/roles/elastic_beats/vars/tenderdash.yml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -platform_filebeat_inputs: - - type: container - enabled: "{{ tenderdash_host_info.containers | length > 0 }}" - json.message_key: message - index: "logs-drive.tenderdash-{{ dash_network_name }}-%{[agent.version]}" - paths: - - '/var/lib/docker/containers/{{ tenderdash_container_id }}/*.log' - processors: - - add_fields: - target: event - fields: - dataset: "drive.tenderdash-{{ dash_network_name }}" - - rename: - fields: - - from: "json.message" - to: "message" - ignore_missing: true - fail_on_error: true - - rename: - fields: - - from: "json.level" - to: "log.level" diff --git a/ansible/roles/elastic_stack/tasks/configure_cluster.yml b/ansible/roles/elastic_stack/tasks/configure_cluster.yml index 9cea11cb..899afd0f 100644 --- a/ansible/roles/elastic_stack/tasks/configure_cluster.yml +++ b/ansible/roles/elastic_stack/tasks/configure_cluster.yml @@ -20,7 +20,7 @@ body_format: json body: "{{ lookup('file', 'files/index-templates/logs-core.json') }}" -- name: Set up abci index template +- name: Set up drive abci index template ansible.builtin.uri: url: http://localhost:9200/_index_template/logs-drive.abci method: PUT @@ -52,16 +52,3 @@ body: index: number_of_replicas: 0 - -- name: Set up Kibana UI - ansible.builtin.uri: - url: http://localhost:5601/api/saved_objects/_bulk_create - method: POST - status_code: 200 - user: '{{ elastic_username }}' - password: '{{ elastic_password }}' - body_format: json - force_basic_auth: true - headers: - kbn-xsrf: true - body: "{{ lookup('file', 'files/infrastructure-ui-source.json') }}" diff --git a/ansible/roles/elastic_stack/tasks/main.yml b/ansible/roles/elastic_stack/tasks/main.yml index 07015c25..f6e8ec6c 100644 --- a/ansible/roles/elastic_stack/tasks/main.yml +++ b/ansible/roles/elastic_stack/tasks/main.yml @@ -103,6 +103,7 @@ recreate: always services: - kibana + when: inventory_hostname == "logs-1" - name: Wait for Kibana to be available ansible.builtin.uri: @@ -114,8 +115,23 @@ until: response.status == 200 and response.json.status.overall.level == "available" retries: 10 delay: 10 + when: inventory_hostname == "logs-1" -- name: Configure Elasticsearch and Kibana +- name: Set up Kibana UI + ansible.builtin.uri: + url: http://localhost:5601/api/saved_objects/_bulk_create + method: POST + status_code: 200 + user: '{{ elastic_username }}' + password: '{{ elastic_password }}' + body_format: json + force_basic_auth: true + headers: + kbn-xsrf: true + body: "{{ lookup('file', 'files/infrastructure-ui-source.json') }}" + when: inventory_hostname == "logs-1" + +- name: Configure Elasticsearch cluster ansible.builtin.import_tasks: configure_cluster.yml run_once: true delegate_to: '{{ play_hosts | first }}' diff --git a/ansible/roles/metricbeat/tasks/main.yml b/ansible/roles/metricbeat/tasks/main.yml new file mode 100644 index 00000000..20212bf5 --- /dev/null +++ b/ansible/roles/metricbeat/tasks/main.yml @@ -0,0 +1,67 @@ +--- + +- name: Set up metricbeat + ansible.builtin.include_role: + name: elastic.beats + vars: + beats_version: "{{ elastic_version }}" + beat: metricbeat + beat_conf: + setup: + dashboards: + enabled: true + kibana: + host: "{{ hostvars['logs-1'].private_ip }}:5601" + username: "{{ elastic_username }}" + password: "{{ elastic_password }}" + metricbeat: + modules: + - module: system + metricsets: + - cpu # CPU usage + - load # CPU load averages + - memory # Memory usage + - network # Network IO + - process # Per process metrics + - process_summary # Process summary + - uptime # System Uptime + - socket_summary # Socket summary + - core # Per CPU core usage + - diskio # Disk IO + - fsstat # File system summary metrics + - socket # Sockets and connection info (linux only) + enabled: true + period: 10s + processes: ['.*'] + + # Configure the metric types that are included by these metricsets. + cpu.metrics: ["percentages", "normalized_percentages"] # The other available option is ticks. + core.metrics: ["percentages"] # The other available option is ticks. + - module: docker + metricsets: + - "container" + - "cpu" + - "diskio" + - "event" + - "healthcheck" + - "info" + - "memory" + - "network" + # - "network_summary" + hosts: ["unix:///var/run/docker.sock"] + period: 10s + enabled: true + output_conf: + elasticsearch: + hosts: >- + {{ + query('inventory_hostnames', 'logs_nodes') | + map('extract', hostvars, ['private_ip']) | + map('string') | + product([':9200']) | + map('join') | + list + }} + username: "{{ elastic_username }}" + password: "{{ elastic_password }}" + when: metricbeat_enabled