diff --git a/Karamelfile b/Karamelfile index 3c8c2bd2..16ed550c 100755 --- a/Karamelfile +++ b/Karamelfile @@ -18,6 +18,9 @@ dependencies: - recipe: hopslog::_filebeat-jupyter global: - hopslog::default + - recipe: hopslog::_filebeat-git + global: + - hopslog::default - recipe: hopslog::_filebeat-services global: - hopslog::default diff --git a/attributes/default.rb b/attributes/default.rb index 0f849281..12a4de20 100644 --- a/attributes/default.rb +++ b/attributes/default.rb @@ -20,6 +20,7 @@ default['logstash']['beats']['python_jobs_port'] = "5051" default['logstash']['beats']['jupyter_port'] = "5052" default['logstash']['beats']['services_port'] = "5053" +default['logstash']['beats']['git_port'] = "5054" default['logstash']['http']['port'] = "9600" default['logstash']['systemd'] = "true" diff --git a/metadata.rb b/metadata.rb index 99888a99..8e4e1210 100755 --- a/metadata.rb +++ b/metadata.rb @@ -59,6 +59,10 @@ :description => "Filebeat port for jupyter server logs", :type => "string" +attribute "logstash/beats/git_port", + :description => "Filebeat port for logs of a container executing a git command", + :type => "string" + attribute "kibana/url", :description => "Url to hopslog binaries", :type => "string" diff --git a/recipes/_filebeat-git.rb b/recipes/_filebeat-git.rb new file mode 100644 index 00000000..5fa10a8e --- /dev/null +++ b/recipes/_filebeat-git.rb @@ -0,0 +1,109 @@ +file "#{node['filebeat']['base_dir']}/filebeat.xml" do + action :delete +end + +git_owner = "glassfish" +git_group = "glassfish" +if node.attribute?("hopsworks") && node['hopsworks'].attribute?("user") + git_owner = node['hopsworks']['user'] + git_group = node['hopsworks']['user'] +end + +#Add glassfish user to elastic group +group node['elastic']['group'] do + action :modify + members [git_owner] + append true + not_if { node['install']['external_users'].casecmp("true") == 0 } +end + +log_glob = "#{node['install']['dir']}/staging/private_dirs/*/git_logs/*.log" + +logstash_fqdn = consul_helper.get_service_fqdn("logstash") +logstash_endpoint = "#{logstash_fqdn}:#{node['logstash']['beats']['git_port']}" + +template "#{node['filebeat']['base_dir']}/filebeat-git.yml" do + source "filebeat.yml.erb" + user git_owner + group git_group + mode 0655 + variables({ + :paths => [log_glob], + :multiline => false, + :fields => false, + :logstash_endpoint => logstash_endpoint, + :log_name => "git" + }) +end + +template "#{node['filebeat']['base_dir']}/bin/start-filebeat-git.sh" do + source "start-filebeat.sh.erb" + user git_owner + group git_group + mode 0750 + variables({ + :pid => "#{node['filebeat']['pid_dir']}/filebeat-git.pid", + :config_file => "filebeat-git.yml" + }) +end + +template"#{node['filebeat']['base_dir']}/bin/stop-filebeat-git.sh" do + source "stop-filebeat.sh.erb" + user git_owner + group git_group + mode 0750 + variables({ + :pid => "#{node['filebeat']['pid_dir']}/filebeat-git.pid", + :user => git_owner + }) +end + +service_name="filebeat-git" + +service service_name do + provider Chef::Provider::Service::Systemd + supports :restart => true, :stop => true, :start => true, :status => true + action :nothing +end + +case node['platform_family'] +when "rhel" + systemd_script = "/usr/lib/systemd/system/#{service_name}.service" +when "debian" + systemd_script = "/lib/systemd/system/#{service_name}.service" +end + +deps = "" +if exists_local("hopslog", "default") + deps = "logstash.service" +end + +template systemd_script do + source "filebeat.service.erb" + owner "root" + group "root" + mode 0754 + variables({ + :user => git_owner, + :pid => "#{node['filebeat']['pid_dir']}/filebeat-git.pid", + :exec_start => "#{node['filebeat']['base_dir']}/bin/start-filebeat-git.sh", + :exec_stop => "#{node['filebeat']['base_dir']}/bin/stop-filebeat-git.sh", + :deps => deps, + }) + if node['services']['enabled'] == "true" + notifies :enable, resources(:service => service_name) + end + notifies :restart, resources(:service => service_name) +end + +kagent_config service_name do + service "ELK" + log_file "#{node['filebeat']['base_dir']}/log/git" +end + + +if conda_helpers.is_upgrade + kagent_config "#{service_name}" do + action :systemd_reload + end +end diff --git a/recipes/_logstash.rb b/recipes/_logstash.rb index ea8973d1..46ca72e8 100644 --- a/recipes/_logstash.rb +++ b/recipes/_logstash.rb @@ -60,6 +60,17 @@ }) end +template"#{node['logstash']['base_dir']}/config/git.conf" do + source "git.conf.erb" + owner node['hopslog']['user'] + group node['hopslog']['group'] + mode 0655 + variables({ + :elastic_addr => elastic_addrs, + :hops_ca => hops_ca + }) +end + template"#{node['logstash']['base_dir']}/config/services.conf" do source "services.conf.erb" owner node['hopslog']['user'] diff --git a/templates/default/consul/logstash-consul.hcl.erb b/templates/default/consul/logstash-consul.hcl.erb index c75228d1..008d6e3c 100644 --- a/templates/default/consul/logstash-consul.hcl.erb +++ b/templates/default/consul/logstash-consul.hcl.erb @@ -62,5 +62,17 @@ services = [ id = "logstash-beats-jupyter-check" alias_service = "logstash/http" } + }, + { + id = "logstash/git" + name = "logstash" + tags = [ + "git" + ] + port = <%= node['logstash']['beats']['git_port'] %> + check = { + id = "logstash-beats-git-check" + alias_service = "logstash/http" + } } ] diff --git a/templates/default/git.conf.erb b/templates/default/git.conf.erb new file mode 100644 index 00000000..ba618c0a --- /dev/null +++ b/templates/default/git.conf.erb @@ -0,0 +1,126 @@ +input { + beats { + port => <%= node['logstash']['beats']['git_port'] %> + } +} + +filter { + dissect { + mapping => {"message" => "%{log_message}"} + } + + #Ignore failed parse entries. Dissect filter patterns can be tested with https://dissect-tester.jorgelbg.me/ + if "_dissectfailure" in [tags] { + drop { } + } + + # For backwards compatibility with filebeat v6.x, we need to pick the correct field + mutate { + add_field => [ "filepath", "" ] + } + + if [log][file][path] { + mutate { + replace => [ "filepath", "%{[log][file][path]}"] + } + } else if [source] { + mutate { + replace => [ "filepath", "%{[source]}"] + } + } + + if "command_output.log" in [filepath] { + mutate { + replace => [ "logtype", "commandexecution" ] + } + } else if "hopsfs_mount.log" in [filepath] { + mutate { + replace => [ "logtype", "hopsfsmount" ] + } + } + + mutate { + replace => [ "project", "%{[gitinfo][0]}"] + } + + grok { + match => { "filepath" => ".+logs/(%{GREEDYDATA:gitinfo}).log" } + } + + mutate { + add_field => [ "project", "" ] + } + + mutate { + add_field => [ "executionid", ""] + } + + mutate { + add_field => [ "command", ""] + } + + mutate { + add_field => [ "repository", ""] + } + + mutate { + add_field => [ "jobname", "gitcommandexecution" ] + } + + mutate { + add_field => [ "application", "" ] + } + + mutate { + split => ["gitinfo", "__"] + } + + mutate { + replace => [ "project", "%{[gitinfo][0]}"] + } + + mutate { + add_field => [ "userinfo", "%{[gitinfo][1]}" ] + } + + mutate { + split => ["userinfo", "--s--"] + } + + mutate { + replace => [ "application", "%{[userinfo][0]}"] + } + + mutate { + replace => [ "executionid", "%{[userinfo][1]}"] + } + + mutate { + replace => [ "repository", "%{[userinfo][2]}"] + } + + mutate { + replace => [ "command", "%{[userinfo][3]}"] + } + + mutate { + remove_field => ["fields", "source", "log", "ecs", "message", "agent", "prospector", "beat", "tags", "gitinfo","host"] + } +} + + +output { +opensearch { +hosts => [<%= @elastic_addr %>] +index => "%{project}_git-%{+YYYY.MM.dd}" +<% if node['elastic']['opensearch_security']['enabled'] %> + user => "<%=node['elastic']['opensearch_security']['logstash']['username']%>" + password => "<%=node['elastic']['opensearch_security']['logstash']['password']%>" + <% if node['elastic']['opensearch_security']['https']['enabled'] %> + cacert => "<%= @hops_ca %>" + ssl => true + <% end %> +<% end %> +} +} + diff --git a/templates/default/pipelines.yml.erb b/templates/default/pipelines.yml.erb index 5a017811..2bbd20c8 100644 --- a/templates/default/pipelines.yml.erb +++ b/templates/default/pipelines.yml.erb @@ -6,6 +6,8 @@ path.config: "<%= node['logstash']['base_dir'] %>/config/kube_jobs.conf" - pipeline.id: jupyter path.config: "<%= node['logstash']['base_dir'] %>/config/jupyter.conf" +- pipeline.id: git + path.config: "<%= node['logstash']['base_dir'] %>/config/git.conf" # this is needed to be able to send the same event to both the services and # the managed_cloud_services pipelines, we only set the services pipeline here, # the cloud one is added at runtime if needed