Skip to content

Commit

Permalink
add ingest-stats api (idaholab#611) needed for automated testing (ida…
Browse files Browse the repository at this point in the history
  • Loading branch information
mmguero committed Nov 4, 2024
1 parent 9756b44 commit 0dd04ce
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 4 deletions.
48 changes: 45 additions & 3 deletions api/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from collections import defaultdict, OrderedDict
from collections.abc import Iterable
from datetime import datetime
from datetime import datetime, timezone
from flask import Flask, jsonify, request
from requests.auth import HTTPBasicAuth
from urllib.parse import urlparse
Expand Down Expand Up @@ -239,14 +239,14 @@

if databaseMode == malcolm_utils.DatabaseMode.ElasticsearchRemote:
import elasticsearch as DatabaseImport
from elasticsearch_dsl import Search as SearchClass
from elasticsearch_dsl import Search as SearchClass, A as AggregationClass

DatabaseClass = DatabaseImport.Elasticsearch
if opensearchHttpAuth:
DatabaseInitArgs['basic_auth'] = opensearchHttpAuth
else:
import opensearchpy as DatabaseImport
from opensearchpy import Search as SearchClass
from opensearchpy import Search as SearchClass, A as AggregationClass

DatabaseClass = DatabaseImport.OpenSearch
if opensearchHttpAuth:
Expand Down Expand Up @@ -1074,6 +1074,48 @@ def ready():
)


@app.route(
f"{('/' + app.config['MALCOLM_API_PREFIX']) if app.config['MALCOLM_API_PREFIX'] else ''}/ingest-stats",
methods=['GET'],
)
def ingest_stats():
"""Provide an aggregation of each log source (host.name) with it's latest event.ingested
time. This can be used to know the most recent time a document was written from each
network sensor.
Parameters
----------
request : Request
Uses 'doctype' from request arguments
Returns
-------
fields
A dict where key is host.name and value is max(event.ingested) for that host
"""
global databaseClient
global SearchClass
global AggregationClass

s = SearchClass(
using=databaseClient,
index=index_from_args(get_request_arguments(request)),
).extra(size=0)

hostAgg = AggregationClass('terms', field='host.name')
maxIngestAgg = AggregationClass('max', field='event.ingested')
s.aggs.bucket('host_names', hostAgg).metric('max_event_ingested', maxIngestAgg)
response = s.execute()

return jsonify(
{
bucket.key: datetime.fromtimestamp(bucket.max_event_ingested.value / 1000, timezone.utc)
.replace(microsecond=0)
.isoformat()
for bucket in response.aggregations.host_names.buckets
}
)


@app.route(
f"{('/' + app.config['MALCOLM_API_PREFIX']) if app.config['MALCOLM_API_PREFIX'] else ''}/ping", methods=['GET']
)
Expand Down
17 changes: 17 additions & 0 deletions docs/api-ingest-stats.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Document Ingest Statistics

`GET` - /mapi/ingest-stats

Executes an OpenSearch [bucket aggregation](https://opensearch.org/docs/latest/opensearch/bucket-agg/) query for the `host.name` field and its maximum (i.e., most regent) `event.ingested` UTC time value for all of Malcolm's indexed network traffic metadata.

This can be used to know the most recent time a log was indexed for each network sensor.

Example output:

```
{
"malcolm": "2024-11-04T14:58:57+00:00",
"sensor_a": "2024-11-04T14:57:41+00:00",
"sensor_b": "2024-11-04T14:58:59+00:00"
}
```
3 changes: 2 additions & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# <a name="API"></a>API

* [Field Aggregations](api-aggregations.md)
* [Document Ingest Statistics](api-ingest-stats.md)
* [Document Lookup](api-document-lookup.md)
* [Event Logging](api-event-logging.md)
* [Field Aggregations](api-aggregations.md)
* [Fields](api-fields.md)
* [Indices](api-indices.md)
* [Ping](api-ping.md)
Expand Down

0 comments on commit 0dd04ce

Please sign in to comment.