cgar_log2csv

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
cgar_log2csv

Reads `cgar_collect` log and prints data as CSV to stdout.


If new controllers or controller files need to be added:
-------------------------------------------------------

1. Extend global dictionary `ATTRIBUTE_MAP`.

2. Add support for the controller file in `convert()`


Ideas/ToDos:
-----------
    - Be more flexible with date and time.
    - Add converters (Byte to KiB, etc.).
    - Check if the tool can handle logs where the content (added/removed cgroups/controllers) has changed.
    - Sometimes when used in pipe, still a cosmetic message: 
        Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
        BrokenPipeError: [Errno 32] Broken pipe
      appears. Would be nice to get rid of it.


Changelog:
----------
    05.02.2023      v0.1beta        - first bits are done
    06.02.2023      v0.2            - fixed bug where complex attributes (like `memory.pressure`)
                                      have not been resolved
                                    - introduced global cgroup attribute mapping for easier extensibility
                                    - fixed classic BrokenPipeError error
                                    - dared to remove beta status
    08.02.2023      v0.3            - rework (removed 'controller' argument and instead enhanced
                                      syntax of `-f` to allow fields of attribute files 
                                    - introduced regex for cgroup arguments
                                    - added support for 'memory.{min,high,max,swap.high,swap.max}'
                                    - added support for memory conversion (`--unit-mem`)
                                    - fixed a bug if `-l` is used
                                    - fixed bug in `ATTRIBUTE_MAP`
                                    - fixed a few cosmetic bugs in output like missing parenthesis
    08.02.2023      v0.4            - now using an generator to read the log file
    08.02.2023      v0.4.1          - fixed bug if a cgroup is missing a controller file
    09.02.2023      v0.4.2          - fixed a comment
                                    - reduced number of digits on memory calculations from 2 to 1
    11.02.2023      v0.5            - added support for xz compressed log files

"""

import argparse
import datetime
import json
import lzma
import os
import re
import sys


__version__ = '0.5'


# This map must have listed each controller file as key and a list with the resolved
# sub keys. Most of the time this will be a one to one mapping, but some files have 
# multiple entries (sub keys), like `memory.stat` and need to be resolved. At the end
# we net one name (table column) per value.
# The `convert()` function must return the resolved columns!
ATTRIBUTE_MAP = {'memory.current': ['memory.current'], 
                 'memory.swap.current': ['memory.swap.current'], 
                 'memory.pressure' : ['memory.pressure::some_avg10', 
                                        'memory.pressure::some_avg60',
                                        'memory.pressure::some_avg300', 
                                        'memory.pressure::some_total',
                                        'memory.pressure::full_avg10',
                                        'memory.pressure::full_avg60',
                                        'memory.pressure::full_avg300',
                                        'memory.pressure::full_total'],
                 'memory.min': ['memory.min'],
                 'memory.high': ['memory.high'],
                 'memory.max': ['memory.max'],
                 'memory.swap.high': ['memory.swap.high'],
                 'memory.swap.max': ['memory.swap.max'],
                 'memory.stat': ['memory.stat::anon',
                                 'memory.stat::file',
                                 'memory.stat::kernel_stack',
                                 'memory.stat::pagetables',
                                 'memory.stat::percpu',
                                 'memory.stat::sock',
                                 'memory.stat::shmem',
                                 'memory.stat::file_mapped',
                                 'memory.stat::file_dirty',
                                 'memory.stat::file_writeback',
                                 'memory.stat::swapcached',
                                 'memory.stat::anon_thp',
                                 'memory.stat::file_thp',
                                 'memory.stat::shmem_thp',
                                 'memory.stat::inactive_anon',
                                 'memory.stat::active_anon',
                                 'memory.stat::inactive_file',
                                 'memory.stat::active_file',
                                 'memory.stat::unevictable',
                                 'memory.stat::slab_reclaimable',
                                 'memory.stat::slab_unreclaimable',
                                 'memory.stat::slab',
                                 'memory.stat::workingset_refault_anon',
                                 'memory.stat::workingset_refault_file',
                                 'memory.stat::workingset_activate_anon',
                                 'memory.stat::workingset_activate_file',
                                 'memory.stat::workingset_restore_anon',
                                 'memory.stat::workingset_restore_file',
                                 'memory.stat::workingset_nodereclaim',
                                 'memory.stat::pgfault',
                                 'memory.stat::pgmajfault',
                                 'memory.stat::pgrefill',
                                 'memory.stat::pgscan',
                                 'memory.stat::pgsteal',
                                 'memory.stat::pgactivate',
                                 'memory.stat::pgdeactivate',
                                 'memory.stat::pglazyfree',
                                 'memory.stat::pglazyfreed',
                                 'memory.stat::thp_fault_alloc',
                                 'memory.stat::thp_collapse_alloc'
                                ]
                }


def argument_parse():
    """Evaluates the command line arguments."""

    # All supported attributes (with and without their sub keys).
    supported = set(ATTRIBUTE_MAP.keys())
    supported.update([item for sublist in ATTRIBUTE_MAP.values() for item in sublist])
    
    parser = argparse. ArgumentParser(prog=os.path.basename(sys.argv[0]), 
                       description='Extracts cgroup data from cgar collector log and prints them as CSV on stdout.',
                       add_help=True,
                       epilog=f'v{__version__}')

    # Global flags and arguments.   
    parser.add_argument('--start-time',
                        metavar='TIME',
                        dest='start_time',
                        type=str,
                        action='store',
                        required=False,
                        help='only print data from this time forward (no timezone info)')   
    parser.add_argument('--end-time',
                        metavar='TIME',
                        dest='end_time',
                        type=str,
                        action='store',
                        required=False,
                        help='only print data to this time (no timezone info)')
    parser.add_argument('-l', '--log',
                        dest='logfile',
                        type=str,
                        action='store',
                        required=False,
                        default='/var/log/cgar/cgar',
                        help='logfile to parse (default: "/var/log/cgar/cgar")')    
    parser.add_argument('-f', '--filter',
                        metavar='FILTER',
                        dest='filter',
                        type=str,
                        action='append',
                        choices=supported,
                        required=False,
                        help='print only these attributes')  
    parser.add_argument('--unit-mem',
                        metavar='UNIT',
                        dest='memory_unit',
                        type=str,
                        action='store',
                        choices=['B', 'kiB', 'MiB', 'GiB'],
                        default='B',
                        required=False,
                        help='display memory in this unit (default: B(ytes))') 
    parser.add_argument('-s', '--separator',
                        dest='separator',
                        type=str,
                        action='store',
                        default=',',
                        required=False,
                        help='separator for the output (default: ,)')  
    parser.add_argument('-r', '--regex',
                        dest='use_regex',
                        action='store_true',
                        default=False,
                        required=False,
                        help='use regular expressions for CGROUP argument')  
    parser.add_argument('--quote-strings',
                        dest='quotestrings',
                        action='store_true',
                        default=False,
                        required=False,
                        help='if strings in CSV output shall be quoted (default: false)')
    parser.add_argument('--group-by',
                        dest='group_by',
                        default='cgroup',
                        choices=['cgroup', 'attribute'],
                        required=False,
                        help='how cgroup attribute columns shall be grouped (default: cgroup)')  
    parser.add_argument('cgroups', 
                        metavar='CGROUP',
                        nargs='+',
                        type=str,
                        help='extract data for this cgroup'),

    args = parser.parse_args()

    # Convert times into datetime objects
    try:
        args.time_range = (
            datetime.datetime.strptime(args.start_time, '%Y-%m-%dT%H:%M:%S') if args.start_time else None,
            datetime.datetime.strptime(args.end_time, '%Y-%m-%dT%H:%M:%S') if args.end_time else None)
    except Exception as err:
        print(f'Error in time range: {err}')
        sys.exit(1)  

    # Check if the logfile is available.
    if not os.path.isfile(args.logfile) and os.access(args.logfile, os.R_OK):
        print(f'Logfile "{args.logfile}" does not exists!')
        sys.exit(0)     

    return args


def get_log_line(filename: str, format: str='text') -> str:
    """Generator to walk through the given file."""

    try: 
        # Check if the file is either text ot xz. 
        with open(filename, 'rb') as f:
            header = f.read(6)
        if header == b'\xfd7zXZ\x00':
            open_file = lzma.open
        else:
            open_file = open

        with open_file(filename, 'r') as f:
            for line in f:
                yield line           
    except Exception as err:
        print(f'Error reading "{filename}": {err}')
        sys.exit(2)  


def convert(attr: str, value: str, memory_unit='B') -> dict:
    """Takes a cgroup attribute value pair and returns a dict
    with the attribute columns and normalized values."""

    def memconv(value, divisor):
        if divisor == 1:
            return int(value) 
        else:
            return float(f'{int(value)/divisor:.1f}')

    # Set memory conversion.
    if memory_unit == 'B':
        divisor = 1
    elif memory_unit == 'kiB':
        divisor = 1024
    elif memory_unit == 'MiB':
        divisor = 1024*1024
    elif memory_unit == 'GiB':
        divisor = 1024*1024*1024
    else:
        print(f'Unknown memory unit "{memory_unit}"! Using "B(yte)".', file=sys.stderr)     
        divisor = 1

    try:
        if attr in ['memory.current', 'memory.swap.current', 'memory.min']:
            return {attr: memconv(value, divisor)} 

        elif attr == 'memory.pressure':
            # some avg10=0.00 avg60=0.00 avg300=0.00 total=0
            # full avg10=0.00 avg60=0.00 avg300=0.00 total=0
            some, full = value.split('\n')
            some_comp = some.split()
            full_comp = some.split()
            return {f'{attr}::some_avg10': float(some_comp[1].split('=')[1]),
                    f'{attr}::some_avg60': float(some_comp[2].split('=')[1]),
                    f'{attr}::some_avg300': float(some_comp[3].split('=')[1]),
                    f'{attr}::some_total': float(some_comp[4].split('=')[1]),
                    f'{attr}::full_avg10': float(full_comp[1].split('=')[1]),
                    f'{attr}::full_avg60': float(full_comp[2].split('=')[1]),
                    f'{attr}::full_avg300': float(full_comp[3].split('=')[1]),
                    f'{attr}::full_total': float(full_comp[4].split('=')[1])
                   }
        elif attr in ['memory.high', 'memory.max', 'memory.swap.high', 'memory.swap.max']:
            if value == 'max':
                return {attr: str(value)} 
            else:
                return {attr: memconv(value, divisor)} 
        elif attr == 'memory.stat': 
            # anon 1187840
            # file 3383296
            # kernel_stack 0
            # ...
            return dict(zip([f'{attr}::{a}' for a in value.split()[::2]], [memconv(v, divisor) for v in value.split()[1::2]]))
        else:
            print(f'Conversion of cgroup attribute found unknown attribute: {attr}', file=sys.stderr)           

    except Exception as err:
        print(f'Conversion of cgroup attribute value "{attr} = {value}" failed: {err}', file=sys.stderr)
        return None


def print_csv(cols: list, separator: str = ',', quote_string : bool = True) -> None:
    """Prints list as CSV row."""

    cols_transformed = []

    # Transform certain data types.
    for col in cols:
        if quote_string and isinstance(col, str):
            cols_transformed.append(f'"{col}"')   # surround strings with quotes
        else:
            cols_transformed.append(str(col))     
    print(separator.join(cols_transformed))


def main():

    # Parse command line arguments.
    arguments = argument_parse()

    try: 

        # Do a pre-parsing of the logfile if regex (`-r`) are used.
        # This is necessary to extract the header names matching the regex,
        # because the headers must be printed first.
        expanded_cgroups = set()
        if arguments.use_regex:
            for line in get_log_line(arguments.logfile):
                for timestamp, dataset in json.loads(line).items():

                    # Expand cgroup arguments (regex) with current dataset.                        
                    for cgroup_regex in arguments.cgroups:
                        expanded_cgroups.update([cg for cg in dataset.keys() if re.search(cgroup_regex, cg)])
                        
        # We always need resolved attributes from now on (eg. `memory.stat::anon`... instead of `memory.stat`),
        # therefore we convert the given filter into a resolved attributes set as well a set of the controller
        # files associated with them. 
        resolved_attributes = set()
        controller_files = set()
        for filter in arguments.filter:
            if '::' in filter:
                resolved_attributes.add(filter)
                controller_files.add(filter.split('::')[0])
            else:
                resolved_attributes.update(ATTRIBUTE_MAP[filter])
                controller_files.add(filter)

        # Depending on the use of regex, the required cgroups are coming either directly
        # from the cgroup argument or the (further above) resolved regular expressions for them.
        required_cgroups = sorted(expanded_cgroups) if arguments.use_regex else set(arguments.cgroups)

        if arguments.group_by == 'attribute':
            headers =[(cg, attrib) for attrib in resolved_attributes for cg in required_cgroups]
        else:
            headers =[(cg, attrib) for cg in required_cgroups for attrib in resolved_attributes]

        # Print the CSV header first.
        print_csv(['timestamp'] + ['::'.join(h) for h in headers], separator=arguments.separator, quote_string=arguments.quotestrings)

        # Iterate over log (one JSON object each line).
        for line in get_log_line(arguments.logfile):
            
            # Usually each line has only one timestamp and dataset, but
            # maybe this will change.
            for timestamp, dataset in json.loads(line).items():
            
                # Check if entry is in given time range.
                timestamp_short = datetime.datetime.strptime(timestamp[:19], '%Y-%m-%dT%H:%M:%S')
                if arguments.time_range[0] and timestamp_short < arguments.time_range[0]:
                    continue
                if arguments.time_range[1] and timestamp_short > arguments.time_range[1]:
                    continue

                cgroup_data = {}
                
                for cgroup in required_cgroups:

                    cgroup_data[cgroup] = {}

                    # Skip if the requested cgroup is not in the data set.
                    if cgroup not in dataset:
                        continue
                    
                    # Get the values for all requested controller files 
                    for controller_file in controller_files:
                        try:
                            if controller_file in dataset[cgroup]:
                                cgroup_data[cgroup].update(convert(controller_file, dataset[cgroup][controller_file], memory_unit=arguments.memory_unit))
                        except TypeError:
                            continue

                # Print a value row.    
                row = [timestamp]
                for cg, attrib in headers:
                    if attrib in cgroup_data[cg]: 
                        row.append(cgroup_data[cg][attrib])
                    else:
                        row.append('-')   # attribute not found in current dataset
                print_csv(row, separator=arguments.separator, quote_string=arguments.quotestrings)
            
    except (BrokenPipeError, IOError):
        # Handle BrokenPipe issue: https://docs.python.org/3/library/signal.html#note-on-sigpipe
        devnull = os.open(os.devnull, os.O_WRONLY)
        os.dup2(devnull, sys.stdout.fileno())
        sys.exit(0)

    except Exception as err:
        print(f'Sadly a runtime error ocurred: {err}')
        sys.exit(3)  

    # Bye.        
    sys.exit(0)


if __name__ == "__main__":
    main()