Merge pull request #1341 from fedspendingtransparency/stg

Mid-Sprint 67 Production Deployment
fedspendingtransparency · Jul 18, 2018 · 08e5865 · 08e5865
2 parents 607d406 + 2bffa1a
commit 08e5865
Show file tree

Hide file tree

Showing 42 changed files with 883 additions and 92 deletions.
diff --git a/usaspending_api/accounts/tests/unit/test_account_download_filter.py b/usaspending_api/accounts/tests/unit/test_account_download_filter.py
@@ -0,0 +1,108 @@
+import pytest
+
+from model_mommy import mommy
+
+from usaspending_api.accounts.models import AppropriationAccountBalances
+from usaspending_api.accounts.v2.filters.account_download import account_download_filter
+from usaspending_api.awards.models import FinancialAccountsByAwards
+from usaspending_api.financial_activities.models import FinancialAccountsByProgramActivityObjectClass
+
+
+@pytest.mark.django_db
+def test_fyq_filter():
+    """ Ensure the fiscal year and quarter filter is working """
+    # Create TAS models
+    tas1 = mommy.make('accounts.TreasuryAppropriationAccount')
+    tas2 = mommy.make('accounts.TreasuryAppropriationAccount')
+
+    # Create file A models
+    mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas1,
+               reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
+    mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas2,
+               reporting_period_start='1700-01-01', reporting_period_end='1700-03-31')
+
+    queryset = account_download_filter('account_balances', AppropriationAccountBalances, {
+        'fy': 1700,
+        'quarter': 1
+    })
+    assert queryset.count() == 1
+
+
+@pytest.mark.django_db
+def test_federal_account_filter():
+    """ Ensure the fiscal year and quarter filter is working """
+    # Create FederalAccount models
+    fed_acct1 = mommy.make('accounts.FederalAccount')
+    fed_acct2 = mommy.make('accounts.FederalAccount')
+
+    # Create TAS models
+    tas1 = mommy.make('accounts.TreasuryAppropriationAccount', federal_account=fed_acct1)
+    tas2 = mommy.make('accounts.TreasuryAppropriationAccount', federal_account=fed_acct2)
+
+    # Create file A models
+    mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas1,
+               reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
+    mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas2,
+               reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
+
+    queryset = account_download_filter('account_balances', AppropriationAccountBalances, {
+        'federal_account': fed_acct1.id,
+        'fy': 1700,
+        'quarter': 1
+    })
+    assert queryset.count() == 1
+
+
+@pytest.mark.django_db
+def test_cgac_agency_filter():
+    """ Ensure the CGAC agency filter is working """
+    # Create TAS models
+    tas1 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='NOT')
+    tas2 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='CGC')
+
+    # Create file B models
+    mommy.make('financial_activities.FinancialAccountsByProgramActivityObjectClass',
+               treasury_account_id=tas1.treasury_account_identifier, reporting_period_start='1699-10-01',
+               reporting_period_end='1699-12-31')
+    mommy.make('financial_activities.FinancialAccountsByProgramActivityObjectClass',
+               treasury_account_id=tas2.treasury_account_identifier, reporting_period_start='1699-10-01',
+               reporting_period_end='1699-12-31')
+
+    # Create ToptierAgency models
+    mommy.make('references.ToptierAgency', toptier_agency_id=-9999, cgac_code='CGC')
+    mommy.make('references.ToptierAgency', toptier_agency_id=-9998, cgac_code='NOT')
+
+    # Filter by ToptierAgency (CGAC)
+    queryset = account_download_filter('program_activity_object_class', FinancialAccountsByProgramActivityObjectClass, {
+        'agency': '-9999',
+        'fy': 1700,
+        'quarter': 1
+    })
+    assert queryset.count() == 1
+
+
+@pytest.mark.django_db
+def test_frec_agency_filter():
+    """ Ensure the FREC agency filter is working """
+    # Create TAS models
+    tas1 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='CGC', fr_entity_code='FAKE')
+    tas2 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='CGC', fr_entity_code='FREC')
+
+    # Create file C models
+    mommy.make('awards.FinancialAccountsByAwards', treasury_account_id=tas1.treasury_account_identifier,
+               reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
+    mommy.make('awards.FinancialAccountsByAwards', treasury_account_id=tas2.treasury_account_identifier,
+               reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
+
+    # Create ToptierAgency models
+    mommy.make('references.ToptierAgency', toptier_agency_id=-9999, cgac_code='FREC')
+    mommy.make('references.ToptierAgency', toptier_agency_id=-9998, cgac_code='FAKE')
+    mommy.make('references.ToptierAgency', toptier_agency_id=-9997, cgac_code='CGC')
+
+    # Filter by ToptierAgency (FREC)
+    queryset = account_download_filter('award_financial', FinancialAccountsByAwards, {
+        'agency': '-9999',
+        'fy': 1700,
+        'quarter': 1
+    })
+    assert queryset.count() == 1
diff --git a/usaspending_api/accounts/v2/filters/account_download.py b/usaspending_api/accounts/v2/filters/account_download.py
@@ -19,7 +19,9 @@ def account_download_filter(account_type, download_table, filters, account_level
     if filters.get('agency', False) and filters['agency'] != 'all':
         agency = ToptierAgency.objects.filter(toptier_agency_id=filters['agency']).first()
         if agency:
-            query_filters['{}__agency_id'.format(tas_id)] = agency.cgac_code
+            # Agency is FREC if the cgac_code is 4 digits, CGAC otherwise
+            agency_filter_type = "fr_entity_code" if len(agency.cgac_code) == 4 else "agency_id"
+            query_filters['{}__{}'.format(tas_id, agency_filter_type)] = agency.cgac_code
         else:
             raise InvalidParameterException('Agency with that ID does not exist')
 

diff --git a/...pending_api/api_docs/api_documentation/download/custom_account_data_download.md b/...pending_api/api_docs/api_documentation/download/custom_account_data_download.md
@@ -12,6 +12,7 @@ This route sends a request to the backend to begin generating a zipfile of accou
     "account_level": "treasury_account",
     "filters": {
         "agency": "3",
+        "federal_account": "15",
         "submission_type": "object_class_program_activity",
         "fy": "2018",
         "quarter": "2"
@@ -21,14 +22,14 @@ This route sends a request to the backend to begin generating a zipfile of accou
 ```
 
 ### Request Parameters Description
-* `account_level` - *required* - the account level: `tresury_account` or `federal_account` (must be `treasury_account` for Beta)
+* `account_level` - *required* - the account level: `tresury_account` or `federal_account`
 * `filters` - *required* - a JSON filter object with the following fields
-        * `agency` - *optional* - agency database id to include, `all` is also an option to include all agencies
-        * `federal_account` - *optional* - federal account id to include (based on the agency filter), out of scope for Beta
-        * `submission_type` - *required* - the file type requested: `account_balances` (File A) or `program_activity_object_class` (File B)
+        * `agency` - *optional* - agency database id, `all` is also an option to include all agencies
+        * `federal_account` - *optional* - federal account id
+        * `submission_type` - *required* - the file type requested: `account_balances` (File A), `program_activity_object_class` (File B), or `award_financial` (File C)
         * `fy` - *required* - fiscal year
         * `quarter` - *required*
-* `file_format` - *optional* - must be `csv` for Beta
+* `file_format` - *optional* - must be `csv`
 
 ### Response (JSON)
 

diff --git a/usaspending_api/awards/migrations/0033_add_indexes_to_financial_accounts_by_awards.py b/usaspending_api/awards/migrations/0033_add_indexes_to_financial_accounts_by_awards.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.4 on 2018-07-17 16:16
+from __future__ import unicode_literals
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('awards', '0032_auto_20180614_1929'),
+    ]
+
+    operations = [
+        migrations.RunSQL(sql="CREATE INDEX faba_fain_and_uri ON financial_accounts_by_awards(fain, uri, award_id) "
+                              "WHERE fain IS NOT NULL AND uri IS NOT NULL AND award_id IS NULL;",
+                          reverse_sql="DROP INDEX faba_fain_and_uri;"),
+        migrations.RunSQL(sql="CREATE INDEX faba_fain ON financial_accounts_by_awards(fain, uri, award_id) WHERE fain "
+                              "IS NOT NULL AND uri IS NULL AND award_id IS NULL;",
+                          reverse_sql="DROP INDEX faba_fain;"),
+        migrations.RunSQL(sql="CREATE INDEX faba_uri ON financial_accounts_by_awards(fain, uri, award_id) WHERE fain "
+                              "IS NULL AND uri IS NOT NULL AND award_id IS NULL;",
+                          reverse_sql="DROP INDEX faba_uri;"),
+        migrations.RunSQL(sql="CREATE INDEX faba_piid ON financial_accounts_by_awards(piid, award_id) WHERE piid IS "
+                              "NOT NULL AND award_id IS NULL;",
+                          reverse_sql="DROP INDEX faba_piid;"),
+    ]
diff --git a/usaspending_api/broker/management/commands/fabs_nightly_loader.py b/usaspending_api/broker/management/commands/fabs_nightly_loader.py
@@ -8,6 +8,7 @@
 from django.db.models import Count
 from django.conf import settings
 
+from usaspending_api.common.helpers.etl_helpers import update_c_to_d_linkages
 from usaspending_api.common.helpers.generic_helper import fy, timer, upper_case_dict_values
 from usaspending_api.etl.broker_etl_helpers import dictfetchall
 from usaspending_api.awards.models import TransactionFABS, TransactionNormalized, Award
@@ -304,7 +305,7 @@ def send_deletes_to_elasticsearch(ids_to_delete):
                     writer.write(row + '\n')
         else:
             # Write to file in S3 bucket directly
-            aws_region = os.environ.get('AWS_REGION')
+            aws_region = os.environ.get('USASPENDING_AWS_REGION')
             elasticsearch_bucket_name = os.environ.get('FPDS_BUCKET_NAME')
             s3_bucket = boto.s3.connect_to_region(aws_region).get_bucket(elasticsearch_bucket_name)
             conn = s3_bucket.new_key(file_name)
@@ -367,6 +368,10 @@ def handle(self, *args, **options):
             # Update AwardCategories based on changed FABS records
             with timer('updating award category variables', logger.info):
                 update_award_categories(tuple(award_update_id_list))
+
+            # Check the linkages from file C to FABS records and update any that are missing
+            with timer('updating C->D linkages', logger.info):
+                update_c_to_d_linkages('assistance')
         else:
             logger.info('Nothing to insert...')
 

diff --git a/usaspending_api/broker/management/commands/fpds_nightly_loader.py b/usaspending_api/broker/management/commands/fpds_nightly_loader.py
@@ -10,6 +10,7 @@
 from django.db.models import Count
 from django.conf import settings
 
+from usaspending_api.common.helpers.etl_helpers import update_c_to_d_linkages
 from usaspending_api.common.helpers.generic_helper import fy, timer, upper_case_dict_values
 from usaspending_api.etl.broker_etl_helpers import dictfetchall
 from usaspending_api.awards.models import TransactionFPDS, TransactionNormalized, Award
@@ -62,11 +63,11 @@ def get_fpds_data(date):
                         ids_to_delete += unique_key_list
         else:
             # Connect to AWS
-            aws_region = os.environ.get('AWS_REGION')
+            aws_region = os.environ.get('USASPENDING_AWS_REGION')
             fpds_bucket_name = os.environ.get('FPDS_BUCKET_NAME')
 
             if not (aws_region or fpds_bucket_name):
-                raise Exception('Missing required environment variables: AWS_REGION, FPDS_BUCKET_NAME')
+                raise Exception('Missing required environment variables: USASPENDING_AWS_REGION, FPDS_BUCKET_NAME')
 
             s3client = boto3.client('s3', region_name=aws_region)
             s3resource = boto3.resource('s3', region_name=aws_region)
@@ -352,17 +353,25 @@ def handle(self, *args, **options):
             logger.info('Nothing to delete...')
 
         if total_rows > 0:
+            # Add FPDS records
             with timer('inserting new FPDS data', logger.info):
                 self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows)
 
+            # Update Awards based on changed FPDS records
             with timer('updating awards to reflect their latest associated transaction info', logger.info):
                 update_awards(tuple(award_update_id_list))
 
+            # Update FPDS-specific Awards based on the info in child transactions
             with timer('updating contract-specific awards to reflect their latest transaction info', logger.info):
                 update_contract_awards(tuple(award_update_id_list))
 
+            # Update AwardCategories based on changed FPDS records
             with timer('updating award category variables', logger.info):
                 update_award_categories(tuple(award_update_id_list))
+
+            # Check the linkages from file C to FPDS records and update any that are missing
+            with timer('updating C->D linkages', logger.info):
+                update_c_to_d_linkages('contract')
         else:
             logger.info('Nothing to insert...')
 

diff --git a/usaspending_api/broker/management/sql/load_historic_duns.sql b/usaspending_api/broker/management/sql/load_historic_duns.sql
@@ -0,0 +1,39 @@
+DROP TABLE IF EXISTS historic_parent_duns_new;
+
+CREATE TABLE historic_parent_duns_new AS (
+    SELECT
+        broker_historic_duns.awardee_or_recipient_uniqu AS awardee_or_recipient_uniqu,
+        broker_historic_duns.legal_business_name AS legal_business_name,
+        broker_historic_duns.ultimate_parent_unique_ide AS ultimate_parent_unique_ide,
+        broker_historic_duns.ultimate_parent_legal_enti AS ultimate_parent_legal_enti,
+        broker_historic_duns.duns_id AS broker_historic_duns_id,
+        broker_historic_duns.year AS year
+    FROM
+        dblink ('broker_server', '(
+            SELECT
+                hduns.awardee_or_recipient_uniqu,
+                hduns.legal_business_name,
+                hduns.ultimate_parent_unique_ide,
+                hduns.ultimate_parent_legal_enti,
+                hduns.duns_id,
+                hduns.year
+            FROM
+                historic_parent_duns as hduns)') AS broker_historic_duns
+            (
+                awardee_or_recipient_uniqu text,
+                legal_business_name text,
+                ultimate_parent_unique_ide text,
+                ultimate_parent_legal_enti text,
+                duns_id text,
+                year int
+            )
+);
+CREATE INDEX historic_parent_duns_awardee_idx_new ON historic_parent_duns_new USING btree (awardee_or_recipient_uniqu);
+CREATE INDEX historic_parent_duns_year_idx_new ON historic_parent_duns_new USING btree (year);
+
+BEGIN;
+DROP TABLE IF EXISTS historic_parent_duns CASCADE;
+ALTER TABLE historic_parent_duns_new RENAME TO historic_parent_duns;
+ALTER INDEX historic_parent_duns_awardee_idx_new RENAME TO historic_parent_duns_awardee_idx;
+ALTER INDEX historic_parent_duns_year_idx_new RENAME TO historic_parent_duns_year_idx;
+COMMIT;
diff --git a/usaspending_api/common/csv_helpers.py b/usaspending_api/common/csv_helpers.py
@@ -4,7 +4,7 @@
 
 
 # Used by bulk and baby download
-def sqs_queue(region_name=settings.CSV_AWS_REGION, QueueName=settings.CSV_SQS_QUEUE_NAME):
+def sqs_queue(region_name=settings.USASPENDING_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME):
     # stuff that's in get_queue
     sqs = boto3.resource('sqs', region_name=region_name)
     queue = sqs.get_queue_by_name(QueueName=QueueName)

diff --git a/usaspending_api/common/helpers/etl_helpers.py b/usaspending_api/common/helpers/etl_helpers.py
@@ -0,0 +1,61 @@
+from datetime import datetime
+import logging
+
+from django.db import connection
+
+from usaspending_api.common.exceptions import InvalidParameterException
+from usaspending_api.common.helpers.sql_helpers import read_sql_file
+
+
+logger = logging.getLogger('console')
+ETL_SQL_FILE_PATH = 'usaspending_api/etl/management/sql/'
+
+
+def get_unlinked_count(file_name):
+
+    file_path = ETL_SQL_FILE_PATH + file_name
+    sql_commands = read_sql_file(file_path=file_path)
+
+    if len(sql_commands) != 1:
+        raise InvalidParameterException('Invalid number of commands in specified file. File should contain 1 SQL '
+                                        'command to get the count of unlinked records.')
+
+    with connection.cursor() as cursor:
+        cursor.execute(sql_commands[0])
+        result = cursor.fetchall()[0][0]
+
+    return int(result)
+
+
+def update_c_to_d_linkages(type):
+    logger.info('Starting File C to D linkage updates for %s records' % type)
+
+    if type.lower() == 'contract':
+        file_names = ['update_file_c_linkages_piid.sql']
+        unlinked_count_file_name = 'check_contract_file_c_linkages.sql'
+    elif type.lower() == 'assistance':
+        file_names = ['update_file_c_linkages_fain.sql', 'update_file_c_linkages_uri.sql',
+                      'update_file_c_linkages_fain_and_uri.sql']
+        unlinked_count_file_name = 'check_assistance_file_c_linkages.sql'
+    else:
+        raise InvalidParameterException('Invalid type provided to process C to D linkages.')
+
+    file_paths = [ETL_SQL_FILE_PATH + file_name for file_name in file_names]
+
+    starting_unlinked_count = get_unlinked_count(file_name=unlinked_count_file_name)
+    logger.info('Current count of unlinked %s records: %s' % (type, str(starting_unlinked_count)))
+
+    total_start = datetime.now()
+    for file_name in file_paths:
+        start = datetime.now()
+        logger.info('Running %s' % file_name)
+        sql_commands = read_sql_file(file_path=file_name)
+        for command in sql_commands:
+            with connection.cursor() as cursor:
+                cursor.execute(command)
+        logger.info('Finished %s in %s seconds' % (file_name, str(datetime.now() - start)))
+
+    ending_unlinked_count = get_unlinked_count(file_name=unlinked_count_file_name)
+    logger.info('Count of unlinked %s records after updates: %s' % (type, str(ending_unlinked_count)))
+
+    logger.info('Finished all queries in %s seconds' % str(datetime.now() - total_start))
diff --git a/usaspending_api/common/helpers/sql_helpers.py b/usaspending_api/common/helpers/sql_helpers.py
@@ -0,0 +1,23 @@
+import os
+import logging
+
+from usaspending_api.common.exceptions import InvalidParameterException
+
+
+logger = logging.getLogger('console')
+
+
+def read_sql_file(file_path):
+    # Read in SQL file and extract commands into a list
+    _, file_extension = os.path.splitext(file_path)
+
+    if file_extension != '.sql':
+        raise InvalidParameterException("Invalid file provided. A file with extension '.sql' is required.")
+
+    # Open and read the file as a single buffer
+    fd = open(file_path, 'r')
+    sql_file = fd.read()
+    fd.close()
+
+    # all SQL commands (split on ';') and trimmed for whitespaces
+    return [command.strip() for command in sql_file.split(';') if command]
diff --git a/usaspending_api/common/threaded_data_loader.py b/usaspending_api/common/threaded_data_loader.py
@@ -90,7 +90,7 @@ def load_from_file(self, filepath, encoding='utf-8', remote_file=False):
             process.start()
 
         if remote_file:
-            aws_region = os.environ.get('AWS_REGION')
+            aws_region = os.environ.get('USASPENDING_AWS_REGION')
             with smart_open.smart_open(filepath, 'r', encoding=encoding, region_name=aws_region) as csv_file:
                 row_queue = self.csv_file_to_queue(csv_file, row_queue)
         else: