Skip to content

Commit

Permalink
Merge pull request #1341 from fedspendingtransparency/stg
Browse files Browse the repository at this point in the history
Mid-Sprint 67 Production Deployment
  • Loading branch information
nmonga91 authored Jul 18, 2018
2 parents 607d406 + 2bffa1a commit 08e5865
Show file tree
Hide file tree
Showing 42 changed files with 883 additions and 92 deletions.
108 changes: 108 additions & 0 deletions usaspending_api/accounts/tests/unit/test_account_download_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import pytest

from model_mommy import mommy

from usaspending_api.accounts.models import AppropriationAccountBalances
from usaspending_api.accounts.v2.filters.account_download import account_download_filter
from usaspending_api.awards.models import FinancialAccountsByAwards
from usaspending_api.financial_activities.models import FinancialAccountsByProgramActivityObjectClass


@pytest.mark.django_db
def test_fyq_filter():
""" Ensure the fiscal year and quarter filter is working """
# Create TAS models
tas1 = mommy.make('accounts.TreasuryAppropriationAccount')
tas2 = mommy.make('accounts.TreasuryAppropriationAccount')

# Create file A models
mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas1,
reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas2,
reporting_period_start='1700-01-01', reporting_period_end='1700-03-31')

queryset = account_download_filter('account_balances', AppropriationAccountBalances, {
'fy': 1700,
'quarter': 1
})
assert queryset.count() == 1


@pytest.mark.django_db
def test_federal_account_filter():
""" Ensure the fiscal year and quarter filter is working """
# Create FederalAccount models
fed_acct1 = mommy.make('accounts.FederalAccount')
fed_acct2 = mommy.make('accounts.FederalAccount')

# Create TAS models
tas1 = mommy.make('accounts.TreasuryAppropriationAccount', federal_account=fed_acct1)
tas2 = mommy.make('accounts.TreasuryAppropriationAccount', federal_account=fed_acct2)

# Create file A models
mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas1,
reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
mommy.make('accounts.AppropriationAccountBalances', treasury_account_identifier=tas2,
reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')

queryset = account_download_filter('account_balances', AppropriationAccountBalances, {
'federal_account': fed_acct1.id,
'fy': 1700,
'quarter': 1
})
assert queryset.count() == 1


@pytest.mark.django_db
def test_cgac_agency_filter():
""" Ensure the CGAC agency filter is working """
# Create TAS models
tas1 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='NOT')
tas2 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='CGC')

# Create file B models
mommy.make('financial_activities.FinancialAccountsByProgramActivityObjectClass',
treasury_account_id=tas1.treasury_account_identifier, reporting_period_start='1699-10-01',
reporting_period_end='1699-12-31')
mommy.make('financial_activities.FinancialAccountsByProgramActivityObjectClass',
treasury_account_id=tas2.treasury_account_identifier, reporting_period_start='1699-10-01',
reporting_period_end='1699-12-31')

# Create ToptierAgency models
mommy.make('references.ToptierAgency', toptier_agency_id=-9999, cgac_code='CGC')
mommy.make('references.ToptierAgency', toptier_agency_id=-9998, cgac_code='NOT')

# Filter by ToptierAgency (CGAC)
queryset = account_download_filter('program_activity_object_class', FinancialAccountsByProgramActivityObjectClass, {
'agency': '-9999',
'fy': 1700,
'quarter': 1
})
assert queryset.count() == 1


@pytest.mark.django_db
def test_frec_agency_filter():
""" Ensure the FREC agency filter is working """
# Create TAS models
tas1 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='CGC', fr_entity_code='FAKE')
tas2 = mommy.make('accounts.TreasuryAppropriationAccount', agency_id='CGC', fr_entity_code='FREC')

# Create file C models
mommy.make('awards.FinancialAccountsByAwards', treasury_account_id=tas1.treasury_account_identifier,
reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')
mommy.make('awards.FinancialAccountsByAwards', treasury_account_id=tas2.treasury_account_identifier,
reporting_period_start='1699-10-01', reporting_period_end='1699-12-31')

# Create ToptierAgency models
mommy.make('references.ToptierAgency', toptier_agency_id=-9999, cgac_code='FREC')
mommy.make('references.ToptierAgency', toptier_agency_id=-9998, cgac_code='FAKE')
mommy.make('references.ToptierAgency', toptier_agency_id=-9997, cgac_code='CGC')

# Filter by ToptierAgency (FREC)
queryset = account_download_filter('award_financial', FinancialAccountsByAwards, {
'agency': '-9999',
'fy': 1700,
'quarter': 1
})
assert queryset.count() == 1
4 changes: 3 additions & 1 deletion usaspending_api/accounts/v2/filters/account_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ def account_download_filter(account_type, download_table, filters, account_level
if filters.get('agency', False) and filters['agency'] != 'all':
agency = ToptierAgency.objects.filter(toptier_agency_id=filters['agency']).first()
if agency:
query_filters['{}__agency_id'.format(tas_id)] = agency.cgac_code
# Agency is FREC if the cgac_code is 4 digits, CGAC otherwise
agency_filter_type = "fr_entity_code" if len(agency.cgac_code) == 4 else "agency_id"
query_filters['{}__{}'.format(tas_id, agency_filter_type)] = agency.cgac_code
else:
raise InvalidParameterException('Agency with that ID does not exist')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ This route sends a request to the backend to begin generating a zipfile of accou
"account_level": "treasury_account",
"filters": {
"agency": "3",
"federal_account": "15",
"submission_type": "object_class_program_activity",
"fy": "2018",
"quarter": "2"
Expand All @@ -21,14 +22,14 @@ This route sends a request to the backend to begin generating a zipfile of accou
```

### Request Parameters Description
* `account_level` - *required* - the account level: `tresury_account` or `federal_account` (must be `treasury_account` for Beta)
* `account_level` - *required* - the account level: `tresury_account` or `federal_account`
* `filters` - *required* - a JSON filter object with the following fields
* `agency` - *optional* - agency database id to include, `all` is also an option to include all agencies
* `federal_account` - *optional* - federal account id to include (based on the agency filter), out of scope for Beta
* `submission_type` - *required* - the file type requested: `account_balances` (File A) or `program_activity_object_class` (File B)
* `agency` - *optional* - agency database id, `all` is also an option to include all agencies
* `federal_account` - *optional* - federal account id
* `submission_type` - *required* - the file type requested: `account_balances` (File A), `program_activity_object_class` (File B), or `award_financial` (File C)
* `fy` - *required* - fiscal year
* `quarter` - *required*
* `file_format` - *optional* - must be `csv` for Beta
* `file_format` - *optional* - must be `csv`

### Response (JSON)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.4 on 2018-07-17 16:16
from __future__ import unicode_literals

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('awards', '0032_auto_20180614_1929'),
]

operations = [
migrations.RunSQL(sql="CREATE INDEX faba_fain_and_uri ON financial_accounts_by_awards(fain, uri, award_id) "
"WHERE fain IS NOT NULL AND uri IS NOT NULL AND award_id IS NULL;",
reverse_sql="DROP INDEX faba_fain_and_uri;"),
migrations.RunSQL(sql="CREATE INDEX faba_fain ON financial_accounts_by_awards(fain, uri, award_id) WHERE fain "
"IS NOT NULL AND uri IS NULL AND award_id IS NULL;",
reverse_sql="DROP INDEX faba_fain;"),
migrations.RunSQL(sql="CREATE INDEX faba_uri ON financial_accounts_by_awards(fain, uri, award_id) WHERE fain "
"IS NULL AND uri IS NOT NULL AND award_id IS NULL;",
reverse_sql="DROP INDEX faba_uri;"),
migrations.RunSQL(sql="CREATE INDEX faba_piid ON financial_accounts_by_awards(piid, award_id) WHERE piid IS "
"NOT NULL AND award_id IS NULL;",
reverse_sql="DROP INDEX faba_piid;"),
]
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.db.models import Count
from django.conf import settings

from usaspending_api.common.helpers.etl_helpers import update_c_to_d_linkages
from usaspending_api.common.helpers.generic_helper import fy, timer, upper_case_dict_values
from usaspending_api.etl.broker_etl_helpers import dictfetchall
from usaspending_api.awards.models import TransactionFABS, TransactionNormalized, Award
Expand Down Expand Up @@ -304,7 +305,7 @@ def send_deletes_to_elasticsearch(ids_to_delete):
writer.write(row + '\n')
else:
# Write to file in S3 bucket directly
aws_region = os.environ.get('AWS_REGION')
aws_region = os.environ.get('USASPENDING_AWS_REGION')
elasticsearch_bucket_name = os.environ.get('FPDS_BUCKET_NAME')
s3_bucket = boto.s3.connect_to_region(aws_region).get_bucket(elasticsearch_bucket_name)
conn = s3_bucket.new_key(file_name)
Expand Down Expand Up @@ -367,6 +368,10 @@ def handle(self, *args, **options):
# Update AwardCategories based on changed FABS records
with timer('updating award category variables', logger.info):
update_award_categories(tuple(award_update_id_list))

# Check the linkages from file C to FABS records and update any that are missing
with timer('updating C->D linkages', logger.info):
update_c_to_d_linkages('assistance')
else:
logger.info('Nothing to insert...')

Expand Down
13 changes: 11 additions & 2 deletions usaspending_api/broker/management/commands/fpds_nightly_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.db.models import Count
from django.conf import settings

from usaspending_api.common.helpers.etl_helpers import update_c_to_d_linkages
from usaspending_api.common.helpers.generic_helper import fy, timer, upper_case_dict_values
from usaspending_api.etl.broker_etl_helpers import dictfetchall
from usaspending_api.awards.models import TransactionFPDS, TransactionNormalized, Award
Expand Down Expand Up @@ -62,11 +63,11 @@ def get_fpds_data(date):
ids_to_delete += unique_key_list
else:
# Connect to AWS
aws_region = os.environ.get('AWS_REGION')
aws_region = os.environ.get('USASPENDING_AWS_REGION')
fpds_bucket_name = os.environ.get('FPDS_BUCKET_NAME')

if not (aws_region or fpds_bucket_name):
raise Exception('Missing required environment variables: AWS_REGION, FPDS_BUCKET_NAME')
raise Exception('Missing required environment variables: USASPENDING_AWS_REGION, FPDS_BUCKET_NAME')

s3client = boto3.client('s3', region_name=aws_region)
s3resource = boto3.resource('s3', region_name=aws_region)
Expand Down Expand Up @@ -352,17 +353,25 @@ def handle(self, *args, **options):
logger.info('Nothing to delete...')

if total_rows > 0:
# Add FPDS records
with timer('inserting new FPDS data', logger.info):
self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows)

# Update Awards based on changed FPDS records
with timer('updating awards to reflect their latest associated transaction info', logger.info):
update_awards(tuple(award_update_id_list))

# Update FPDS-specific Awards based on the info in child transactions
with timer('updating contract-specific awards to reflect their latest transaction info', logger.info):
update_contract_awards(tuple(award_update_id_list))

# Update AwardCategories based on changed FPDS records
with timer('updating award category variables', logger.info):
update_award_categories(tuple(award_update_id_list))

# Check the linkages from file C to FPDS records and update any that are missing
with timer('updating C->D linkages', logger.info):
update_c_to_d_linkages('contract')
else:
logger.info('Nothing to insert...')

Expand Down
39 changes: 39 additions & 0 deletions usaspending_api/broker/management/sql/load_historic_duns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
DROP TABLE IF EXISTS historic_parent_duns_new;

CREATE TABLE historic_parent_duns_new AS (
SELECT
broker_historic_duns.awardee_or_recipient_uniqu AS awardee_or_recipient_uniqu,
broker_historic_duns.legal_business_name AS legal_business_name,
broker_historic_duns.ultimate_parent_unique_ide AS ultimate_parent_unique_ide,
broker_historic_duns.ultimate_parent_legal_enti AS ultimate_parent_legal_enti,
broker_historic_duns.duns_id AS broker_historic_duns_id,
broker_historic_duns.year AS year
FROM
dblink ('broker_server', '(
SELECT
hduns.awardee_or_recipient_uniqu,
hduns.legal_business_name,
hduns.ultimate_parent_unique_ide,
hduns.ultimate_parent_legal_enti,
hduns.duns_id,
hduns.year
FROM
historic_parent_duns as hduns)') AS broker_historic_duns
(
awardee_or_recipient_uniqu text,
legal_business_name text,
ultimate_parent_unique_ide text,
ultimate_parent_legal_enti text,
duns_id text,
year int
)
);
CREATE INDEX historic_parent_duns_awardee_idx_new ON historic_parent_duns_new USING btree (awardee_or_recipient_uniqu);
CREATE INDEX historic_parent_duns_year_idx_new ON historic_parent_duns_new USING btree (year);

BEGIN;
DROP TABLE IF EXISTS historic_parent_duns CASCADE;
ALTER TABLE historic_parent_duns_new RENAME TO historic_parent_duns;
ALTER INDEX historic_parent_duns_awardee_idx_new RENAME TO historic_parent_duns_awardee_idx;
ALTER INDEX historic_parent_duns_year_idx_new RENAME TO historic_parent_duns_year_idx;
COMMIT;
2 changes: 1 addition & 1 deletion usaspending_api/common/csv_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


# Used by bulk and baby download
def sqs_queue(region_name=settings.CSV_AWS_REGION, QueueName=settings.CSV_SQS_QUEUE_NAME):
def sqs_queue(region_name=settings.USASPENDING_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME):
# stuff that's in get_queue
sqs = boto3.resource('sqs', region_name=region_name)
queue = sqs.get_queue_by_name(QueueName=QueueName)
Expand Down
61 changes: 61 additions & 0 deletions usaspending_api/common/helpers/etl_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from datetime import datetime
import logging

from django.db import connection

from usaspending_api.common.exceptions import InvalidParameterException
from usaspending_api.common.helpers.sql_helpers import read_sql_file


logger = logging.getLogger('console')
ETL_SQL_FILE_PATH = 'usaspending_api/etl/management/sql/'


def get_unlinked_count(file_name):

file_path = ETL_SQL_FILE_PATH + file_name
sql_commands = read_sql_file(file_path=file_path)

if len(sql_commands) != 1:
raise InvalidParameterException('Invalid number of commands in specified file. File should contain 1 SQL '
'command to get the count of unlinked records.')

with connection.cursor() as cursor:
cursor.execute(sql_commands[0])
result = cursor.fetchall()[0][0]

return int(result)


def update_c_to_d_linkages(type):
logger.info('Starting File C to D linkage updates for %s records' % type)

if type.lower() == 'contract':
file_names = ['update_file_c_linkages_piid.sql']
unlinked_count_file_name = 'check_contract_file_c_linkages.sql'
elif type.lower() == 'assistance':
file_names = ['update_file_c_linkages_fain.sql', 'update_file_c_linkages_uri.sql',
'update_file_c_linkages_fain_and_uri.sql']
unlinked_count_file_name = 'check_assistance_file_c_linkages.sql'
else:
raise InvalidParameterException('Invalid type provided to process C to D linkages.')

file_paths = [ETL_SQL_FILE_PATH + file_name for file_name in file_names]

starting_unlinked_count = get_unlinked_count(file_name=unlinked_count_file_name)
logger.info('Current count of unlinked %s records: %s' % (type, str(starting_unlinked_count)))

total_start = datetime.now()
for file_name in file_paths:
start = datetime.now()
logger.info('Running %s' % file_name)
sql_commands = read_sql_file(file_path=file_name)
for command in sql_commands:
with connection.cursor() as cursor:
cursor.execute(command)
logger.info('Finished %s in %s seconds' % (file_name, str(datetime.now() - start)))

ending_unlinked_count = get_unlinked_count(file_name=unlinked_count_file_name)
logger.info('Count of unlinked %s records after updates: %s' % (type, str(ending_unlinked_count)))

logger.info('Finished all queries in %s seconds' % str(datetime.now() - total_start))
23 changes: 23 additions & 0 deletions usaspending_api/common/helpers/sql_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import logging

from usaspending_api.common.exceptions import InvalidParameterException


logger = logging.getLogger('console')


def read_sql_file(file_path):
# Read in SQL file and extract commands into a list
_, file_extension = os.path.splitext(file_path)

if file_extension != '.sql':
raise InvalidParameterException("Invalid file provided. A file with extension '.sql' is required.")

# Open and read the file as a single buffer
fd = open(file_path, 'r')
sql_file = fd.read()
fd.close()

# all SQL commands (split on ';') and trimmed for whitespaces
return [command.strip() for command in sql_file.split(';') if command]
2 changes: 1 addition & 1 deletion usaspending_api/common/threaded_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def load_from_file(self, filepath, encoding='utf-8', remote_file=False):
process.start()

if remote_file:
aws_region = os.environ.get('AWS_REGION')
aws_region = os.environ.get('USASPENDING_AWS_REGION')
with smart_open.smart_open(filepath, 'r', encoding=encoding, region_name=aws_region) as csv_file:
row_queue = self.csv_file_to_queue(csv_file, row_queue)
else:
Expand Down
Loading

0 comments on commit 08e5865

Please sign in to comment.