diff --git a/usaspending_api/api_contracts/contracts/v2/references/total_budgetary_resources.md b/usaspending_api/api_contracts/contracts/v2/references/total_budgetary_resources.md new file mode 100644 index 0000000000..04d0bce958 --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/references/total_budgetary_resources.md @@ -0,0 +1,45 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Total Government Budgetary Resources [/api/v2/references/total_budgetary_resources/{?fiscal_year,fiscal_period}] + +This endpoint is used to provide information on the federal budgetary resources of the government. + +## GET + +This endpoint returns federal budgetary resources by fiscal year and fiscal period. + ++ Parameters + + + `fiscal_year`(optional, number) + The fiscal year. + + `fiscal_period` (optional, number) + The fiscal period. If this optional parameter is provided then `fiscal_year` is a required parameter + ++ Response 200 (application/json) + + + Attributes (object) + + `results` (required, array[FederalBudgetaryResources], fixed-type) + + Body + + { + "results": [ + { + "total_budgetary_resources": 8361447130497.72, + "fiscal_year": 2020, + "fiscal_period": 6 + }, + { + "total_budgetary_resources": 234525.72, + "fiscal_year": 2020, + "fiscal_period": 5 + } + ] + } + +# Data Structures + +## FederalBudgetaryResources (object) ++ `total_budgetary_resources` (required, number) ++ `fiscal_year` (required, number) ++ `fiscal_period` (required, number) diff --git a/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/differences.md b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/differences.md new file mode 100644 index 0000000000..71b32cfc52 --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/differences.md @@ -0,0 +1,93 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Agency Reporting Differences [/api/v2/reporting/agencies/{toptier_code}/differences/{?fiscal_year,fiscal_period,page,limit,order,sort}] + +This endpoint is used to power USAspending.gov's About the Data \| Agencies reported balance and spending differences over a submission period + +## GET + +This endpoint returns an overview of government agency obligation differences data. + ++ Parameters + + `toptier_code`: `020` (required, string) + The specific agency code. + + `fiscal_year`: 2020 (required, number) + The fiscal year. + + `fiscal_period`: 10 (required, number) + The fiscal period. Valid values: 2-12 (2 = November ... 12 = September) + For retriving quarterly data, provide the period which equals 'quarter * 3' (e.g. Q2 = P6) + + `page` (optional, number) + The page of results to return based on the limit. + + Default: 1 + + `limit` (optional, number) + The number of results to include per page. + + Default: 10 + + `order` (optional, enum[string]) + The direction (`asc` or `desc`) that the `sort` field will be sorted in. + + Default: `desc` + + Members + + `asc` + + `desc` + + `sort` (optional, enum[string]) + A data field that will be used to sort the response array. + + Default: `tas` + + Members + + `difference` + + `file_a_obligation` + + `file_b_obligation` + + `tas` + ++ Response 200 (application/json) + + + Attributes (object) + + `page_metadata` (required, PaginationMetadata, fixed-type) + + `results` (required, array[ObligationDifferences], fixed-type) + + `messages` (required, array[string], fixed-type) + An array of warnings or instructional directives to aid consumers of this endpoint with development and debugging. + + Body + + { + + "page_metadata": { + "page": 1, + "total": 10, + "limit": 2, + "next": 2, + "previous": null, + "hasNext": true, + "hasPrevious": false + }, + "results": [ + { + "tas": "011-X-8345-000", + "file_a_obligation": 47425.37, + "file_b_obligation": 240066.32, + "difference": -192640.95 + }, + { + "tas": "011-X-8245-000", + "file_a_obligation": 428508.11, + "file_b_obligation": 2358478.83, + "difference": -1929970.72 + } + ], + "messages": [] + } + +# Data Structures + +## PaginationMetadata (object) ++ `page` (required, number) ++ `next` (required, number, nullable) ++ `previous` (required, number, nullable) ++ `hasNext` (required, boolean) ++ `hasPrevious` (required, boolean) ++ `total` (required, number) ++ `limit` (required, number) + +## ObligationDifferences (object) ++ `tas` (required, string) ++ `file_a_obligation` (required, number) ++ `file_b_obligation` (required, number) ++ `difference` (required, number) diff --git a/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/discrepancies.md b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/discrepancies.md new file mode 100644 index 0000000000..7a2145101b --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/discrepancies.md @@ -0,0 +1,82 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Agency Reporting Discrepancies [/api/v2/reporting/agencies/{agency_code}/discrepancies/{?fiscal_year,fiscal_period,page,limit,order,sort}] + +This endpoint is used to power USAspending.gov's About the Data \| Agencies TAS discrepencies over a submission period + +## GET + +This endpoint returns an overview of government agency TAS discrepancies data. + ++ Parameters + + `agency_code`: `020` (required, string) + The specific agency code. + + `fiscal_year`: 2020 (required, number) + The fiscal year. + + `fiscal_period`: 10 (required, number) + The fiscal period. Valid values: 2-12 (2 = November ... 12 = September) + For retriving quarterly data, provide the period which equals 'quarter * 3' (e.g. Q2 = P6) + + `page` (optional, number) + The page of results to return based on the limit. + + Default: 1 + + `limit` (optional, number) + The number of results to include per page. + + Default: 10 + + `order` (optional, enum[string]) + The direction (`asc` or `desc`) that the `sort` field will be sorted in. + + Default: `desc` + + Members + + `asc` + + `desc` + + `sort` (optional, enum[string]) + A data field that will be used to sort the response array. + + Default: `amount` + + Members + + `amount` + + `tas` + ++ Response 200 (application/json) + + + Attributes (object) + + `page_metadata` (required, PaginationMetadata, fixed-type) + + `results` (required, array[TASDiscrepancies], fixed-type) + + Body + + { + + "page_metadata": { + "page": 1, + "next": 2, + "previous": 0, + "hasNext": false, + "hasPrevious": false, + "total": 2, + "limit": 10 + }, + "results": [ + { + "tas": "210-1503", + "amount": 234543543 + }, + { + "tas": "012-0212", + "amount": 43637623 + } + ] + } + +# Data Structures + +## PaginationMetadata (object) ++ `page` (required, number) ++ `next` (required, number, nullable) ++ `previous` (required, number, nullable) ++ `hasNext` (required, boolean) ++ `hasPrevious` (required, boolean) ++ `total` (required, number) ++ `limit` (required, number) + +## TASDiscrepancies (object) ++ `tas` (required, string) ++ `amount` (required, number) diff --git a/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/overview.md b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/overview.md new file mode 100644 index 0000000000..8315042cb8 --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/overview.md @@ -0,0 +1,119 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Agency Reporting Overview [/api/v2/reporting/agencies/{agency_code}/overview/{?page,limit,order,sort}] + +This endpoint is used to power USAspending.gov's About the Data \| Agencies agency details table. + +## GET + +This endpoint returns an overview of government agency submission data. + ++ Parameters + + `agency_code`: `020` (required, string) + The specific agency. + + `page` (optional, number) + The page of results to return based on the limit. + + Default: 1 + + `limit` (optional, number) + The number of results to include per page. + + Default: 10 + + `order` (optional, enum[string]) + The direction (`asc` or `desc`) that the `sort` field will be sorted in. + + Default: `desc` + + Members + + `asc` + + `desc` + + `sort` (optional, enum[string]) + A data field that will be used to sort the response array. + + Default: `current_total_budget_authority_amount` + + Members + + `current_total_budget_authority_amount` + + `fiscal_period` + + `fiscal_year` + + `missing_tas_accounts_count` + + `obligation_difference` + + `recent_publication_date_certified` + + `recent_publication_date` + + `tas_obligation_not_in_gtas_total` + ++ Response 200 (application/json) + + + Attributes (object) + + `page_metadata` (required, PaginationMetadata, fixed-type) + + `results` (required, array[AgencyData], fixed-type) + + `messages` (optional, array[string]) + An array of warnings or instructional directives to aid consumers of this endpoint with development and debugging. + + + Body + + { + "messages": [], + "page_metadata": { + "page": 1, + "next": 2, + "previous": 0, + "hasNext": false, + "hasPrevious": false, + "total": 2, + "limit": 10 + }, + "results": [ + { + "fiscal_year": 2020, + "fiscal_period": 12, + "current_total_budget_authority_amount": 8361447130497.72, + "recent_publication_date": "2020-01-10T11:59:21Z", + "recent_publication_date_certified": false, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 66432, + "tas_accounts_total": 2342, + "tas_obligation_not_in_gtas_total": 343345, + "missing_tas_accounts_count": 10 + }, + "obligation_difference": 436376232652.87 + }, + { + "fiscal_year": 2020, + "fiscal_period": 9, + "current_total_budget_authority_amount": 8361447130497.72, + "recent_publication_date": null, + "recent_publication_date_certified": true, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 66432, + "tas_accounts_total": 23903, + "tas_obligation_not_in_gtas_total": 11543, + "missing_tas_accounts_count": 10 + }, + "obligation_difference": 436376232652.87 + } + ] + } + +# Data Structures + +## PaginationMetadata (object) ++ `page` (required, number) ++ `next` (required, number, nullable) ++ `previous` (required, number, nullable) ++ `hasNext` (required, boolean) ++ `hasPrevious` (required, boolean) ++ `total` (required, number) ++ `limit` (required, number) + +## TASTotals (object) ++ `gtas_obligation_total` (required, number) ++ `tas_accounts_total` (required, number) ++ `tas_obligation_not_in_gtas_total` (required, number) ++ `missing_tas_accounts_count` (required, number) + +## AgencyData (object) ++ `fiscal_year` (required, number) ++ `fiscal_period` (required, number) ++ `current_total_budget_authority_amount` (required, number) ++ `recent_publication_date` (required, string, nullable) ++ `recent_publication_date_certified` (required, boolean) ++ `recent_publication_date_certified` (required, boolean) ++ `tas_account_discrepancies_totals` (required, array[TASTotals], fixed-type) ++ `obligation_difference` (required, number) + The difference in file A and file B obligations. diff --git a/usaspending_api/api_contracts/contracts/v2/reporting/agencies/overview.md b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/overview.md new file mode 100644 index 0000000000..6b9fcce32c --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/overview.md @@ -0,0 +1,129 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Agencies Reporting Overview [/api/v2/reporting/agencies/overview/{?fiscal_year,fiscal_period,search,page,limit,order,sort}] + +This endpoint is used to power USAspending.gov's About the Data \| Agencies Overview table. This data can be used to better understand the ways agencies submit data. + +## GET + +This endpoint returns an overview list of government agencies submission data. + ++ Parameters + + + `fiscal_year`: 2020 (required, number) + The fiscal year. + + `fiscal_period`: 10 (required, number) + The fiscal period. Valid values: 2-12 (2 = November ... 12 = September) + For retriving quarterly data, provide the period which equals 'quarter * 3' (e.g. Q2 = P6) + + `search` (optional, string) + The agency name to filter on. + + `page` (optional, number) + The page of results to return based on the limit. + + Default: 1 + + `limit` (optional, number) + The number of results to include per page. + + Default: 10 + + `order` (optional, enum[string]) + The direction (`asc` or `desc`) that the `sort` field will be sorted in. + + Default: `desc` + + Members + + `asc` + + `desc` + + `sort` (optional, enum[string]) + A data field that will be used to sort the response array. + + Default: `current_total_budget_authority_amount` + + Members + + `agency_code` + + `current_total_budget_authority_amount` + + `missing_tas_accounts_total` + + `agency_name` + + `obligation_difference` + + `recent_publication_date` + + `recent_publication_date_certified` + + `tas_obligation_not_in_gtas_total` + ++ Response 200 (application/json) + + + Attributes (object) + + `page_metadata` (required, PaginationMetadata, fixed-type) + + `results` (required, array[AgencyData], fixed-type) + + `messages` (optional, array[string]) + An array of warnings or instructional directives to aid consumers of this endpoint with development and debugging. + + + Body + + { + "page_metadata": { + "page": 1, + "next": 2, + "previous": 0, + "hasNext": false, + "hasPrevious": false, + "total": 2, + "limit": 10 + }, + "results": [ + { + "agency_name": "Department of Health and Human Services", + "abbreviation": "DHHS", + "agency_code": "020", + "agency_id": 123, + "current_total_budget_authority_amount": 8361447130497.72, + "recent_publication_date": "2020-01-10T11:59:21Z", + "recent_publication_date_certified": false, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 55234, + "tas_accounts_total": 23923, + "tas_obligation_not_in_gtas_total": 11543, + "missing_tas_accounts_count": 20 + }, + "obligation_difference": 436376232652.87 + }, + { + "agency_name": "Department of Treasury", + "abbreviation": "DOT", + "agency_code": "021", + "agency_id": 789, + "current_total_budget_authority_amount": 8361447130497.72, + "recent_publication_date": null, + "recent_publication_date_certified": true, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 66432, + "tas_accounts_total": 23913, + "tas_obligation_not_in_gtas_total": 11543, + "missing_tas_accounts_count": 10 + }, + "obligation_difference": 436376232652.87 + } + ] + } + +# Data Structures + +## PaginationMetadata (object) ++ `page` (required, number) ++ `next` (required, number, nullable) ++ `previous` (required, number, nullable) ++ `hasNext` (required, boolean) ++ `hasPrevious` (required, boolean) ++ `total` (required, number) ++ `limit` (required, number) + +## TASTotals (object) ++ `gtas_obligation_total` (required, number) ++ `tas_accounts_total` (required, number) ++ `tas_obligation_not_in_gtas_total` (required, number) ++ `missing_tas_accounts_count` (required, number) + +## AgencyData (object) ++ `agency_name` (required, string) ++ `abbreviation` (required, string) ++ `agency_code` (required, string) ++ `agency_id` (required, number, nullable) ++ `current_total_budget_authority_amount` (required, number) ++ `recent_publication_date` (required, string, nullable) ++ `recent_publication_date_certified` (required, boolean) ++ `tas_account_discrepancies_totals` (required, array[TASTotals], fixed-type) ++ `obligation_difference` (required, number) + The difference in File A and File B obligations. diff --git a/usaspending_api/api_contracts/contracts/v2/reporting/agencies/publish_dates.md b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/publish_dates.md new file mode 100644 index 0000000000..c90dc843a3 --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/reporting/agencies/publish_dates.md @@ -0,0 +1,116 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Agency Reporting Publish Dates [/api/v2/reporting/agencies/publish_dates/{?fiscal_year,search,page,limit,order,sort}] + +This endpoint is used to power USAspending.gov's About the Data \| Agencies submission publication dates. + +## GET + ++ Parameters + + `fiscal_year`: 2020 (required, number) + The fiscal year. + + `search` (optional, string) + The agency name to filter on. + + `page` (optional, number) + The page of results to return based on the limit. + + Default: 1 + + `limit` (optional, number) + The number of results to include per page. + + Default: 10 + + `order` (optional, enum[string]) + The direction (`asc` or `desc`) that the `sort` field will be sorted in. + + Default: `desc` + + Members + + `asc` + + `desc` + + `sort` (optional, enum[string]) + A data field that will be used to sort the response array. + + Default: `current_total_budget_authority_amount` + + Members + + `agency_name` + + `abbreviation` + + `agency_code` + + `current_total_budget_authority_amount` + + `publication_date` + When using publication_date, provide the desired fiscal period (2-12) after a comma + example: &sort=publication_date,10 + ++ Response 200 (application/json) + + + Attributes (object) + + `page_metadata` (required, PaginationMetadata, fixed-type) + + `results` (required, array[Agency], fixed-type) + + Body + + { + "page_metadata": { + "page": 1, + "next": 2, + "previous": 0, + "hasNext": false, + "hasPrevious": false, + "total": 2, + "limit": 10 + }, + "results": [ + { + "agency_name": "Department of Health and Human Services", + "abbreviation": "DHHS", + "agency_code": "020", + "current_total_budget_authority_amount": 8361447130497.72, + "periods": [{ + "period": 2, + "quarter": 1, + "submission_dates": { + "publication_date" : "2020-01-20T11:59:21Z", + "certification_date" : "2020-01-21T10:58:21Z" + }, + "quarterly": false + }] + }, + { + "agency_name": "Department of Treasury", + "abbreviation": "DOT", + "agency_code": "021", + "current_total_budget_authority_amount": 8361447130497.72, + "periods": [{ + "period": 2, + "quarter": 1, + "submission_dates": { + "publication_date" : "2020-01-20T11:59:21Z", + "certification_date" : "2020-01-21T10:58:21Z" + }, + "quarterly": false + }] + } + ] + } + +# Data Structures + +## PaginationMetadata (object) ++ `page` (required, number) ++ `next` (required, number, nullable) ++ `previous` (required, number, nullable) ++ `hasNext` (required, boolean) ++ `hasPrevious` (required, boolean) ++ `total` (required, number) ++ `limit` (required, number) + +## SubmissionDates (object) ++ `publication_date` (required, string, nullable) ++ `certification_date` (required, string, nullable) + +## Period (object) ++ `period` (required, number) ++ `quarter` (required, number) ++ `submission_dates` (required, array[SubmissionDates], nullable) ++ `quarterly` (required, boolean) + +## Agency (object) ++ `agency_name` (required, string) ++ `abbreviation` (required, string) ++ `agency_code` (required, string) ++ `current_total_budget_authority_amount` (required, number) ++ `periods` (required, array[Period], fixed-type) diff --git a/usaspending_api/api_contracts/contracts/v2/reporting/submission_history/agency_code/fiscal_year/fiscal_period.md b/usaspending_api/api_contracts/contracts/v2/reporting/submission_history/agency_code/fiscal_year/fiscal_period.md new file mode 100644 index 0000000000..b1611ebb3e --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/reporting/submission_history/agency_code/fiscal_year/fiscal_period.md @@ -0,0 +1,48 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Agencies Reporting Publish Dates History [/api/v2/reporting/submission_history/{agency_code}/{fiscal_year}/{fiscal_period}] + +This endpoint is used to power USAspending.gov's About the Data \| Agencies submission history modal. + +## GET + +This endpoint returns the history of publication and certification dates for a single agency's submission. + ++ Parameters + + `agency_code`: `020` (required, string) + The specific agency code. + + `fiscal_year`: 2020 (required, number) + The fiscal year of the submission + + `fiscal_period`: 10 (required, number) + The fiscal period of the submission. valid values: 2-12 (2 = November ... 12 = September) + For retriving quarterly submissions, provide the period which equals 'quarter * 3' (e.g. Q2 = P6) + ++ Response 200 (application/json) + + + Attributes (object) + + `results` (required, array[SubmissionHistory], fixed-type) + + Body + + { + "results": [ + { + "publication_date": "2020-10-11T11:59:21Z", + "certification_date": "2020-10-22T11:59:21Z" + }, + { + "publication_date": "2020-07-10T11:59:21Z", + "certification_date": "2020-07-11T11:59:21Z" + }, + { + "publication_date": "2020-07-10T11:59:21Z", + "certification_date": null + } + ] + } + +# Data Structures + +## SubmissionHistory (object) ++ `publication_date` (required, string, nullable) ++ `certification_date` (required, string, nullable) diff --git a/usaspending_api/api_docs/markdown/endpoints.md b/usaspending_api/api_docs/markdown/endpoints.md index 0c00f640e5..3b40f9ece2 100644 --- a/usaspending_api/api_docs/markdown/endpoints.md +++ b/usaspending_api/api_docs/markdown/endpoints.md @@ -120,6 +120,7 @@ The currently available endpoints are listed in the following table. |[/api/v2/recipient/state//](/api/v2/recipient/state/51/)|GET| Returns basic information about the specified state | |[/api/v2/recipient/state/](/api/v2/recipient/state/)|GET| Returns basic information about the specified state | |[/api/v2/recipient/state/awards//](/api/v2/recipient/state/awards/51/)|GET| Returns award breakdown based on FIPS | +|[/api/v2/reporting/agencies//differences/](/api/v2/reporting/agencies/097/differences/)|GET| Returns About the Data information about differences in account balance and spending obligations for a specific agency/year/period | |[/api/v2/references/agency//](/api/v2/references/agency/479/)|GET| Returns basic information about a federal agency | |[/api/v2/references/award_types/](/api/v2/references/award_types/)|GET| Returns a map of award types by award grouping. | |[/api/v2/references/cfda/totals//](/api/v2/references/cfda/totals/10.555/)|GET| Provides total values for provided CFDA | @@ -140,7 +141,8 @@ The currently available endpoints are listed in the following table. |[/api/v2/references/naics/](/api/v2/references/naics/)|GET| Returns all Tier 1 (2-digit) NAICS and related, relevant data. | |[/api/v2/references/submission_periods/](/api/v2/references/submission_periods/)|GET| Returns a list of all available submission periods with essential information about start and end dates. | |[/api/v2/references/toptier_agencies/](/api/v2/references/toptier_agencies/)|GET| Returns all toptier agencies and related, relevant data. | -|[/api/v2/reporting/placeholder/](/api/v2/reporting/placeholder/)|POST| Temp Placeholder. Ignore and rmove | +|[/api/v2/reporting/agencies//overview/](/api/v2/reporting/agencies/020/overview/)|GET| Returns a list of submission data for the provided agency. | +|[/api/v2/reporting/agencies/overview/](/api/v2/reporting/agencies/overview/)|GET| Returns About the Data information about all agencies with submissions in a provided fiscal year and period| |[/api/v2/search/new_awards_over_time/](/api/v2/search/new_awards_over_time/)|POST| Returns a list of time periods with the new awards in the appropriate period within the provided time range | |[/api/v2/search/spending_by_award/](/api/v2/search/spending_by_award/)|POST| Returns the fields of the filtered awards | |[/api/v2/search/spending_by_award_count/](/api/v2/search/spending_by_award_count/)|POST| Returns the number of awards in each award type (Contracts, IDV, Loans, Direct Payments, Grants, and Other) | diff --git a/usaspending_api/common/elasticsearch/json_helpers.py b/usaspending_api/common/elasticsearch/json_helpers.py deleted file mode 100644 index 4c267101e2..0000000000 --- a/usaspending_api/common/elasticsearch/json_helpers.py +++ /dev/null @@ -1,33 +0,0 @@ -import json -import re - - -def json_str_to_dict(string: str) -> dict: - if not str: - return {} - - try: - return json.loads(string) - except json.decoder.JSONDecodeError: - pass # Give the unicode_escape a chance to succeed - - try: - return json.loads(string.encode("unicode_escape")) - except json.decoder.JSONDecodeError: - - # Try to parse the string with Regex before throwing error - key_count_regex = r"\"[^\"]*\"\s?:" - grouping_regex = r"\"([^\"]*)\"\s?:\s?\"([^\"]*(?:(?:\w|\s)?(?:\"|\')?(?:\w|\s)?)*[^\"]*)(?:(?:\"\,)|(?:\"\}))" - - key_count_matches = re.findall(key_count_regex, string) - grouping_matches = re.findall(grouping_regex, string) - - # Need to verify the correct number of elements in case grouping regex didn't work - if ( - isinstance(key_count_matches, list) - and isinstance(grouping_matches, list) - and len(key_count_matches) == len(grouping_matches) - ): - return {key: value for key, value in grouping_matches} - else: - raise json.decoder.JSONDecodeError(f"Unable to parse '{string}' even using regex") diff --git a/usaspending_api/common/helpers/sql_helpers.py b/usaspending_api/common/helpers/sql_helpers.py index 416bb13b69..a5ad0fba59 100644 --- a/usaspending_api/common/helpers/sql_helpers.py +++ b/usaspending_api/common/helpers/sql_helpers.py @@ -304,3 +304,14 @@ def get_connection(model=Award, read_only=True): else: _connection = connections[router.db_for_write(model)] return _connection + + +def close_all_django_db_conns() -> None: + """ + Helper function to close all DB connetions + Sometimes we have to kill any DB connections before forking processes + as Django will want to share the single connection with all processes + and we don't want to have any deadlock/SSL problems due to that. + """ + + connections.close_all() diff --git a/usaspending_api/common/tests/integration/test_decorators.py b/usaspending_api/common/tests/integration/test_decorators.py index f57af8ee44..52ec36e8b7 100644 --- a/usaspending_api/common/tests/integration/test_decorators.py +++ b/usaspending_api/common/tests/integration/test_decorators.py @@ -1,13 +1,8 @@ -# Stdlib imports -from time import perf_counter +import pytest -# Core Django imports from django.db import connection +from time import perf_counter -# Third-party app imports -import pytest - -# Imports from your apps from usaspending_api.common.helpers.decorators import set_db_timeout @@ -17,14 +12,14 @@ def test_statement_timeout_successfully_times_out(): Test the django statement timeout setting """ - test_timeout_in_seconds = 1 + test_timeout_in_seconds = 0.5 pg_sleep_in_seconds = 10 @set_db_timeout(test_timeout_in_seconds) def test_timeout_success(): with connection.cursor() as cursor: # pg_sleep takes in a parameter corresponding to seconds - cursor.execute("SELECT pg_sleep(%d)" % pg_sleep_in_seconds) + cursor.execute(f"SELECT pg_sleep({pg_sleep_in_seconds:.2f})") start = perf_counter() try: @@ -43,14 +38,14 @@ def test_statement_timeout_successfully_runs_within_timeout(): Test the django statement timeout setting """ - test_timeout_in_seconds = 2 - pg_sleep_in_seconds = 1 + test_timeout_in_seconds = 1 + pg_sleep_in_seconds = 0.5 @set_db_timeout(test_timeout_in_seconds) def test_timeout_success(): with connection.cursor() as cursor: # pg_sleep takes in a parameter corresponding to seconds - cursor.execute("SELECT pg_sleep(%d)" % pg_sleep_in_seconds) + cursor.execute(f"SELECT pg_sleep({pg_sleep_in_seconds:.2f})") try: start = perf_counter() @@ -66,16 +61,17 @@ def test_statement_timeout_no_decorator(): """Test the django statement timeout setting""" start = perf_counter() - pg_sleep_in_seconds = 5 + pg_sleep_in_seconds = 2 + tiny_offset = 0.001 # allows for slight rounding or timing differences between tools def test_timeout_success(): with connection.cursor() as cursor: # pg_sleep takes in a parameter corresponding to seconds - cursor.execute("SELECT pg_sleep({:d})".format(pg_sleep_in_seconds)) + cursor.execute(f"SELECT pg_sleep({pg_sleep_in_seconds:.2f})") try: test_timeout_success() except Exception: assert False else: - assert (perf_counter() - start) >= pg_sleep_in_seconds + assert (perf_counter() - start) >= (pg_sleep_in_seconds - tiny_offset) diff --git a/usaspending_api/conftest_helpers.py b/usaspending_api/conftest_helpers.py index 51305c9c27..d21964fdfe 100644 --- a/usaspending_api/conftest_helpers.py +++ b/usaspending_api/conftest_helpers.py @@ -1,11 +1,7 @@ from builtins import Exception - -import json - from datetime import datetime, timezone -from typing import Optional, List from django.conf import settings -from django.core.serializers.json import DjangoJSONEncoder +from django.core.serializers.json import json, DjangoJSONEncoder from django.db import connection, DEFAULT_DB_ALIAS from elasticsearch import Elasticsearch from pathlib import Path @@ -21,8 +17,11 @@ from usaspending_api.common.helpers.text_helpers import generate_random_string from usaspending_api.etl.elasticsearch_loader_helpers import ( create_award_type_aliases, - transform_covid19_faba_data, + execute_sql_statement, TaskSpec, + transform_award_data, + transform_covid19_faba_data, + transform_transaction_data, ) from usaspending_api.etl.management.commands.es_configure import retrieve_index_template @@ -46,6 +45,20 @@ def __init__(self, index_type): "verbose": False, "write_alias": self.index_name + "-alias", } + self.worker = TaskSpec( + base_table=None, + base_table_id=None, + execute_sql_func=execute_sql_statement, + field_for_es_id="award_id" if self.index_type == "award" else "transaction_id", + index=self.index_name, + is_incremental=None, + name=f"{self.index_type} test worker", + partition_number=None, + primary_key="award_id" if self.index_type == "award" else "transaction_id", + sql=None, + transform_func=None, + view=None, + ) def delete_index(self): self.client.indices.delete(self.index_name, ignore_unavailable=True) @@ -87,7 +100,11 @@ def _add_contents(self, **options): cursor.execute(f"SELECT * FROM {view_name};") records = ordered_dictionary_fetcher(cursor) cursor.execute(f"DROP VIEW {view_name};") - if self.index_type == "covid19_faba": + if self.index_type == "award": + records = transform_award_data(self.worker, records) + elif self.index_type == "transaction": + records = transform_transaction_data(self.worker, records) + elif self.index_type == "covid19_faba": records = transform_covid19_faba_data( TaskSpec( name="worker", @@ -103,15 +120,17 @@ def _add_contents(self, **options): ), records, ) + for record in records: # Special cases where we convert array of JSON to an array of strings to avoid nested types routing_key = options.get("routing", settings.ES_ROUTING_FIELD) routing_value = record.get(routing_key) - es_id_value = record.get(es_id) - if self.index_type == "transaction": - record["federal_accounts"] = self.convert_json_arrays_to_list(record["federal_accounts"]) - if self.index_type == "covid19_faba": + + if "_id" in record: es_id_value = record.pop("_id") + else: + es_id_value = record.get(es_id) + self.client.index( index=self.index_name, body=json.dumps(record, cls=DjangoJSONEncoder), @@ -123,20 +142,7 @@ def _add_contents(self, **options): @classmethod def _generate_index_name(cls): - return "test-{}-{}".format( - datetime.now(timezone.utc).strftime("%Y-%m-%d-%H-%M-%S-%f"), generate_random_string() - ) - - @staticmethod - def convert_json_arrays_to_list(json_array: Optional[List[dict]]) -> Optional[List[str]]: - if json_array is None: - return None - result = [] - for j in json_array: - for key, value in j.items(): - j[key] = "" if value is None else str(j[key]) - result.append(json.dumps(j, sort_keys=True)) - return result + return f"test-{datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-%f')}-{generate_random_string()}" def ensure_broker_server_dblink_exists(): diff --git a/usaspending_api/database_scripts/etl/award_delta_view.sql b/usaspending_api/database_scripts/etl/award_delta_view.sql index d00ab4f5b5..e51863fa43 100644 --- a/usaspending_api/database_scripts/etl/award_delta_view.sql +++ b/usaspending_api/database_scripts/etl/award_delta_view.sql @@ -1,99 +1,102 @@ DROP VIEW IF EXISTS award_delta_view; + CREATE VIEW award_delta_view AS SELECT - vw_es_award_search.award_id, - vw_es_award_search.generated_unique_award_id, - vw_es_award_search.display_award_id, + "award_id", + "generated_unique_award_id", + "display_award_id", - vw_es_award_search.category, - vw_es_award_search.type, - vw_es_award_search.type_description, - vw_es_award_search.piid, - vw_es_award_search.fain, - vw_es_award_search.uri, - vw_es_award_search.total_obligation, - vw_es_award_search.description, - vw_es_award_search.award_amount, - vw_es_award_search.total_subsidy_cost, - vw_es_award_search.total_loan_value, - vw_es_award_search.update_date, + "category", + "type", + "type_description", + "piid", + "fain", + "uri", + "total_obligation", + "description", + "award_amount", + "total_subsidy_cost", + "total_loan_value", + "update_date", - vw_es_award_search.recipient_name, - vw_es_award_search.recipient_unique_id, - vw_es_award_search.recipient_hash, - vw_es_award_search.recipient_agg_key, + "recipient_name", + "recipient_unique_id", + "recipient_hash", + "recipient_levels", - vw_es_award_search.parent_recipient_unique_id, - vw_es_award_search.business_categories, + "parent_recipient_unique_id", + "business_categories", - vw_es_award_search.action_date, - vw_es_award_search.fiscal_year, - vw_es_award_search.last_modified_date, - vw_es_award_search.period_of_performance_start_date, - vw_es_award_search.period_of_performance_current_end_date, - vw_es_award_search.date_signed, - vw_es_award_search.ordering_period_end_date, + "action_date", + "fiscal_year", + "last_modified_date", + "period_of_performance_start_date", + "period_of_performance_current_end_date", + "date_signed", + "ordering_period_end_date", - vw_es_award_search.original_loan_subsidy_cost, - vw_es_award_search.face_value_loan_guarantee, + "original_loan_subsidy_cost", + "face_value_loan_guarantee", - vw_es_award_search.awarding_agency_id, - vw_es_award_search.funding_agency_id, - vw_es_award_search.awarding_toptier_agency_name, - vw_es_award_search.funding_toptier_agency_name, - vw_es_award_search.awarding_subtier_agency_name, - vw_es_award_search.funding_subtier_agency_name, - vw_es_award_search.awarding_toptier_agency_code, - vw_es_award_search.funding_toptier_agency_code, - vw_es_award_search.awarding_subtier_agency_code, - vw_es_award_search.funding_subtier_agency_code, - vw_es_award_search.funding_toptier_agency_agg_key, - vw_es_award_search.funding_subtier_agency_agg_key, + "awarding_agency_id", + "funding_agency_id", + "funding_toptier_agency_id", + "funding_subtier_agency_id", + "awarding_toptier_agency_name", + "funding_toptier_agency_name", + "awarding_subtier_agency_name", + "funding_subtier_agency_name", + "awarding_toptier_agency_code", + "funding_toptier_agency_code", + "awarding_subtier_agency_code", + "funding_subtier_agency_code", - vw_es_award_search.recipient_location_country_code, - vw_es_award_search.recipient_location_country_name, - vw_es_award_search.recipient_location_state_code, - vw_es_award_search.recipient_location_county_code, - vw_es_award_search.recipient_location_county_name, - vw_es_award_search.recipient_location_congressional_code, - vw_es_award_search.recipient_location_zip5, - vw_es_award_search.recipient_location_city_name, + "recipient_location_country_code", + "recipient_location_country_name", + "recipient_location_state_code", + "recipient_location_state_name", + "recipient_location_state_fips", + "recipient_location_state_population", + "recipient_location_county_code", + "recipient_location_county_name", + "recipient_location_county_population", + "recipient_location_congressional_code", + "recipient_location_congressional_population", + "recipient_location_zip5", + "recipient_location_city_name", - vw_es_award_search.pop_country_code, - vw_es_award_search.pop_country_name, - vw_es_award_search.pop_state_code, - vw_es_award_search.pop_county_code, - vw_es_award_search.pop_county_name, - vw_es_award_search.pop_zip5, - vw_es_award_search.pop_congressional_code, - vw_es_award_search.pop_city_name, - vw_es_award_search.pop_city_code, + "pop_country_code", + "pop_country_name", + "pop_state_code", + "pop_state_name", + "pop_state_fips", + "pop_state_population", + "pop_county_code", + "pop_county_name", + "pop_county_population", + "pop_zip5", + "pop_congressional_code", + "pop_congressional_population", + "pop_city_name", + "pop_city_code", - vw_es_award_search.cfda_number, - vw_es_award_search.cfda_program_title as cfda_title, - - vw_es_award_search.sai_number, - vw_es_award_search.type_of_contract_pricing, - vw_es_award_search.extent_competed, - vw_es_award_search.type_set_aside, + "cfda_number", + "cfda_program_title" as cfda_title, - vw_es_award_search.product_or_service_code, - vw_es_award_search.product_or_service_description, - vw_es_award_search.naics_code, - vw_es_award_search.naics_description, + "sai_number", + "type_of_contract_pricing", + "extent_competed", + "type_set_aside", - vw_es_award_search.recipient_location_county_agg_key, - vw_es_award_search.recipient_location_congressional_agg_key, - vw_es_award_search.recipient_location_state_agg_key, + "product_or_service_code", + "product_or_service_description", + "naics_code", + "naics_description", - vw_es_award_search.pop_county_agg_key, - vw_es_award_search.pop_congressional_agg_key, - vw_es_award_search.pop_state_agg_key, + "tas_paths", + "tas_components", + "disaster_emergency_fund_codes", + "total_covid_outlay", + "total_covid_obligation" - vw_es_award_search.tas_paths, - vw_es_award_search.tas_components, - vw_es_award_search.disaster_emergency_fund_codes, - vw_es_award_search.total_covid_outlay, - vw_es_award_search.total_covid_obligation -FROM vw_es_award_search -; +FROM "vw_es_award_search"; diff --git a/usaspending_api/database_scripts/etl/transaction_delta_view.sql b/usaspending_api/database_scripts/etl/transaction_delta_view.sql index 73fd7bef5a..f74cecffbf 100644 --- a/usaspending_api/database_scripts/etl/transaction_delta_view.sql +++ b/usaspending_api/database_scripts/etl/transaction_delta_view.sql @@ -1,107 +1,105 @@ --- Needs to be present in the Postgres DB if data needs to be retrieved for Elasticsearch DROP VIEW IF EXISTS transaction_delta_view; CREATE VIEW transaction_delta_view AS SELECT - "transaction_id", - "award_id", - "modification_number", - "detached_award_proc_unique", - "afa_generated_unique", - "generated_unique_award_id", - "fain", - "uri", - "piid", - CASE - WHEN "detached_award_proc_unique" IS NOT NULL THEN 'CONT_TX_' || "detached_award_proc_unique" - WHEN "afa_generated_unique" IS NOT NULL THEN 'ASST_TX_' || "afa_generated_unique" - ELSE NULL - END AS generated_unique_transaction_id, - CASE - WHEN "type" IN ('02', '03', '04', '05', '06', '10', '07', '08', '09', '11') AND "fain" IS NOT NULL THEN "fain" - WHEN "piid" IS NOT NULL THEN "piid" -- contracts. Did it this way to easily handle IDV contracts - ELSE "uri" - END AS display_award_id, - "action_date", - "fiscal_action_date", - "last_modified_date", - "fiscal_year", - "award_certified_date", - "award_fiscal_year", - "update_date", - "award_update_date", - "etl_update_date", - "period_of_performance_start_date", - "period_of_performance_current_end_date", - "type", - "type_description", - "award_category", - "transaction_description", - "award_amount", - "generated_pragmatic_obligation", - "federal_action_obligation", - "original_loan_subsidy_cost", - "face_value_loan_guarantee", - "business_categories", - "naics_code", - "naics_description", - "product_or_service_code", - "product_or_service_description", - "type_of_contract_pricing", - "type_set_aside", - "extent_competed", - "ordering_period_end_date", - "cfda_number", - "cfda_title", - "pop_country_name", - "pop_country_code", - "pop_state_code", - "pop_county_code", - "pop_county_name", - "pop_zip5", - "pop_congressional_code", - "pop_city_name", - "recipient_location_country_code", - "recipient_location_country_name", - "recipient_location_state_code", - "recipient_location_county_code", - "recipient_location_county_name", - "recipient_location_congressional_code", - "recipient_location_zip5", - "recipient_location_city_name", - "recipient_hash", - "recipient_name", - "recipient_unique_id", - "parent_recipient_hash", - "parent_recipient_name", - "parent_recipient_unique_id", - "awarding_agency_id", - "funding_agency_id", - "awarding_toptier_agency_name", - "funding_toptier_agency_name", - "awarding_subtier_agency_name", - "funding_subtier_agency_name", - "awarding_toptier_agency_abbreviation", - "funding_toptier_agency_abbreviation", - "awarding_subtier_agency_abbreviation", - "funding_subtier_agency_abbreviation", - "tas_paths", - "tas_components", - "federal_accounts", - "disaster_emergency_fund_codes", - "recipient_location_county_agg_key", - "recipient_location_congressional_agg_key", - "recipient_location_state_agg_key", - "pop_county_agg_key", - "pop_congressional_agg_key", - "pop_state_agg_key", - "pop_country_agg_key", - "awarding_toptier_agency_agg_key", - "funding_toptier_agency_agg_key", - "awarding_subtier_agency_agg_key", - "funding_subtier_agency_agg_key", - "psc_agg_key", - "naics_agg_key", - "recipient_agg_key" + "transaction_id", + "award_id", + "modification_number", + "detached_award_proc_unique", + "afa_generated_unique", + "generated_unique_award_id", + "piid", + "fain", + "uri", + CASE + WHEN "detached_award_proc_unique" IS NOT NULL THEN 'CONT_TX_' || "detached_award_proc_unique" + WHEN "afa_generated_unique" IS NOT NULL THEN 'ASST_TX_' || "afa_generated_unique" + ELSE NULL + END AS generated_unique_transaction_id, + CASE + WHEN "type" IN ('02', '03', '04', '05', '06', '10', '07', '08', '09', '11') AND "fain" IS NOT NULL THEN "fain" + WHEN "piid" IS NOT NULL THEN "piid" -- contracts. Did it this way to easily handle IDV contracts + ELSE "uri" + END AS display_award_id, + "action_date", + "fiscal_action_date", + "last_modified_date", + "fiscal_year", + "award_certified_date", + "award_fiscal_year", + "update_date", + "award_update_date", + "etl_update_date", + "period_of_performance_start_date", + "period_of_performance_current_end_date", + "ordering_period_end_date", + "type", + "type_description", + "award_category", + "transaction_description", + "award_amount", + "generated_pragmatic_obligation", + "federal_action_obligation", + "original_loan_subsidy_cost", + "face_value_loan_guarantee", + "business_categories", + "naics_code", + "naics_description", + "product_or_service_code", + "product_or_service_description", + "type_of_contract_pricing", + "type_set_aside", + "extent_competed", + "cfda_number", + "cfda_title", + "pop_country_name", + "pop_country_code", + "pop_state_name", + "pop_state_code", + "pop_state_fips", + "pop_state_population", + "pop_county_code", + "pop_county_name", + "pop_county_population", + "pop_zip5", + "pop_congressional_code", + "pop_congressional_population", + "pop_city_name", + "recipient_location_country_code", + "recipient_location_country_name", + "recipient_location_state_name", + "recipient_location_state_code", + "recipient_location_state_fips", + "recipient_location_state_population", + "recipient_location_county_code", + "recipient_location_county_name", + "recipient_location_county_population", + "recipient_location_congressional_code", + "recipient_location_congressional_population", + "recipient_location_zip5", + "recipient_location_city_name", + "recipient_hash", + "recipient_name", + "recipient_levels", + "recipient_unique_id", + "parent_recipient_hash", + "parent_recipient_name", + "parent_recipient_unique_id", + "awarding_agency_id", + "funding_agency_id", + "awarding_toptier_agency_id", + "funding_toptier_agency_id", + "awarding_toptier_agency_name", + "funding_toptier_agency_name", + "awarding_subtier_agency_name", + "funding_subtier_agency_name", + "awarding_toptier_agency_abbreviation", + "funding_toptier_agency_abbreviation", + "awarding_subtier_agency_abbreviation", + "funding_subtier_agency_abbreviation", + "tas_paths", + "tas_components", + "federal_accounts", + "disaster_emergency_fund_codes" FROM "universal_transaction_matview" WHERE "action_date" >= '2007-10-01'; diff --git a/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json index 84ed4e5f87..6418669c88 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json @@ -23,23 +23,8 @@ " 0::NUMERIC(23, 2) AS total_loan_value,", "", " recipient_profile.recipient_hash,", + " recipient_profile.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)) AS recipient_name,", - " CASE", - " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", - " THEN", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"\",\"levels\":\"\"}'", - " )", - " ELSE", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"', recipient_profile.recipient_hash,", - " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", - " )", - " END AS recipient_agg_key,", " transaction_fpds.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fpds.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -65,24 +50,8 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', TFA.name,", - " '\",\"code\":\"', TFA.toptier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', SFA.name,", - " '\",\"code\":\"', SFA.subtier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", + " (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1) AS funding_toptier_agency_id,", + " latest_transaction.funding_agency_id AS funding_subtier_agency_id,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -92,6 +61,11 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS recipient_location_congressional_code,", " transaction_fpds.legal_entity_zip5 AS recipient_location_zip5,", " TRIM(TRAILING FROM transaction_fpds.legal_entity_city_name) AS recipient_location_city_name,", + " RL_STATE_LOOKUP.name AS recipient_location_state_name,", + " RL_STATE_LOOKUP.fips AS recipient_location_state_fips,", + " RL_STATE_POPULATION.latest_population AS recipient_location_state_population,", + " RL_COUNTY_POPULATION.latest_population AS recipient_location_county_population,", + " RL_DISTRICT_POPULATION.latest_population AS recipient_location_congressional_population,", "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", @@ -102,6 +76,11 @@ " transaction_fpds.place_of_performance_zip5 AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fpds.place_of_perform_city_name) AS pop_city_name,", + " POP_STATE_LOOKUP.name AS pop_state_name,", + " POP_STATE_LOOKUP.fips AS pop_state_fips,", + " POP_STATE_POPULATION.latest_population AS pop_state_population,", + " POP_COUNTY_POPULATION.latest_population AS pop_county_population,", + " POP_DISTRICT_POPULATION.latest_population AS pop_congressional_population,", "", " NULL::text AS cfda_program_title,", " NULL::text AS cfda_number,", @@ -115,77 +94,6 @@ " psc.description AS product_or_service_description,", " transaction_fpds.naics AS naics_code,", " transaction_fpds.naics_description,", - " CASE", - " WHEN", - " transaction_fpds.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN", - " transaction_fpds.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN transaction_fpds.legal_entity_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - - " CASE", - " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN transaction_fpds.place_of_performance_state IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", "", " TREASURY_ACCT.tas_paths,", " TREASURY_ACCT.tas_components,", @@ -287,7 +195,6 @@ ") AND recipient_name IS NOT NULL", "AND recipient_level != 'P'", "GROUP BY recipient_hash, recipient_unique_id", - "LIMIT 1", ") recipient_profile ON TRUE", "LEFT JOIN (", " -- Get awards with COVID-related data", diff --git a/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json index aeb215e8aa..4b895bb51d 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json @@ -23,23 +23,8 @@ " 0::NUMERIC(23, 2) AS total_loan_value,", "", " recipient_profile.recipient_hash,", + " recipient_profile.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", - " CASE", - " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", - " THEN", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"\",\"levels\":\"\"}'", - " )", - " ELSE", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"', recipient_profile.recipient_hash,", - " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", - " )", - " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -65,24 +50,8 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', TFA.name,", - " '\",\"code\":\"', TFA.toptier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', SFA.name,", - " '\",\"code\":\"', SFA.subtier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", + " (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1) AS funding_toptier_agency_id,", + " latest_transaction.funding_agency_id AS funding_subtier_agency_id,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -92,6 +61,11 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS recipient_location_congressional_code,", " transaction_fabs.legal_entity_zip5 AS recipient_location_zip5,", " TRIM(TRAILING FROM transaction_fabs.legal_entity_city_name) AS recipient_location_city_name,", + " RL_STATE_LOOKUP.name AS recipient_location_state_name,", + " RL_STATE_LOOKUP.fips AS recipient_location_state_fips,", + " RL_STATE_POPULATION.latest_population AS recipient_location_state_population,", + " RL_COUNTY_POPULATION.latest_population AS recipient_location_county_population,", + " RL_DISTRICT_POPULATION.latest_population AS recipient_location_congressional_population,", "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", @@ -102,6 +76,11 @@ " transaction_fabs.place_of_performance_zip5 AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", + " POP_STATE_LOOKUP.name AS pop_state_name,", + " POP_STATE_LOOKUP.fips AS pop_state_fips,", + " POP_STATE_POPULATION.latest_population AS pop_state_population,", + " POP_COUNTY_POPULATION.latest_population AS pop_county_population,", + " POP_DISTRICT_POPULATION.latest_population AS pop_congressional_population,", "", " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", @@ -115,78 +94,6 @@ " NULL::text AS naics_code,", " NULL::text AS naics_description,", "", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - "", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", - "", " TREASURY_ACCT.tas_paths,", " TREASURY_ACCT.tas_components,", " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", @@ -285,7 +192,6 @@ ") AND recipient_name IS NOT NULL", "AND recipient_level != 'P'", "GROUP BY recipient_hash, recipient_unique_id", - "LIMIT 1", ") recipient_profile ON TRUE", "LEFT JOIN (", " -- Get awards with COVID-related data", diff --git a/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json index 960a2fc7ee..4619bee7ec 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json @@ -23,23 +23,8 @@ " 0::NUMERIC(23, 2) AS total_loan_value,", "", " recipient_profile.recipient_hash,", + " recipient_profile.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", - " CASE", - " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", - " THEN", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"\",\"levels\":\"\"}'", - " )", - " ELSE", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"', recipient_profile.recipient_hash,", - " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", - " )", - " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -65,24 +50,8 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', TFA.name,", - " '\",\"code\":\"', TFA.toptier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', SFA.name,", - " '\",\"code\":\"', SFA.subtier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", + " (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1) AS funding_toptier_agency_id,", + " latest_transaction.funding_agency_id AS funding_subtier_agency_id,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -92,6 +61,11 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS recipient_location_congressional_code,", " transaction_fabs.legal_entity_zip5 AS recipient_location_zip5,", " TRIM(TRAILING FROM transaction_fabs.legal_entity_city_name) AS recipient_location_city_name,", + " RL_STATE_LOOKUP.name AS recipient_location_state_name,", + " RL_STATE_LOOKUP.fips AS recipient_location_state_fips,", + " RL_STATE_POPULATION.latest_population AS recipient_location_state_population,", + " RL_COUNTY_POPULATION.latest_population AS recipient_location_county_population,", + " RL_DISTRICT_POPULATION.latest_population AS recipient_location_congressional_population,", "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", @@ -102,6 +76,11 @@ " transaction_fabs.place_of_performance_zip5 AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", + " POP_STATE_LOOKUP.name AS pop_state_name,", + " POP_STATE_LOOKUP.fips AS pop_state_fips,", + " POP_STATE_POPULATION.latest_population AS pop_state_population,", + " POP_COUNTY_POPULATION.latest_population AS pop_county_population,", + " POP_DISTRICT_POPULATION.latest_population AS pop_congressional_population,", "", " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", @@ -114,78 +93,6 @@ " NULL::text AS product_or_service_description,", " NULL::text AS naics_code,", " NULL::text AS naics_description,", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - "", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", - "", "", " TREASURY_ACCT.tas_paths,", " TREASURY_ACCT.tas_components,", @@ -285,7 +192,6 @@ ") AND recipient_name IS NOT NULL", "AND recipient_level != 'P'", "GROUP BY recipient_hash, recipient_unique_id", - "LIMIT 1", ") recipient_profile ON TRUE", "LEFT JOIN (", " -- Get awards with COVID-related data", diff --git a/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json index 2ec0ad1a58..a53e83e07b 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json @@ -23,23 +23,8 @@ " 0::NUMERIC(23, 2) AS total_loan_value,", "", " recipient_profile.recipient_hash,", + " recipient_profile.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)) AS recipient_name,", - " CASE", - " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", - " THEN", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"\",\"levels\":\"\"}'", - " )", - " ELSE", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"', recipient_profile.recipient_hash,", - " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", - " )", - " END AS recipient_agg_key,", " transaction_fpds.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fpds.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -65,24 +50,8 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', TFA.name,", - " '\",\"code\":\"', TFA.toptier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', SFA.name,", - " '\",\"code\":\"', SFA.subtier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", + " (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1) AS funding_toptier_agency_id,", + " latest_transaction.funding_agency_id AS funding_subtier_agency_id,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -92,6 +61,11 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS recipient_location_congressional_code,", " transaction_fpds.legal_entity_zip5 AS recipient_location_zip5,", " TRIM(TRAILING FROM transaction_fpds.legal_entity_city_name) AS recipient_location_city_name,", + " RL_STATE_LOOKUP.name AS recipient_location_state_name,", + " RL_STATE_LOOKUP.fips AS recipient_location_state_fips,", + " RL_STATE_POPULATION.latest_population AS recipient_location_state_population,", + " RL_COUNTY_POPULATION.latest_population AS recipient_location_county_population,", + " RL_DISTRICT_POPULATION.latest_population AS recipient_location_congressional_population,", "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", @@ -102,6 +76,11 @@ " transaction_fpds.place_of_performance_zip5 AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fpds.place_of_perform_city_name) AS pop_city_name,", + " POP_STATE_LOOKUP.name AS pop_state_name,", + " POP_STATE_LOOKUP.fips AS pop_state_fips,", + " POP_STATE_POPULATION.latest_population AS pop_state_population,", + " POP_COUNTY_POPULATION.latest_population AS pop_county_population,", + " POP_DISTRICT_POPULATION.latest_population AS pop_congressional_population,", "", " NULL::text AS cfda_program_title,", " NULL::text AS cfda_number,", @@ -114,78 +93,6 @@ " psc.description AS product_or_service_description,", " transaction_fpds.naics AS naics_code,", " transaction_fpds.naics_description,", - " CASE", - " WHEN", - " transaction_fpds.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN", - " transaction_fpds.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN transaction_fpds.legal_entity_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - "", - " CASE", - " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN transaction_fpds.place_of_performance_state IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", - "", "", " TREASURY_ACCT.tas_paths,", " TREASURY_ACCT.tas_components,", @@ -287,7 +194,6 @@ ") AND recipient_name IS NOT NULL", "AND recipient_level != 'P'", "GROUP BY recipient_hash, recipient_unique_id", - "LIMIT 1", ") recipient_profile ON TRUE", "LEFT JOIN (", " -- Get awards with COVID-related data", diff --git a/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json index 1d1e2017df..d16e6c7f9d 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json @@ -23,23 +23,8 @@ " COALESCE(awards.total_loan_value, 0)::NUMERIC(23, 2) AS total_loan_value,", "", " recipient_profile.recipient_hash,", + " recipient_profile.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", - " CASE", - " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", - " THEN", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"\",\"levels\":\"\"}'", - " )", - " ELSE", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"', recipient_profile.recipient_hash,", - " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", - " )", - " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -65,24 +50,8 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', TFA.name,", - " '\",\"code\":\"', TFA.toptier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', SFA.name,", - " '\",\"code\":\"', SFA.subtier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", + " (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1) AS funding_toptier_agency_id,", + " latest_transaction.funding_agency_id AS funding_subtier_agency_id,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -92,6 +61,11 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS recipient_location_congressional_code,", " transaction_fabs.legal_entity_zip5 AS recipient_location_zip5,", " TRIM(TRAILING FROM transaction_fabs.legal_entity_city_name) AS recipient_location_city_name,", + " RL_STATE_LOOKUP.name AS recipient_location_state_name,", + " RL_STATE_LOOKUP.fips AS recipient_location_state_fips,", + " RL_STATE_POPULATION.latest_population AS recipient_location_state_population,", + " RL_COUNTY_POPULATION.latest_population AS recipient_location_county_population,", + " RL_DISTRICT_POPULATION.latest_population AS recipient_location_congressional_population,", "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", @@ -102,6 +76,11 @@ " transaction_fabs.place_of_performance_zip5 AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", + " POP_STATE_LOOKUP.name AS pop_state_name,", + " POP_STATE_LOOKUP.fips AS pop_state_fips,", + " POP_STATE_POPULATION.latest_population AS pop_state_population,", + " POP_COUNTY_POPULATION.latest_population AS pop_county_population,", + " POP_DISTRICT_POPULATION.latest_population AS pop_congressional_population,", "", " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", @@ -115,78 +94,6 @@ " NULL::text AS naics_code,", " NULL::text AS naics_description,", "", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - "", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", - "", " TREASURY_ACCT.tas_paths,", " TREASURY_ACCT.tas_components,", " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", @@ -285,7 +192,6 @@ " ) AND recipient_name IS NOT NULL", " AND recipient_level != 'P'", " GROUP BY recipient_hash, recipient_unique_id", - " LIMIT 1", ") recipient_profile ON TRUE", "LEFT JOIN (", " -- Get awards with COVID-related data", diff --git a/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json index 9535c2d131..6556f80b29 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json @@ -23,23 +23,8 @@ " 0::NUMERIC(23, 2) AS total_loan_value,", "", " recipient_profile.recipient_hash,", + " recipient_profile.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", - " CASE", - " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", - " THEN", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"\",\"levels\":\"\"}'", - " )", - " ELSE", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", - " '\",\"hash\":\"', recipient_profile.recipient_hash,", - " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", - " )", - " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -65,24 +50,8 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', TFA.name,", - " '\",\"code\":\"', TFA.toptier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', SFA.name,", - " '\",\"code\":\"', SFA.subtier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", + " (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1) AS funding_toptier_agency_id,", + " latest_transaction.funding_agency_id AS funding_subtier_agency_id,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -92,6 +61,11 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS recipient_location_congressional_code,", " transaction_fabs.legal_entity_zip5 AS recipient_location_zip5,", " TRIM(TRAILING FROM transaction_fabs.legal_entity_city_name) AS recipient_location_city_name,", + " RL_STATE_LOOKUP.name AS recipient_location_state_name,", + " RL_STATE_LOOKUP.fips AS recipient_location_state_fips,", + " RL_STATE_POPULATION.latest_population AS recipient_location_state_population,", + " RL_COUNTY_POPULATION.latest_population AS recipient_location_county_population,", + " RL_DISTRICT_POPULATION.latest_population AS recipient_location_congressional_population,", "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", @@ -102,6 +76,11 @@ " transaction_fabs.place_of_performance_zip5 AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", + " POP_STATE_LOOKUP.name AS pop_state_name,", + " POP_STATE_LOOKUP.fips AS pop_state_fips,", + " POP_STATE_POPULATION.latest_population AS pop_state_population,", + " POP_COUNTY_POPULATION.latest_population AS pop_county_population,", + " POP_DISTRICT_POPULATION.latest_population AS pop_congressional_population,", "", " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", @@ -114,78 +93,6 @@ " NULL::text AS product_or_service_description,", " NULL::text AS naics_code,", " NULL::text AS naics_description,", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN", - " transaction_fabs.legal_entity_state_code IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - "", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", - "", "", " TREASURY_ACCT.tas_paths,", " TREASURY_ACCT.tas_components,", @@ -285,7 +192,6 @@ ") AND recipient_name IS NOT NULL", "AND recipient_level != 'P'", "GROUP BY recipient_hash, recipient_unique_id", - "LIMIT 1", ") recipient_profile ON TRUE", "LEFT JOIN (", " -- Get awards with COVID-related data", diff --git a/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json index 0cc420793b..b254c11b34 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json @@ -33,23 +33,8 @@ " COALESCE(awards.total_loan_value, 0)::NUMERIC(23, 2) AS total_loan_value,", "", " recipient_profile.recipient_hash,", + " recipient_profile.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", - " CASE", - " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", - " THEN", - " CONCAT(", - " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu),", - " '\",\"hash\":\"\",\"levels\":\"\"}'", - " )", - " ELSE", - " CONCAT(", - " '{\"name\":\"', recipient_lookup.recipient_name,", - " '\",\"unique_id\":\"', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu),", - " '\",\"hash\":\"', recipient_profile.recipient_hash,", - " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", - " )", - " END AS recipient_agg_key,", " COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu) AS recipient_unique_id,", " COALESCE(transaction_fpds.ultimate_parent_unique_ide, transaction_fabs.ultimate_parent_unique_ide) AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -75,24 +60,8 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', TFA.name,", - " '\",\"code\":\"', TFA.toptier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT(", - " '{\"name\":\"', SFA.name,", - " '\",\"code\":\"', SFA.subtier_code,", - " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", + " (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1) AS funding_toptier_agency_id,", + " latest_transaction.funding_agency_id AS funding_subtier_agency_id,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -102,6 +71,11 @@ " LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_congressional, transaction_fabs.legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS recipient_location_congressional_code,", " COALESCE(transaction_fpds.legal_entity_zip5, transaction_fabs.legal_entity_zip5) AS recipient_location_zip5,", " TRIM(TRAILING FROM COALESCE(transaction_fpds.legal_entity_city_name, transaction_fabs.legal_entity_city_name)) AS recipient_location_city_name,", + " RL_STATE_LOOKUP.name AS recipient_location_state_name,", + " RL_STATE_LOOKUP.fips AS recipient_location_state_fips,", + " RL_STATE_POPULATION.latest_population AS recipient_location_state_population,", + " RL_COUNTY_POPULATION.latest_population AS recipient_location_county_population,", + " RL_DISTRICT_POPULATION.latest_population AS recipient_location_congressional_population,", "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", @@ -112,6 +86,11 @@ " COALESCE(transaction_fpds.place_of_performance_zip5, transaction_fabs.place_of_performance_zip5) AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM COALESCE(transaction_fpds.place_of_perform_city_name, transaction_fabs.place_of_performance_city)) AS pop_city_name,", + " POP_STATE_LOOKUP.name AS pop_state_name,", + " POP_STATE_LOOKUP.fips AS pop_state_fips,", + " POP_STATE_POPULATION.latest_population AS pop_state_population,", + " POP_COUNTY_POPULATION.latest_population AS pop_county_population,", + " POP_DISTRICT_POPULATION.latest_population AS pop_congressional_population,", "", " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", @@ -125,78 +104,6 @@ " transaction_fpds.naics AS naics_code,", " transaction_fpds.naics_description,", "", - " CASE", - " WHEN", - " COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name, transaction_fabs.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN", - " COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL", - " AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_congressional, transaction_fabs.legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_congressional, transaction_fabs.legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - " ", - " CASE", - " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL AND pop_country_lookup.country_code IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na, transaction_fabs.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL", - " THEN CONCAT(", - " '{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", - "", " TREASURY_ACCT.tas_paths,", " TREASURY_ACCT.tas_components,", " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", @@ -300,7 +207,6 @@ " ) AND recipient_name IS NOT NULL", " AND recipient_level != 'P'", " GROUP BY recipient_hash, recipient_unique_id", - " LIMIT 1", ") recipient_profile ON TRUE", "LEFT JOIN (", " -- Get awards with COVID-related data", diff --git a/usaspending_api/database_scripts/matview_generator/universal_transaction_matview.json b/usaspending_api/database_scripts/matview_generator/universal_transaction_matview.json index c218baafff..4ee7a2c795 100644 --- a/usaspending_api/database_scripts/matview_generator/universal_transaction_matview.json +++ b/usaspending_api/database_scripts/matview_generator/universal_transaction_matview.json @@ -57,15 +57,21 @@ "", " pop_country_lookup.country_name AS pop_country_name,", " pop_country_lookup.country_code AS pop_country_code,", + " POP_STATE_LOOKUP.name AS pop_state_name,", " COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) AS pop_state_code,", " LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') AS pop_county_code,", " COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na, transaction_fabs.place_of_perform_county_na) AS pop_county_name,", " COALESCE(transaction_fpds.place_of_performance_zip5, transaction_fabs.place_of_performance_zip5) AS pop_zip5,", " LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", + " POP_DISTRICT_POPULATION.latest_population as pop_congressional_population,", + " POP_COUNTY_POPULATION.latest_population as pop_county_population,", + " POP_STATE_LOOKUP.fips as pop_state_fips,", + " POP_STATE_POPULATION.latest_population as pop_state_population,", " TRIM(TRAILING FROM COALESCE(transaction_fpds.place_of_perform_city_name, transaction_fabs.place_of_performance_city)) AS pop_city_name,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", + " RL_STATE_LOOKUP.name as recipient_location_state_name,", " COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) AS recipient_location_state_code,", " LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') AS recipient_location_county_code,", " COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name, transaction_fabs.legal_entity_county_name) AS recipient_location_county_name,", @@ -78,16 +84,17 @@ " WHEN COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu) IS NOT NULL THEN CONCAT('duns-', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu))", " ELSE CONCAT('name-', COALESCE(transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)) END", " ))::uuid) AS recipient_hash,", + " RECIPIENT_HASH_AND_LEVELS.recipient_levels,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", - " COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu) AS recipient_unique_id,", + " COALESCE(recipient_lookup.duns, transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu) AS recipient_unique_id,", " PRL.recipient_hash AS parent_recipient_hash,", " UPPER(PRL.legal_business_name) AS parent_recipient_name,", " COALESCE(transaction_fpds.ultimate_parent_unique_ide, transaction_fabs.ultimate_parent_unique_ide) AS parent_recipient_unique_id,", "", + " (SELECT a.id FROM agency a WHERE a.toptier_agency_id = TAA.toptier_agency_id AND a.toptier_flag = TRUE) as awarding_toptier_agency_id,", + " (SELECT a.id FROM agency a WHERE a.toptier_agency_id = TFA.toptier_agency_id AND a.toptier_flag = TRUE) as funding_toptier_agency_id,", " transaction_normalized.awarding_agency_id,", " transaction_normalized.funding_agency_id,", - " AA.toptier_agency_id AS awarding_toptier_agency_id,", - " FA.toptier_agency_id AS funding_toptier_agency_id,", " TAA.name AS awarding_toptier_agency_name,", " TFA.name AS funding_toptier_agency_name,", " SAA.name AS awarding_subtier_agency_name,", @@ -102,129 +109,10 @@ " TREASURY_ACCT.tas_components,", " FEDERAL_ACCT.federal_accounts,", " FEDERAL_ACCT.defc AS disaster_emergency_fund_codes,", - "", - " CASE", - " WHEN COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", - " THEN CONCAT('{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name, transaction_fabs.legal_entity_county_name),", - " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_county_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_congressional, transaction_fabs.legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT('{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", - " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_congressional, transaction_fabs.legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_congressional_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL", - " THEN CONCAT('{\"country_code\":\"', rl_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", - " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", - " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS recipient_location_state_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL AND COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co) IS NOT NULL", - " THEN CONCAT('{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", - " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na, transaction_fabs.place_of_perform_county_na),", - " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_county_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", - " THEN CONCAT('{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", - " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", - " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", - " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_congressional_agg_key,", - " CASE", - " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL", - " THEN CONCAT('{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", - " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", - " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", - " )", - " ELSE NULL", - " END AS pop_state_agg_key,", - " CASE", - " WHEN pop_country_lookup.country_code IS NOT NULL", - " THEN CONCAT('{\"country_code\":\"', pop_country_lookup.country_code,", - " '\",\"country_name\":\"', pop_country_lookup.country_name, '\"}'", - " )", - " ELSE NULL", - " END AS pop_country_agg_key,", - " CASE", - " WHEN TAA.name IS NOT NULL", - " THEN CONCAT('{\"name\":\"', TAA.name,", - " '\",\"abbreviation\":\"', TAA.abbreviation,", - " '\",\"id\":\"', (SELECT a.id FROM agency a WHERE a.toptier_agency_id = TAA.toptier_agency_id AND a.toptier_flag = TRUE), '\"}'", - " )", - " ELSE NULL", - " END AS awarding_toptier_agency_agg_key,", - " CASE", - " WHEN TFA.name IS NOT NULL", - " THEN CONCAT('{\"name\":\"', TFA.name,", - " '\",\"abbreviation\":\"', TFA.abbreviation,", - " '\",\"id\":\"', (SELECT a.id FROM agency a WHERE a.toptier_agency_id = TFA.toptier_agency_id AND a.toptier_flag = TRUE), '\"}'", - " )", - " ELSE NULL", - " END AS funding_toptier_agency_agg_key,", - " CASE", - " WHEN SAA.name IS NOT NULL", - " THEN CONCAT('{\"name\":\"', SAA.name,", - " '\",\"abbreviation\":\"', SAA.abbreviation,", - " '\",\"id\":\"', transaction_normalized.awarding_agency_id, '\"}'", - " )", - " ELSE NULL", - " END AS awarding_subtier_agency_agg_key,", - " CASE", - " WHEN SFA.name IS NOT NULL", - " THEN CONCAT('{\"name\":\"', SFA.name,", - " '\",\"abbreviation\":\"', SFA.abbreviation,", - " '\",\"id\":\"', transaction_normalized.funding_agency_id, '\"}'", - " )", - " ELSE NULL", - " END AS funding_subtier_agency_agg_key,", - " CASE", - " WHEN transaction_fpds.product_or_service_code IS NOT NULL", - " THEN CONCAT(", - " '{\"code\":\"', transaction_fpds.product_or_service_code,", - " '\",\"description\":\"', psc.description, '\"}'", - " )", - " ELSE NULL", - " END AS psc_agg_key,", - " CASE", - " WHEN transaction_fpds.naics IS NOT NULL", - " THEN CONCAT('{\"code\":\"', transaction_fpds.naics, '\",\"description\":\"', naics.description, '\"}')", - " ELSE NULL", - " END AS naics_agg_key,", - " CASE", - " WHEN RECIPIENT_HASH_AND_LEVEL.recipient_hash IS NULL or RECIPIENT_HASH_AND_LEVEL.recipient_level IS NULL", - " THEN CONCAT('{\"hash_with_level\": \"\",\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)), '\",\"unique_id\":\"', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu), '\"}')", - " ELSE", - " CONCAT(", - " '{\"hash_with_level\":\"', CONCAT(RECIPIENT_HASH_AND_LEVEL.recipient_hash, '-', RECIPIENT_HASH_AND_LEVEL.recipient_level),", - " '\",\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)),", - " '\",\"unique_id\":\"', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu), '\"}'", - " )", - " END AS recipient_agg_key", + " RL_STATE_LOOKUP.fips as recipient_location_state_fips,", + " RL_DISTRICT_POPULATION.latest_population as recipient_location_congressional_population,", + " RL_COUNTY_POPULATION.latest_population as recipient_location_county_population,", + " RL_STATE_POPULATION.latest_population as recipient_location_state_population", "FROM", " transaction_normalized", "LEFT OUTER JOIN", @@ -271,54 +159,49 @@ " faba.award_id", ") tas ON (tas.award_id = transaction_normalized.award_id)", "LEFT OUTER JOIN", - " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS rl_county_lookup on", - " rl_county_lookup.state_alpha = COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) and", + " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS rl_county_lookup ON", + " rl_county_lookup.state_alpha = COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) AND", " rl_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", "LEFT OUTER JOIN", - " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", - " pop_county_lookup.state_alpha = COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) and", + " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup ON", + " pop_county_lookup.state_alpha = COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) AND", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", "LEFT OUTER JOIN", - " ref_country_code AS pop_country_lookup on (", + " ref_country_code AS pop_country_lookup ON (", " pop_country_lookup.country_code = COALESCE(transaction_fpds.place_of_perform_country_c, transaction_fabs.place_of_perform_country_c, 'USA')", " OR pop_country_lookup.country_name = COALESCE(transaction_fpds.place_of_perform_country_c, transaction_fabs.place_of_perform_country_c))", "LEFT OUTER JOIN", - " ref_country_code AS rl_country_lookup on (", + " ref_country_code AS rl_country_lookup ON (", " rl_country_lookup.country_code = COALESCE(transaction_fpds.legal_entity_country_code, transaction_fabs.legal_entity_country_code, 'USA')", " OR rl_country_lookup.country_name = COALESCE(transaction_fpds.legal_entity_country_code, transaction_fabs.legal_entity_country_code))", "LEFT JOIN recipient_lookup PRL ON (PRL.duns = COALESCE(transaction_fpds.ultimate_parent_unique_ide, transaction_fabs.ultimate_parent_unique_ide))", "LEFT JOIN LATERAL (", - " SELECT recipient_hash, recipient_level, recipient_unique_id", + " SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", " FROM recipient_profile", " WHERE (", " recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(CASE WHEN COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu) IS NOT NULL THEN CONCAT('duns-', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu)) ELSE CONCAT('name-', COALESCE(transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)) END))::uuid)", - " or recipient_unique_id = recipient_lookup.duns) AND", - " recipient_name NOT IN (", - " 'MULTIPLE RECIPIENTS',", - " 'REDACTED DUE TO PII',", - " 'MULTIPLE FOREIGN RECIPIENTS',", - " 'PRIVATE INDIVIDUAL',", - " 'INDIVIDUAL RECIPIENT',", - " 'MISCELLANEOUS FOREIGN AWARDEES'", - " ) AND recipient_name IS NOT NULL", - " ORDER BY CASE", - " WHEN recipient_level = 'C' then 0", - " WHEN recipient_level = 'R' then 1", - " ELSE 2", - " END ASC", - " LIMIT 1", - ") RECIPIENT_HASH_AND_LEVEL ON TRUE", + " OR recipient_unique_id = recipient_lookup.duns", + " ) AND recipient_name NOT IN (", + " 'MULTIPLE RECIPIENTS',", + " 'REDACTED DUE TO PII',", + " 'MULTIPLE FOREIGN RECIPIENTS',", + " 'PRIVATE INDIVIDUAL',", + " 'INDIVIDUAL RECIPIENT',", + " 'MISCELLANEOUS FOREIGN AWARDEES'", + " ) AND recipient_name IS NOT NULL", + " GROUP BY recipient_hash, recipient_unique_id", + ") RECIPIENT_HASH_AND_LEVELS ON TRUE", "LEFT JOIN (", - " SELECT code, name, fips, MAX(id)", - " FROM state_data", + " SELECT code, name, fips, MAX(id)", + " FROM state_data", " GROUP BY code, name, fips", ") POP_STATE_LOOKUP ON (POP_STATE_LOOKUP.code = COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code))", "LEFT JOIN ref_population_county POP_STATE_POPULATION ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", "LEFT JOIN ref_population_county POP_COUNTY_POPULATION ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", "LEFT JOIN ref_population_cong_district POP_DISTRICT_POPULATION ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", "LEFT JOIN (", - " SELECT code, name, fips, MAX(id)", - " FROM state_data", + " SELECT code, name, fips, MAX(id)", + " FROM state_data", " GROUP BY code, name, fips", ") RL_STATE_LOOKUP ON (RL_STATE_LOOKUP.code = COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code))", "LEFT JOIN ref_population_county RL_STATE_POPULATION ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", diff --git a/usaspending_api/disaster/tests/fixtures/disaster_account_data.py b/usaspending_api/disaster/tests/fixtures/disaster_account_data.py index d9bb518456..61447c88f4 100644 --- a/usaspending_api/disaster/tests/fixtures/disaster_account_data.py +++ b/usaspending_api/disaster/tests/fixtures/disaster_account_data.py @@ -17,7 +17,7 @@ def disaster_account_data(): ag1 = mommy.make("references.Agency", id=1, toptier_agency=ta1, subtier_agency=sa1, toptier_flag=True) ag2 = mommy.make("references.Agency", id=2, toptier_agency=ta2, subtier_agency=sa2, toptier_flag=True) - ag3 = mommy.make("references.Agency", id=3, toptier_agency=ta2, subtier_agency=sa3) + ag3 = mommy.make("references.Agency", id=3, toptier_agency=ta2, subtier_agency=sa3, toptier_flag=False) mommy.make("references.Agency", id=4, toptier_agency=ta3, subtier_agency=sa4, toptier_flag=True) dsws1 = mommy.make( diff --git a/usaspending_api/disaster/tests/fixtures/overview_data.py b/usaspending_api/disaster/tests/fixtures/overview_data.py index 154a84f306..624968a5e5 100644 --- a/usaspending_api/disaster/tests/fixtures/overview_data.py +++ b/usaspending_api/disaster/tests/fixtures/overview_data.py @@ -22,6 +22,7 @@ EARLY_GTAS_BUDGETARY_RESOURCES = 0.20 EARLY_GTAS_OUTLAY = 0.02 +EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE = 0.05 UNOBLIGATED_GTAS_BUDGETARY_RESOURCES = 1.5 @@ -62,6 +63,7 @@ def late_gtas(defc_codes): total_budgetary_resources_cpe=LATE_GTAS_BUDGETARY_RESOURCES, budget_authority_appropriation_amount_cpe=LATE_GTAS_APPROPRIATION, other_budgetary_resources_amount_cpe=0.1, + budget_authority_unobligated_balance_brought_forward_cpe=0.0, gross_outlay_amount_by_tas_cpe=LATE_GTAS_OUTLAY, ) @@ -78,6 +80,7 @@ def quarterly_gtas(defc_codes): total_budgetary_resources_cpe=QUARTERLY_GTAS_BUDGETARY_RESOURCES, budget_authority_appropriation_amount_cpe=0.25, other_budgetary_resources_amount_cpe=0.0, + budget_authority_unobligated_balance_brought_forward_cpe=0.0, gross_outlay_amount_by_tas_cpe=0.02, ) @@ -93,6 +96,7 @@ def early_gtas(defc_codes): total_budgetary_resources_cpe=EARLY_GTAS_BUDGETARY_RESOURCES, budget_authority_appropriation_amount_cpe=0.19, other_budgetary_resources_amount_cpe=0.0, + budget_authority_unobligated_balance_brought_forward_cpe=EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE, gross_outlay_amount_by_tas_cpe=0.02, ) @@ -108,6 +112,7 @@ def non_covid_gtas(defc_codes): total_budgetary_resources_cpe=0.32, budget_authority_appropriation_amount_cpe=0.31, other_budgetary_resources_amount_cpe=0.0, + budget_authority_unobligated_balance_brought_forward_cpe=0.0, gross_outlay_amount_by_tas_cpe=0.13, ) @@ -123,6 +128,7 @@ def unobligated_balance_gtas(defc_codes): total_budgetary_resources_cpe=1.5, budget_authority_appropriation_amount_cpe=0.74, other_budgetary_resources_amount_cpe=0.74, + budget_authority_unobligated_balance_brought_forward_cpe=0.0, gross_outlay_amount_by_tas_cpe=0.0, ) @@ -138,6 +144,7 @@ def other_budget_authority_gtas(defc_codes): total_budgetary_resources_cpe=0.85, budget_authority_appropriation_amount_cpe=0.69, other_budgetary_resources_amount_cpe=0.14, + budget_authority_unobligated_balance_brought_forward_cpe=0.0, gross_outlay_amount_by_tas_cpe=0.02, ) @@ -168,6 +175,7 @@ def _year_2_gtas(code): total_budgetary_resources_cpe=YEAR_TWO_GTAS_BUDGETARY_RESOURCES, budget_authority_appropriation_amount_cpe=YEAR_TWO_GTAS_APPROPRIATION, other_budgetary_resources_amount_cpe=0.0, + budget_authority_unobligated_balance_brought_forward_cpe=0.0, gross_outlay_amount_by_tas_cpe=YEAR_TWO_OUTLAY, ) diff --git a/usaspending_api/disaster/tests/integration/test_overview.py b/usaspending_api/disaster/tests/integration/test_overview.py index d7aba958fa..373bcd91a1 100644 --- a/usaspending_api/disaster/tests/integration/test_overview.py +++ b/usaspending_api/disaster/tests/integration/test_overview.py @@ -10,6 +10,7 @@ LATE_GTAS_OUTLAY, EARLY_GTAS_BUDGETARY_RESOURCES, EARLY_GTAS_OUTLAY, + EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE, LATE_GTAS_APPROPRIATION, UNOBLIGATED_GTAS_BUDGETARY_RESOURCES, YEAR_TWO_GTAS_BUDGETARY_RESOURCES, @@ -30,6 +31,9 @@ QUARTERLY_GTAS_BUDGETARY_RESOURCES = Decimal(f"{QUARTERLY_GTAS_BUDGETARY_RESOURCES}") EARLY_GTAS_BUDGETARY_RESOURCES = Decimal(f"{EARLY_GTAS_BUDGETARY_RESOURCES}") EARLY_GTAS_OUTLAY = Decimal(f"{EARLY_GTAS_OUTLAY}") +EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE = Decimal( + f"{EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE}" +) UNOBLIGATED_GTAS_BUDGETARY_RESOURCES = Decimal(f"{UNOBLIGATED_GTAS_BUDGETARY_RESOURCES}") YEAR_TWO_GTAS_BUDGETARY_RESOURCES = Decimal(f"{YEAR_TWO_GTAS_BUDGETARY_RESOURCES}") YEAR_TWO_GTAS_UNOBLIGATED_BALANCE = Decimal(f"{YEAR_TWO_GTAS_UNOBLIGATED_BALANCE}") @@ -43,11 +47,13 @@ def test_basic_data_set(client, monkeypatch, helpers, defc_codes, basic_ref_data resp = client.get(OVERVIEW_URL) assert resp.data == { "funding": BASIC_FUNDING, - "total_budget_authority": EARLY_GTAS_BUDGETARY_RESOURCES, + "total_budget_authority": EARLY_GTAS_BUDGETARY_RESOURCES + - EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE, "spending": { "award_obligations": Decimal("0.0"), "award_outlays": Decimal("0"), - "total_obligations": EARLY_GTAS_BUDGETARY_RESOURCES, + "total_obligations": EARLY_GTAS_BUDGETARY_RESOURCES + - EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE, "total_outlays": EARLY_GTAS_OUTLAY, }, } @@ -96,8 +102,14 @@ def test_exclude_gtas_for_incompleted_period( helpers.reset_dabs_cache() resp = client.get(OVERVIEW_URL) assert resp.data["funding"] == [{"amount": Decimal("0.2"), "def_code": "M"}] - assert resp.data["total_budget_authority"] == EARLY_GTAS_BUDGETARY_RESOURCES - assert resp.data["spending"]["total_obligations"] == EARLY_GTAS_BUDGETARY_RESOURCES + assert ( + resp.data["total_budget_authority"] + == EARLY_GTAS_BUDGETARY_RESOURCES - EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE + ) + assert ( + resp.data["spending"]["total_obligations"] + == EARLY_GTAS_BUDGETARY_RESOURCES - EARLY_GTAS_BUDGET_AUTHORITY_UNOBLIGATED_BALANCE_BROUGHT_FORWARD_CPE + ) assert resp.data["spending"]["total_outlays"] == EARLY_GTAS_OUTLAY diff --git a/usaspending_api/disaster/tests/integration/test_recipient_loans.py b/usaspending_api/disaster/tests/integration/test_recipient_loans.py index c7a370a18c..30ffd01a33 100644 --- a/usaspending_api/disaster/tests/integration/test_recipient_loans.py +++ b/usaspending_api/disaster/tests/integration/test_recipient_loans.py @@ -16,7 +16,7 @@ def test_correct_response_defc_no_results( ): setup_elasticsearch_test(monkeypatch, elasticsearch_award_index) - resp = helpers.post_for_spending_endpoint(client, url, def_codes=["N"]) + resp = helpers.post_for_spending_endpoint(client, url, def_codes=["N"], sort="obligation") expected_results = [] assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results @@ -26,7 +26,7 @@ def test_correct_response_defc_no_results( def test_correct_response_single_defc(client, monkeypatch, helpers, elasticsearch_award_index, awards_and_transactions): setup_elasticsearch_test(monkeypatch, elasticsearch_award_index) - resp = helpers.post_for_spending_endpoint(client, url, def_codes=["L"]) + resp = helpers.post_for_spending_endpoint(client, url, def_codes=["L"], sort="obligation") expected_results = [ { "code": "987654321", @@ -66,7 +66,7 @@ def test_correct_response_multiple_defc( ): setup_elasticsearch_test(monkeypatch, elasticsearch_award_index) - resp = helpers.post_for_spending_endpoint(client, url, def_codes=["L", "M"]) + resp = helpers.post_for_spending_endpoint(client, url, def_codes=["L", "M"], sort="obligation") expected_results = [ { "code": "987654321", @@ -157,7 +157,7 @@ def test_correct_response_with_query(client, monkeypatch, helpers, elasticsearch assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results - resp = helpers.post_for_spending_endpoint(client, url, def_codes=["L", "M"], query="rec") + resp = helpers.post_for_spending_endpoint(client, url, def_codes=["L", "M"], query="rec", sort="obligation") expected_results = [ { "code": "987654321", diff --git a/usaspending_api/disaster/tests/integration/test_recipient_spending.py b/usaspending_api/disaster/tests/integration/test_recipient_spending.py index 6560bd4f51..8c4d629c90 100644 --- a/usaspending_api/disaster/tests/integration/test_recipient_spending.py +++ b/usaspending_api/disaster/tests/integration/test_recipient_spending.py @@ -86,20 +86,20 @@ def test_correct_response_multiple_defc( "outlay": 1.0, }, { - "code": "096354360", + "code": "DUNS Number not provided", "award_count": 1, "description": "MULTIPLE RECIPIENTS", "id": None, - "obligation": 20000.0, - "outlay": 10000.0, + "obligation": 2000000.0, + "outlay": 1000000.0, }, { - "code": "DUNS Number not provided", + "code": "096354360", "award_count": 1, "description": "MULTIPLE RECIPIENTS", "id": None, - "obligation": 2000000.0, - "outlay": 1000000.0, + "obligation": 20000.0, + "outlay": 10000.0, }, ] assert resp.status_code == status.HTTP_200_OK diff --git a/usaspending_api/disaster/v2/views/agency/loans.py b/usaspending_api/disaster/v2/views/agency/loans.py index 20786c7900..1333852a3c 100644 --- a/usaspending_api/disaster/v2/views/agency/loans.py +++ b/usaspending_api/disaster/v2/views/agency/loans.py @@ -1,3 +1,4 @@ +import json import logging from decimal import Decimal @@ -6,7 +7,6 @@ from django.views.decorators.csrf import csrf_exempt from typing import List from usaspending_api.common.cache_decorator import cache_response -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.disaster.v2.views.disaster_base import ( DisasterBase, LoansPaginationMixin, @@ -139,9 +139,9 @@ def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: return results def _build_json_result(self, bucket: dict): - info = json_str_to_dict(bucket.get("key")) + info = json.loads(bucket.get("key")) return { - "id": int(info["id"]), + "id": info["id"], "code": info["code"], "description": info["name"], # the count of distinct awards contributing to the totals diff --git a/usaspending_api/disaster/v2/views/agency/spending.py b/usaspending_api/disaster/v2/views/agency/spending.py index 0ea2b04b20..cefaa92c49 100644 --- a/usaspending_api/disaster/v2/views/agency/spending.py +++ b/usaspending_api/disaster/v2/views/agency/spending.py @@ -1,3 +1,4 @@ +import json import logging from decimal import Decimal @@ -10,7 +11,6 @@ from typing import List from usaspending_api.common.cache_decorator import cache_response -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.common.helpers.generic_helper import get_pagination_metadata from usaspending_api.disaster.v2.views.disaster_base import ( DisasterBase, @@ -233,9 +233,9 @@ def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: return results def _build_json_result(self, bucket: dict): - info = json_str_to_dict(bucket.get("key")) + info = json.loads(bucket.get("key")) return { - "id": int(info["id"]), + "id": info["id"], "code": info["code"], "description": info["name"], # the count of distinct awards contributing to the totals diff --git a/usaspending_api/disaster/v2/views/cfda/loans.py b/usaspending_api/disaster/v2/views/cfda/loans.py index f5817d46f6..4190e8f3f0 100644 --- a/usaspending_api/disaster/v2/views/cfda/loans.py +++ b/usaspending_api/disaster/v2/views/cfda/loans.py @@ -22,7 +22,6 @@ class CfdaLoansViewSet(ElasticsearchLoansPaginationMixin, ElasticsearchDisasterB def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: results = [] - cfda_prefetch_pks = [bucket.get("key") for bucket in info_buckets] prefetched_cfdas = { cfda["program_number"]: cfda for cfda in Cfda.objects.filter(program_number__in=cfda_prefetch_pks).values() diff --git a/usaspending_api/disaster/v2/views/overview.py b/usaspending_api/disaster/v2/views/overview.py index ba8132f838..e0040f884d 100644 --- a/usaspending_api/disaster/v2/views/overview.py +++ b/usaspending_api/disaster/v2/views/overview.py @@ -26,9 +26,8 @@ def get(self, request): request_values = self._parse_and_validate(request.GET) self.defc = request_values["def_codes"].split(",") - funding = self.funding() + funding, self.total_budget_authority = self.funding() - self.total_budget_authority = Decimal(sum([elem["amount"] for elem in funding])) return Response( {"funding": funding, "total_budget_authority": self.total_budget_authority, "spending": self.spending()} ) @@ -49,14 +48,25 @@ def _parse_and_validate(self, request): return TinyShield(models).block(request) def funding(self): - return list( + funding = list( latest_gtas_of_each_year_queryset() .filter(disaster_emergency_fund_code__in=self.defc) .values("disaster_emergency_fund_code") - .annotate(def_code=F("disaster_emergency_fund_code"), amount=Sum("total_budgetary_resources_cpe"),) - .values("def_code", "amount") + .annotate( + def_code=F("disaster_emergency_fund_code"), + amount=Sum("total_budgetary_resources_cpe"), + unobligated_balance=Sum("budget_authority_unobligated_balance_brought_forward_cpe"), + ) + .values("def_code", "amount", "unobligated_balance") ) + total_budget_authority = self.sum_values(funding, "amount") - self.sum_values(funding, "unobligated_balance") + + for entry in funding: + del entry["unobligated_balance"] + + return funding, total_budget_authority + def spending(self): remaining_balances = self.remaining_balances() award_obligations = self.award_obligations() @@ -105,3 +115,7 @@ def total_outlays(self): .aggregate(total=Sum("gross_outlay_amount_by_tas_cpe"))["total"] or 0.0 ) + + @staticmethod + def sum_values(list_of_objects: list, key_to_extract: str) -> Decimal: + return Decimal(sum([elem[key_to_extract] for elem in list_of_objects])) diff --git a/usaspending_api/disaster/v2/views/recipient/loans.py b/usaspending_api/disaster/v2/views/recipient/loans.py index 6ffd8f73f8..a6c6d3efe5 100644 --- a/usaspending_api/disaster/v2/views/recipient/loans.py +++ b/usaspending_api/disaster/v2/views/recipient/loans.py @@ -1,8 +1,7 @@ -import re +import json from typing import List -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.disaster.v2.views.elasticsearch_base import ( ElasticsearchDisasterBase, ElasticsearchLoansPaginationMixin, @@ -26,11 +25,11 @@ class RecipientLoansViewSet(ElasticsearchLoansPaginationMixin, ElasticsearchDisa def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: results = [] for bucket in info_buckets: - info = json_str_to_dict(bucket.get("key")) + info = json.loads(bucket.get("key")) # Build a list of hash IDs to handle multiple levels recipient_hash = info.get("hash") - recipient_levels = sorted(list(re.sub("[{},]", "", info.get("levels", "")))) + recipient_levels = sorted(info.get("levels") or []) if recipient_hash and recipient_levels: recipient_hash_list = [f"{recipient_hash}-{level}" for level in recipient_levels] else: diff --git a/usaspending_api/disaster/v2/views/recipient/spending.py b/usaspending_api/disaster/v2/views/recipient/spending.py index 77784ea0ea..d85c9f56db 100644 --- a/usaspending_api/disaster/v2/views/recipient/spending.py +++ b/usaspending_api/disaster/v2/views/recipient/spending.py @@ -1,8 +1,7 @@ -import re +import json from typing import List -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.disaster.v2.views.elasticsearch_base import ( ElasticsearchDisasterBase, ElasticsearchSpendingPaginationMixin, @@ -26,11 +25,11 @@ class RecipientSpendingViewSet(ElasticsearchSpendingPaginationMixin, Elasticsear def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: results = [] for bucket in info_buckets: - info = json_str_to_dict(bucket.get("key")) + info = json.loads(bucket.get("key")) # Build a list of hash IDs to handle multiple levels recipient_hash = info.get("hash") - recipient_levels = sorted(list(re.sub("[{},]", "", info.get("levels", "")))) + recipient_levels = sorted(info.get("levels") or []) if recipient_hash and recipient_levels: recipient_hash_list = [f"{recipient_hash}-{level}" for level in recipient_levels] else: diff --git a/usaspending_api/disaster/v2/views/spending_by_geography.py b/usaspending_api/disaster/v2/views/spending_by_geography.py index b81196abd2..67e292bb88 100644 --- a/usaspending_api/disaster/v2/views/spending_by_geography.py +++ b/usaspending_api/disaster/v2/views/spending_by_geography.py @@ -1,3 +1,5 @@ +import json + from decimal import Decimal from enum import Enum from typing import Optional, List, Dict @@ -7,7 +9,6 @@ from elasticsearch_dsl import A, Q as ES_Q from usaspending_api.common.cache_decorator import cache_response -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.common.elasticsearch.search_wrappers import AwardSearch from usaspending_api.common.exceptions import UnprocessableEntityException from usaspending_api.common.query_with_filters import QueryWithFilters @@ -156,8 +157,8 @@ def build_elasticsearch_result(self, response: dict) -> Dict[str, dict]: shape_code = None population = None else: - geo_info = json_str_to_dict(bucket.get("key")) - state_code = geo_info["state_code"] + geo_info = json.loads(bucket.get("key")) + state_code = geo_info["state_code"] or "" population = int(geo_info["population"]) if geo_info["population"] else None if self.geo_layer == GeoLayer.STATE: @@ -166,7 +167,7 @@ def build_elasticsearch_result(self, response: dict) -> Dict[str, dict]: display_name = display_name.title() elif self.geo_layer == GeoLayer.COUNTY: state_fips = geo_info["state_fips"] or code_to_state.get(state_code, {}).get("fips", "") - display_name = geo_info["county_name"].title() + display_name = (geo_info["county_name"] or "").title() shape_code = f"{state_fips}{geo_info['county_code']}" else: state_fips = geo_info["state_fips"] or code_to_state.get(state_code, {}).get("fips", "") diff --git a/usaspending_api/etl/elasticsearch_loader_helpers/aggregate_key_functions.py b/usaspending_api/etl/elasticsearch_loader_helpers/aggregate_key_functions.py new file mode 100644 index 0000000000..0207aee0ad --- /dev/null +++ b/usaspending_api/etl/elasticsearch_loader_helpers/aggregate_key_functions.py @@ -0,0 +1,181 @@ +import json +import logging + +from typing import Optional, List + + +logger = logging.getLogger("script") + + +def award_recipient_agg_key(record: dict) -> str: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record["recipient_hash"] is None or record["recipient_levels"] is None: + return json.dumps( + {"name": record["recipient_name"], "unique_id": record["recipient_unique_id"], "hash": "", "levels": ""} + ) + return json.dumps( + { + "name": record["recipient_name"], + "unique_id": record["recipient_unique_id"], + "hash": str(record["recipient_hash"]), + "levels": record["recipient_levels"], + } + ) + + +def transaction_recipient_agg_key(record: dict) -> str: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record["recipient_hash"] is None or record["recipient_levels"] is None: + return json.dumps( + {"name": record["recipient_name"], "unique_id": record["recipient_unique_id"], "hash_with_level": ""} + ) + return json.dumps( + { + "name": record["recipient_name"], + "unique_id": record["recipient_unique_id"], + "hash_with_level": f"{record['recipient_hash']}-{_return_one_level(record['recipient_levels'])}", + } + ) + + +def _return_one_level(levels: List[str]) -> Optional[str]: + """Return the most-desirable recipient level""" + for level in ("C", "R", "P"): # Child, "Recipient," or Parent + if level in levels: + return level + else: + return None + + +def awarding_subtier_agency_agg_key(record: dict) -> Optional[str]: + return _agency_agg_key("awarding", "subtier", record) + + +def awarding_toptier_agency_agg_key(record: dict) -> Optional[str]: + return _agency_agg_key("awarding", "toptier", record) + + +def funding_subtier_agency_agg_key(record: dict) -> Optional[str]: + return _agency_agg_key("funding", "subtier", record) + + +def funding_toptier_agency_agg_key(record: dict) -> Optional[str]: + return _agency_agg_key("funding", "toptier", record) + + +def _agency_agg_key(agency_type, agency_tier, record: dict) -> Optional[str]: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record[f"{agency_type}_{agency_tier}_agency_name"] is None: + return None + result = {"name": record[f"{agency_type}_{agency_tier}_agency_name"]} + if f"{agency_type}_{agency_tier}_agency_abbreviation" in record: + result["abbreviation"] = record[f"{agency_type}_{agency_tier}_agency_abbreviation"] + if f"{agency_type}_{agency_tier}_agency_code" in record: + result["code"] = record[f"{agency_type}_{agency_tier}_agency_code"] + result["id"] = record[f"{agency_type}_toptier_agency_id"] + return json.dumps(result) + + +def naics_agg_key(record: dict) -> Optional[str]: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record["naics_code"] is None: + return None + return json.dumps({"code": record["naics_code"], "description": record["naics_description"]}) + + +def psc_agg_key(record: dict) -> Optional[str]: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record["product_or_service_code"] is None: + return None + return json.dumps( + {"code": record["product_or_service_code"], "description": record["product_or_service_description"]} + ) + + +def pop_county_agg_key(record: dict) -> Optional[str]: + return _county_agg_key("pop", record) + + +def recipient_location_county_agg_key(record: dict) -> Optional[str]: + return _county_agg_key("recipient_location", record) + + +def _county_agg_key(location_type, record: dict) -> Optional[str]: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record[f"{location_type}_state_code"] is None or record[f"{location_type}_county_code"] is None: + return None + return json.dumps( + { + "country_code": record[f"{location_type}_country_code"], + "state_code": record[f"{location_type}_state_code"], + "state_fips": record[f"{location_type}_state_fips"], + "county_code": record[f"{location_type}_county_code"], + "county_name": record[f"{location_type}_county_name"], + "population": record[f"{location_type}_county_population"], + } + ) + + +def pop_congressional_agg_key(record: dict) -> Optional[str]: + return _congressional_agg_key("pop", record) + + +def recipient_location_congressional_agg_key(record: dict) -> Optional[str]: + return _congressional_agg_key("recipient_location", record) + + +def _congressional_agg_key(location_type, record: dict) -> Optional[str]: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record[f"{location_type}_state_code"] is None or record[f"{location_type}_congressional_code"] is None: + return None + return json.dumps( + { + "country_code": record[f"{location_type}_country_code"], + "state_code": record[f"{location_type}_state_code"], + "state_fips": record[f"{location_type}_state_fips"], + "congressional_code": record[f"{location_type}_congressional_code"], + "population": record[f"{location_type}_congressional_population"], + } + ) + + +def pop_state_agg_key(record: dict) -> Optional[str]: + return _state_agg_key("pop", record) + + +def recipient_location_state_agg_key(record: dict) -> Optional[str]: + return _state_agg_key("recipient_location", record) + + +def _state_agg_key(location_type, record: dict) -> Optional[str]: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record[f"{location_type}_state_code"] is None: + return None + return json.dumps( + { + "country_code": record[f"{location_type}_country_code"], + "state_code": record[f"{location_type}_state_code"], + "state_name": record[f"{location_type}_state_name"], + "population": record[f"{location_type}_state_population"], + } + ) + + +def pop_country_agg_key(record: dict) -> Optional[str]: + return _country_agg_key("pop", record) + + +def recipient_location_country_agg_key(record: dict) -> Optional[str]: + return _country_agg_key("recipient_location", record) + + +def _country_agg_key(location_type, record: dict) -> Optional[str]: + """Dictionary key order impacts Elasticsearch behavior!!!""" + if record[f"{location_type}_country_code"] is None: + return None + return json.dumps( + { + "country_code": record[f"{location_type}_country_code"], + "country_name": record[f"{location_type}_country_name"], + } + ) diff --git a/usaspending_api/etl/elasticsearch_loader_helpers/controller.py b/usaspending_api/etl/elasticsearch_loader_helpers/controller.py index 72dd2561b5..7bd0e224fe 100644 --- a/usaspending_api/etl/elasticsearch_loader_helpers/controller.py +++ b/usaspending_api/etl/elasticsearch_loader_helpers/controller.py @@ -23,6 +23,7 @@ TaskSpec, toggle_refresh_on, ) +from usaspending_api.common.helpers.sql_helpers import close_all_django_db_conns logger = logging.getLogger("script") @@ -44,6 +45,7 @@ class Controller: def __init__(self, config): self.config = config + self.tasks = [] def prepare_for_etl(self) -> None: if self.config["process_deletes"]: @@ -95,6 +97,8 @@ def complete_process(self) -> None: logger.info(format_log("Closing old indices and adding aliases")) swap_aliases(client, self.config) + close_all_django_db_conns() + if self.config["is_incremental_load"]: toggle_refresh_on(client, self.config["index_name"]) logger.info( @@ -173,7 +177,11 @@ def extract_transform_load(task: TaskSpec) -> None: f"Prematurely ending partition #{task.partition_number} due to error in another process" logger.warning(format_log(msg, name=task.name)) return - success, fail = load_data(task, records, client) + if len(records) > 0: + success, fail = load_data(task, records, client) + else: + logger.info(format_log("No records to index", name=task.name)) + success, fail = 0, 0 with total_doc_success.get_lock(): total_doc_success.value += success with total_doc_fail.get_lock(): diff --git a/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py b/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py index c72fdc23e2..5e5b966bb4 100644 --- a/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py +++ b/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py @@ -4,6 +4,7 @@ from time import perf_counter from typing import Callable, Dict, List, Optional +from usaspending_api.etl.elasticsearch_loader_helpers import aggregate_key_functions as funcs from usaspending_api.etl.elasticsearch_loader_helpers.utilities import ( convert_postgres_json_array_to_list, format_log, @@ -16,14 +17,71 @@ def transform_award_data(worker: TaskSpec, records: List[dict]) -> List[dict]: converters = {} - return transform_data(worker, records, converters, settings.ES_ROUTING_FIELD) + agg_key_creations = { + "funding_subtier_agency_agg_key": funcs.funding_subtier_agency_agg_key, + "funding_toptier_agency_agg_key": funcs.funding_toptier_agency_agg_key, + "pop_congressional_agg_key": funcs.pop_congressional_agg_key, + "pop_county_agg_key": funcs.pop_county_agg_key, + "pop_state_agg_key": funcs.pop_state_agg_key, + "recipient_agg_key": funcs.award_recipient_agg_key, + "recipient_location_congressional_agg_key": funcs.recipient_location_congressional_agg_key, + "recipient_location_county_agg_key": funcs.recipient_location_county_agg_key, + "recipient_location_state_agg_key": funcs.recipient_location_state_agg_key, + } + drop_fields = [ + "recipient_levels", + "funding_toptier_agency_id", + "funding_subtier_agency_id", + "recipient_location_state_name", + "recipient_location_state_fips", + "recipient_location_state_population", + "recipient_location_county_population", + "recipient_location_congressional_population", + "pop_state_name", + "pop_state_fips", + "pop_state_population", + "pop_county_population", + "pop_congressional_population", + ] + return transform_data(worker, records, converters, agg_key_creations, drop_fields, settings.ES_ROUTING_FIELD) def transform_transaction_data(worker: TaskSpec, records: List[dict]) -> List[dict]: converters = { "federal_accounts": convert_postgres_json_array_to_list, } - return transform_data(worker, records, converters, settings.ES_ROUTING_FIELD) + agg_key_creations = { + "awarding_subtier_agency_agg_key": funcs.awarding_subtier_agency_agg_key, + "awarding_toptier_agency_agg_key": funcs.awarding_toptier_agency_agg_key, + "funding_subtier_agency_agg_key": funcs.funding_subtier_agency_agg_key, + "funding_toptier_agency_agg_key": funcs.funding_toptier_agency_agg_key, + "naics_agg_key": funcs.naics_agg_key, + "pop_congressional_agg_key": funcs.pop_congressional_agg_key, + "pop_country_agg_key": funcs.pop_country_agg_key, + "pop_county_agg_key": funcs.pop_county_agg_key, + "pop_state_agg_key": funcs.pop_state_agg_key, + "psc_agg_key": funcs.psc_agg_key, + "recipient_agg_key": funcs.transaction_recipient_agg_key, + "recipient_location_congressional_agg_key": funcs.recipient_location_congressional_agg_key, + "recipient_location_county_agg_key": funcs.recipient_location_county_agg_key, + "recipient_location_state_agg_key": funcs.recipient_location_state_agg_key, + } + drop_fields = [ + "pop_state_name", + "pop_state_fips", + "pop_state_population", + "pop_county_population", + "pop_congressional_population", + "recipient_location_state_name", + "recipient_location_state_fips", + "recipient_location_state_population", + "recipient_location_county_population", + "recipient_location_congressional_population", + "recipient_levels", + "awarding_toptier_agency_id", + "funding_toptier_agency_id", + ] + return transform_data(worker, records, converters, agg_key_creations, drop_fields, settings.ES_ROUTING_FIELD) def transform_covid19_faba_data(worker: TaskSpec, records: List[dict]) -> List[dict]: @@ -68,14 +126,22 @@ def transform_covid19_faba_data(worker: TaskSpec, records: List[dict]) -> List[d def transform_data( - worker: TaskSpec, records: List[dict], converters: Dict[str, Callable], routing_field: Optional[str] = None + worker: TaskSpec, + records: List[dict], + converters: Dict[str, Callable], + agg_key_creations: Dict[str, Callable], + drop_fields: List[str], + routing_field: Optional[str] = None, ) -> List[dict]: logger.info(format_log(f"Transforming data", name=worker.name, action="Transform")) + start = perf_counter() for record in records: for field, converter in converters.items(): record[field] = converter(record[field]) + for key, transform_func in agg_key_creations.items(): + record[key] = transform_func(record) # Route all documents with the same recipient to the same shard # This allows for accuracy and early-termination of "top N" recipient category aggregation queries @@ -92,6 +158,10 @@ def transform_data( # so docs must be deleted before UPSERTed. (More info in streaming_post_to_es(...)) record["_id"] = record[worker.field_for_es_id] + # Removing data which were used for creating aggregate keys and aren't necessary standalone + for key in drop_fields: + record.pop(key) + duration = perf_counter() - start logger.info(format_log(f"Transformation operation took {duration:.2f}s", name=worker.name, action="Transform")) return records diff --git a/usaspending_api/etl/elasticsearch_loader_helpers/utilities.py b/usaspending_api/etl/elasticsearch_loader_helpers/utilities.py index 3cda1f81b9..279e865fdc 100644 --- a/usaspending_api/etl/elasticsearch_loader_helpers/utilities.py +++ b/usaspending_api/etl/elasticsearch_loader_helpers/utilities.py @@ -8,7 +8,6 @@ from random import choice from typing import Any, Generator, List, Optional - from usaspending_api.common.helpers.sql_helpers import get_database_dsn_string logger = logging.getLogger("script") @@ -48,8 +47,6 @@ def convert_postgres_json_array_to_list(json_array: dict) -> Optional[List]: return None result = [] for j in json_array: - for key, value in j.items(): - j[key] = "" if value is None else str(j[key]) result.append(json.dumps(j, sort_keys=True)) return result diff --git a/usaspending_api/etl/management/commands/elasticsearch_indexer.py b/usaspending_api/etl/management/commands/elasticsearch_indexer.py index 88d6f59e77..c96efcfcf2 100644 --- a/usaspending_api/etl/management/commands/elasticsearch_indexer.py +++ b/usaspending_api/etl/management/commands/elasticsearch_indexer.py @@ -111,7 +111,7 @@ def handle(self, *args, **options): start_msg = "target index: {index_name} | Starting from: {starting_date}" logger.info(format_log(start_msg.format(**config))) - ensure_view_exists(config["sql_view"]) + ensure_view_exists(config["sql_view"], force=True) error_addition = "" loader = Controller(config) diff --git a/usaspending_api/etl/management/commands/es_rapidloader.py b/usaspending_api/etl/management/commands/es_rapidloader.py deleted file mode 100644 index a1dfb972d5..0000000000 --- a/usaspending_api/etl/management/commands/es_rapidloader.py +++ /dev/null @@ -1,1185 +0,0 @@ -import json -import logging -import os -import pandas as pd -import subprocess - -from collections import defaultdict -from datetime import datetime, timezone -from django.conf import settings -from django.core.management import call_command -from django.core.management.base import BaseCommand -from elasticsearch import helpers, TransportError, Elasticsearch -from elasticsearch_dsl import Search, Q as ES_Q -from multiprocessing import Process, Queue -from pathlib import Path -from time import perf_counter, sleep -from typing import Optional -from typing import Tuple - -from usaspending_api.awards.v2.lookups.elasticsearch_lookups import INDEX_ALIASES_TO_AWARD_TYPES -from usaspending_api.broker.helpers.last_load_date import get_last_load_date -from usaspending_api.broker.helpers.last_load_date import update_last_load_date -from usaspending_api.common.csv_helpers import count_rows_in_delimited_file -from usaspending_api.common.elasticsearch.client import instantiate_elasticsearch_client -from usaspending_api.common.elasticsearch.elasticsearch_sql_helpers import ensure_view_exists -from usaspending_api.common.helpers.date_helper import datetime_command_line_argument_type, fy as parse_fiscal_year -from usaspending_api.common.helpers.fiscal_year_helpers import create_fiscal_year_list -from usaspending_api.common.helpers.s3_helpers import retrieve_s3_bucket_object_list, access_s3_object -from usaspending_api.common.helpers.sql_helpers import get_database_dsn_string -from usaspending_api.etl.elasticsearch_loader_helpers import ( - check_awards_for_deletes, - chunks, - execute_sql_statement, -) - - -logger = logging.getLogger("script") - - -class Command(BaseCommand): - """ETL script for indexing transaction data into Elasticsearch - - HIGHLEVEL PROCESS OVERVIEW - 1. Generate the full list of fiscal years to process as jobs - 2. Iterate by job - a. Download a CSV file by year (one at a time) - i. Continue to download a CSV file until all years are downloaded - b. Upload a CSV to Elasticsearch - i. Continue to upload a CSV file until all years are uploaded to ES - c. Delete CSV file - TO RELOAD ALL data: - python3 manage.py es_rapidloader --index-name --create-new-index all - - Running with --new-index will trigger several actions: - 0. A view will be created in the source database for the ETL queries - 1. A new index will be created from the value provided by --index-name (obviously) - 2. A new index template will be loaded into the cluster to set mapping and index metadata - 3. All aliases used by the API queries will be re-assigned to the new index - 4. An alias for incremental indexes will be applied to the new index - 5. If any previous indexes existed with the API aliases, they will be deleted. - """ - - help = """Hopefully the code comments are helpful enough to figure this out....""" - - def add_arguments(self, parser): - parser.add_argument( - "fiscal_years", - nargs="+", - type=str, - metavar="fiscal-years", - help="Provide a list of fiscal years to process. For convenience, provide 'all' for FY2008 to current FY", - ) - parser.add_argument( - "--process-deletes", - action="store_true", - help="When this flag is set, the script will include the process to " - "obtain records of deleted transactions from S3 and remove from the index", - ) - parser.add_argument( - "--dir", - default=str(Path(__file__).resolve().parent), - type=str, - help="Set for a custom location of output files", - dest="directory", - ) - parser.add_argument( - "--skip-counts", - action="store_true", - help="When this flag is set, the ETL process will skip the record counts to reduce operation time", - ) - parser.add_argument( - "--index-name", - type=str, - help="Provide name for new index about to be created. Only used when --create-new-index is provided", - ) - parser.add_argument( - "--create-new-index", - action="store_true", - help="It needs a new unique index name and set aliases used by API logic to the new index", - ) - parser.add_argument( - "--snapshot", - action="store_true", - help="Create a new Elasticsearch snapshot of the current index state which is stored in S3", - ) - parser.add_argument( - "--start-datetime", - type=datetime_command_line_argument_type(naive=False), - help="Processes transactions updated on or after the UTC date/time provided. yyyy-mm-dd hh:mm:ss is always " - "a safe format. Wrap in quotes if date/time contains spaces.", - ) - parser.add_argument( - "--skip-delete-index", - action="store_true", - help="When creating a new index skip the step that deletes the old indexes and swaps the aliases. " - "Only used when --create-new-index is provided.", - ) - parser.add_argument( - "--load-type", - type=str, - help="Select which type of load to perform, current options are transactions or awards.", - choices=["transactions", "awards"], - default="transactions", - ) - parser.add_argument( - "--idle-wait-time", - type=int, - help="Time in seconds the ES index process should wait before looking for a new CSV data file.", - default=60, - ) - - def handle(self, *args, **options): - elasticsearch_client = instantiate_elasticsearch_client() - config = process_cli_parameters(options, elasticsearch_client) - - start = perf_counter() - logger.info(format_log(f"Starting script\n{'=' * 56}")) - start_msg = "target index: {index_name} | FY(s): {fiscal_years} | Starting from: {starting_date}" - logger.info(format_log(start_msg.format(**config))) - - if config["load_type"] == "transactions": - ensure_view_exists(settings.ES_TRANSACTIONS_ETL_VIEW_NAME) - elif config["load_type"] == "awards": - ensure_view_exists(settings.ES_AWARDS_ETL_VIEW_NAME) - - loader = Rapidloader(config, elasticsearch_client) - loader.run_load_steps() - loader.complete_process() - - logger.info(format_log("---------------------------------------------------------------")) - logger.info(format_log(f"Script completed in {perf_counter() - start:.2f}s")) - logger.info(format_log("---------------------------------------------------------------")) - - -def process_cli_parameters(options: dict, es_client) -> dict: - default_datetime = datetime.strptime(f"{settings.API_SEARCH_MIN_DATE}+0000", "%Y-%m-%d%z") - simple_args = ( - "skip_delete_index", - "process_deletes", - "create_new_index", - "snapshot", - "index_name", - "directory", - "skip_counts", - "load_type", - ) - config = set_config(simple_args, options) - - config["fiscal_years"] = fiscal_years_for_processing(options) - config["directory"] = Path(config["directory"]).resolve() - - if config["create_new_index"] and not config["index_name"]: - raise SystemExit("Fatal error: --create-new-index requires --index-name.") - elif config["create_new_index"]: - config["index_name"] = config["index_name"].lower() - config["starting_date"] = default_datetime - check_new_index_name_is_ok( - config["index_name"], - settings.ES_AWARDS_NAME_SUFFIX if config["load_type"] == "awards" else settings.ES_TRANSACTIONS_NAME_SUFFIX, - ) - elif options["start_datetime"]: - config["starting_date"] = options["start_datetime"] - else: - # Due to the queries used for fetching postgres data, - # `starting_date` needs to be present and a date before: - # - The earliest records in S3. - # - When all transaction records in the USAspending SQL database were updated. - # And keep it timezone-award for S3 - config["starting_date"] = get_last_load_date(f"es_{options['load_type']}", default=default_datetime) - - config["max_query_size"] = settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW - if options["load_type"] == "awards": - config["max_query_size"] = settings.ES_AWARDS_MAX_RESULT_WINDOW - - config["is_incremental_load"] = not bool(config["create_new_index"]) and ( - config["starting_date"] != default_datetime - ) - - if config["is_incremental_load"]: - write_alias = settings.ES_TRANSACTIONS_WRITE_ALIAS - if config["load_type"] == "awards": - write_alias = settings.ES_AWARDS_WRITE_ALIAS - if config["index_name"]: - logger.info(format_log(f"Ignoring provided index name, using alias '{write_alias}' for incremental load")) - config["index_name"] = write_alias - if not es_client.cat.aliases(name=write_alias): - logger.error(format_log(f"Write alias '{write_alias}' is missing")) - raise SystemExit(1) - # Force manual refresh for atomic transaction-like delete/re-add consistency during incremental load. - # Turned back on at end. - toggle_refresh_off(es_client, config["index_name"]) - else: - if es_client.indices.exists(config["index_name"]): - logger.error(format_log(f"Data load into existing index. Change index name or run an incremental load")) - raise SystemExit(1) - - if not config["directory"].is_dir(): - logger.error(format_log(f"Provided directory does not exist")) - raise SystemExit(1) - elif config["starting_date"] < default_datetime: - logger.error(format_log(f"--start-datetime is too early. Set no earlier than {default_datetime}")) - raise SystemExit(1) - elif not config["is_incremental_load"] and config["process_deletes"]: - logger.error(format_log("Skipping deletions for ths load, --deleted overwritten to False")) - config["process_deletes"] = False - - config["ingest_wait"] = options["idle_wait_time"] - - return config - - -def set_config(copy_args: list, arg_parse_options: dict) -> dict: - """Set values based on env vars and when the script started""" - root_index = settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX - if arg_parse_options["load_type"] == "awards": - root_index = settings.ES_AWARDS_QUERY_ALIAS_PREFIX - config = { - "aws_region": settings.USASPENDING_AWS_REGION, - "s3_bucket": settings.DELETED_TRANSACTION_JOURNAL_FILES, - "root_index": root_index, - "processing_start_datetime": datetime.now(timezone.utc), - "verbose": arg_parse_options["verbosity"] > 1, # convert the management command's levels of verbosity to a bool - } - - config.update({k: v for k, v in arg_parse_options.items() if k in copy_args}) - return config - - -def fiscal_years_for_processing(options: list) -> list: - if "all" in options["fiscal_years"]: - return create_fiscal_year_list(start_year=parse_fiscal_year(settings.API_SEARCH_MIN_DATE)) - return [int(x) for x in options["fiscal_years"]] - - -def check_new_index_name_is_ok(provided_name: str, suffix: str) -> None: - if not provided_name.endswith(suffix): - raise SystemExit(f"new index name doesn't end with the expected pattern: '{suffix}'") - - -class Rapidloader: - def __init__(self, config, elasticsearch_client): - """Set values based on env vars and when the script started""" - self.config = config - self.elasticsearch_client = elasticsearch_client - - def run_load_steps(self) -> None: - download_queue = Queue() # Queue for jobs which need a csv downloaded - es_ingest_queue = Queue(20) # Queue for jobs which have a csv and are ready for ES ingest - - updated_record_count = get_updated_record_count(self.config) - logger.info(format_log(f"Found {updated_record_count:,} {self.config['load_type']} records to index")) - - if updated_record_count == 0: - jobs = 0 - else: - download_queue, jobs = self.create_download_jobs() - - logger.info(format_log(f"There are {jobs} jobs to process")) - - process_list = [ - Process( - name="Download Process", - target=download_db_records, - args=(download_queue, es_ingest_queue, self.config), - ), - Process( - name="ES Index Process", - target=es_data_loader, - args=(self.elasticsearch_client, download_queue, es_ingest_queue, self.config), - ), - ] - - if updated_record_count != 0: # only run if there are data to process - process_list[0].start() # Start Download process - - if self.config["process_deletes"]: - process_list.append( - Process( - name="S3 Deleted Records Scrapper Process", - target=deleted_transactions if self.config["load_type"] == "transactions" else deleted_awards, - args=(self.elasticsearch_client, self.config), - ) - ) - process_list[-1].start() # start S3 csv fetch proces - while process_list[-1].is_alive(): - logger.info(format_log("Waiting to start ES ingest until S3 deletes are complete")) - sleep(7) # add a brief pause to make sure the deletes are processed in ES - - if updated_record_count != 0: - process_list[1].start() # start ES ingest process - - while True: - sleep(10) - if process_guarddog(process_list): - raise SystemExit("Fatal error: review logs to determine why process died.") - elif all([not x.is_alive() for x in process_list]): - logger.info(format_log("All ETL processes completed execution with no error codes")) - break - - def create_download_jobs(self) -> Tuple[Queue, int]: - download_queue = Queue() - for job_number, fiscal_year in enumerate(self.config["fiscal_years"], start=1): - index = self.config["index_name"] - filename = str(self.config["directory"] / f"{fiscal_year}_{self.config['load_type']}.csv") - - new_job = DataJob(job_number, index, fiscal_year, filename) - - if Path(filename).exists(): - Path(filename).unlink() - download_queue.put(new_job) - return download_queue, job_number - - def complete_process(self) -> None: - if self.config["create_new_index"]: - set_final_index_config(self.elasticsearch_client, self.config["index_name"]) - if self.config["skip_delete_index"]: - logger.info(format_log("Skipping deletion of old indices")) - else: - logger.info(format_log("Closing old indices and adding aliases")) - swap_aliases(self.elasticsearch_client, self.config["index_name"], self.config["load_type"]) - - if self.config["snapshot"]: - logger.info(format_log("Taking snapshot")) - take_snapshot(self.elasticsearch_client, self.config["index_name"], settings.ES_REPOSITORY) - - if self.config["is_incremental_load"]: - toggle_refresh_on(self.elasticsearch_client, self.config["index_name"]) - logger.info( - format_log(f"Storing datetime {self.config['processing_start_datetime']} for next incremental load") - ) - update_last_load_date(f"es_{self.config['load_type']}", self.config["processing_start_datetime"]) - - -VIEW_COLUMNS = [ - "transaction_id", - "detached_award_proc_unique", - "afa_generated_unique", - "generated_unique_transaction_id", - "display_award_id", - "update_date", - "modification_number", - "generated_unique_award_id", - "award_id", - "piid", - "fain", - "uri", - "transaction_description", - "product_or_service_code", - "product_or_service_description", - "psc_agg_key", - "naics_code", - "naics_description", - "naics_agg_key", - "type_description", - "award_category", - "recipient_unique_id", - "recipient_name", - "recipient_hash", - "recipient_agg_key", - "parent_recipient_unique_id", - "parent_recipient_name", - "parent_recipient_hash", - "action_date", - "fiscal_action_date", - "period_of_performance_start_date", - "period_of_performance_current_end_date", - "ordering_period_end_date", - "fiscal_year", - "award_fiscal_year", - "award_amount", - "federal_action_obligation", - "face_value_loan_guarantee", - "original_loan_subsidy_cost", - "generated_pragmatic_obligation", - "awarding_agency_id", - "funding_agency_id", - "awarding_toptier_agency_name", - "funding_toptier_agency_name", - "awarding_subtier_agency_name", - "funding_subtier_agency_name", - "awarding_toptier_agency_abbreviation", - "funding_toptier_agency_abbreviation", - "awarding_subtier_agency_abbreviation", - "funding_subtier_agency_abbreviation", - "awarding_toptier_agency_agg_key", - "funding_toptier_agency_agg_key", - "awarding_subtier_agency_agg_key", - "funding_subtier_agency_agg_key", - "cfda_number", - "cfda_title", - "type_of_contract_pricing", - "type_set_aside", - "extent_competed", - "type", - "pop_country_code", - "pop_country_name", - "pop_state_code", - "pop_county_code", - "pop_county_name", - "pop_zip5", - "pop_congressional_code", - "pop_city_name", - "pop_county_agg_key", - "pop_congressional_agg_key", - "pop_state_agg_key", - "pop_country_agg_key", - "recipient_location_country_code", - "recipient_location_country_name", - "recipient_location_state_code", - "recipient_location_county_code", - "recipient_location_county_name", - "recipient_location_zip5", - "recipient_location_congressional_code", - "recipient_location_city_name", - "recipient_location_county_agg_key", - "recipient_location_congressional_agg_key", - "recipient_location_state_agg_key", - "tas_paths", - "tas_components", - "federal_accounts", - "business_categories", - "disaster_emergency_fund_codes", -] -AWARD_VIEW_COLUMNS = [ - "award_id", - "generated_unique_award_id", - "display_award_id", - "category", - "type", - "type_description", - "piid", - "fain", - "uri", - "total_obligation", - "description", - "award_amount", - "total_subsidy_cost", - "total_loan_value", - "update_date", - "recipient_name", - "recipient_hash", - "recipient_agg_key", - "recipient_unique_id", - "parent_recipient_unique_id", - "business_categories", - "action_date", - "fiscal_year", - "last_modified_date", - "period_of_performance_start_date", - "period_of_performance_current_end_date", - "date_signed", - "ordering_period_end_date", - "original_loan_subsidy_cost", - "face_value_loan_guarantee", - "awarding_agency_id", - "funding_agency_id", - "awarding_toptier_agency_name", - "funding_toptier_agency_name", - "awarding_subtier_agency_name", - "funding_subtier_agency_name", - "awarding_toptier_agency_code", - "funding_toptier_agency_code", - "awarding_subtier_agency_code", - "funding_subtier_agency_code", - "funding_toptier_agency_agg_key", - "funding_subtier_agency_agg_key", - "recipient_location_country_code", - "recipient_location_country_name", - "recipient_location_state_code", - "recipient_location_county_code", - "recipient_location_county_name", - "recipient_location_congressional_code", - "recipient_location_zip5", - "recipient_location_city_name", - "recipient_location_county_agg_key", - "recipient_location_congressional_agg_key", - "recipient_location_state_agg_key", - "pop_country_code", - "pop_country_name", - "pop_state_code", - "pop_county_code", - "pop_county_name", - "pop_zip5", - "pop_congressional_code", - "pop_city_name", - "pop_city_code", - "pop_county_agg_key", - "pop_congressional_agg_key", - "pop_state_agg_key", - "cfda_number", - "cfda_title", - "sai_number", - "type_of_contract_pricing", - "extent_competed", - "type_set_aside", - "product_or_service_code", - "product_or_service_description", - "naics_code", - "naics_description", - "tas_paths", - "tas_components", - "disaster_emergency_fund_codes", - "total_covid_obligation", - "total_covid_outlay", -] - -COUNT_FY_SQL = """ -SELECT COUNT(*) AS count -FROM "{view}" -WHERE "fiscal_year" = {fy} AND "{update_date_col}" >= '{update_date}' -""" - -COUNT_SQL = """ -SELECT COUNT(*) AS count -FROM "{view}" -WHERE "{update_date_col}" >= '{update_date}' -""" - -COPY_SQL = """"COPY ( - SELECT * - FROM "{view}" - WHERE "fiscal_year" = {fy} AND "{update_date_col}" >= '{update_date}' -) TO STDOUT DELIMITER ',' CSV HEADER" > '{filename}' -""" - -# ============================================================================== -# Other Globals -# ============================================================================== - -AWARD_DESC_CATEGORIES = { - "loans": "loans", - "grant": "grants", - "insurance": "other", - "other": "other", - "contract": "contracts", - "direct payment": "directpayments", -} - -UNIVERSAL_TRANSACTION_ID_NAME = "generated_unique_transaction_id" -UNIVERSAL_AWARD_ID_NAME = "generated_unique_award_id" - - -class DataJob: - def __init__(self, *args): - self.name = args[0] - self.index = args[1] - self.fy = args[2] - self.csv = args[3] - self.count = None - - -def convert_postgres_array_as_string_to_list(array_as_string: str) -> Optional[list]: - """ - Postgres arrays are stored in CSVs as strings. Elasticsearch is able to handle lists of items, but needs to - be passed a list instead of a string. In the case of an empty array, return null. - For example, "{this,is,a,postgres,array}" -> ["this", "is", "a", "postgres", "array"]. - """ - return array_as_string[1:-1].split(",") if len(array_as_string) > 2 else None - - -def convert_postgres_json_array_as_string_to_list(json_array_as_string: str) -> Optional[dict]: - """ - Postgres JSON arrays (jsonb) are stored in CSVs as strings. Since we want to avoid nested types - in Elasticsearch the JSON arrays are converted to dictionaries to make parsing easier and then - converted back into a formatted string. - """ - if json_array_as_string is None or len(json_array_as_string) == 0: - return None - result = [] - json_array = json.loads(json_array_as_string) - for j in json_array: - for key, value in j.items(): - j[key] = "" if value is None else str(j[key]) - result.append(json.dumps(j, sort_keys=True)) - return result - - -def process_guarddog(process_list): - """ - pass in a list of multiprocess Process objects. - If one errored then terminate the others and return True - """ - for proc in process_list: - # If exitcode is None, process is still running. exit code 0 is normal - if proc.exitcode not in (None, 0): - msg = f"Script proccess failed!!! {proc.name} exited with error {proc.exitcode}. Terminating all processes." - logger.error(format_log(msg)) - [x.terminate() for x in process_list] - return True - return False - - -def configure_sql_strings(config, filename, deleted_ids): - """ - Populates the formatted strings defined globally in this file to create the desired SQL - """ - if config["load_type"] == "awards": - view = settings.ES_AWARDS_ETL_VIEW_NAME - update_date_col = "update_date" - else: - view = settings.ES_TRANSACTIONS_ETL_VIEW_NAME - update_date_col = "etl_update_date" - - copy_sql = COPY_SQL.format( - fy=config["fiscal_year"], - update_date_col=update_date_col, - update_date=config["starting_date"], - filename=filename, - view=view, - ) - - count_sql = COUNT_FY_SQL.format( - fy=config["fiscal_year"], update_date_col=update_date_col, update_date=config["starting_date"], view=view - ) - - return copy_sql, count_sql - - -def get_updated_record_count(config): - if config["load_type"] == "awards": - view_name = settings.ES_AWARDS_ETL_VIEW_NAME - update_date_col = "update_date" - else: - view_name = settings.ES_TRANSACTIONS_ETL_VIEW_NAME - update_date_col = "etl_update_date" - - count_sql = COUNT_SQL.format(update_date_col=update_date_col, update_date=config["starting_date"], view=view_name) - - return execute_sql_statement(count_sql, True, config["verbose"])[0]["count"] - - -def download_db_records(fetch_jobs, done_jobs, config): - # There was recurring issue with .empty() returning true when the queue - # actually contained multiple jobs. Potentially caused by a race condition - # Funny story: adding the log statement was enough to prevent the issue - # Decided to be safe and added short pause to guarentee no race condition - sleep(5) - logger.info(format_log(f"Queue has items: {not fetch_jobs.empty()}", process="Download")) - while not fetch_jobs.empty(): - if done_jobs.full(): - logger.info(format_log(f"Paused downloading new CSVs so ES indexing can catch up", process="Download")) - sleep(60) - else: - start = perf_counter() - job = fetch_jobs.get_nowait() - logger.info(format_log(f"Preparing to download '{job.csv}'", process="Download")) - - sql_config = { - "starting_date": config["starting_date"], - "fiscal_year": job.fy, - "process_deletes": config["process_deletes"], - "load_type": config["load_type"], - } - copy_sql, count_sql = configure_sql_strings(sql_config, job.csv, []) - - if os.path.isfile(job.csv): - os.remove(job.csv) - - job.count = download_csv(count_sql, copy_sql, job.csv, job.name, config["skip_counts"], config["verbose"]) - done_jobs.put(job) - logger.info( - format_log(f"CSV '{job.csv}' copy took {perf_counter() - start:.2f}s", job=job.name, process="Download") - ) - sleep(1) - - # This "Null Job" is used to notify the other (ES data load) process this is the final job - done_jobs.put(DataJob(None, None, None, None)) - logger.info(format_log(f"PostgreSQL COPY operations complete", process="Download")) - return - - -def download_csv(count_sql, copy_sql, filename, job_id, skip_counts, verbose): - - # Execute Copy SQL to download records to CSV - # It is preferable to not use shell=True, but this command works. Limited user-input so risk is low - subprocess.Popen(f"psql {get_database_dsn_string()} -c {copy_sql}", shell=True).wait() - download_count = count_rows_in_delimited_file(filename, has_header=True, safe=False) - logger.info(format_log(f"Wrote {download_count:,} to this file: {filename}", job=job_id, process="Download")) - - # If --skip_counts is disabled, execute count_sql and compare this count to the download_count - if not skip_counts: - sql_count = execute_sql_statement(count_sql, True, verbose)[0]["count"] - if sql_count != download_count: - msg = f'Mismatch between CSV "{filename}" and DB!!! Expected: {sql_count:,} | Actual: {download_count:,}' - logger.error(format_log(msg, job=job_id, process="Download")) - raise SystemExit(1) - else: - logger.info(format_log(f"Skipping count comparison checks (sql vs download)", job=job_id, process="Download")) - - return download_count - - -def csv_chunk_gen(filename, chunksize, job_id, load_type): - logger.info(format_log(f"Opening {filename} (batch size = {chunksize:,})", job=job_id, process="ES Index")) - # Need a specific converter to handle converting strings to correct data types (e.g. string -> array) - converters = { - "business_categories": convert_postgres_array_as_string_to_list, - "tas_paths": convert_postgres_array_as_string_to_list, - "tas_components": convert_postgres_array_as_string_to_list, - "federal_accounts": convert_postgres_json_array_as_string_to_list, - "disaster_emergency_fund_codes": convert_postgres_array_as_string_to_list, - } - # Panda's data type guessing causes issues for Elasticsearch. Explicitly cast using dictionary - column_list = AWARD_VIEW_COLUMNS if load_type == "awards" else VIEW_COLUMNS - dtype = {k: str for k in column_list if k not in converters} - for file_df in pd.read_csv(filename, dtype=dtype, converters=converters, header=0, chunksize=chunksize): - file_df = file_df.where(cond=(pd.notnull(file_df)), other=None) - # Route all documents with the same recipient to the same shard - # This allows for accuracy and early-termination of "top N" recipient category aggregation queries - # Recipient is are highest-cardinality category with over 2M unique values to aggregate against, - # and this is needed for performance - # ES helper will pop any "meta" fields like "routing" from provided data dict and use them in the action - file_df["routing"] = file_df[settings.ES_ROUTING_FIELD] - - # Explicitly setting the ES _id field to match the postgres PK value allows - # bulk index operations to be upserts without creating duplicate documents - file_df["_id"] = file_df[f"{'award' if load_type == 'awards' else 'transaction'}_id"] - yield file_df.to_dict(orient="records") - - -def es_data_loader(client, fetch_jobs, done_jobs, config): - if config["create_new_index"]: - # ensure template for index is present and the latest version - call_command("es_configure", "--template-only", f"--load-type={config['load_type']}") - while True: - if not done_jobs.empty(): - job = done_jobs.get_nowait() - if job.name is None: - break - - logger.info(format_log(f"Starting new job", job=job.name, process="ES Index")) - post_to_elasticsearch(client, job, config) - if os.path.exists(job.csv): - os.remove(job.csv) - else: - logger.info(format_log(f"No Job. Sleeping {config['ingest_wait']}s", process="ES Index")) - sleep(int(config["ingest_wait"])) - - logger.info(format_log(f"Completed Elasticsearch data load", process="ES Index")) - return - - -def streaming_post_to_es( - client, chunk, index_name: str, type: str, job_id=None, delete_before_index=True, delete_key="_id" -): - """ - Called this repeatedly with successive chunks of data to pump into an Elasticsearch index. - - Args: - client: Elasticsearch client - chunk (List[dict]): list of dictionary objects holding field_name:value data - index_name (str): name of targetted index - type (str): indexed data type (e.g. awards or transactions) - job_id (str): name of ES ETL job being run, used in logging - delete_before_index (bool): When true, attempts to delete given documents by a unique key before indexing them. - NOTE: For incremental loads, we must "delete-before-index" due to the fact that on many of our indices, - we have different values for _id and routing key. - Not doing this exposed a bug in our approach to expedite incremental UPSERTS aimed at allowing ES to - overwrite documents when it encountered one already existing by a given _id. The problem is that the - index operation uses the routing key to target only 1 shard for its index/overwrite. If the routing key - value changes between two incremental loads of the same doc with the same _id, it may get routed to a - different shard and won't overwrite the original doc, leaving duplicates across all shards in the index. - delete_key (str): The column (field) name used for value lookup in the given chunk to derive documents to be - deleted, if delete_before_index is True. Currently defaulting to "_id", taking advantage of the fact - that we are explicitly setting "_id" in the documents to-be-indexed, which is a unique key for each doc - (e.g. the PK of the DB row) - - Returns: (succeeded, failed) tuple, which counts successful index doc writes vs. failed doc writes - """ - success, failed = 0, 0 - try: - if delete_before_index: - value_list = [doc[delete_key] for doc in chunk] - delete_docs_by_unique_key(client, delete_key, value_list, job_id, index_name) - for ok, item in helpers.parallel_bulk(client, chunk, index=index_name): - success = [success, success + 1][ok] - failed = [failed + 1, failed][ok] - - except Exception as e: - logger.exception(f"Fatal error: \n\n{str(e)[:5000]}...\n\n{'*' * 80}") - raise SystemExit(1) - - logger.info(format_log(f"Success: {success:,} | Fail: {failed:,}", job=job_id, process="ES Index")) - return success, failed - - -def put_alias(client, index, alias_name, alias_body): - client.indices.put_alias(index, alias_name, body=alias_body) - - -def create_aliases(client, index, load_type, silent=False): - for award_type, award_type_codes in INDEX_ALIASES_TO_AWARD_TYPES.items(): - if load_type == "awards": - prefix = settings.ES_AWARDS_QUERY_ALIAS_PREFIX - else: - prefix = settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX - - alias_name = f"{prefix}-{award_type}" - if silent is False: - logger.info( - format_log( - f"Putting alias '{alias_name}' on {index} with award codes {award_type_codes}", - process="ES Alias Put", - ) - ) - alias_body = {"filter": {"terms": {"type": award_type_codes}}} - put_alias(client, index, alias_name, alias_body) - - # ensure the new index is added to the alias used for incremental loads. - # If the alias is on multiple indexes, the loads will fail! - write_alias = settings.ES_AWARDS_WRITE_ALIAS if load_type == "awards" else settings.ES_TRANSACTIONS_WRITE_ALIAS - logger.info(format_log(f"Putting alias '{write_alias}' on {index}", process="ES Alias Put")) - put_alias( - client, index, write_alias, {}, - ) - - -def set_final_index_config(client, index): - es_settingsfile = str(settings.APP_DIR / "etl" / "es_config_objects.json") - with open(es_settingsfile) as f: - settings_dict = json.load(f) - final_index_settings = settings_dict["final_index_settings"] - - current_settings = client.indices.get(index)[index]["settings"]["index"] - - client.indices.put_settings(final_index_settings, index) - client.indices.refresh(index) - for setting, value in final_index_settings.items(): - message = f'Changed "{setting}" from {current_settings.get(setting)} to {value}' - logger.info(format_log(message, process="ES Settings")) - - -def toggle_refresh_off(client, index): - client.indices.put_settings({"refresh_interval": "-1"}, index) - message = ( - f'Set "refresh_interval": "-1" to turn auto refresh off during incremental load. Manual refreshes will ' - f"occur for each batch completion." - ) - logger.info(format_log(message, process="ES Settings")) - - -def toggle_refresh_on(client, index): - response = client.indices.get(index) - aliased_index_name = list(response.keys())[0] - current_refresh_interval = response[aliased_index_name]["settings"]["index"]["refresh_interval"] - es_settingsfile = str(settings.APP_DIR / "etl" / "es_config_objects.json") - with open(es_settingsfile) as f: - settings_dict = json.load(f) - final_refresh_interval = settings_dict["final_index_settings"]["refresh_interval"] - client.indices.put_settings({"refresh_interval": final_refresh_interval}, index) - message = f'Changed "refresh_interval" from {current_refresh_interval} to {final_refresh_interval}' - logger.info(format_log(message, process="ES Settings")) - - -def swap_aliases(client, index, load_type): - if client.indices.get_alias(index, "*"): - logger.info(format_log(f"Removing old aliases for index '{index}'", process="ES Alias Drop")) - client.indices.delete_alias(index, "_all") - if load_type == "awards": - alias_patterns = settings.ES_AWARDS_QUERY_ALIAS_PREFIX + "*" - else: - alias_patterns = settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX + "*" - old_indexes = [] - - try: - old_indexes = list(client.indices.get_alias("*", alias_patterns).keys()) - for old_index in old_indexes: - client.indices.delete_alias(old_index, "_all") - logger.info(format_log(f"Removing aliases from '{old_index}'", process="ES Alias Drop")) - except Exception: - logger.exception(format_log(f"No aliases found for {alias_patterns}", process="ES Alias Drop")) - - create_aliases(client, index, load_type=load_type) - - try: - if old_indexes: - client.indices.delete(index=old_indexes, ignore_unavailable=False) - logger.info(format_log(f"Deleted index(es) '{old_indexes}'", process="ES Alias Drop")) - except Exception: - logger.exception(format_log(f"Unable to delete indexes: {old_indexes}", process="ES Alias Drop")) - - -def post_to_elasticsearch(client, job, config, chunksize=250000): - logger.info(format_log(f"Populating ES Index '{job.index}'", job=job.name, process="ES Index")) - start = perf_counter() - try: - does_index_exist = client.indices.exists(job.index) - except Exception as e: - print(e) - raise SystemExit(1) - if not does_index_exist: - logger.info(format_log(f"Creating index '{job.index}'", job=job.name, process="ES Index")) - client.indices.create(index=job.index) - client.indices.refresh(job.index) - - csv_generator = csv_chunk_gen(job.csv, chunksize, job.name, config["load_type"]) - for count, chunk in enumerate(csv_generator): - if len(chunk) == 0: - logger.info(format_log(f"No documents to add/delete for chunk #{count}", job=job.name, process="ES Index")) - continue - - # Only delete before adding/inserting/indexing new docs on incremental loads, not full reindexes - is_incremental = config["is_incremental_load"] and str(config["is_incremental_load"]).lower() == "true" - - iteration = perf_counter() - current_rows = f"({count * chunksize + 1:,}-{count * chunksize + len(chunk):,})" - logger.info( - format_log(f"ES Stream #{count} rows [{current_rows}/{job.count:,}]", job=job.name, process="ES Index") - ) - streaming_post_to_es( - client, chunk, job.index, config["load_type"], job.name, delete_before_index=is_incremental - ) - if is_incremental: - # refresh_interval is off during incremental loads. - # Manually refresh after delete + insert complete for search consistency - client.indices.refresh(job.index) - logger.info( - format_log( - f"Iteration group #{count} took {perf_counter() - iteration:.2f}s", job=job.name, process="ES Index" - ) - ) - - logger.info( - format_log(f"Elasticsearch Index loading took {perf_counter() - start:.2f}s", job=job.name, process="ES Index") - ) - - -def deleted_transactions(client, config): - deleted_ids = gather_deleted_ids(config) - id_list = [{"key": deleted_id, "col": UNIVERSAL_TRANSACTION_ID_NAME} for deleted_id in deleted_ids] - delete_from_es(client, id_list, None, config, None) - - -def deleted_awards(client, config): - """ - so we have to find all the awards connected to these transactions, - if we can't find the awards in the database, then we have to delete them from es - """ - deleted_ids = gather_deleted_ids(config) - id_list = [{"key": deleted_id, "col": UNIVERSAL_TRANSACTION_ID_NAME} for deleted_id in deleted_ids] - award_ids = get_deleted_award_ids(client, id_list, config, settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX + "-*") - if (len(award_ids)) == 0: - logger.info(format_log(f"No related awards require deletion", process="ES Delete")) - return - deleted_award_ids = check_awards_for_deletes(award_ids) - if len(deleted_award_ids) != 0: - award_id_list = [ - {"key": deleted_award["generated_unique_award_id"], "col": UNIVERSAL_AWARD_ID_NAME} - for deleted_award in deleted_award_ids - ] - delete_from_es(client, award_id_list, None, config, None) - else: - logger.info(format_log(f"No related awards require deletion", process="ES Delete")) - return - - -def take_snapshot(client, index, repository): - snapshot_name = f"{index}-{str(datetime.now().date())}" - try: - client.snapshot.create(repository, snapshot_name, body={"indices": index}) - logger.info( - format_log( - f"Taking snapshot INDEX: '{index}' SNAPSHOT: '{snapshot_name}' REPO: '{repository}'", - process="ES Snapshot", - ) - ) - except TransportError: - logger.exception(format_log(f"SNAPSHOT FAILED", process="ES Snapshot")) - raise SystemExit(1) - - -def gather_deleted_ids(config): - """ - Connect to S3 and gather all of the transaction ids stored in CSV files - generated by the broker when transactions are removed from the DB. - """ - - if not config["process_deletes"]: - logger.info(format_log(f"Skipping the S3 CSV fetch for deleted transactions", process="ES Delete")) - return - - logger.info(format_log(f"Gathering all deleted transactions from S3", process="ES Delete")) - start = perf_counter() - - bucket_objects = retrieve_s3_bucket_object_list(bucket_name=config["s3_bucket"]) - logger.info( - format_log(f"{len(bucket_objects):,} files found in bucket '{config['s3_bucket']}'", process="ES Delete") - ) - - if config["verbose"]: - logger.info(format_log(f"CSV data from {config['starting_date']} to now", process="ES Delete")) - - filtered_csv_list = [ - x - for x in bucket_objects - if (x.key.endswith(".csv") and not x.key.startswith("staging") and x.last_modified >= config["starting_date"]) - ] - - if config["verbose"]: - logger.info(format_log(f"Found {len(filtered_csv_list)} csv files", process="ES Delete")) - - deleted_ids = {} - - for obj in filtered_csv_list: - object_data = access_s3_object(bucket_name=config["s3_bucket"], obj=obj) - - # Ingests the CSV into a dataframe. pandas thinks some ids are dates, so disable parsing - data = pd.read_csv(object_data, dtype=str) - - if "detached_award_proc_unique" in data: - new_ids = ["CONT_TX_" + x.upper() for x in data["detached_award_proc_unique"].values] - elif "afa_generated_unique" in data: - new_ids = ["ASST_TX_" + x.upper() for x in data["afa_generated_unique"].values] - else: - logger.info(format_log(f"[Missing valid col] in {obj.key}", process="ES Delete")) - - for uid in new_ids: - if uid in deleted_ids: - if deleted_ids[uid]["timestamp"] < obj.last_modified: - deleted_ids[uid]["timestamp"] = obj.last_modified - else: - deleted_ids[uid] = {"timestamp": obj.last_modified} - - if config["verbose"]: - for uid, deleted_dict in deleted_ids.items(): - logger.info(format_log(f"id: {uid} last modified: {deleted_dict['timestamp']}", process="ES Delete")) - - logger.info( - format_log( - f"Gathering {len(deleted_ids):,} deleted transactions took {perf_counter() - start:.2f}s", - process="ES Delete", - ) - ) - return deleted_ids - - -def filter_query(column, values, query_type="match_phrase"): - queries = [{query_type: {column: str(i)}} for i in values] - return {"query": {"bool": {"should": [queries]}}} - - -def delete_query(response): - return {"query": {"ids": {"values": [i["_id"] for i in response["hits"]["hits"]]}}} - - -def delete_from_es(client, id_list, job_id, config, index=None): - """ - id_list = [{key:'key1',col:'tranaction_id'}, - {key:'key2',col:'generated_unique_transaction_id'}], - ...] - or - id_list = [{key:'key1',col:'award_id'}, - {key:'key2',col:'generated_unique_award_id'}], - ...] - """ - start = perf_counter() - - logger.info(format_log(f"Deleting up to {len(id_list):,} document(s)", job=job_id, process="ES Delete")) - - if index is None: - index = f"{config['root_index']}-*" - start_ = client.count(index=index)["count"] - logger.info(format_log(f"Starting amount of indices ----- {start_:,}", job=job_id, process="ES Delete")) - col_to_items_dict = defaultdict(list) - for l in id_list: - col_to_items_dict[l["col"]].append(l["key"]) - - for column, values in col_to_items_dict.items(): - logger.info(format_log(f"Deleting {len(values):,} of '{column}'", job=job_id, process="ES Delete")) - values_generator = chunks(values, 1000) - for v in values_generator: - # IMPORTANT: This delete routine looks at just 1 index at a time. If there are duplicate records across - # multiple indexes, those duplicates will not be caught by this routine. It is left as is because at the - # time of this comment, we are migrating to using a single index. - body = filter_query(column, v) - response = client.search(index=index, body=json.dumps(body), size=config["max_query_size"]) - delete_body = delete_query(response) - try: - client.delete_by_query( - index=index, body=json.dumps(delete_body), refresh=True, size=config["max_query_size"] - ) - except Exception: - logger.exception(format_log(f"", job=job_id, process="ES Delete")) - raise SystemExit(1) - - end_ = client.count(index=index)["count"] - msg = f"ES Deletes took {perf_counter() - start:.2f}s. Deleted {start_ - end_:,} records" - logger.info(format_log(msg, job=job_id, process="ES Delete")) - return - - -def delete_docs_by_unique_key(client: Elasticsearch, key: str, value_list: list, job_id: str, index) -> int: - """ - Bulk delete a batch of documents whose field identified by ``key`` matches any value provided in the - ``values_list``. - - Args: - client (Elasticsearch): elasticsearch-dsl client for making calls to an ES cluster - key (str): name of filed in targeted elasticearch index that shoudld have a unique value for - every doc in the index. Ideally the field or sub-field provided is of ``keyword`` type. - value_list (list): if key field has these values, the document will be deleted - job_id (str): name of ES ETL job being run, used in logging - index (str): name of index (or alias) to target for the ``_delete_by_query`` ES operation. - - NOTE: This delete routine looks at just the index name given. If there are duplicate records across - multiple indexes, an alias or wildcard should be provided for ``index`` param that covers multiple - indices, or this will need to be run once per index. - - Returns: Number of ES documents deleted - """ - start = perf_counter() - - logger.info(format_log(f"Deleting up to {len(value_list):,} document(s)", process="ES Delete", job=job_id)) - assert index, "index name must be provided" - - deleted = 0 - is_error = False - try: - # 65,536 is max number of terms that can be added to an ES terms filter query - values_generator = chunks(value_list, 50000) - for chunk_of_values in values_generator: - # Creates an Elasticsearch query criteria for the _delete_by_query call - q = ES_Q("terms", **{key: chunk_of_values}) - # Invoking _delete_by_query as per the elasticsearch-dsl docs: - # https://elasticsearch-dsl.readthedocs.io/en/latest/search_dsl.html#delete-by-query - response = Search(using=client, index=index).filter(q).delete() - chunk_deletes = response["deleted"] - deleted += chunk_deletes - except Exception: - is_error = True - logger.exception(format_log(f"", job=job_id, process="ES Delete")) - raise SystemExit(1) - finally: - error_text = " before encountering an error" if is_error else "" - msg = f"ES Deletes took {perf_counter() - start:.2f}s. Deleted {deleted:,} records{error_text}" - logger.info(format_log(msg, process="ES Delete", job=job_id)) - - return deleted - - -def get_deleted_award_ids(client, id_list, config, index=None): - """ - id_list = [{key:'key1',col:'transaction_id'}, - {key:'key2',col:'generated_unique_transaction_id'}], - ...] - """ - if index is None: - index = f"{config['root_index']}-*" - col_to_items_dict = defaultdict(list) - for l in id_list: - col_to_items_dict[l["col"]].append(l["key"]) - awards = [] - for column, values in col_to_items_dict.items(): - values_generator = chunks(values, 1000) - for v in values_generator: - body = filter_query(column, v) - response = client.search(index=index, body=json.dumps(body), size=config["max_query_size"]) - if response["hits"]["total"]["value"] != 0: - awards = [x["_source"]["generated_unique_award_id"] for x in response["hits"]["hits"]] - return awards - - -def format_log(msg, process=None, job=None): - inner_str = f"[{process if process else 'main'}] {f'(#{job})' if job else ''}" - return f"{inner_str:<18} | {msg}" diff --git a/usaspending_api/etl/management/commands/load_submission.py b/usaspending_api/etl/management/commands/load_submission.py index db0150d9a2..a282d1fb5c 100644 --- a/usaspending_api/etl/management/commands/load_submission.py +++ b/usaspending_api/etl/management/commands/load_submission.py @@ -157,6 +157,29 @@ def signal_handler(signal, frame): def get_broker_submission(self): self.db_cursor.execute( f""" + with publish_certify_history as ( + select + distinct_pairings.submission_id, + jsonb_agg( + jsonb_build_object( + 'published_date', ph.updated_at::timestamptz, + 'certified_date', ch.updated_at::timestamptz + ) + ) AS history + from + (select distinct + submission_id, + publish_history_id, + certify_history_id + from published_files_history + where submission_id = %s) as distinct_pairings + left join + publish_history as ph + on distinct_pairings.publish_history_id = ph.publish_history_id + left join + certify_history as ch + on distinct_pairings.certify_history_id = ch.certify_history_id + group by distinct_pairings.submission_id) select s.submission_id, ( @@ -176,11 +199,13 @@ def get_broker_submission(self): s.reporting_fiscal_period, s.is_quarter_format, s.d2_submission, - s.publish_status_id + s.publish_status_id, + pch.history from submission as s - where - s.submission_id = %s + join + publish_certify_history as pch + on pch.submission_id = s.submission_id """, [self.submission_id], ) diff --git a/usaspending_api/etl/submission_loader_helpers/submission_attributes.py b/usaspending_api/etl/submission_loader_helpers/submission_attributes.py index d578719d51..4ab8ee93da 100644 --- a/usaspending_api/etl/submission_loader_helpers/submission_attributes.py +++ b/usaspending_api/etl/submission_loader_helpers/submission_attributes.py @@ -35,7 +35,7 @@ def attempt_submission_update_only(submission_data): if submission.certified_date != submission_data["certified_date"]: SubmissionAttributes.objects.filter(submission_id=submission_id).update( - certified_date=submission_data["certified_date"] + certified_date=submission_data["certified_date"], history=submission_data["history"] ) return True diff --git a/usaspending_api/etl/tests/integration/test_load_multiple_submissions.py b/usaspending_api/etl/tests/integration/test_load_multiple_submissions.py index b73be7da39..71466a8cf3 100644 --- a/usaspending_api/etl/tests/integration/test_load_multiple_submissions.py +++ b/usaspending_api/etl/tests/integration/test_load_multiple_submissions.py @@ -159,8 +159,8 @@ def setUp(self): submission_id, updated_at ) (values - (1, 1, '2000-01-01'), (2, 2, '2000-01-02'), (3, 3, '2000-01-03'), (4, 4, '2000-01-04'), - (5, 5, '2000-01-05'), (6, 6, '2000-01-06'), (7, 7, '2000-01-07') + (1, 1, '1999-01-01'), (2, 2, '2000-01-02'), (3, 3, '2000-01-03'), (4, 4, '2000-01-04'), + (5, 5, '2000-01-05'), (6, 6, '2000-01-06'), (7, 7, '2000-01-07'), (8, 1, '2000-01-01') ) """ ) @@ -177,6 +177,22 @@ def setUp(self): """ ) + cursor.execute( + """ + insert into published_files_history ( + published_files_history_id, + submission_id, + publish_history_id, + certify_history_id, + updated_at + ) (values + (1, 1, 1, NULL, '1999-01-01'), (2, 2, 2, NULL, '2000-01-02'), (3, 3, 3, 3, '2000-01-03'), + (4, 4, 4, NULL, '2000-01-04'), (5, 5, 5, 5, '2000-01-05'), (6, 6, 6, NULL, '2000-01-06'), + (7, 7, 7, 7, '2000-01-07'), (8, 1, 8, 1, '2000-01-01') + ) + """ + ) + cursor.execute( """ insert into certified_appropriation ( @@ -352,6 +368,10 @@ def test_all_the_things(self): "is_final_balances_for_fy": False, "published_date": datetime(2000, 1, 1, 0, 0, tzinfo=timezone.utc), "submission_window_id": 2000041, + "history": [ + {"certified_date": None, "published_date": "1999-01-01T00:00:00+00:00"}, + {"certified_date": "2000-02-01T00:00:00+00:00", "published_date": "2000-01-01T00:00:00+00:00"}, + ], } cursor.execute( diff --git a/usaspending_api/etl/tests/integration/test_load_submission_mgmt_cmd.py b/usaspending_api/etl/tests/integration/test_load_submission_mgmt_cmd.py index d01ec3a62c..43e132df14 100644 --- a/usaspending_api/etl/tests/integration/test_load_submission_mgmt_cmd.py +++ b/usaspending_api/etl/tests/integration/test_load_submission_mgmt_cmd.py @@ -1,6 +1,7 @@ import copy import pytest +from datetime import datetime, timedelta from django.core.management import call_command from django.db import connections from django.db.models import Q @@ -10,8 +11,11 @@ from usaspending_api.etl.submission_loader_helpers.object_class import reset_object_class_cache from usaspending_api.etl.transaction_loaders.data_load_helpers import format_insert_or_update_column_sql +earlier_time = datetime.now() - timedelta(days=1) +current_time = datetime.now() -@pytest.mark.usefixtures("broker_db_setup") + +@pytest.mark.usefixtures("broker_db_setup", "broker_server_dblink_setup") class TestWithMultipleDatabases(TestCase): databases = "__all__" @@ -47,7 +51,12 @@ def setUpTestData(cls): broker_objects_to_insert = { "tas_lookup": {"broker_object": _assemble_broker_tas_lookup_records(), "conflict_column": "tas_id"}, "submission": {"broker_object": _assemble_broker_submission_records(), "conflict_column": "submission_id"}, + "publish_history": {"broker_object": _assemble_publish_history(), "conflict_column": "publish_history_id"}, "certify_history": {"broker_object": _assemble_certify_history(), "conflict_column": "certify_history_id"}, + "published_files_history": { + "broker_object": _assemble_published_files_history(), + "conflict_column": "published_files_history_id", + }, "certified_award_financial": { "broker_object": _assemble_certified_award_financial_records(), "conflict_column": "certified_award_financial_id", @@ -428,12 +437,35 @@ def _assemble_certified_award_financial_records() -> list: ] +def _assemble_publish_history(): + base_record = { + "created_at": earlier_time, + "updated_at": earlier_time, + "publish_history_id": 1, + "submission_id": -9999, + "user_id": None, + } + return [base_record] + + def _assemble_certify_history(): base_record = { - "created_at": None, - "updated_at": None, + "created_at": current_time, + "updated_at": current_time, "certify_history_id": 1, "submission_id": -9999, "user_id": None, } return [base_record] + + +def _assemble_published_files_history(): + base_record = { + "created_at": None, + "updated_at": None, + "published_files_history_id": 1, + "publish_history_id": 1, + "certify_history_id": None, + "submission_id": -9999, + } + return [base_record] diff --git a/usaspending_api/etl/tests/test_es_rapidloader.py b/usaspending_api/etl/tests/test_es_rapidloader.py index 7aaa972332..6a0dec519f 100644 --- a/usaspending_api/etl/tests/test_es_rapidloader.py +++ b/usaspending_api/etl/tests/test_es_rapidloader.py @@ -1,4 +1,5 @@ import pytest +from django.conf import settings from collections import OrderedDict from datetime import datetime, timezone @@ -12,6 +13,9 @@ check_awards_for_deletes, get_deleted_award_ids, Controller, + execute_sql_statement, + transform_award_data, + transform_transaction_data, ) @@ -91,22 +95,44 @@ def award_data_fixture(db): mommy.make("awards.FinancialAccountsByAwards", financial_accounts_by_awards_id=1, award_id=1, treasury_account_id=1) -config = { - "query_alias_prefix": "award-query", - "processing_start_datetime": datetime(2019, 12, 13, 16, 10, 33, 729108, tzinfo=timezone.utc), - "verbose": False, - "load_type": "awards", - "process_deletes": False, +award_config = { + "create_new_index": True, + "data_type": "award", + "data_transform_func": transform_award_data, "directory": Path(__file__).resolve().parent, - "skip_counts": False, + "fiscal_years": [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], "index_name": f"test-{datetime.now(timezone.utc).strftime('%Y-%m-%d-%H-%M-%S-%f')}-{generate_random_string()}", - "create_new_index": True, + "is_incremental_load": False, + "max_query_size": 10000, + "process_deletes": False, + "processing_start_datetime": datetime(2019, 12, 13, 16, 10, 33, 729108, tzinfo=timezone.utc), + "query_alias_prefix": "award-query", + "skip_counts": False, "snapshot": False, - "fiscal_years": [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], "starting_date": datetime(2007, 10, 1, 0, 0, tzinfo=timezone.utc), - "max_query_size": 10000, - "is_incremental_load": False, - "ingest_wait": 0.001, + "unique_key_field": "award_id", + "verbose": False, +} + +transaction_config = { + "base_table": "transaction_normalized", + "base_table_id": "id", + "create_award_type_aliases": True, + "data_transform_func": transform_transaction_data, + "data_type": "transaction", + "execute_sql_func": execute_sql_statement, + "extra_null_partition": False, + "field_for_es_id": "transaction_id", + "initial_datetime": datetime(2019, 12, 13, 16, 10, 33, 729108, tzinfo=timezone.utc), + "max_query_size": 50000, + "optional_predicate": """WHERE "update_date" >= '{starting_date}'""", + "primary_key": "transaction_id", + "query_alias_prefix": "transaction-query", + "required_index_name": settings.ES_TRANSACTIONS_NAME_SUFFIX, + "sql_view": settings.ES_TRANSACTIONS_ETL_VIEW_NAME, + "stored_date_key": "es_transactions", + "unique_key_field": "generated_unique_transaction_id", + "write_alias": settings.ES_TRANSACTIONS_WRITE_ALIAS, } ################################################################################ @@ -125,11 +151,11 @@ def test_es_award_loader_class(award_data_fixture, elasticsearch_award_index, mo "usaspending_api.etl.elasticsearch_loader_helpers.utilities.execute_sql_statement", mock_execute_sql ) elasticsearch_client = instantiate_elasticsearch_client() - loader = Controller(config, elasticsearch_client) + loader = Controller(award_config, elasticsearch_client) assert loader.__class__.__name__ == "Controller" loader.run_load_steps() - assert elasticsearch_client.indices.exists(config["index_name"]) - elasticsearch_client.indices.delete(index=config["index_name"], ignore_unavailable=False) + assert elasticsearch_client.indices.exists(award_config["index_name"]) + elasticsearch_client.indices.delete(index=award_config["index_name"], ignore_unavailable=False) @pytest.mark.skip @@ -137,14 +163,12 @@ def test_es_transaction_loader_class(award_data_fixture, elasticsearch_transacti monkeypatch.setattr( "usaspending_api.etl.elasticsearch_loader_helpers.utilities.execute_sql_statement", mock_execute_sql ) - config["query_alias_prefix"] = "transaction-query" - config["load_type"] = "transactions" elasticsearch_client = instantiate_elasticsearch_client() - loader = Controller(config, elasticsearch_client) + loader = Controller(transaction_config, elasticsearch_client) assert loader.__class__.__name__ == "Controller" loader.run_load_steps() - assert elasticsearch_client.indices.exists(config["index_name"]) - elasticsearch_client.indices.delete(index=config["index_name"], ignore_unavailable=False) + assert elasticsearch_client.indices.exists(transaction_config["index_name"]) + elasticsearch_client.indices.delete(index=transaction_config["index_name"], ignore_unavailable=False) # SQL method is being mocked here since the `execute_sql_statement` used @@ -166,11 +190,9 @@ def test_award_delete_sql(award_data_fixture, monkeypatch, db): assert awards == [OrderedDict([("generated_unique_award_id", "CONT_AWD_WHATEVER")])] -def test_get_award_ids(award_data_fixture, elasticsearch_transaction_index): - elasticsearch_transaction_index.update_index() - id_list = [{"key": 1, "col": "transaction_id"}] - config["query_alias_prefix"] = "transaction-query" - config["load_type"] = "transactions" - client = elasticsearch_transaction_index.client - ids = get_deleted_award_ids(client, id_list, config, index=elasticsearch_transaction_index.index_name) +def test_get_award_ids(award_data_fixture, elasticsearch_award_index): + elasticsearch_award_index.update_index() + id_list = [{"key": 1, "col": "award_id"}] + client = elasticsearch_award_index.client + ids = get_deleted_award_ids(client, id_list, award_config, index=elasticsearch_award_index.index_name) assert ids == ["CONT_AWD_IND12PB00323"] diff --git a/usaspending_api/etl/tests/unit/__init__.py b/usaspending_api/etl/tests/unit/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/usaspending_api/recipient/v2/views/recipients.py b/usaspending_api/recipient/v2/views/recipients.py index b721d91b9c..827fc689d3 100644 --- a/usaspending_api/recipient/v2/views/recipients.py +++ b/usaspending_api/recipient/v2/views/recipients.py @@ -1,3 +1,4 @@ +import json import logging import uuid @@ -12,7 +13,6 @@ from usaspending_api.awards.v2.lookups.lookups import loan_type_mapping from usaspending_api.broker.helpers.get_business_categories import get_business_categories from usaspending_api.common.cache_decorator import cache_response -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.common.elasticsearch.search_wrappers import TransactionSearch from usaspending_api.common.exceptions import InvalidParameterException from usaspending_api.common.query_with_filters import QueryWithFilters @@ -285,12 +285,12 @@ def obtain_recipient_totals(recipient_id, children=False, year="latest"): for bucket in recipient_info_buckets: result = {} if children: - recipient_info = json_str_to_dict(bucket.get("key")) + recipient_info = json.loads(bucket.get("key")) hash_with_level = recipient_info.get("hash_with_level") or None result = { "recipient_hash": hash_with_level[:-2] if hash_with_level else None, - "recipient_unique_id": recipient_info.get("unique_id") or None, - "recipient_name": recipient_info.get("name") or None, + "recipient_unique_id": recipient_info.get("unique_id"), + "recipient_name": recipient_info.get("name"), } loan_info = bucket.get("filter_loans", {}) result.update( @@ -436,7 +436,7 @@ def get(self, request, duns): ) # Add state/provinces to each result - child_hashes = [result["recipient_id"][:-2] for result in results] + child_hashes = [result["recipient_id"][:-2] for result in results if result is not None] states_qs = RecipientLookup.objects.filter(recipient_hash__in=child_hashes).values("recipient_hash", "state") state_map = {str(state_result["recipient_hash"]): state_result["state"] for state_result in list(states_qs)} for result in results: diff --git a/usaspending_api/references/management/commands/load_gtas.py b/usaspending_api/references/management/commands/load_gtas.py index 0fdac78b3f..0786f380ca 100644 --- a/usaspending_api/references/management/commands/load_gtas.py +++ b/usaspending_api/references/management/commands/load_gtas.py @@ -7,19 +7,36 @@ from usaspending_api.etl.broker_etl_helpers import dictfetchall from usaspending_api.references.models import GTASSF133Balances -logger = logging.getLogger("console") +logger = logging.getLogger("script") DERIVED_COLUMNS = { + "budget_authority_unobligated_balance_brought_forward_cpe": [1000], "obligations_incurred_total_cpe": [2190], "budget_authority_appropriation_amount_cpe": [1160, 1180, 1260, 1280], + "borrowing_authority_amount": [1340, 1440], + "contract_authority_amount": [1540, 1640], + "spending_authority_from_offsetting_collections_amount": [1750, 1850], "other_budgetary_resources_amount_cpe": [1340, 1440, 1540, 1640, 1750, 1850], + "obligations_incurred": [2190], + "deobligations_or_recoveries_or_refunds_from_prior_year_cpe": [1021, 1033], "unobligated_balance_cpe": [2490], "total_budgetary_resources_cpe": [1910], } + INVERTED_DERIVED_COLUMNS = { "gross_outlay_amount_by_tas_cpe": [3020], } +# The before_year list of items is applied to records before the change_year fiscal year. +# The year_and_after list is applied to the change_year and subsequent fiscal years. +DERIVED_COLUMNS_DYNAMIC = { + "adjustments_to_unobligated_balance_brought_forward_cpe": { + "before_year": list(range(1010, 1043)), + "year_and_after": list(range(1010, 1066)), + "change_year": 2021, + } +} + class Command(mixins.ETLMixin, BaseCommand): help = "Update GTAS aggregations used as domain data" @@ -72,17 +89,23 @@ def broker_fetch_sql(self): """ def column_statements(self): - return "\n".join( - [ - f"""COALESCE(SUM(CASE WHEN line IN ({','.join([str(elem) for elem in val])}) THEN sf.amount ELSE 0 END), 0.0) AS {key},""" - for key, val in DERIVED_COLUMNS.items() - ] - ) + "\n".join( - [ - f"""COALESCE(SUM(CASE WHEN line IN ({','.join([str(elem) for elem in val])}) THEN sf.amount * -1 ELSE 0 END), 0.0) AS {key},""" - for key, val in INVERTED_DERIVED_COLUMNS.items() - ] - ) + simple_fields = [ + f"COALESCE(SUM(CASE WHEN line IN ({','.join([str(elem) for elem in val])}) THEN sf.amount ELSE 0 END), 0.0) AS {key}," + for key, val in DERIVED_COLUMNS.items() + ] + inverted_fields = [ + f"COALESCE(SUM(CASE WHEN line IN ({','.join([str(elem) for elem in val])}) THEN sf.amount * -1 ELSE 0 END), 0.0) AS {key}," + for key, val in INVERTED_DERIVED_COLUMNS.items() + ] + year_specific_fields = [ + f"""COALESCE(SUM(CASE + WHEN line IN ({','.join([str(elem) for elem in val["before_year"]])}) AND fiscal_year < {val["change_year"]} THEN sf.amount * -1 + WHEN line IN ({','.join([str(elem) for elem in val["year_and_after"]])}) AND fiscal_year >= {val["change_year"]} THEN sf.amount * -1 + ELSE 0 + END), 0.0) AS {key},""" + for key, val in DERIVED_COLUMNS_DYNAMIC.items() + ] + return "\n".join(simple_fields + inverted_fields + year_specific_fields) def tas_fk_sql(self): return """UPDATE gtas_sf133_balances diff --git a/usaspending_api/references/migrations/0050_auto_20201208_1549.py b/usaspending_api/references/migrations/0050_auto_20201208_1549.py new file mode 100644 index 0000000000..1964af0a20 --- /dev/null +++ b/usaspending_api/references/migrations/0050_auto_20201208_1549.py @@ -0,0 +1,55 @@ +# Generated by Django 2.2.14 on 2020-12-08 15:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('references', '0049_auto_20200727_1735'), + ] + + operations = [ + migrations.AddField( + model_name='gtassf133balances', + name='adjustments_to_unobligated_balance_brought_forward_cpe', + field=models.DecimalField(decimal_places=2, default=0, max_digits=23), + preserve_default=False, + ), + migrations.AddField( + model_name='gtassf133balances', + name='borrowing_authority_amount', + field=models.DecimalField(decimal_places=2, default=0, max_digits=23), + preserve_default=False, + ), + migrations.AddField( + model_name='gtassf133balances', + name='budget_authority_unobligated_balance_brought_forward_cpe', + field=models.DecimalField(decimal_places=2, default=0, max_digits=23), + preserve_default=False, + ), + migrations.AddField( + model_name='gtassf133balances', + name='contract_authority_amount', + field=models.DecimalField(decimal_places=2, default=0, max_digits=23), + preserve_default=False, + ), + migrations.AddField( + model_name='gtassf133balances', + name='deobligations_or_recoveries_or_refunds_from_prior_year_cpe', + field=models.DecimalField(decimal_places=2, default=0, max_digits=23), + preserve_default=False, + ), + migrations.AddField( + model_name='gtassf133balances', + name='obligations_incurred', + field=models.DecimalField(decimal_places=2, default=0, max_digits=23), + preserve_default=False, + ), + migrations.AddField( + model_name='gtassf133balances', + name='spending_authority_from_offsetting_collections_amount', + field=models.DecimalField(decimal_places=2, default=0, max_digits=23), + preserve_default=False, + ), + ] diff --git a/usaspending_api/references/models/gtas_sf133_balances.py b/usaspending_api/references/models/gtas_sf133_balances.py index 22ade5b951..968a02a2c9 100644 --- a/usaspending_api/references/models/gtas_sf133_balances.py +++ b/usaspending_api/references/models/gtas_sf133_balances.py @@ -4,9 +4,16 @@ class GTASSF133Balances(models.Model): fiscal_year = models.IntegerField() fiscal_period = models.IntegerField() + budget_authority_unobligated_balance_brought_forward_cpe = models.DecimalField(max_digits=23, decimal_places=2) + adjustments_to_unobligated_balance_brought_forward_cpe = models.DecimalField(max_digits=23, decimal_places=2) obligations_incurred_total_cpe = models.DecimalField(max_digits=23, decimal_places=2) budget_authority_appropriation_amount_cpe = models.DecimalField(max_digits=23, decimal_places=2) + borrowing_authority_amount = models.DecimalField(max_digits=23, decimal_places=2) + contract_authority_amount = models.DecimalField(max_digits=23, decimal_places=2) + spending_authority_from_offsetting_collections_amount = models.DecimalField(max_digits=23, decimal_places=2) other_budgetary_resources_amount_cpe = models.DecimalField(max_digits=23, decimal_places=2) + obligations_incurred = models.DecimalField(max_digits=23, decimal_places=2) + deobligations_or_recoveries_or_refunds_from_prior_year_cpe = models.DecimalField(max_digits=23, decimal_places=2) gross_outlay_amount_by_tas_cpe = models.DecimalField(max_digits=23, decimal_places=2) unobligated_balance_cpe = models.DecimalField(max_digits=23, decimal_places=2) total_budgetary_resources_cpe = models.DecimalField(max_digits=23, decimal_places=2) diff --git a/usaspending_api/references/tests/data/broker_gtas.json b/usaspending_api/references/tests/data/broker_gtas.json index b2cf9f3d0e..9b84ddca70 100644 --- a/usaspending_api/references/tests/data/broker_gtas.json +++ b/usaspending_api/references/tests/data/broker_gtas.json @@ -1,11 +1,18 @@ { -"SELECT\n fiscal_year,\n period as fiscal_period,\n COALESCE(SUM(CASE WHEN line IN (2190) THEN sf.amount ELSE 0 END), 0.0) AS obligations_incurred_total_cpe,\nCOALESCE(SUM(CASE WHEN line IN (1160,1180,1260,1280) THEN sf.amount ELSE 0 END), 0.0) AS budget_authority_appropriation_amount_cpe,\nCOALESCE(SUM(CASE WHEN line IN (1340,1440,1540,1640,1750,1850) THEN sf.amount ELSE 0 END), 0.0) AS other_budgetary_resources_amount_cpe,\nCOALESCE(SUM(CASE WHEN line IN (2490) THEN sf.amount ELSE 0 END), 0.0) AS unobligated_balance_cpe,\nCOALESCE(SUM(CASE WHEN line IN (1910) THEN sf.amount ELSE 0 END), 0.0) AS total_budgetary_resources_cpe,COALESCE(SUM(CASE WHEN line IN (3020) THEN sf.amount * -1 ELSE 0 END), 0.0) AS gross_outlay_amount_by_tas_cpe,\n disaster_emergency_fund_code,\n CONCAT(\n CASE WHEN sf.allocation_transfer_agency is not null THEN CONCAT(sf.allocation_transfer_agency, '-') ELSE null END,\n sf.agency_identifier, '-',\n CASE WHEN sf.beginning_period_of_availa is not null THEN CONCAT(sf.beginning_period_of_availa, '/', sf.ending_period_of_availabil) ELSE sf.availability_type_code END,\n '-', sf.main_account_code, '-', sf.sub_account_code)\n as tas_rendering_label\n FROM\n sf_133 sf\n GROUP BY\n fiscal_year,\n fiscal_period,\n disaster_emergency_fund_code,\n tas_rendering_label\n ORDER BY\n fiscal_year,\n fiscal_period;": [ +"SELECT fiscal_year, period as fiscal_period, COALESCE(SUM(CASE WHEN line IN (1000) THEN sf.amount ELSE 0 END), 0.0) AS budget_authority_unobligated_balance_brought_forward_cpe, COALESCE(SUM(CASE WHEN line IN (2190) THEN sf.amount ELSE 0 END), 0.0) AS obligations_incurred_total_cpe, COALESCE(SUM(CASE WHEN line IN (1160,1180,1260,1280) THEN sf.amount ELSE 0 END), 0.0) AS budget_authority_appropriation_amount_cpe, COALESCE(SUM(CASE WHEN line IN (1340,1440) THEN sf.amount ELSE 0 END), 0.0) AS borrowing_authority_amount, COALESCE(SUM(CASE WHEN line IN (1540,1640) THEN sf.amount ELSE 0 END), 0.0) AS contract_authority_amount, COALESCE(SUM(CASE WHEN line IN (1750,1850) THEN sf.amount ELSE 0 END), 0.0) AS spending_authority_from_offsetting_collections_amount, COALESCE(SUM(CASE WHEN line IN (1340,1440,1540,1640,1750,1850) THEN sf.amount ELSE 0 END), 0.0) AS other_budgetary_resources_amount_cpe, COALESCE(SUM(CASE WHEN line IN (2190) THEN sf.amount ELSE 0 END), 0.0) AS obligations_incurred, COALESCE(SUM(CASE WHEN line IN (1021,1033) THEN sf.amount ELSE 0 END), 0.0) AS deobligations_or_recoveries_or_refunds_from_prior_year_cpe, COALESCE(SUM(CASE WHEN line IN (2490) THEN sf.amount ELSE 0 END), 0.0) AS unobligated_balance_cpe, COALESCE(SUM(CASE WHEN line IN (1910) THEN sf.amount ELSE 0 END), 0.0) AS total_budgetary_resources_cpe,COALESCE(SUM(CASE WHEN line IN (3020) THEN sf.amount * -1 ELSE 0 END), 0.0) AS gross_outlay_amount_by_tas_cpe,COALESCE(SUM(CASE WHEN line IN (1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042) AND fiscal_year < 2021 THEN sf.amount * -1 WHEN line IN (1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065) AND fiscal_year >= 2021 THEN sf.amount * -1 ELSE 0 END), 0.0) AS adjustments_to_unobligated_balance_brought_forward_cpe, disaster_emergency_fund_code, CONCAT( CASE WHEN sf.allocation_transfer_agency is not null THEN CONCAT(sf.allocation_transfer_agency, '-') ELSE null END, sf.agency_identifier, '-', CASE WHEN sf.beginning_period_of_availa is not null THEN CONCAT(sf.beginning_period_of_availa, '/', sf.ending_period_of_availabil) ELSE sf.availability_type_code END, '-', sf.main_account_code, '-', sf.sub_account_code) as tas_rendering_label FROM sf_133 sf GROUP BY fiscal_year, fiscal_period, disaster_emergency_fund_code, tas_rendering_label ORDER BY fiscal_year, fiscal_period;": [ { "fiscal_year": 1600, "fiscal_period": "-1", + "budget_authority_unobligated_balance_brought_forward_cpe": "-11", + "adjustments_to_unobligated_balance_brought_forward_cpe": "-11", "obligations_incurred_total_cpe": "-10", "budget_authority_appropriation_amount_cpe": "-11", + "borrowing_authority_amount": "-11", + "contract_authority_amount": "-11", + "spending_authority_from_offsetting_collections_amount": "-11", "other_budgetary_resources_amount_cpe": "-11", + "obligations_incurred": "-11", + "deobligations_or_recoveries_or_refunds_from_prior_year_cpe": "-11", "gross_outlay_amount_by_tas_cpe": "-11", "unobligated_balance_cpe": "-11", "total_budgetary_resources_cpe": "11", @@ -14,9 +21,16 @@ { "fiscal_year": 1600, "fiscal_period": "-2", + "budget_authority_unobligated_balance_brought_forward_cpe": "-12", + "adjustments_to_unobligated_balance_brought_forward_cpe": "-12", "obligations_incurred_total_cpe": "-9", "budget_authority_appropriation_amount_cpe": "-12", + "borrowing_authority_amount": "-12", + "contract_authority_amount": "-12", + "spending_authority_from_offsetting_collections_amount": "-12", "other_budgetary_resources_amount_cpe": "-12", + "obligations_incurred": "-12", + "deobligations_or_recoveries_or_refunds_from_prior_year_cpe": "-12", "gross_outlay_amount_by_tas_cpe": "-12", "unobligated_balance_cpe": "-12", "total_budgetary_resources_cpe": "12", @@ -25,9 +39,16 @@ { "fiscal_year": 1601, "fiscal_period": "-1", + "budget_authority_unobligated_balance_brought_forward_cpe": "-13", + "adjustments_to_unobligated_balance_brought_forward_cpe": "-13", "obligations_incurred_total_cpe": "-8", "budget_authority_appropriation_amount_cpe": "-13", + "borrowing_authority_amount": "-13", + "contract_authority_amount": "-13", + "spending_authority_from_offsetting_collections_amount": "-13", "other_budgetary_resources_amount_cpe": "-13", + "obligations_incurred": "-13", + "deobligations_or_recoveries_or_refunds_from_prior_year_cpe": "-13", "gross_outlay_amount_by_tas_cpe": "-13", "unobligated_balance_cpe": "-13", "total_budgetary_resources_cpe": "13", diff --git a/usaspending_api/references/tests/integration/test_load_gtas_mgmt_cmd.py b/usaspending_api/references/tests/integration/test_load_gtas_mgmt_cmd.py index 3bbf5d4478..fdf444bdac 100644 --- a/usaspending_api/references/tests/integration/test_load_gtas_mgmt_cmd.py +++ b/usaspending_api/references/tests/integration/test_load_gtas_mgmt_cmd.py @@ -27,9 +27,9 @@ def test_program_activity_fresh_load(monkeypatch): expected_results = { "count": 3, "row_tuples": [ - (1600, -1, -10.00, -11.00, -11.00, -11.00, 11), - (1600, -2, -9.00, -12.00, -12.00, -12.00, 12), - (1601, -1, -8.00, -13.00, -13.00, -13.00, 13), + (1600, -1, -11.00, -11.00, -10.00, -11.00, -11.00, -11.00, -11.00, -11.00, -11.00, -11.00, -11.00, 11), + (1600, -2, -12.00, -12.00, -9.00, -12.00, -12.00, -12.00, -12.00, -12.00, -12.00, -12.00, -12.00, 12), + (1601, -1, -13.00, -13.00, -8.00, -13.00, -13.00, -13.00, -13.00, -13.00, -13.00, -13.00, -13.00, 13), ], } @@ -39,9 +39,16 @@ def test_program_activity_fresh_load(monkeypatch): GTASSF133Balances.objects.values_list( "fiscal_year", "fiscal_period", + "budget_authority_unobligated_balance_brought_forward_cpe", + "adjustments_to_unobligated_balance_brought_forward_cpe", "obligations_incurred_total_cpe", "budget_authority_appropriation_amount_cpe", + "borrowing_authority_amount", + "contract_authority_amount", + "spending_authority_from_offsetting_collections_amount", "other_budgetary_resources_amount_cpe", + "obligations_incurred", + "deobligations_or_recoveries_or_refunds_from_prior_year_cpe", "unobligated_balance_cpe", "total_budgetary_resources_cpe", ) diff --git a/usaspending_api/reporting/management/sql/populate_reporting_agency_missing_tas.sql b/usaspending_api/reporting/management/sql/populate_reporting_agency_missing_tas.sql new file mode 100644 index 0000000000..06f1fea202 --- /dev/null +++ b/usaspending_api/reporting/management/sql/populate_reporting_agency_missing_tas.sql @@ -0,0 +1,42 @@ +DELETE FROM public.reporting_agency_missing_tas; +ALTER SEQUENCE reporting_agency_missing_tas_reporting_agency_missing_tas_i_seq RESTART WITH 1; + +INSERT INTO public.reporting_agency_missing_tas ( + toptier_code, + fiscal_year, + fiscal_period, + tas_rendering_label, + obligated_amount +) + +WITH missing AS ( + SELECT + gtas.id + FROM appropriation_account_balances AS aab + INNER JOIN submission_attributes AS sa + ON aab.submission_id = sa.submission_id + RIGHT OUTER JOIN gtas_sf133_balances AS gtas + ON sa.reporting_fiscal_period = gtas.fiscal_period + AND sa.reporting_fiscal_year = gtas.fiscal_year + AND aab.treasury_account_identifier = gtas.treasury_account_identifier + WHERE + aab.submission_id IS NULL +) +SELECT + ta.toptier_code, + gtas.fiscal_year, + gtas.fiscal_period, + taa.tas_rendering_label, + SUM(gtas.obligations_incurred_total_cpe) AS obligated_amount +FROM gtas_sf133_balances AS gtas +INNER JOIN missing + ON gtas.id = missing.id +INNER JOIN treasury_appropriation_account AS taa + ON gtas.treasury_account_identifier = taa.treasury_account_identifier +INNER JOIN toptier_agency AS ta + ON taa.funding_toptier_agency_id = ta.toptier_agency_id +GROUP BY ta.toptier_code, + gtas.fiscal_year, + gtas.fiscal_period, + taa.tas_rendering_label +; diff --git a/usaspending_api/reporting/migrations/0003_auto_20201204_1531.py b/usaspending_api/reporting/migrations/0003_auto_20201204_1531.py new file mode 100644 index 0000000000..23774509f2 --- /dev/null +++ b/usaspending_api/reporting/migrations/0003_auto_20201204_1531.py @@ -0,0 +1,31 @@ +# Generated by Django 2.2.17 on 2020-12-04 21:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('reporting', '0002_auto_20201123_2051'), + ] + + operations = [ + migrations.CreateModel( + name='ReportingAgencyMissingTas', + fields=[ + ('reporting_agency_missing_tas_id', models.AutoField(primary_key=True, serialize=False)), + ('toptier_code', models.TextField()), + ('fiscal_year', models.IntegerField()), + ('fiscal_period', models.IntegerField()), + ('tas_rendering_label', models.TextField()), + ('obligated_amount', models.DecimalField(decimal_places=2, max_digits=23)), + ], + options={ + 'db_table': 'reporting_agency_missing_tas', + }, + ), + migrations.AddIndex( + model_name='reportingagencymissingtas', + index=models.Index(fields=['fiscal_year', 'fiscal_period', 'toptier_code'], name='rpt_agency_missing_tas_grp_idx'), + ), + ] diff --git a/usaspending_api/reporting/models.py b/usaspending_api/reporting/models.py index a0b0aac343..5c1e82dc59 100644 --- a/usaspending_api/reporting/models.py +++ b/usaspending_api/reporting/models.py @@ -23,6 +23,26 @@ class Meta: ] +class ReportingAgencyMissingTas(models.Model): + """ + Model representing missing reporting data for appropriation and object class program activity values grouped by TAS and + period + """ + + reporting_agency_missing_tas_id = models.AutoField(primary_key=True) + toptier_code = models.TextField() + fiscal_year = models.IntegerField() + fiscal_period = models.IntegerField() + tas_rendering_label = models.TextField() + obligated_amount = models.DecimalField(max_digits=23, decimal_places=2) + + class Meta: + db_table = "reporting_agency_missing_tas" + indexes = [ + models.Index(fields=["fiscal_year", "fiscal_period", "toptier_code"], name="rpt_agency_missing_tas_grp_idx") + ] + + class ReportingAgencyOverview(models.Model): """ Model representing reporting data for appropriation and object class program activity values grouped by TAS and diff --git a/usaspending_api/reporting/tests/integration/test_agencies_overview.py b/usaspending_api/reporting/tests/integration/test_agencies_overview.py new file mode 100644 index 0000000000..d1b6f34f96 --- /dev/null +++ b/usaspending_api/reporting/tests/integration/test_agencies_overview.py @@ -0,0 +1,357 @@ +import pytest +from model_mommy import mommy +from rest_framework import status + +from usaspending_api.common.helpers.fiscal_year_helpers import ( + current_fiscal_year, + calculate_last_completed_fiscal_quarter, + get_final_period_of_quarter, +) + +url = "/api/v2/reporting/agencies/overview/" + + +@pytest.fixture +def setup_test_data(db): + """ Insert data into DB for testing """ + sub = mommy.make( + "submissions.SubmissionAttributes", submission_id=1, reporting_fiscal_year=2019, reporting_fiscal_period=6 + ) + sub2 = mommy.make( + "submissions.SubmissionAttributes", + submission_id=2, + reporting_fiscal_year=current_fiscal_year(), + reporting_fiscal_period=get_final_period_of_quarter( + calculate_last_completed_fiscal_quarter(current_fiscal_year()) + ), + ) + mommy.make("references.Agency", id=1, toptier_agency_id=1, toptier_flag=True) + mommy.make("references.Agency", id=2, toptier_agency_id=2, toptier_flag=True) + mommy.make("references.Agency", id=3, toptier_agency_id=3, toptier_flag=True) + agencies = [ + mommy.make( + "references.ToptierAgency", toptier_agency_id=1, toptier_code="123", abbreviation="ABC", name="Test Agency" + ), + mommy.make( + "references.ToptierAgency", + toptier_agency_id=2, + toptier_code="987", + abbreviation="XYZ", + name="Test Agency 2", + ), + mommy.make( + "references.ToptierAgency", + toptier_agency_id=3, + toptier_code="001", + abbreviation="AAA", + name="Test Agency 3", + ), + ] + + treas_accounts = [ + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=1, + funding_toptier_agency_id=agencies[0].toptier_agency_id, + tas_rendering_label="tas-1-overview", + ), + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=2, + funding_toptier_agency_id=agencies[2].toptier_agency_id, + tas_rendering_label="tas-2-overview", + ), + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=3, + funding_toptier_agency_id=agencies[1].toptier_agency_id, + tas_rendering_label="tas-3-overview", + ), + ] + approps = [ + {"sub_id": sub.submission_id, "treasury_account": treas_accounts[0], "total_resources": 50}, + {"sub_id": sub.submission_id, "treasury_account": treas_accounts[1], "total_resources": 12}, + {"sub_id": sub2.submission_id, "treasury_account": treas_accounts[1], "total_resources": 29}, + {"sub_id": sub2.submission_id, "treasury_account": treas_accounts[2], "total_resources": 15.5}, + ] + for approp in approps: + mommy.make( + "accounts.AppropriationAccountBalances", + submission_id=approp["sub_id"], + treasury_account_identifier=approp["treasury_account"], + total_budgetary_resources_amount_cpe=approp["total_resources"], + ) + + reporting_tases = [ + { + "year": sub.reporting_fiscal_year, + "period": sub.reporting_fiscal_period, + "label": treas_accounts[0].tas_rendering_label, + "toptier_code": agencies[0].toptier_code, + "diff": 29.5, + }, + { + "year": sub2.reporting_fiscal_year, + "period": sub2.reporting_fiscal_period, + "label": treas_accounts[1].tas_rendering_label, + "toptier_code": agencies[2].toptier_code, + "diff": -1.3, + }, + { + "year": sub2.reporting_fiscal_year, + "period": sub2.reporting_fiscal_period, + "label": treas_accounts[2].tas_rendering_label, + "toptier_code": agencies[1].toptier_code, + "diff": 20.5, + }, + ] + for reporting_tas in reporting_tases: + mommy.make( + "reporting.ReportingAgencyTas", + fiscal_year=reporting_tas["year"], + fiscal_period=reporting_tas["period"], + tas_rendering_label=reporting_tas["label"], + toptier_code=reporting_tas["toptier_code"], + diff_approp_ocpa_obligated_amounts=reporting_tas["diff"], + appropriation_obligated_amount=100, + ) + + mommy.make( + "reporting.ReportingAgencyOverview", + reporting_agency_overview_id=1, + toptier_code=123, + fiscal_year=2019, + fiscal_period=6, + total_dollars_obligated_gtas=1788370.03, + total_budgetary_resources=22478810.97, + total_diff_approp_ocpa_obligated_amounts=84931.95, + ) + mommy.make( + "reporting.ReportingAgencyOverview", + reporting_agency_overview_id=2, + toptier_code=987, + fiscal_year=current_fiscal_year(), + fiscal_period=get_final_period_of_quarter(calculate_last_completed_fiscal_quarter(current_fiscal_year())), + total_dollars_obligated_gtas=18.6, + total_budgetary_resources=100, + total_diff_approp_ocpa_obligated_amounts=0, + ) + mommy.make( + "reporting.ReportingAgencyOverview", + reporting_agency_overview_id=3, + toptier_code="001", + fiscal_year=current_fiscal_year(), + fiscal_period=get_final_period_of_quarter(calculate_last_completed_fiscal_quarter(current_fiscal_year())), + total_dollars_obligated_gtas=20.0, + total_budgetary_resources=10.0, + total_diff_approp_ocpa_obligated_amounts=10.0, + ) + mommy.make( + "reporting.ReportingAgencyMissingTas", + toptier_code=123, + fiscal_year=2019, + fiscal_period=6, + tas_rendering_label="TAS 1", + obligated_amount=10.0, + ) + mommy.make( + "reporting.ReportingAgencyMissingTas", + toptier_code=123, + fiscal_year=2019, + fiscal_period=6, + tas_rendering_label="TAS 2", + obligated_amount=1.0, + ) + mommy.make( + "reporting.ReportingAgencyMissingTas", + toptier_code=987, + fiscal_year=2020, + fiscal_period=12, + tas_rendering_label="TAS 2", + obligated_amount=12.0, + ) + + +def test_basic_success(setup_test_data, client): + resp = client.get(url) + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 2 + expected_results = [ + { + "agency_name": "Test Agency 2", + "abbreviation": "XYZ", + "agency_code": "987", + "agency_id": 2, + "current_total_budget_authority_amount": 100.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 18.6, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 12.0, + "missing_tas_accounts_count": 1, + }, + "obligation_difference": 0.0, + }, + { + "agency_name": "Test Agency 3", + "abbreviation": "AAA", + "agency_code": "001", + "agency_id": 3, + "current_total_budget_authority_amount": 10.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 20.0, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 0.0, + "missing_tas_accounts_count": 0, + }, + "obligation_difference": 10.0, + }, + ] + assert response["results"] == expected_results + + +def test_filter(setup_test_data, client): + resp = client.get(url + "?filter=Test Agency 2") + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 1 + expected_results = [ + { + "agency_name": "Test Agency 2", + "abbreviation": "XYZ", + "agency_code": "987", + "agency_id": 2, + "current_total_budget_authority_amount": 100.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 18.6, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 12.0, + "missing_tas_accounts_count": 1, + }, + "obligation_difference": 0.0, + } + ] + assert response["results"] == expected_results + + +def test_pagination(setup_test_data, client): + resp = client.get(url + "?limit=1") + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 1 + expected_results = [ + { + "agency_name": "Test Agency 2", + "abbreviation": "XYZ", + "agency_code": "987", + "agency_id": 2, + "current_total_budget_authority_amount": 100.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 18.6, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 12.0, + "missing_tas_accounts_count": 1, + }, + "obligation_difference": 0.0, + } + ] + assert response["results"] == expected_results + + resp = client.get(url + "?limit=1&page=2") + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 1 + expected_results = [ + { + "agency_name": "Test Agency 3", + "abbreviation": "AAA", + "agency_code": "001", + "agency_id": 3, + "current_total_budget_authority_amount": 10.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 20.0, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 0.0, + "missing_tas_accounts_count": 0, + }, + "obligation_difference": 10.0, + } + ] + assert response["results"] == expected_results + + resp = client.get(url + "?sort=obligation_difference&order=desc") + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 2 + expected_results = [ + { + "agency_name": "Test Agency 3", + "abbreviation": "AAA", + "agency_code": "001", + "agency_id": 3, + "current_total_budget_authority_amount": 10.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 20.0, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 0.0, + "missing_tas_accounts_count": 0, + }, + "obligation_difference": 10.0, + }, + { + "agency_name": "Test Agency 2", + "abbreviation": "XYZ", + "agency_code": "987", + "agency_id": 2, + "current_total_budget_authority_amount": 100.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 18.6, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 12.0, + "missing_tas_accounts_count": 1, + }, + "obligation_difference": 0.0, + }, + ] + assert response["results"] == expected_results + + +def test_fiscal_year_period_selection(setup_test_data, client): + resp = client.get(url + "?fiscal_year=2019&fiscal_period=6") + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 1 + + expected_results = [ + { + "agency_name": "Test Agency", + "abbreviation": "ABC", + "agency_code": "123", + "agency_id": 1, + "current_total_budget_authority_amount": 22478810.97, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 1788370.03, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 11.0, + "missing_tas_accounts_count": 2, + }, + "obligation_difference": 84931.95, + } + ] + assert response["results"] == expected_results diff --git a/usaspending_api/reporting/tests/integration/test_agency_code_overview.py b/usaspending_api/reporting/tests/integration/test_agency_code_overview.py new file mode 100644 index 0000000000..0cb40ea3b1 --- /dev/null +++ b/usaspending_api/reporting/tests/integration/test_agency_code_overview.py @@ -0,0 +1,249 @@ +import pytest +from model_mommy import mommy +from rest_framework import status + + +url = "/api/v2/reporting/agencies/123/overview/" + + +@pytest.fixture +def setup_test_data(db): + """ Insert data into DB for testing """ + sub = mommy.make( + "submissions.SubmissionAttributes", submission_id=1, reporting_fiscal_year=2019, reporting_fiscal_period=6 + ) + sub2 = mommy.make( + "submissions.SubmissionAttributes", submission_id=2, reporting_fiscal_year=2020, reporting_fiscal_period=12, + ) + agency = mommy.make("references.ToptierAgency", toptier_code="123", abbreviation="ABC", name="Test Agency") + + treas_accounts = [ + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=1, + funding_toptier_agency_id=agency.toptier_agency_id, + tas_rendering_label="tas-1-overview", + ), + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=2, + funding_toptier_agency_id=agency.toptier_agency_id, + tas_rendering_label="tas-2-overview", + ), + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=3, + funding_toptier_agency_id=agency.toptier_agency_id, + tas_rendering_label="tas-3-overview", + ), + ] + approps = [ + {"sub_id": sub.submission_id, "treasury_account": treas_accounts[0], "total_resources": 50}, + {"sub_id": sub.submission_id, "treasury_account": treas_accounts[1], "total_resources": 12}, + {"sub_id": sub2.submission_id, "treasury_account": treas_accounts[1], "total_resources": 29}, + {"sub_id": sub2.submission_id, "treasury_account": treas_accounts[2], "total_resources": 15.5}, + ] + for approp in approps: + mommy.make( + "accounts.AppropriationAccountBalances", + submission_id=approp["sub_id"], + treasury_account_identifier=approp["treasury_account"], + total_budgetary_resources_amount_cpe=approp["total_resources"], + ) + + reporting_tases = [ + { + "year": sub.reporting_fiscal_year, + "period": sub.reporting_fiscal_period, + "label": treas_accounts[0].tas_rendering_label, + "toptier_code": agency.toptier_code, + "diff": 29.5, + }, + { + "year": sub.reporting_fiscal_year, + "period": sub.reporting_fiscal_period, + "label": treas_accounts[1].tas_rendering_label, + "toptier_code": agency.toptier_code, + "diff": -1.3, + }, + { + "year": sub2.reporting_fiscal_year, + "period": sub2.reporting_fiscal_period, + "label": treas_accounts[2].tas_rendering_label, + "toptier_code": agency.toptier_code, + "diff": 20.5, + }, + ] + for reporting_tas in reporting_tases: + mommy.make( + "reporting.ReportingAgencyTas", + fiscal_year=reporting_tas["year"], + fiscal_period=reporting_tas["period"], + tas_rendering_label=reporting_tas["label"], + toptier_code=reporting_tas["toptier_code"], + diff_approp_ocpa_obligated_amounts=reporting_tas["diff"], + appropriation_obligated_amount=100, + ) + + mommy.make( + "reporting.ReportingAgencyOverview", + reporting_agency_overview_id=1, + toptier_code=123, + fiscal_year=2019, + fiscal_period=6, + total_dollars_obligated_gtas=1788370.03, + total_budgetary_resources=22478810.97, + total_diff_approp_ocpa_obligated_amounts=84931.95, + ) + mommy.make( + "reporting.ReportingAgencyOverview", + reporting_agency_overview_id=2, + toptier_code=123, + fiscal_year=2020, + fiscal_period=12, + total_dollars_obligated_gtas=18.6, + total_budgetary_resources=100, + total_diff_approp_ocpa_obligated_amounts=0, + ) + mommy.make( + "reporting.ReportingAgencyMissingTas", + toptier_code=123, + fiscal_year=2019, + fiscal_period=6, + tas_rendering_label="TAS 1", + obligated_amount=10.0, + ) + mommy.make( + "reporting.ReportingAgencyMissingTas", + toptier_code=123, + fiscal_year=2019, + fiscal_period=6, + tas_rendering_label="TAS 2", + obligated_amount=1.0, + ) + mommy.make( + "reporting.ReportingAgencyMissingTas", + toptier_code=123, + fiscal_year=2020, + fiscal_period=12, + tas_rendering_label="TAS 2", + obligated_amount=12.0, + ) + + +def test_basic_success(setup_test_data, client): + resp = client.get(url) + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 2 + expected_results = [ + { + "fiscal_year": 2019, + "fiscal_period": 6, + "current_total_budget_authority_amount": 22478810.97, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 1788370.03, + "tas_accounts_total": 200.00, + "tas_obligation_not_in_gtas_total": 11.0, + "missing_tas_accounts_count": 2, + }, + "obligation_difference": 84931.95, + }, + { + "fiscal_year": 2020, + "fiscal_period": 12, + "current_total_budget_authority_amount": 100.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 18.6, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 12.0, + "missing_tas_accounts_count": 1, + }, + "obligation_difference": 0.0, + }, + ] + assert response["results"] == expected_results + + +def test_pagination(setup_test_data, client): + resp = client.get(url + "?sort=current_total_budget_authority_amount&order=asc") + assert resp.status_code == status.HTTP_200_OK + response = resp.json() + assert len(response["results"]) == 2 + expected_results = [ + { + "fiscal_year": 2020, + "fiscal_period": 12, + "current_total_budget_authority_amount": 100.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 18.6, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 12.0, + "missing_tas_accounts_count": 1, + }, + "obligation_difference": 0.0, + }, + { + "fiscal_year": 2019, + "fiscal_period": 6, + "current_total_budget_authority_amount": 22478810.97, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 1788370.03, + "tas_accounts_total": 200.00, + "tas_obligation_not_in_gtas_total": 11.0, + "missing_tas_accounts_count": 2, + }, + "obligation_difference": 84931.95, + }, + ] + assert response["results"] == expected_results + + resp = client.get(url + "?limit=1") + response = resp.json() + assert len(response["results"]) == 1 + expected_results = [ + { + "fiscal_year": 2019, + "fiscal_period": 6, + "current_total_budget_authority_amount": 22478810.97, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 1788370.03, + "tas_accounts_total": 200.00, + "tas_obligation_not_in_gtas_total": 11.0, + "missing_tas_accounts_count": 2, + }, + "obligation_difference": 84931.95, + } + ] + assert response["results"] == expected_results + + resp = client.get(url + "?limit=1&page=2") + response = resp.json() + assert len(response["results"]) == 1 + expected_results = [ + { + "fiscal_year": 2020, + "fiscal_period": 12, + "current_total_budget_authority_amount": 100.0, + "recent_publication_date": None, + "recent_publication_date_certified": False, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": 18.6, + "tas_accounts_total": 100.00, + "tas_obligation_not_in_gtas_total": 12.0, + "missing_tas_accounts_count": 1, + }, + "obligation_difference": 0.0, + } + ] + assert response["results"] == expected_results diff --git a/usaspending_api/reporting/tests/integration/test_differences_endpoint.py b/usaspending_api/reporting/tests/integration/test_differences_endpoint.py new file mode 100644 index 0000000000..e8c29a5d93 --- /dev/null +++ b/usaspending_api/reporting/tests/integration/test_differences_endpoint.py @@ -0,0 +1,198 @@ +import pytest +from decimal import Decimal + +from model_mommy import mommy +from rest_framework import status + +from usaspending_api.common.helpers.fiscal_year_helpers import current_fiscal_year + +URL = "/api/v2/reporting/agencies/{code}/differences/{filter}" + + +@pytest.fixture +def differences_data(): + ta1 = mommy.make("references.ToptierAgency", toptier_code="001") + tas1 = mommy.make("accounts.TreasuryAppropriationAccount", funding_toptier_agency=ta1) + mommy.make("accounts.AppropriationAccountBalances", treasury_account_identifier=tas1) + mommy.make( + "reporting.ReportingAgencyTas", + toptier_code=ta1.toptier_code, + fiscal_year=2020, + fiscal_period=3, + tas_rendering_label="TAS-1", + appropriation_obligated_amount=123.4, + object_class_pa_obligated_amount=120, + diff_approp_ocpa_obligated_amounts=3.4, + ) + mommy.make( + "reporting.ReportingAgencyTas", + toptier_code=ta1.toptier_code, + fiscal_year=2020, + fiscal_period=3, + tas_rendering_label="TAS-2", + appropriation_obligated_amount=500, + object_class_pa_obligated_amount=1000, + diff_approp_ocpa_obligated_amounts=-500, + ) + mommy.make( + "reporting.ReportingAgencyTas", + toptier_code=ta1.toptier_code, + fiscal_year=2020, + fiscal_period=3, + tas_rendering_label="TAS-3", + appropriation_obligated_amount=100, + object_class_pa_obligated_amount=100, + diff_approp_ocpa_obligated_amounts=0, + ) + + +@pytest.mark.django_db +def test_happy_path(client, differences_data): + resp = client.get(URL.format(code="001", filter="?fiscal_year=2020&fiscal_period=3")) + assert resp.status_code == status.HTTP_200_OK + assert resp.data["page_metadata"] == { + "page": 1, + "next": None, + "previous": None, + "hasNext": False, + "hasPrevious": False, + "total": 2, + "limit": 10, + } + assert resp.data["results"] == [ + { + "tas": "TAS-2", + "file_a_obligation": Decimal(500), + "file_b_obligation": Decimal(1000), + "difference": Decimal(-500), + }, + { + "tas": "TAS-1", + "file_a_obligation": Decimal("123.4"), + "file_b_obligation": Decimal(120), + "difference": Decimal("3.4"), + }, + ] + + +@pytest.mark.django_db +def test_sort_by_file_a_obligation_ascending(client, differences_data): + resp = client.get( + URL.format(code="001", filter="?fiscal_year=2020&fiscal_period=3&sort=file_a_obligation&order=asc") + ) + assert resp.status_code == status.HTTP_200_OK + assert resp.data["page_metadata"] == { + "page": 1, + "next": None, + "previous": None, + "hasNext": False, + "hasPrevious": False, + "total": 2, + "limit": 10, + } + assert resp.data["results"] == [ + { + "tas": "TAS-1", + "file_a_obligation": Decimal("123.4"), + "file_b_obligation": Decimal(120), + "difference": Decimal("3.4"), + }, + { + "tas": "TAS-2", + "file_a_obligation": Decimal(500), + "file_b_obligation": Decimal(1000), + "difference": Decimal(-500), + }, + ] + + +@pytest.mark.django_db +def test_limit_one(client, differences_data): + resp = client.get(URL.format(code="001", filter="?fiscal_year=2020&fiscal_period=3&limit=1")) + assert resp.status_code == status.HTTP_200_OK + assert resp.data["page_metadata"] == { + "page": 1, + "next": 2, + "previous": None, + "hasNext": True, + "hasPrevious": False, + "total": 2, + "limit": 1, + } + assert resp.data["results"] == [ + { + "tas": "TAS-2", + "file_a_obligation": Decimal(500), + "file_b_obligation": Decimal(1000), + "difference": Decimal(-500), + } + ] + + +@pytest.mark.django_db +def test_limit_one_page_two(client, differences_data): + resp = client.get(URL.format(code="001", filter="?fiscal_year=2020&fiscal_period=3&limit=1&page=2")) + assert resp.status_code == status.HTTP_200_OK + assert resp.data["page_metadata"] == { + "page": 2, + "next": None, + "previous": 1, + "hasNext": False, + "hasPrevious": True, + "total": 2, + "limit": 1, + } + assert resp.data["results"] == [ + { + "tas": "TAS-1", + "file_a_obligation": Decimal("123.4"), + "file_b_obligation": Decimal(120), + "difference": Decimal("3.4"), + }, + ] + + +@pytest.mark.django_db +def test_no_results(client, differences_data): + resp = client.get(URL.format(code="001", filter="?fiscal_year=2020&fiscal_period=2")) + assert resp.status_code == status.HTTP_200_OK + assert resp.data["page_metadata"] == { + "page": 1, + "next": None, + "previous": None, + "hasNext": False, + "hasPrevious": False, + "total": 0, + "limit": 10, + } + assert resp.data["results"] == [] + + +@pytest.mark.django_db +def test_missing_fiscal_year(client, differences_data): + resp = client.get(URL.format(code="001", filter="?fiscal_period=3")) + assert resp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + +@pytest.mark.django_db +def test_missing_fiscal_period(client, differences_data): + resp = client.get(URL.format(code="001", filter="?fiscal_year=2020")) + assert resp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + +@pytest.mark.django_db +def test_invalid_code(client, differences_data): + resp = client.get(URL.format(code="002", filter="?fiscal_year=2020&fiscal_period=3")) + assert resp.status_code == status.HTTP_404_NOT_FOUND + + +@pytest.mark.django_db +def test_invalid_period(client, differences_data): + resp = client.get(URL.format(code="001", filter="?fiscal_year=2020&fiscal_period=13")) + assert resp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + +@pytest.mark.django_db +def test_invalid_year(client, differences_data): + resp = client.get(URL.format(code="001", filter=f"?fiscal_year={current_fiscal_year() + 1}&fiscal_period=3")) + assert resp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY diff --git a/usaspending_api/reporting/tests/integration/test_populate_reporting_agency_missing_tas.py b/usaspending_api/reporting/tests/integration/test_populate_reporting_agency_missing_tas.py new file mode 100644 index 0000000000..d81da867bd --- /dev/null +++ b/usaspending_api/reporting/tests/integration/test_populate_reporting_agency_missing_tas.py @@ -0,0 +1,129 @@ +import pytest +from decimal import Decimal + +from django.conf import settings +from django.db import connection +from model_mommy import mommy + +from usaspending_api.reporting.models import ReportingAgencyMissingTas + + +@pytest.fixture +def setup_test_data(db): + """ Insert data into DB for testing """ + sub = [ + mommy.make( + "submissions.SubmissionAttributes", submission_id=1, reporting_fiscal_year=2019, reporting_fiscal_period=3 + ), + mommy.make( + "submissions.SubmissionAttributes", submission_id=2, reporting_fiscal_year=2019, reporting_fiscal_period=4 + ), + ] + agency = mommy.make("references.ToptierAgency", toptier_agency_id=1, toptier_code="123") + + treas_accounts = [ + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=1, + funding_toptier_agency_id=agency.toptier_agency_id, + tas_rendering_label="tas-1", + ), + mommy.make( + "accounts.TreasuryAppropriationAccount", + treasury_account_identifier=2, + funding_toptier_agency_id=agency.toptier_agency_id, + tas_rendering_label="tas-2", + ), + ] + + approps = [ + {"sub_id": sub[0].submission_id, "treasury_account": treas_accounts[0]}, + {"sub_id": sub[0].submission_id, "treasury_account": treas_accounts[1]}, + {"sub_id": sub[1].submission_id, "treasury_account": treas_accounts[1]}, + ] + for approp in approps: + mommy.make( + "accounts.AppropriationAccountBalances", + submission_id=approp["sub_id"], + treasury_account_identifier=approp["treasury_account"], + ) + + gtas_rows = [ + { + "treasury_account_identifier": approps[0]["treasury_account"], + "fiscal_year": 2019, + "fiscal_period": 3, + "obligations_incurred_total_cpe": 1, + }, + { + "treasury_account_identifier": approps[1]["treasury_account"], + "fiscal_year": 2019, + "fiscal_period": 4, + "obligations_incurred_total_cpe": 2, + }, + { + "treasury_account_identifier": approps[0]["treasury_account"], + "fiscal_year": 2019, + "fiscal_period": 5, + "obligations_incurred_total_cpe": 3, + }, + { + "treasury_account_identifier": approps[0]["treasury_account"], + "fiscal_year": 2020, + "fiscal_period": 3, + "obligations_incurred_total_cpe": 4, + }, + { + "treasury_account_identifier": approps[1]["treasury_account"], + "fiscal_year": 2020, + "fiscal_period": 3, + "obligations_incurred_total_cpe": 5, + }, + { + "treasury_account_identifier": approps[1]["treasury_account"], + "fiscal_year": 2020, + "fiscal_period": 3, + "obligations_incurred_total_cpe": 6, + }, + ] + for gtas in gtas_rows: + mommy.make( + "references.GTASSF133Balances", + treasury_account_identifier=gtas["treasury_account_identifier"], + fiscal_year=gtas["fiscal_year"], + fiscal_period=gtas["fiscal_period"], + obligations_incurred_total_cpe=gtas["obligations_incurred_total_cpe"], + ) + + +def test_run_script(setup_test_data): + """ Test that the populate_reporting_agency_missing_tas script acts as expected """ + sql_path = settings.APP_DIR / "reporting" / "management" / "sql" / "populate_reporting_agency_missing_tas.sql" + + with open(sql_path) as f: + test_sql = f.read() + + with connection.cursor() as cursor: + cursor.execute(test_sql) + results = ReportingAgencyMissingTas.objects.filter().all() + + # Expected results: GTAS rows 3, 4 and 5 & 6 summed + assert len(results) == 3 + + assert results[0].toptier_code == "123" + assert results[0].fiscal_year == 2019 + assert results[0].fiscal_period == 5 + assert results[0].tas_rendering_label == "tas-1" + assert results[0].obligated_amount == Decimal("3") + + assert results[1].toptier_code == "123" + assert results[1].fiscal_year == 2020 + assert results[1].fiscal_period == 3 + assert results[1].tas_rendering_label == "tas-1" + assert results[1].obligated_amount == Decimal("4") + + assert results[2].toptier_code == "123" + assert results[2].fiscal_year == 2020 + assert results[2].fiscal_period == 3 + assert results[2].tas_rendering_label == "tas-2" + assert results[2].obligated_amount == Decimal("11") diff --git a/usaspending_api/reporting/v2/urls.py b/usaspending_api/reporting/v2/urls.py index 183687a38f..40592c7572 100644 --- a/usaspending_api/reporting/v2/urls.py +++ b/usaspending_api/reporting/v2/urls.py @@ -1,6 +1,5 @@ -from django.conf.urls import url -from usaspending_api.reporting.v2.views.placeholder import Placeholder +from django.conf.urls import url, include urlpatterns = [ - url(r"^placeholder/$", Placeholder.as_view()), + url(r"^agencies/", include("usaspending_api.reporting.v2.views.agencies.urls")), ] diff --git a/usaspending_api/reporting/v2/views/agencies/agency_code/overview.py b/usaspending_api/reporting/v2/views/agencies/agency_code/overview.py new file mode 100644 index 0000000000..bac995fe13 --- /dev/null +++ b/usaspending_api/reporting/v2/views/agencies/agency_code/overview.py @@ -0,0 +1,138 @@ +from django.db.models import Subquery, OuterRef, DecimalField, Func, F, Q, IntegerField +from rest_framework.response import Response +from usaspending_api.agency.v2.views.agency_base import AgencyBase +from django.utils.functional import cached_property + +from usaspending_api.common.data_classes import Pagination +from usaspending_api.common.helpers.generic_helper import get_pagination_metadata +from usaspending_api.common.validator import customize_pagination_with_sort_columns, TinyShield +from usaspending_api.reporting.models import ReportingAgencyOverview, ReportingAgencyTas, ReportingAgencyMissingTas +from usaspending_api.submissions.models import SubmissionAttributes + + +class AgencyOverview(AgencyBase): + """Returns an overview of the specified agency's submission data""" + + endpoint_doc = "usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/overview.md" + + def get(self, request, toptier_code): + results = self.get_agency_overview() + page_metadata = get_pagination_metadata(len(results), self.pagination.limit, self.pagination.page) + results = results[self.pagination.lower_limit : self.pagination.upper_limit] + return Response( + {"page_metadata": page_metadata, "results": results, "messages": self.standard_response_messages} + ) + + def get_agency_overview(self): + agency_filters = [ + Q(reporting_fiscal_year=OuterRef("fiscal_year")), + Q(reporting_fiscal_period=OuterRef("fiscal_period")), + Q(toptier_code=OuterRef("toptier_code")), + ] + result_list = ( + ReportingAgencyOverview.objects.filter(toptier_code=self.toptier_code) + .annotate( + recent_publication_date=Subquery( + SubmissionAttributes.objects.filter(*agency_filters).values("published_date") + ), + recent_publication_date_certified=Subquery( + SubmissionAttributes.objects.filter(*agency_filters).values("certified_date") + ), + tas_obligations=Subquery( + ReportingAgencyTas.objects.filter( + fiscal_year=OuterRef("fiscal_year"), + fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ) + .annotate(the_sum=Func(F("appropriation_obligated_amount"), function="SUM")) + .values("the_sum"), + output_field=DecimalField(max_digits=23, decimal_places=2), + ), + tas_obligation_not_in_gtas_total=Subquery( + ReportingAgencyMissingTas.objects.filter( + fiscal_year=OuterRef("fiscal_year"), + fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ) + .annotate(the_sum=Func(F("obligated_amount"), function="SUM")) + .values("the_sum"), + output_field=DecimalField(max_digits=23, decimal_places=2), + ), + missing_tas_accounts=Subquery( + ReportingAgencyMissingTas.objects.filter( + fiscal_year=OuterRef("fiscal_year"), + fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ) + .annotate(count=Func(F("tas_rendering_label"), function="COUNT")) + .values("count"), + output_field=IntegerField(), + ), + ) + .values( + "fiscal_year", + "fiscal_period", + "total_dollars_obligated_gtas", + "total_budgetary_resources", + "total_diff_approp_ocpa_obligated_amounts", + "recent_publication_date", + "recent_publication_date_certified", + "tas_obligations", + "tas_obligation_not_in_gtas_total", + "missing_tas_accounts", + ) + ) + return self.format_results(result_list) + + def format_results(self, result_list): + results = [ + { + "fiscal_year": result["fiscal_year"], + "fiscal_period": result["fiscal_period"], + "current_total_budget_authority_amount": result["total_budgetary_resources"], + "recent_publication_date": result["recent_publication_date"], + "recent_publication_date_certified": result["recent_publication_date_certified"] is not None, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": result["total_dollars_obligated_gtas"], + "tas_accounts_total": result["tas_obligations"], + "tas_obligation_not_in_gtas_total": result["tas_obligation_not_in_gtas_total"] or 0.0, + "missing_tas_accounts_count": result["missing_tas_accounts"], + }, + "obligation_difference": result["total_diff_approp_ocpa_obligated_amounts"], + } + for result in result_list + ] + results = sorted( + results, + key=lambda x: x["tas_account_discrepancies_totals"][self.pagination.sort_key] + if ( + self.pagination.sort_key == "missing_tas_accounts_count" + or self.pagination.sort_key == "tas_obligation_not_in_gtas_total" + ) + else x[self.pagination.sort_key], + reverse=self.pagination.sort_order == "desc", + ) + return results + + @cached_property + def pagination(self): + sortable_columns = [ + "fiscal_year", + "current_total_budget_authority_amount", + "missing_tas_accounts_total", + "obligation_difference", + "recent_publication_date", + "recent_publication_date_certified", + "tas_obligation_not_in_gtas_total", + ] + default_sort_column = "current_total_budget_authority_amount" + model = customize_pagination_with_sort_columns(sortable_columns, default_sort_column) + request_data = TinyShield(model).block(self.request.query_params) + return Pagination( + page=request_data["page"], + limit=request_data["limit"], + lower_limit=(request_data["page"] - 1) * request_data["limit"], + upper_limit=(request_data["page"] * request_data["limit"]), + sort_key=request_data.get("sort", default_sort_column), + sort_order=request_data["order"], + ) diff --git a/usaspending_api/reporting/v2/views/agencies/overview.py b/usaspending_api/reporting/v2/views/agencies/overview.py new file mode 100644 index 0000000000..6fec8bdf8c --- /dev/null +++ b/usaspending_api/reporting/v2/views/agencies/overview.py @@ -0,0 +1,175 @@ +from django.db.models import Subquery, OuterRef, DecimalField, Func, F, Q, IntegerField +from rest_framework.response import Response +from usaspending_api.agency.v2.views.agency_base import AgencyBase +from django.utils.functional import cached_property + +from usaspending_api.common.data_classes import Pagination +from usaspending_api.common.helpers.fiscal_year_helpers import ( + get_final_period_of_quarter, + calculate_last_completed_fiscal_quarter, +) +from usaspending_api.common.helpers.generic_helper import get_pagination_metadata +from usaspending_api.common.validator import customize_pagination_with_sort_columns, TinyShield +from usaspending_api.references.models import ToptierAgency, Agency +from usaspending_api.reporting.models import ReportingAgencyOverview, ReportingAgencyTas, ReportingAgencyMissingTas +from usaspending_api.submissions.models import SubmissionAttributes + + +class AgenciesOverview(AgencyBase): + """Return list of all agencies and the overview of their spending data for a provided fiscal year and period""" + + endpoint_doc = "usaspending_api/api_contracts/contracts/v2/reporting/agencies/overview.md" + + def get(self, request): + results = self.get_agency_overview() + page_metadata = get_pagination_metadata(len(results), self.pagination.limit, self.pagination.page) + results = results[self.pagination.lower_limit : self.pagination.upper_limit] + return Response( + {"page_metadata": page_metadata, "results": results, "messages": self.standard_response_messages} + ) + + def get_agency_overview(self): + agency_filters = [Q(toptier_code=OuterRef("toptier_code"))] + if self.filter is not None: + agency_filters.append(Q(name__icontains=self.filter)) + result_list = ( + ReportingAgencyOverview.objects.filter(fiscal_year=self.fiscal_year, fiscal_period=self.fiscal_period) + .annotate( + agency_name=Subquery(ToptierAgency.objects.filter(*agency_filters).values("name")), + abbreviation=Subquery(ToptierAgency.objects.filter(*agency_filters).values("abbreviation")), + recent_publication_date=Subquery( + SubmissionAttributes.objects.filter( + reporting_fiscal_year=OuterRef("fiscal_year"), + reporting_fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ).values("published_date") + ), + recent_publication_date_certified=Subquery( + SubmissionAttributes.objects.filter( + reporting_fiscal_year=OuterRef("fiscal_year"), + reporting_fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ).values("certified_date") + ), + tas_obligations=Subquery( + ReportingAgencyTas.objects.filter( + fiscal_year=OuterRef("fiscal_year"), + fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ) + .annotate(the_sum=Func(F("appropriation_obligated_amount"), function="SUM")) + .values("the_sum"), + output_field=DecimalField(max_digits=23, decimal_places=2), + ), + tas_obligation_not_in_gtas_total=Subquery( + ReportingAgencyMissingTas.objects.filter( + fiscal_year=OuterRef("fiscal_year"), + fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ) + .annotate(the_sum=Func(F("obligated_amount"), function="SUM")) + .values("the_sum"), + output_field=DecimalField(max_digits=23, decimal_places=2), + ), + missing_tas_accounts=Subquery( + ReportingAgencyMissingTas.objects.filter( + fiscal_year=OuterRef("fiscal_year"), + fiscal_period=OuterRef("fiscal_period"), + toptier_code=OuterRef("toptier_code"), + ) + .annotate(count=Func(F("tas_rendering_label"), function="COUNT")) + .values("count"), + output_field=IntegerField(), + ), + ) + .exclude(agency_name__isnull=True) + .values( + "agency_name", + "abbreviation", + "toptier_code", + "total_dollars_obligated_gtas", + "total_budgetary_resources", + "total_diff_approp_ocpa_obligated_amounts", + "recent_publication_date", + "recent_publication_date_certified", + "tas_obligations", + "tas_obligation_not_in_gtas_total", + "missing_tas_accounts", + ) + ) + return self.format_results(result_list) + + def format_results(self, result_list): + results = [ + { + "agency_name": result["agency_name"], + "abbreviation": result["abbreviation"], + "agency_code": result["toptier_code"], + "agency_id": Agency.objects.filter( + toptier_agency__toptier_code=result["toptier_code"], toptier_flag=True + ) + .first() + .id, + "current_total_budget_authority_amount": result["total_budgetary_resources"], + "recent_publication_date": result["recent_publication_date"], + "recent_publication_date_certified": result["recent_publication_date_certified"] is not None, + "tas_account_discrepancies_totals": { + "gtas_obligation_total": result["total_dollars_obligated_gtas"], + "tas_accounts_total": result["tas_obligations"], + "tas_obligation_not_in_gtas_total": result["tas_obligation_not_in_gtas_total"] or 0.0, + "missing_tas_accounts_count": result["missing_tas_accounts"], + }, + "obligation_difference": result["total_diff_approp_ocpa_obligated_amounts"], + } + for result in result_list + ] + results = sorted( + results, + key=lambda x: x["tas_account_discrepancies_totals"][self.pagination.sort_key] + if ( + self.pagination.sort_key == "missing_tas_accounts_count" + or self.pagination.sort_key == "tas_obligation_not_in_gtas_total" + ) + else x[self.pagination.sort_key], + reverse=self.pagination.sort_order == "desc", + ) + return results + + @cached_property + def pagination(self): + sortable_columns = [ + "agency_code", + "current_total_budget_authority_amount", + "missing_tas_accounts_count", + "agency_name", + "obligation_difference", + "recent_publication_date", + "recent_publication_date_certified", + "tas_obligation_not_in_gtas_total", + ] + default_sort_column = "current_total_budget_authority_amount" + model = customize_pagination_with_sort_columns(sortable_columns, default_sort_column) + request_data = TinyShield(model).block(self.request.query_params) + return Pagination( + page=request_data["page"], + limit=request_data["limit"], + lower_limit=(request_data["page"] - 1) * request_data["limit"], + upper_limit=(request_data["page"] * request_data["limit"]), + sort_key=request_data.get("sort", default_sort_column), + sort_order=request_data["order"], + ) + + @cached_property + def fiscal_period(self): + """ + This is the fiscal period we want to limit our queries to when querying CPE values for + self.fiscal_year. If it's prior to Q1 submission window close date, we will return + quarter 1 anyhow and just show what we have (which will likely be incomplete). + """ + return self.request.query_params.get( + "fiscal_period", get_final_period_of_quarter(calculate_last_completed_fiscal_quarter(self.fiscal_year)) or 3 + ) + + @property + def filter(self): + return self.request.query_params.get("filter") diff --git a/usaspending_api/reporting/v2/views/agencies/urls.py b/usaspending_api/reporting/v2/views/agencies/urls.py new file mode 100644 index 0000000000..4b3ae6ea2a --- /dev/null +++ b/usaspending_api/reporting/v2/views/agencies/urls.py @@ -0,0 +1,10 @@ +from django.conf.urls import url +from usaspending_api.reporting.v2.views.agencies.agency_code.overview import AgencyOverview +from usaspending_api.reporting.v2.views.agencies.overview import AgenciesOverview +from usaspending_api.reporting.v2.views.differences import Differences + +urlpatterns = [ + url(r"^overview/$", AgenciesOverview.as_view()), + url(r"^(?P[0-9]{3,4})/differences/$", Differences.as_view()), + url(r"^(?P[0-9]{3,4})/overview/$", AgencyOverview.as_view()), +] diff --git a/usaspending_api/reporting/v2/views/differences.py b/usaspending_api/reporting/v2/views/differences.py new file mode 100644 index 0000000000..0534ff0274 --- /dev/null +++ b/usaspending_api/reporting/v2/views/differences.py @@ -0,0 +1,97 @@ +from rest_framework.request import Request +from rest_framework.response import Response +from typing import Any +from django.db.models import Q + +from usaspending_api.agency.v2.views.agency_base import AgencyBase + +from usaspending_api.common.cache_decorator import cache_response +from usaspending_api.common.data_classes import Pagination +from usaspending_api.common.exceptions import UnprocessableEntityException +from usaspending_api.common.helpers.generic_helper import get_pagination_metadata +from usaspending_api.common.validator import TinyShield, customize_pagination_with_sort_columns +from usaspending_api.reporting.models import ReportingAgencyTas + + +class Differences(AgencyBase): + """ + Obtain the differences between file A obligations and file B obligations for a specific agency/period + """ + + endpoint_doc = "usaspending_api/api_contracts/contracts/v2/reporting/agencies/agency_code/differences.md" + + @staticmethod + def validate_fiscal_period(request_data): + fiscal_period = request_data["fiscal_period"] + if fiscal_period < 2 or fiscal_period > 12: + raise UnprocessableEntityException(f"fiscal_period must be in the range 2-12") + + @staticmethod + def _parse_and_validate_request(request_dict) -> dict: + sortable_columns = ["difference", "file_a_obligation", "file_b_obligation", "tas"] + default_sort_column = "tas" + models = customize_pagination_with_sort_columns(sortable_columns, default_sort_column) + models.extend( + [ + {"key": "fiscal_year", "name": "fiscal_year", "type": "integer", "optional": False}, + {"key": "fiscal_period", "name": "fiscal_period", "type": "integer", "optional": False}, + ] + ) + + validated_request_data = TinyShield(models).block(request_dict) + return validated_request_data + + @staticmethod + def format_results(rows, pagination): + order = pagination.sort_order == "desc" + formatted_results = [] + for row in rows: + formatted_results.append( + { + "tas": row["tas_rendering_label"], + "file_a_obligation": row["appropriation_obligated_amount"], + "file_b_obligation": row["object_class_pa_obligated_amount"], + "difference": row["diff_approp_ocpa_obligated_amounts"], + } + ) + formatted_results = sorted(formatted_results, key=lambda x: x[pagination.sort_key], reverse=order) + return formatted_results + + @cache_response() + def get(self, request: Request, *args: Any, **kwargs: Any) -> Response: + request_data = self._parse_and_validate_request(request.query_params) + request_data["toptier_agency"] = self.toptier_agency + self.validate_fiscal_period(request_data) + pagination = Pagination( + page=request_data["page"], + limit=request_data["limit"], + lower_limit=(request_data["page"] - 1) * request_data["limit"], + upper_limit=(request_data["page"] * request_data["limit"]), + sort_key=request_data.get("sort", "tas"), + sort_order=request_data["order"], + ) + results = self.get_differences_queryset(request_data) + formatted_results = self.format_results(results, pagination) + page_metadata = get_pagination_metadata(len(results), pagination.limit, pagination.page) + return Response( + { + "page_metadata": page_metadata, + "results": formatted_results[pagination.lower_limit : pagination.upper_limit], + "messages": self.standard_response_messages, + } + ) + + def get_differences_queryset(self, request_data): + filters = [ + Q(toptier_code=request_data["toptier_agency"].toptier_code), + Q(fiscal_year=self.fiscal_year), + Q(fiscal_period=request_data["fiscal_period"]), + ~Q(diff_approp_ocpa_obligated_amounts=0), + ] + results = (ReportingAgencyTas.objects.filter(*filters)).values( + "tas_rendering_label", + "appropriation_obligated_amount", + "object_class_pa_obligated_amount", + "diff_approp_ocpa_obligated_amounts", + ) + return results diff --git a/usaspending_api/search/models/base_award_search.py b/usaspending_api/search/models/base_award_search.py index c49927794c..91af743d44 100644 --- a/usaspending_api/search/models/base_award_search.py +++ b/usaspending_api/search/models/base_award_search.py @@ -33,8 +33,8 @@ class BaseAwardSearchModel(models.Model): total_obl_bin = models.TextField() recipient_hash = models.UUIDField() + recipient_levels = ArrayField(models.TextField(), default=list) recipient_name = models.TextField() - recipient_agg_key = models.TextField() recipient_unique_id = models.TextField() parent_recipient_unique_id = models.TextField() business_categories = ArrayField(models.TextField(), default=list) @@ -53,6 +53,8 @@ class BaseAwardSearchModel(models.Model): awarding_agency_id = models.IntegerField() funding_agency_id = models.IntegerField() + funding_toptier_agency_id = models.IntegerField() + funding_subtier_agency_id = models.IntegerField() awarding_toptier_agency_name = models.TextField() funding_toptier_agency_name = models.TextField() awarding_subtier_agency_name = models.TextField() @@ -62,8 +64,6 @@ class BaseAwardSearchModel(models.Model): funding_toptier_agency_code = models.TextField() awarding_subtier_agency_code = models.TextField() funding_subtier_agency_code = models.TextField() - funding_toptier_agency_agg_key = models.TextField() - funding_subtier_agency_agg_key = models.TextField() recipient_location_country_code = models.TextField() recipient_location_country_name = models.TextField() @@ -73,6 +73,11 @@ class BaseAwardSearchModel(models.Model): recipient_location_zip5 = models.TextField() recipient_location_congressional_code = models.TextField() recipient_location_city_name = models.TextField() + recipient_location_state_name = models.TextField() + recipient_location_state_fips = models.TextField() + recipient_location_state_population = models.IntegerField() + recipient_location_county_population = models.IntegerField() + recipient_location_congressional_population = models.IntegerField() pop_country_code = models.TextField() pop_country_name = models.TextField() @@ -83,6 +88,11 @@ class BaseAwardSearchModel(models.Model): pop_zip5 = models.TextField() pop_congressional_code = models.TextField() pop_city_name = models.TextField() + pop_state_name = models.TextField() + pop_state_fips = models.TextField() + pop_state_population = models.IntegerField() + pop_county_population = models.IntegerField() + pop_congressional_population = models.IntegerField() cfda_program_title = models.TextField() cfda_number = models.TextField() @@ -96,14 +106,6 @@ class BaseAwardSearchModel(models.Model): naics_code = models.TextField() naics_description = models.TextField() - recipient_location_county_agg_key = models.TextField() - recipient_location_congressional_agg_key = models.TextField() - recipient_location_state_agg_key = models.TextField() - - pop_county_agg_key = models.TextField() - pop_congressional_agg_key = models.TextField() - pop_state_agg_key = models.TextField() - tas_paths = ArrayField(models.TextField(), default=list) tas_components = ArrayField(models.TextField(), default=list) disaster_emergency_fund_codes = ArrayField(models.TextField(), default=list) diff --git a/usaspending_api/search/models/universal_transaction_matview.py b/usaspending_api/search/models/universal_transaction_matview.py index bb5093853b..d00c701c35 100644 --- a/usaspending_api/search/models/universal_transaction_matview.py +++ b/usaspending_api/search/models/universal_transaction_matview.py @@ -53,25 +53,36 @@ class UniversalTransactionView(models.Model): pop_country_code = models.TextField() pop_country_name = models.TextField() + pop_state_name = models.TextField() pop_state_code = models.TextField() + pop_state_fips = models.TextField() + pop_state_population = models.IntegerField() pop_county_code = models.TextField() pop_county_name = models.TextField() + pop_county_population = models.IntegerField() pop_zip5 = models.TextField() pop_congressional_code = models.TextField() + pop_congressional_population = models.IntegerField() pop_city_name = models.TextField() recipient_location_country_code = models.TextField() recipient_location_country_name = models.TextField() recipient_location_state_code = models.TextField() + recipient_location_state_name = models.TextField() + recipient_location_state_fips = models.TextField() + recipient_location_state_population = models.IntegerField() recipient_location_county_code = models.TextField() recipient_location_county_name = models.TextField() + recipient_location_county_population = models.IntegerField() recipient_location_zip5 = models.TextField() recipient_location_congressional_code = models.TextField() + recipient_location_congressional_population = models.IntegerField() recipient_location_city_name = models.TextField() recipient_hash = models.UUIDField() recipient_name = models.TextField() recipient_unique_id = models.TextField() + recipient_levels = ArrayField(models.TextField(), default=None) parent_recipient_hash = models.UUIDField() parent_recipient_name = models.TextField() parent_recipient_unique_id = models.TextField() @@ -94,20 +105,6 @@ class UniversalTransactionView(models.Model): tas_components = ArrayField(models.TextField(), default=None) federal_accounts = JSONField() disaster_emergency_fund_codes = ArrayField(models.TextField(), default=None) - recipient_location_county_agg_key = models.TextField() - recipient_location_congressional_agg_key = models.TextField() - recipient_location_state_agg_key = models.TextField() - pop_county_agg_key = models.TextField() - pop_congressional_agg_key = models.TextField() - pop_state_agg_key = models.TextField() - pop_country_agg_key = models.TextField() - awarding_toptier_agency_agg_key = models.TextField() - funding_toptier_agency_agg_key = models.TextField() - awarding_subtier_agency_agg_key = models.TextField() - funding_subtier_agency_agg_key = models.TextField() - psc_agg_key = models.TextField() - naics_agg_key = models.TextField() - recipient_agg_key = models.TextField() class Meta: managed = False diff --git a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_awarding_subagency.py b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_awarding_subagency.py index ead9b425d5..3c99f82872 100644 --- a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_awarding_subagency.py +++ b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_awarding_subagency.py @@ -39,7 +39,7 @@ def test_correct_response(client, monkeypatch, elasticsearch_transaction_index, "limit": 10, "page_metadata": {"page": 1, "next": None, "previous": None, "hasNext": False, "hasPrevious": False}, "results": [ - {"amount": 10.0, "name": "Awarding Subtier Agency 5", "code": "SA5", "id": 1005}, + {"amount": 10.0, "name": "Awarding Subtier Agency 5", "code": "SA5", "id": 1003}, {"amount": 5.0, "name": "Awarding Subtier Agency 1", "code": "SA1", "id": 1001}, ], "messages": [get_time_period_message()], @@ -76,7 +76,7 @@ def test_filtering_subtier_with_toptier( "category": "awarding_subagency", "limit": 10, "page_metadata": {"page": 1, "next": None, "previous": None, "hasNext": False, "hasPrevious": False}, - "results": [{"amount": 10.0, "name": "Awarding Subtier Agency 5", "code": "SA5", "id": 1005}], + "results": [{"amount": 10.0, "name": "Awarding Subtier Agency 5", "code": "SA5", "id": 1003}], "messages": [get_time_period_message()], } diff --git a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_funding_subagency.py b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_funding_subagency.py index 2b31d983b4..b5c97a0e17 100644 --- a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_funding_subagency.py +++ b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_funding_subagency.py @@ -39,7 +39,7 @@ def test_correct_response(client, monkeypatch, elasticsearch_transaction_index, "limit": 10, "page_metadata": {"page": 1, "next": None, "previous": None, "hasNext": False, "hasPrevious": False}, "results": [ - {"amount": 10.0, "name": "Funding Subtier Agency 6", "code": "SA6", "id": 1006}, + {"amount": 10.0, "name": "Funding Subtier Agency 6", "code": "SA6", "id": 1002}, {"amount": 5.0, "name": "Funding Subtier Agency 4", "code": "SA4", "id": 1004}, ], "messages": [get_time_period_message()], diff --git a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_recipient.py b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_recipient.py index aa33bbd666..147680884e 100644 --- a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_recipient.py +++ b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_recipient.py @@ -111,7 +111,7 @@ def test_top_1_fails_with_es_transactions_routed_dangerously(client, monkeypatch results = [] for bucket in response["aggregations"]["results"]["buckets"]: results.append({"key": bucket["key"], "sum": bucket["sum_agg"]["value"]}) - print(results) + assert len(results) == 1 assert results[0]["key"] == str( recipient1 diff --git a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_agency_types.py b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_agency_types.py index 669677cb7a..bc2aa9a218 100644 --- a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_agency_types.py +++ b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_agency_types.py @@ -1,10 +1,11 @@ +import json + from abc import ABCMeta from decimal import Decimal from django.db.models import QuerySet, F from enum import Enum from typing import List -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.search.helpers.spending_by_category_helpers import fetch_agency_tier_id_by_agency from usaspending_api.search.v2.views.spending_by_category_views.spending_by_category import ( Category, @@ -30,14 +31,14 @@ def build_elasticsearch_result(self, response: dict) -> List[dict]: results = [] agency_info_buckets = response.get("group_by_agg_key", {}).get("buckets", []) for bucket in agency_info_buckets: - agency_info = json_str_to_dict(bucket.get("key")) + agency_info = json.loads(bucket.get("key")) results.append( { "amount": int(bucket.get("sum_field", {"value": 0})["value"]) / Decimal("100"), "name": agency_info.get("name"), - "code": agency_info.get("abbreviation") or None, - "id": int(agency_info.get("id")) if len(agency_info.get("id")) > 0 else None, + "code": agency_info.get("abbreviation"), + "id": agency_info.get("id"), } ) diff --git a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_federal_account.py b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_federal_account.py index de5c0c3d37..319d19663f 100644 --- a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_federal_account.py +++ b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_federal_account.py @@ -1,10 +1,11 @@ +import json + from abc import ABCMeta from decimal import Decimal from django.db.models import QuerySet from enum import Enum from typing import List -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.search.v2.views.spending_by_category_views.spending_by_category import ( Category, AbstractSpendingByCategoryViewSet, @@ -26,12 +27,12 @@ def build_elasticsearch_result(self, response: dict) -> List[dict]: results = [] account_info_buckets = response.get("group_by_agg_key", {}).get("buckets", []) for bucket in account_info_buckets: - account_info = json_str_to_dict(bucket.get("key")) + account_info = json.loads(bucket.get("key")) results.append( { "amount": int(bucket.get("sum_field", {"value": 0})["value"]) / Decimal("100"), - "id": int(account_info.get("id")) if account_info.get("id") else None, - "code": account_info.get("federal_account_code") or None, + "id": account_info.get("id"), + "code": account_info.get("federal_account_code"), "name": account_info.get("account_title"), } ) diff --git a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_industry_codes.py b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_industry_codes.py index dd3f693b7a..3221d082ce 100644 --- a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_industry_codes.py +++ b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_industry_codes.py @@ -1,10 +1,11 @@ +import json + from abc import ABCMeta from decimal import Decimal from django.db.models import QuerySet, F from enum import Enum from typing import List -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.references.models import Cfda from usaspending_api.search.helpers.spending_by_category_helpers import ( fetch_cfda_id_title_by_number, @@ -40,14 +41,14 @@ def build_elasticsearch_result(self, response: dict) -> List[dict]: industry_code_info = {"code": bucket.get("key")} cfda_code_list.append(industry_code_info["code"]) else: - industry_code_info = json_str_to_dict(bucket.get("key")) + industry_code_info = json.loads(bucket.get("key")) results.append( { "amount": int(bucket.get("sum_field", {"value": 0})["value"]) / Decimal("100"), "code": industry_code_info.get("code"), - "id": int(industry_code_info.get("id")) if industry_code_info.get("id") else None, - "name": industry_code_info.get("description") or None, + "id": industry_code_info.get("id"), + "name": industry_code_info.get("description"), } ) diff --git a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_locations.py b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_locations.py index 45cc2f3898..0582a8d8c2 100644 --- a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_locations.py +++ b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_locations.py @@ -1,10 +1,11 @@ +import json + from abc import ABCMeta from decimal import Decimal from django.db.models import QuerySet, F from enum import Enum from typing import List -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.search.helpers.spending_by_category_helpers import ( fetch_country_name_from_code, fetch_state_name_from_code, @@ -33,14 +34,14 @@ def build_elasticsearch_result(self, response: dict) -> List[dict]: results = [] location_info_buckets = response.get("group_by_agg_key", {}).get("buckets", []) for bucket in location_info_buckets: - location_info = json_str_to_dict(bucket.get("key")) + location_info = json.loads(bucket.get("key")) if self.location_type == LocationType.CONGRESSIONAL_DISTRICT: if location_info.get("congressional_code") == "90": congressional_code = "MULTIPLE DISTRICTS" else: - congressional_code = location_info.get("congressional_code") - name = f"{location_info.get('state_code')}-{congressional_code}" + congressional_code = location_info.get("congressional_code") or "" + name = f"{location_info.get('state_code') or ''}-{congressional_code}" else: name = location_info.get(f"{self.location_type.value}_name") diff --git a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_recipient_duns.py b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_recipient_duns.py index ecd89ba502..939b4b4ae8 100644 --- a/usaspending_api/search/v2/views/spending_by_category_views/spending_by_recipient_duns.py +++ b/usaspending_api/search/v2/views/spending_by_category_views/spending_by_recipient_duns.py @@ -1,8 +1,9 @@ +import json + from decimal import Decimal from django.db.models import QuerySet, F, Case, When, Value, IntegerField from typing import List -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.common.recipient_lookups import combine_recipient_hash_and_level from usaspending_api.recipient.models import RecipientProfile from usaspending_api.recipient.v2.lookups import SPECIAL_CASES @@ -60,10 +61,11 @@ def _get_recipient_id(row: list) -> str: ) def build_elasticsearch_result(self, response: dict) -> List[dict]: + results = [] location_info_buckets = response.get("group_by_agg_key", {}).get("buckets", []) for bucket in location_info_buckets: - recipient_info = json_str_to_dict(bucket.get("key")) + recipient_info = json.loads(bucket.get("key")) results.append( { diff --git a/usaspending_api/search/v2/views/spending_by_geography.py b/usaspending_api/search/v2/views/spending_by_geography.py index a3f4e43363..2a3c6a7ccf 100644 --- a/usaspending_api/search/v2/views/spending_by_geography.py +++ b/usaspending_api/search/v2/views/spending_by_geography.py @@ -1,4 +1,5 @@ import copy +import json import logging from decimal import Decimal @@ -17,7 +18,6 @@ from usaspending_api.awards.v2.filters.sub_award import subaward_filter from usaspending_api.common.api_versioning import api_transformations, API_TRANSFORM_FUNCTIONS from usaspending_api.common.cache_decorator import cache_response -from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.common.elasticsearch.search_wrappers import TransactionSearch from usaspending_api.common.helpers.generic_helper import get_generic_filters_message from usaspending_api.common.query_with_filters import QueryWithFilters @@ -345,21 +345,26 @@ def build_elasticsearch_result(self, response: dict) -> Dict[str, dict]: results = {} geo_info_buckets = response.get("group_by_agg_key", {}).get("buckets", []) for bucket in geo_info_buckets: - geo_info = json_str_to_dict(bucket.get("key")) + geo_info = json.loads(bucket.get("key")) if self.geo_layer == GeoLayer.STATE: - display_name = geo_info.get("state_name").title() - shape_code = geo_info.get("state_code").upper() + display_name = (geo_info.get("state_name") or "").title() + shape_code = (geo_info.get("state_code") or "").upper() elif self.geo_layer == GeoLayer.COUNTY: - display_name = geo_info["county_name"].title() - shape_code = f"{geo_info['state_fips']}{geo_info['county_code']}" + state_fips = geo_info.get("state_fips") or "" + county_code = geo_info.get("county_code") or "" + display_name = (geo_info.get("county_name") or "").title() + shape_code = f"{state_fips}{county_code}" else: - display_name = f"{geo_info['state_code']}-{geo_info['congressional_code']}".upper() - shape_code = f"{geo_info['state_fips']}{geo_info['congressional_code']}" + state_code = geo_info.get("state_code") or "" + state_fips = geo_info.get("state_fips") or "" + congressional_code = geo_info.get("congressional_code") or "" + display_name = f"{state_code}-{congressional_code}".upper() + shape_code = f"{state_fips}{congressional_code}" per_capita = None aggregated_amount = int(bucket.get("sum_field", {"value": 0})["value"]) / Decimal("100") - population = int(geo_info["population"]) if geo_info["population"] else None + population = geo_info.get("population") if population: per_capita = (Decimal(aggregated_amount) / Decimal(population)).quantize(Decimal(".01")) diff --git a/usaspending_api/submissions/migrations/0015_submissionattributes_history.py b/usaspending_api/submissions/migrations/0015_submissionattributes_history.py new file mode 100644 index 0000000000..fce338eb83 --- /dev/null +++ b/usaspending_api/submissions/migrations/0015_submissionattributes_history.py @@ -0,0 +1,19 @@ +# Generated by Django 2.2.17 on 2020-12-03 16:42 + +import django.contrib.postgres.fields.jsonb +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('submissions', '0014_auto_20200901_1710'), + ] + + operations = [ + migrations.AddField( + model_name='submissionattributes', + name='history', + field=django.contrib.postgres.fields.jsonb.JSONField(null=True), + ), + ] diff --git a/usaspending_api/submissions/models/submission_attributes.py b/usaspending_api/submissions/models/submission_attributes.py index d9a42e510f..e73584de9a 100644 --- a/usaspending_api/submissions/models/submission_attributes.py +++ b/usaspending_api/submissions/models/submission_attributes.py @@ -1,4 +1,5 @@ from django.db import models +from django.contrib.postgres.fields import JSONField class SubmissionAttributes(models.Model): @@ -17,6 +18,7 @@ class SubmissionAttributes(models.Model): is_final_balances_for_fy = models.BooleanField(default=False) create_date = models.DateTimeField(auto_now_add=True, blank=True, null=True) update_date = models.DateTimeField(auto_now=True, null=True) + history = JSONField(null=True) class Meta: db_table = "submission_attributes"