diff --git a/.gitignore b/.gitignore index b1385cfe01..150a7fdbca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.pyc *.swp *.env +.env .cache .coverage .DS_Store diff --git a/usaspending_api/api_contracts/contracts/v2/disaster/federal_account/loans.md b/usaspending_api/api_contracts/contracts/v2/disaster/federal_account/loans.md index 2c693fe250..e1ae9a651d 100644 --- a/usaspending_api/api_contracts/contracts/v2/disaster/federal_account/loans.md +++ b/usaspending_api/api_contracts/contracts/v2/disaster/federal_account/loans.md @@ -25,8 +25,7 @@ Returns loan spending details of Federal Accounts receiving supplemental funding { "filter": { - "def_codes": ["L", "M", "N", "O", "P"], - "award_type_codes": ["07", "08"] + "def_codes": ["L", "M", "N", "O", "P"] }, "pagination": { "limit": 10, diff --git a/usaspending_api/api_contracts/contracts/v2/disaster/object_class/loans.md b/usaspending_api/api_contracts/contracts/v2/disaster/object_class/loans.md index d10fcba0ac..48923bbbf1 100644 --- a/usaspending_api/api_contracts/contracts/v2/disaster/object_class/loans.md +++ b/usaspending_api/api_contracts/contracts/v2/disaster/object_class/loans.md @@ -25,8 +25,7 @@ Returns loan spending details of Object Classes receiving supplemental funding b { "filter": { - "def_codes": ["L", "M", "N", "O", "P"], - "award_type_codes": ["07", "08"] + "def_codes": ["L", "M", "N", "O", "P"] }, "pagination": { "limit": 10, diff --git a/usaspending_api/api_contracts/contracts/v2/reporting/placeholder.md b/usaspending_api/api_contracts/contracts/v2/reporting/placeholder.md new file mode 100644 index 0000000000..2891c541f8 --- /dev/null +++ b/usaspending_api/api_contracts/contracts/v2/reporting/placeholder.md @@ -0,0 +1,21 @@ +FORMAT: 1A +HOST: https://api.usaspending.gov + +# Placeholder [/api/v2/reporting/placeholder/] + +Description of the endpoint as a whole not taking into account the different HTTP methods. + +## GET + +Description of the endpoint using the above HTTP method. + + ++ Response 200 (application/json) + + Attributes + + `status` (required, string) + + + Body + + { + "status": "success" + } diff --git a/usaspending_api/api_docs/markdown/endpoints.md b/usaspending_api/api_docs/markdown/endpoints.md index ade9a420b5..0c00f640e5 100644 --- a/usaspending_api/api_docs/markdown/endpoints.md +++ b/usaspending_api/api_docs/markdown/endpoints.md @@ -140,6 +140,7 @@ The currently available endpoints are listed in the following table. |[/api/v2/references/naics/](/api/v2/references/naics/)|GET| Returns all Tier 1 (2-digit) NAICS and related, relevant data. | |[/api/v2/references/submission_periods/](/api/v2/references/submission_periods/)|GET| Returns a list of all available submission periods with essential information about start and end dates. | |[/api/v2/references/toptier_agencies/](/api/v2/references/toptier_agencies/)|GET| Returns all toptier agencies and related, relevant data. | +|[/api/v2/reporting/placeholder/](/api/v2/reporting/placeholder/)|POST| Temp Placeholder. Ignore and rmove | |[/api/v2/search/new_awards_over_time/](/api/v2/search/new_awards_over_time/)|POST| Returns a list of time periods with the new awards in the appropriate period within the provided time range | |[/api/v2/search/spending_by_award/](/api/v2/search/spending_by_award/)|POST| Returns the fields of the filtered awards | |[/api/v2/search/spending_by_award_count/](/api/v2/search/spending_by_award_count/)|POST| Returns the number of awards in each award type (Contracts, IDV, Loans, Direct Payments, Grants, and Other) | diff --git a/usaspending_api/common/elasticsearch/search_wrappers.py b/usaspending_api/common/elasticsearch/search_wrappers.py index 14e4c86621..c1c652892e 100644 --- a/usaspending_api/common/elasticsearch/search_wrappers.py +++ b/usaspending_api/common/elasticsearch/search_wrappers.py @@ -100,3 +100,7 @@ class TransactionSearch(_Search): class AwardSearch(_Search): _index_name = f"{settings.ES_AWARDS_QUERY_ALIAS_PREFIX}*" + + +class AccountSearch(_Search): + _index_name = f"{settings.ES_COVID19_FABA_QUERY_ALIAS_PREFIX}*" diff --git a/usaspending_api/common/helpers/fiscal_year_helpers.py b/usaspending_api/common/helpers/fiscal_year_helpers.py index 9dfa130c3b..5ad124e0bc 100644 --- a/usaspending_api/common/helpers/fiscal_year_helpers.py +++ b/usaspending_api/common/helpers/fiscal_year_helpers.py @@ -111,6 +111,19 @@ def generate_fiscal_date_range(min_date: datetime, max_date: datetime, frequency ) current_date = current_date + relativedelta(months=interval) + # check if max_date is in new period + final_period = { + "fiscal_year": generate_fiscal_year(max_date), + "fiscal_quarter": generate_fiscal_quarter(max_date), + "fiscal_month": generate_fiscal_month(max_date), + } + if final_period["fiscal_year"] > date_range[-1]["fiscal_year"]: + date_range.append(final_period) + elif interval == 3 and final_period["fiscal_quarter"] != date_range[-1]["fiscal_quarter"]: + date_range.append(final_period) + elif interval == 1 and final_period != date_range[-1]: + date_range.append(final_period) + return date_range diff --git a/usaspending_api/common/helpers/s3_helpers.py b/usaspending_api/common/helpers/s3_helpers.py index ec08c06e77..c301576ec3 100644 --- a/usaspending_api/common/helpers/s3_helpers.py +++ b/usaspending_api/common/helpers/s3_helpers.py @@ -54,4 +54,4 @@ def multipart_upload(bucketname, regionname, source_path, keyname): bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), 5242880) config = boto3.s3.transfer.TransferConfig(multipart_chunksize=bytes_per_chunk) transfer = boto3.s3.transfer.S3Transfer(s3client, config) - transfer.upload_file(source_path, bucketname, Path(keyname).name) + transfer.upload_file(source_path, bucketname, Path(keyname).name, extra_args={"ACL": "bucket-owner-full-control"}) diff --git a/usaspending_api/common/management/commands/matview_runner.py b/usaspending_api/common/management/commands/matview_runner.py index 7ee48c35c3..dd2ed5151d 100644 --- a/usaspending_api/common/management/commands/matview_runner.py +++ b/usaspending_api/common/management/commands/matview_runner.py @@ -83,17 +83,17 @@ def handle(self, *args, **options): if not self.no_cleanup: self.cleanup() + @staticmethod + def clean_or_create_dir(dir_path): + if dir_path.exists(): + logger.warning(f"Clearing dir {dir_path}") + recursive_delete(dir_path) + dir_path.mkdir() + def generate_matview_sql(self): """Convert JSON definition files to SQL""" - if self.matview_dir.exists(): - logger.warning("Clearing dir {}".format(self.matview_dir)) - recursive_delete(self.matview_dir) - self.matview_dir.mkdir() - - if self.matview_chunked_dir.exists(): - logger.warning("Clearing dir {}".format(self.matview_chunked_dir)) - recursive_delete(self.matview_chunked_dir) - self.matview_chunked_dir.mkdir() + self.clean_or_create_dir(self.matview_dir) + self.clean_or_create_dir(self.matview_chunked_dir) # IF using this for operations, DO NOT LEAVE hardcoded `python3` in the command # Create main list of Matview SQL files @@ -102,7 +102,12 @@ def generate_matview_sql(self): # Create SQL files for Chunked Universal Transaction Matviews for matview, config in self.chunked_matviews.items(): - exec_str = f"python3 {CHUNKED_MATVIEW_GENERATOR_FILE} --quiet --file {config['json_filepath']} --chunk-count {self.chunk_count}" + exec_str = ( + f"python3 {CHUNKED_MATVIEW_GENERATOR_FILE} --quiet" + f" --file {config['json_filepath']}" + f" --chunk-count {self.chunk_count}" + f" --dest={self.matview_chunked_dir}" + ) subprocess.call(exec_str, shell=True) def cleanup(self): @@ -133,7 +138,10 @@ def create_views(self): if "universal_transaction_matview" in self.chunked_matviews: logger.info("Inserting data from universal_transaction_matview chunks into single table.") call_command( - "combine_universal_transaction_matview_chunks", chunk_count=self.chunk_count, index_concurrency=20, + "combine_universal_transaction_matview_chunks", + chunk_count=self.chunk_count, + index_concurrency=20, + matview_dir=self.matview_chunked_dir, ) for view in OVERLAY_VIEWS: diff --git a/usaspending_api/common/query_with_filters.py b/usaspending_api/common/query_with_filters.py index c41f4f6195..d225b3fb24 100644 --- a/usaspending_api/common/query_with_filters.py +++ b/usaspending_api/common/query_with_filters.py @@ -19,7 +19,9 @@ class _Keywords(_Filter): underscore_name = "keywords" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: keyword_queries = [] fields = [ "recipient_name", @@ -46,7 +48,9 @@ class _KeywordSearch(_Filter): underscore_name = "keyword_search" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: keyword_queries = [] fields = [ "recipient_name", @@ -98,7 +102,9 @@ class _TimePeriods(_Filter): underscore_name = "time_period" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[dict], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[dict], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: time_period_query = [] for v in filter_values: @@ -126,7 +132,9 @@ class _AwardTypeCodes(_Filter): underscore_name = "award_type_codes" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: award_type_codes_query = [] for v in filter_values: @@ -139,7 +147,9 @@ class _Agencies(_Filter): underscore_name = "agencies" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[dict], query_type: _QueryType) -> List[ES_Q]: + def generate_elasticsearch_query( + cls, filter_values: List[dict], query_type: _QueryType, nested_path: str = "" + ) -> List[ES_Q]: awarding_agency_query = [] funding_agency_query = [] @@ -166,7 +176,9 @@ class _RecipientSearchText(_Filter): underscore_name = "recipient_search_text" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: recipient_search_query = [] fields = ["recipient_name"] @@ -190,7 +202,7 @@ class _RecipientId(_Filter): underscore_name = "recipient_id" @classmethod - def generate_elasticsearch_query(cls, filter_value: str, query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query(cls, filter_value: str, query_type: _QueryType, nested_path: str = "") -> ES_Q: recipient_hash = filter_value[:-2] if filter_value.endswith("P"): return ES_Q("match", parent_recipient_hash=recipient_hash) @@ -208,7 +220,7 @@ class _RecipientScope(_Filter): underscore_name = "recipient_scope" @classmethod - def generate_elasticsearch_query(cls, filter_value: str, query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query(cls, filter_value: str, query_type: _QueryType, nested_path: str = "") -> ES_Q: recipient_scope_query = ES_Q("match", recipient_location_country_code="USA") if filter_value == "domestic": @@ -221,7 +233,9 @@ class _RecipientLocations(_Filter): underscore_name = "recipient_locations" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[dict], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[dict], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: recipient_locations_query = [] for v in filter_values: @@ -249,7 +263,9 @@ class _RecipientTypeNames(_Filter): underscore_name = "recipient_type_names" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: recipient_type_query = [] for v in filter_values: @@ -262,7 +278,9 @@ class _PlaceOfPerformanceScope(_Filter): underscore_name = "place_of_performance_scope" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: pop_scope_query = ES_Q("match", pop_country_code="USA") if filter_values == "domestic": @@ -275,7 +293,9 @@ class _PlaceOfPerformanceLocations(_Filter): underscore_name = "place_of_performance_locations" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[dict], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[dict], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: pop_locations_query = [] for v in filter_values: @@ -303,7 +323,9 @@ class _AwardAmounts(_Filter): underscore_name = "award_amounts" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[dict], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[dict], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: award_amounts_query = [] for v in filter_values: lower_bound = v.get("lower_bound") @@ -316,7 +338,9 @@ class _AwardIds(_Filter): underscore_name = "award_ids" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: award_ids_query = [] for v in filter_values: @@ -335,7 +359,9 @@ class _ProgramNumbers(_Filter): underscore_name = "program_numbers" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: programs_numbers_query = [] for v in filter_values: @@ -348,7 +374,9 @@ class _ContractPricingTypeCodes(_Filter): underscore_name = "contract_pricing_type_codes" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: contract_pricing_query = [] for v in filter_values: @@ -361,7 +389,9 @@ class _SetAsideTypeCodes(_Filter): underscore_name = "set_aside_type_codes" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: set_aside_query = [] for v in filter_values: @@ -374,7 +404,9 @@ class _ExtentCompetedTypeCodes(_Filter): underscore_name = "extent_competed_type_codes" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: extent_competed_query = [] for v in filter_values: @@ -389,19 +421,23 @@ class _DisasterEmergencyFundCodes(_Filter): underscore_name = "def_codes" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: + if nested_path is None: + nested_path = "" def_codes_query = [] + def_code_field = f"{nested_path}{'.' if nested_path else ''}disaster_emergency_fund_code{'s' if query_type != _QueryType.ACCOUNTS else ''}" for v in filter_values: - def_codes_query.append(ES_Q("match", disaster_emergency_fund_codes=v)) - if query_type == _QueryType.AWARDS: - return ES_Q("bool", should=def_codes_query, minimum_should_match=1) - - return ES_Q( - "bool", - should=def_codes_query, - minimum_should_match=1, - must=ES_Q("range", action_date={"gte": "2020-04-01"}), - ) + def_codes_query.append(ES_Q("match", **{def_code_field: v})) + if query_type == _QueryType.TRANSACTIONS: + return ES_Q( + "bool", + should=def_codes_query, + minimum_should_match=1, + must=ES_Q("range", action_date={"gte": "2020-04-01"}), + ) + return ES_Q("bool", should=def_codes_query, minimum_should_match=1) class _QueryText(_Filter): @@ -410,9 +446,9 @@ class _QueryText(_Filter): underscore_name = "query" @classmethod - def generate_elasticsearch_query(cls, filter_values: dict, query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query(cls, filter_values: dict, query_type: _QueryType, nested_path: str = "") -> ES_Q: query_text = filter_values["text"] - query_fields = filter_values["fields"] + query_fields = [f"{nested_path}{'.' if nested_path else ''}{field}" for field in filter_values["fields"]] return ES_Q("multi_match", query=query_text, type="phrase_prefix", fields=query_fields) @@ -422,11 +458,31 @@ class _NonzeroFields(_Filter): underscore_name = "nonzero_fields" @classmethod - def generate_elasticsearch_query(cls, filter_values: List[str], query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: + non_zero_queries = [] + for field in filter_values: + field_name = f"{nested_path}{'.' if nested_path else ''}{field}" + non_zero_queries.append(ES_Q("range", **{field_name: {"gt": 0}})) + non_zero_queries.append(ES_Q("range", **{field_name: {"lt": 0}})) + return ES_Q("bool", should=non_zero_queries, minimum_should_match=1) + + +class _NonzeroNestedSumFields(_Filter): + """Query for when the sum of nested fields should not be zero""" + + underscore_name = "nonzero_sum_fields" + + @classmethod + def generate_elasticsearch_query( + cls, filter_values: List[str], query_type: _QueryType, nested_path: str = "" + ) -> ES_Q: non_zero_queries = [] for field in filter_values: - non_zero_queries.append(ES_Q("range", **{field: {"gt": 0}})) - non_zero_queries.append(ES_Q("range", **{field: {"lt": 0}})) + field_name = f"{nested_path}{'.' if nested_path else ''}{field}" + non_zero_queries.append(ES_Q("range", **{field_name: {"gt": 0}})) + non_zero_queries.append(ES_Q("range", **{field_name: {"lt": 0}})) return ES_Q("bool", should=non_zero_queries, minimum_should_match=1) @@ -458,11 +514,18 @@ class QueryWithFilters: _NonzeroFields.underscore_name: _NonzeroFields, } + nested_filter_lookup = { + f"nested_{_DisasterEmergencyFundCodes.underscore_name}": _DisasterEmergencyFundCodes, + f"nested_{_QueryText.underscore_name}": _QueryText, + f"nested_{_NonzeroFields.underscore_name}": _NonzeroFields, + } + unsupported_filters = ["legal_entities"] @classmethod - def _generate_elasticsearch_query(cls, filters: dict, query_type: _QueryType) -> ES_Q: + def _generate_elasticsearch_query(cls, filters: dict, query_type: _QueryType, nested_path: str = "") -> ES_Q: must_queries = [] + nested_must_queries = [] # Create a copy of the filters so that manipulating the filters for the purpose of building the ES query # does not affect the source dictionary @@ -477,17 +540,28 @@ def _generate_elasticsearch_query(cls, filters: dict, query_type: _QueryType) -> msg = "API request included '{}' key. No filtering will occur with provided value '{}'" logger.warning(msg.format(filter_type, filter_values)) continue - elif filter_type not in cls.filter_lookup.keys(): + elif filter_type not in cls.filter_lookup.keys() and filter_type not in cls.nested_filter_lookup.keys(): raise InvalidParameterException(f"Invalid filter: {filter_type} does not exist.") # Generate the query for a filter - query = cls.filter_lookup[filter_type].generate_query(filter_values, query_type) + if "nested_" in filter_type: + query = cls.nested_filter_lookup[filter_type].generate_query(filter_values, query_type, nested_path) + list_pointer = nested_must_queries + else: + query = cls.filter_lookup[filter_type].generate_query(filter_values, query_type) + list_pointer = must_queries # Handle the possibility of multiple queries from one filter if isinstance(query, list): - must_queries.extend(query) + list_pointer.extend(query) else: - must_queries.append(query) + list_pointer.append(query) + + nested_query = ES_Q("nested", path="financial_accounts_by_award", query=ES_Q("bool", must=nested_must_queries)) + if must_queries and nested_must_queries: + must_queries.append(nested_query) + elif nested_must_queries: + must_queries = nested_query return ES_Q("bool", must=must_queries) @classmethod @@ -532,3 +606,7 @@ def generate_awards_elasticsearch_query(cls, filters: dict) -> ES_Q: @classmethod def generate_transactions_elasticsearch_query(cls, filters: dict) -> ES_Q: return cls._generate_elasticsearch_query(filters, _QueryType.TRANSACTIONS) + + @classmethod + def generate_accounts_elasticsearch_query(cls, filters: dict) -> ES_Q: + return cls._generate_elasticsearch_query(filters, _QueryType.ACCOUNTS, "financial_accounts_by_award") diff --git a/usaspending_api/common/sqs/sqs_handler.py b/usaspending_api/common/sqs/sqs_handler.py index 88ce46e78d..0130bc4c3c 100644 --- a/usaspending_api/common/sqs/sqs_handler.py +++ b/usaspending_api/common/sqs/sqs_handler.py @@ -245,6 +245,6 @@ def get_sqs_queue(region_name=settings.USASPENDING_AWS_REGION, queue_name=settin return _FakeFileBackedSQSQueue.instance() else: # stuff that's in get_queue - sqs = boto3.resource("sqs", region_name) + sqs = boto3.resource("sqs", endpoint_url=f"https://sqs.{region_name}.amazonaws.com", region_name=region_name) queue = sqs.get_queue_by_name(QueueName=queue_name) return queue diff --git a/usaspending_api/common/tests/unit/test_fiscal_year_helpers.py b/usaspending_api/common/tests/unit/test_fiscal_year_helpers.py index b4fb392557..cac7ea6498 100644 --- a/usaspending_api/common/tests/unit/test_fiscal_year_helpers.py +++ b/usaspending_api/common/tests/unit/test_fiscal_year_helpers.py @@ -216,3 +216,58 @@ def test_get_quarter_from_period(): assert fyh.get_quarter_from_period("1") is None assert fyh.get_quarter_from_period("a") is None assert fyh.get_quarter_from_period({"hello": "there"}) is None + + +def test_generate_fiscal_date_range(): + # 2-day range that crosses all boundaries + start = date(2020, 9, 30) + end = date(2020, 10, 1) + expected = [ + {"fiscal_year": 2020, "fiscal_quarter": 4, "fiscal_month": 12}, + {"fiscal_year": 2021, "fiscal_quarter": 1, "fiscal_month": 1}, + ] + assert fyh.generate_fiscal_date_range(start, end, "fiscal_year") == expected + assert fyh.generate_fiscal_date_range(start, end, "quarter") == expected + assert fyh.generate_fiscal_date_range(start, end, "anything") == expected + + # check within FY + start = date(2019, 10, 2) + end = date(2020, 9, 30) + expected = [ + {"fiscal_year": 2020, "fiscal_quarter": 1, "fiscal_month": 1}, + ] + assert fyh.generate_fiscal_date_range(start, end, "fiscal_year") == expected + + expected.append({"fiscal_year": 2020, "fiscal_quarter": 2, "fiscal_month": 4}) + expected.append({"fiscal_year": 2020, "fiscal_quarter": 3, "fiscal_month": 7}) + expected.append({"fiscal_year": 2020, "fiscal_quarter": 4, "fiscal_month": 10}) + assert fyh.generate_fiscal_date_range(start, end, "quarter") == expected + + # 1-day period + start = end = date(2021, 6, 23) + expected = [{"fiscal_year": 2021, "fiscal_quarter": 3, "fiscal_month": 9}] + assert fyh.generate_fiscal_date_range(start, end, "fiscal_year") == expected + assert fyh.generate_fiscal_date_range(start, end, "quarter") == expected + assert fyh.generate_fiscal_date_range(start, end, "anything") == expected + + +def test_create_full_time_periods(): + # NOTE: not checking aggregations, only the time periods + # 2-day range that crosses all boundaries + start = date(2020, 9, 30) + end = date(2020, 10, 1) + + years = fyh.create_full_time_periods(start, end, "fy", {}) + assert len(years) == 2 + assert years[0]["time_period"] == {"fy": "2020"} + assert years[1]["time_period"] == {"fy": "2021"} + + quarters = fyh.create_full_time_periods(start, end, "quarter", {}) + assert len(quarters) == 2 + assert quarters[0]["time_period"] == {"fy": "2020", "quarter": "4"} + assert quarters[1]["time_period"] == {"fy": "2021", "quarter": "1"} + + months = fyh.create_full_time_periods(start, end, "month", {}) + assert len(months) == 2 + assert months[0]["time_period"] == {"fy": "2020", "month": "12"} + assert months[1]["time_period"] == {"fy": "2021", "month": "1"} diff --git a/usaspending_api/common/tests/unit/test_sqs_work_dispatcher.py b/usaspending_api/common/tests/unit/test_sqs_work_dispatcher.py index 2d972c2593..a6037913f7 100644 --- a/usaspending_api/common/tests/unit/test_sqs_work_dispatcher.py +++ b/usaspending_api/common/tests/unit/test_sqs_work_dispatcher.py @@ -502,9 +502,10 @@ def work_one_or_two(message): def test_faulty_queue_connection_raises_correct_exception(self): """When a queue cannot be connected to, it raises the appropriate exception""" try: + region_name = "us-gov-west-1" # note: connection max retries config not in botocore v1.5.x - client_config = Config(region_name="us-gov-west-1", connect_timeout=1, read_timeout=1) - sqs = boto3.resource("sqs", config=client_config) + client_config = Config(region_name=region_name, connect_timeout=1, read_timeout=1) + sqs = boto3.resource("sqs", config=client_config, endpoint_url=f"https://sqs.{region_name}.amazonaws.com") queue = sqs.Queue("75f4f422-3866-4e4f-9dc9-5364e3de3eaf") dispatcher = SQSWorkDispatcher( queue, worker_process_name="Test Worker Process", long_poll_seconds=1, monitor_sleep_time=1 diff --git a/usaspending_api/conftest.py b/usaspending_api/conftest.py index 56aee768d9..58aa8efb27 100644 --- a/usaspending_api/conftest.py +++ b/usaspending_api/conftest.py @@ -117,7 +117,7 @@ def elasticsearch_transaction_index(db): See test_demo_elasticsearch_tests.py for sample usage. """ - elastic_search_index = TestElasticSearchIndex("transactions") + elastic_search_index = TestElasticSearchIndex("transaction") with override_settings(ES_TRANSACTIONS_QUERY_ALIAS_PREFIX=elastic_search_index.alias_prefix): yield elastic_search_index elastic_search_index.delete_index() @@ -132,12 +132,27 @@ def elasticsearch_award_index(db): See test_award_index_elasticsearch_tests.py for sample usage. """ - elastic_search_index = TestElasticSearchIndex("awards") + elastic_search_index = TestElasticSearchIndex("award") with override_settings(ES_AWARDS_QUERY_ALIAS_PREFIX=elastic_search_index.alias_prefix): yield elastic_search_index elastic_search_index.delete_index() +@pytest.fixture +def elasticsearch_account_index(db): + """ + Add this fixture to your test if you intend to use the Elasticsearch + account index. To use, create some mock database data then call + elasticsearch_account_index.update_index to populate Elasticsearch. + + See test_account_index_elasticsearch_tests.py for sample usage. + """ + elastic_search_index = TestElasticSearchIndex("covid19_faba") + with override_settings(ES_COVID19_FABA_QUERY_ALIAS_PREFIX=elastic_search_index.alias_prefix): + yield elastic_search_index + elastic_search_index.delete_index() + + @pytest.fixture(scope="session") def broker_db_setup(django_db_setup, django_db_use_migrations): """Fixture to use during a pytest session if you will run integration tests that requires an actual broker diff --git a/usaspending_api/conftest_helpers.py b/usaspending_api/conftest_helpers.py index 1bdd9f418a..51305c9c27 100644 --- a/usaspending_api/conftest_helpers.py +++ b/usaspending_api/conftest_helpers.py @@ -1,3 +1,5 @@ +from builtins import Exception + import json from datetime import datetime, timezone @@ -17,7 +19,11 @@ ) from usaspending_api.common.helpers.sql_helpers import ordered_dictionary_fetcher from usaspending_api.common.helpers.text_helpers import generate_random_string -from usaspending_api.etl.elasticsearch_loader_helpers import create_award_type_aliases +from usaspending_api.etl.elasticsearch_loader_helpers import ( + create_award_type_aliases, + transform_covid19_faba_data, + TaskSpec, +) from usaspending_api.etl.management.commands.es_configure import retrieve_index_template @@ -32,7 +38,7 @@ def __init__(self, index_type): self.index_name = self._generate_index_name() self.alias_prefix = self.index_name self.client = Elasticsearch([settings.ES_HOSTNAME], timeout=settings.ES_TIMEOUT) - self.template = retrieve_index_template("{}_template".format(self.index_type[:-1])) + self.template = retrieve_index_template(f"{self.index_type}_template") self.mappings = json.loads(self.template)["mappings"] self.etl_config = { "index_name": self.index_name, @@ -60,28 +66,56 @@ def _add_contents(self, **options): Get all of the transactions presented in the view and stuff them into the Elasticsearch index. The view is only needed to load the transactions into Elasticsearch so it is dropped after each use. """ - view_sql_file = "award_delta_view.sql" if self.index_type == "awards" else "transaction_delta_view.sql" + if self.index_type == "award": + view_sql_file = f"{settings.ES_AWARDS_ETL_VIEW_NAME}.sql" + view_name = settings.ES_AWARDS_ETL_VIEW_NAME + es_id = f"{self.index_type}_id" + elif self.index_type == "covid19_faba": + view_sql_file = f"{settings.ES_COVID19_FABA_ETL_VIEW_NAME}.sql" + view_name = settings.ES_COVID19_FABA_ETL_VIEW_NAME + es_id = "financial_account_distinct_award_key" + elif self.index_type == "transaction": + view_sql_file = f"{settings.ES_TRANSACTIONS_ETL_VIEW_NAME}.sql" + view_name = settings.ES_TRANSACTIONS_ETL_VIEW_NAME + es_id = f"{self.index_type}_id" + else: + raise Exception("Invalid index type") + view_sql = open(str(settings.APP_DIR / "database_scripts" / "etl" / view_sql_file), "r").read() with connection.cursor() as cursor: cursor.execute(view_sql) - if self.index_type == "transactions": - view_name = settings.ES_TRANSACTIONS_ETL_VIEW_NAME - else: - view_name = settings.ES_AWARDS_ETL_VIEW_NAME cursor.execute(f"SELECT * FROM {view_name};") - transactions = ordered_dictionary_fetcher(cursor) + records = ordered_dictionary_fetcher(cursor) cursor.execute(f"DROP VIEW {view_name};") - - for transaction in transactions: + if self.index_type == "covid19_faba": + records = transform_covid19_faba_data( + TaskSpec( + name="worker", + index=self.index_name, + sql=view_sql_file, + view=view_name, + base_table="financial_accounts_by_awards", + base_table_id="financial_accounts_by_awards_id", + field_for_es_id="financial_account_distinct_award_key", + primary_key="award_id", + partition_number=1, + is_incremental=False, + ), + records, + ) + for record in records: # Special cases where we convert array of JSON to an array of strings to avoid nested types routing_key = options.get("routing", settings.ES_ROUTING_FIELD) - routing_value = transaction.get(routing_key) - if self.index_type == "transactions": - transaction["federal_accounts"] = self.convert_json_arrays_to_list(transaction["federal_accounts"]) + routing_value = record.get(routing_key) + es_id_value = record.get(es_id) + if self.index_type == "transaction": + record["federal_accounts"] = self.convert_json_arrays_to_list(record["federal_accounts"]) + if self.index_type == "covid19_faba": + es_id_value = record.pop("_id") self.client.index( index=self.index_name, - body=json.dumps(transaction, cls=DjangoJSONEncoder), - id=transaction["{}_id".format(self.index_type[:-1])], + body=json.dumps(record, cls=DjangoJSONEncoder), + id=es_id_value, routing=routing_value, ) # Force newly added documents to become searchable. diff --git a/usaspending_api/database_scripts/etl/award_delta_view.sql b/usaspending_api/database_scripts/etl/award_delta_view.sql index 9224ab06aa..d00ab4f5b5 100644 --- a/usaspending_api/database_scripts/etl/award_delta_view.sql +++ b/usaspending_api/database_scripts/etl/award_delta_view.sql @@ -2,12 +2,8 @@ DROP VIEW IF EXISTS award_delta_view; CREATE VIEW award_delta_view AS SELECT vw_es_award_search.award_id, - a.generated_unique_award_id, - CASE - WHEN vw_es_award_search.type IN ('02', '03', '04', '05', '06', '10', '07', '08', '09', '11') AND vw_es_award_search.fain IS NOT NULL THEN vw_es_award_search.fain - WHEN vw_es_award_search.piid IS NOT NULL THEN vw_es_award_search.piid -- contracts. Did it this way to easily handle IDV contracts - ELSE vw_es_award_search.uri - END AS display_award_id, + vw_es_award_search.generated_unique_award_id, + vw_es_award_search.display_award_id, vw_es_award_search.category, vw_es_award_search.type, @@ -20,27 +16,12 @@ SELECT vw_es_award_search.award_amount, vw_es_award_search.total_subsidy_cost, vw_es_award_search.total_loan_value, - a.update_date, + vw_es_award_search.update_date, vw_es_award_search.recipient_name, vw_es_award_search.recipient_unique_id, - recipient_profile.recipient_hash, - CASE - WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL - THEN - CONCAT( - '{"name":"', vw_es_award_search.recipient_name, - '","unique_id":"', vw_es_award_search.recipient_unique_id, - '","hash":"","levels":""}' - ) - ELSE - CONCAT( - '{"name":"', vw_es_award_search.recipient_name, - '","unique_id":"', vw_es_award_search.recipient_unique_id, - '","hash":"', recipient_profile.recipient_hash, - '","levels":"', recipient_profile.recipient_levels, '"}' - ) - END AS recipient_agg_key, + vw_es_award_search.recipient_hash, + vw_es_award_search.recipient_agg_key, vw_es_award_search.parent_recipient_unique_id, vw_es_award_search.business_categories, @@ -66,24 +47,8 @@ SELECT vw_es_award_search.funding_toptier_agency_code, vw_es_award_search.awarding_subtier_agency_code, vw_es_award_search.funding_subtier_agency_code, - CASE - WHEN vw_es_award_search.funding_toptier_agency_name IS NOT NULL - THEN CONCAT( - '{"name":"', vw_es_award_search.funding_toptier_agency_name, - '","code":"', vw_es_award_search.funding_toptier_agency_code, - '","id":"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = vw_es_award_search.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '"}' - ) - ELSE NULL - END AS funding_toptier_agency_agg_key, - CASE - WHEN vw_es_award_search.funding_subtier_agency_name IS NOT NULL - THEN CONCAT( - '{"name":"', vw_es_award_search.funding_subtier_agency_name, - '","code":"', vw_es_award_search.funding_subtier_agency_code, - '","id":"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = vw_es_award_search.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '"}' - ) - ELSE NULL - END AS funding_subtier_agency_agg_key, + vw_es_award_search.funding_toptier_agency_agg_key, + vw_es_award_search.funding_subtier_agency_agg_key, vw_es_award_search.recipient_location_country_code, vw_es_award_search.recipient_location_country_name, @@ -105,8 +70,8 @@ SELECT vw_es_award_search.pop_city_code, vw_es_award_search.cfda_number, - fabs.cfda_title, - + vw_es_award_search.cfda_program_title as cfda_title, + vw_es_award_search.sai_number, vw_es_award_search.type_of_contract_pricing, vw_es_award_search.extent_competed, @@ -117,180 +82,18 @@ SELECT vw_es_award_search.naics_code, vw_es_award_search.naics_description, - CASE - WHEN - vw_es_award_search.recipient_location_state_code IS NOT NULL - AND vw_es_award_search.recipient_location_county_code IS NOT NULL - THEN CONCAT( - '{"country_code":"', vw_es_award_search.recipient_location_country_code, - '","state_code":"', vw_es_award_search.recipient_location_state_code, - '","state_fips":"', RL_STATE_LOOKUP.fips, - '","county_code":"', vw_es_award_search.recipient_location_county_code, - '","county_name":"', vw_es_award_search.recipient_location_county_name, - '","population":"', RL_COUNTY_POPULATION.latest_population, '"}' - ) - ELSE NULL - END AS recipient_location_county_agg_key, - CASE - WHEN - vw_es_award_search.recipient_location_state_code IS NOT NULL - AND vw_es_award_search.recipient_location_congressional_code IS NOT NULL - THEN CONCAT( - '{"country_code":"', vw_es_award_search.recipient_location_country_code, - '","state_code":"', vw_es_award_search.recipient_location_state_code, - '","state_fips":"', RL_STATE_LOOKUP.fips, - '","congressional_code":"', vw_es_award_search.recipient_location_congressional_code, - '","population":"', RL_DISTRICT_POPULATION.latest_population, '"}' - ) - ELSE NULL - END AS recipient_location_congressional_agg_key, - CASE - WHEN vw_es_award_search.recipient_location_state_code IS NOT NULL - THEN CONCAT( - '{"country_code":"', vw_es_award_search.recipient_location_country_code, - '","state_code":"', vw_es_award_search.recipient_location_state_code, - '","state_name":"', RL_STATE_LOOKUP.name, - '","population":"', RL_STATE_POPULATION.latest_population, '"}' - ) - ELSE NULL - END AS recipient_location_state_agg_key, + vw_es_award_search.recipient_location_county_agg_key, + vw_es_award_search.recipient_location_congressional_agg_key, + vw_es_award_search.recipient_location_state_agg_key, - CASE - WHEN vw_es_award_search.pop_state_code IS NOT NULL AND vw_es_award_search.pop_county_code IS NOT NULL - THEN CONCAT( - '{"country_code":"', vw_es_award_search.pop_country_code, - '","state_code":"', vw_es_award_search.pop_state_code, - '","state_fips":"', POP_STATE_LOOKUP.fips, - '","county_code":"', vw_es_award_search.pop_county_code, - '","county_name":"', vw_es_award_search.pop_county_name, - '","population":"', POP_COUNTY_POPULATION.latest_population, '"}' - ) - ELSE NULL - END AS pop_county_agg_key, - CASE - WHEN vw_es_award_search.pop_state_code IS NOT NULL AND vw_es_award_search.pop_congressional_code IS NOT NULL - THEN CONCAT( - '{"country_code":"', vw_es_award_search.pop_country_code, - '","state_code":"', vw_es_award_search.pop_state_code, - '","state_fips":"', POP_STATE_LOOKUP.fips, - '","congressional_code":"', vw_es_award_search.pop_congressional_code, - '","population":"', POP_DISTRICT_POPULATION.latest_population, '"}' - ) - ELSE NULL - END AS pop_congressional_agg_key, - CASE - WHEN vw_es_award_search.pop_state_code IS NOT NULL - THEN CONCAT( - '{"country_code":"', vw_es_award_search.pop_country_code, - '","state_code":"', vw_es_award_search.pop_state_code, - '","state_name":"', POP_STATE_LOOKUP.name, - '","population":"', POP_STATE_POPULATION.latest_population, '"}' - ) - ELSE NULL - END AS pop_state_agg_key, + vw_es_award_search.pop_county_agg_key, + vw_es_award_search.pop_congressional_agg_key, + vw_es_award_search.pop_state_agg_key, - TREASURY_ACCT.tas_paths, - TREASURY_ACCT.tas_components, - DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes, - DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay, - DEFC.transaction_obligated_amount AS total_covid_obligation + vw_es_award_search.tas_paths, + vw_es_award_search.tas_components, + vw_es_award_search.disaster_emergency_fund_codes, + vw_es_award_search.total_covid_outlay, + vw_es_award_search.total_covid_obligation FROM vw_es_award_search -INNER JOIN awards a ON (a.id = vw_es_award_search.award_id) -LEFT JOIN transaction_fabs fabs ON (fabs.transaction_id = a.latest_transaction_id) -LEFT JOIN LATERAL ( - SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels - FROM recipient_profile - WHERE (recipient_hash = vw_es_award_search.recipient_hash OR recipient_unique_id = vw_es_award_search.recipient_unique_id) and - recipient_name NOT IN ( - 'MULTIPLE RECIPIENTS', - 'REDACTED DUE TO PII', - 'MULTIPLE FOREIGN RECIPIENTS', - 'PRIVATE INDIVIDUAL', - 'INDIVIDUAL RECIPIENT', - 'MISCELLANEOUS FOREIGN AWARDEES' - ) AND recipient_name IS NOT NULL - AND recipient_level != 'P' - GROUP BY recipient_hash, recipient_unique_id - LIMIT 1 -) recipient_profile ON TRUE -LEFT JOIN ( - SELECT code, name, fips, MAX(id) - FROM state_data - GROUP BY code, name, fips -) POP_STATE_LOOKUP ON (POP_STATE_LOOKUP.code = vw_es_award_search.pop_state_code) -LEFT JOIN ref_population_county POP_STATE_POPULATION ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000') -LEFT JOIN ref_population_county POP_COUNTY_POPULATION ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = vw_es_award_search.pop_county_code) -LEFT JOIN ref_population_cong_district POP_DISTRICT_POPULATION ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = vw_es_award_search.pop_congressional_code) -LEFT JOIN ( - SELECT code, name, fips, MAX(id) - FROM state_data - GROUP BY code, name, fips -) RL_STATE_LOOKUP ON (RL_STATE_LOOKUP.code = vw_es_award_search.recipient_location_state_code) -LEFT JOIN ref_population_county RL_STATE_POPULATION ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000') -LEFT JOIN ref_population_county RL_COUNTY_POPULATION ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = vw_es_award_search.recipient_location_county_code) -LEFT JOIN ref_population_cong_district RL_DISTRICT_POPULATION ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = vw_es_award_search.recipient_location_congressional_code) -LEFT JOIN ( - -- Get awards with COVID-related data - -- CONDITIONS: - -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards - -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward - -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY - SELECT - faba.award_id, - ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes, - COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe, - COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount - FROM - financial_accounts_by_awards faba - INNER JOIN disaster_emergency_fund_code defc - ON defc.code = faba.disaster_emergency_fund_code - AND defc.group_name = 'covid_19' - INNER JOIN submission_attributes sa - ON faba.submission_id = sa.submission_id - AND sa.reporting_period_start >= '2020-04-01' - INNER JOIN dabs_submission_window_schedule AS closed_periods - ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now() - AND sa.submission_window_id = closed_periods.id - WHERE faba.award_id IS NOT NULL - GROUP BY - faba.award_id -) DEFC ON (DEFC.award_id = vw_es_award_search.award_id) -LEFT JOIN ( - SELECT - faba.award_id, - ARRAY_AGG( - DISTINCT CONCAT( - 'agency=', agency.toptier_code, - 'faaid=', fa.agency_identifier, - 'famain=', fa.main_account_code, - 'aid=', taa.agency_id, - 'main=', taa.main_account_code, - 'ata=', taa.allocation_transfer_agency_id, - 'sub=', taa.sub_account_code, - 'bpoa=', taa.beginning_period_of_availability, - 'epoa=', taa.ending_period_of_availability, - 'a=', taa.availability_type_code - ) - ) tas_paths, - ARRAY_AGG( - DISTINCT CONCAT( - 'aid=', taa.agency_id, - 'main=', taa.main_account_code, - 'ata=', taa.allocation_transfer_agency_id, - 'sub=', taa.sub_account_code, - 'bpoa=', taa.beginning_period_of_availability, - 'epoa=', taa.ending_period_of_availability, - 'a=', taa.availability_type_code - ) - ) tas_components - FROM - treasury_appropriation_account taa - INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id) - INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id) - INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id) - WHERE - faba.award_id IS NOT NULL - GROUP BY - faba.award_id -) TREASURY_ACCT ON (TREASURY_ACCT.award_id = vw_es_award_search.award_id) ; diff --git a/usaspending_api/database_scripts/etl/covid19_faba_view.sql b/usaspending_api/database_scripts/etl/covid19_faba_view.sql index 598a353a69..66403caa15 100644 --- a/usaspending_api/database_scripts/etl/covid19_faba_view.sql +++ b/usaspending_api/database_scripts/etl/covid19_faba_view.sql @@ -79,7 +79,7 @@ CREATE VIEW covid19_faba_view AS oc.direct_reimbursable, defc.group_name AS disaster_emergency_fund_code_group_name, awd.total_loan_value, - awd.type AS award_type, + awd.type, awd.generated_unique_award_id FROM financial_accounts_by_awards faba JOIN submission_attributes sa ON sa.reporting_period_start >= '2020-04-01'::date AND sa.submission_id = faba.submission_id diff --git a/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json index f1ca047074..84ed4e5f87 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_contract_award_search.json @@ -9,6 +9,9 @@ " awards.category,", " awards.type,", " awards.type_description,", + " awards.generated_unique_award_id,", + " awards.piid AS display_award_id,", + " awards.update_date,", " awards.piid,", " NULL::text AS fain,", " NULL::text AS uri,", @@ -19,12 +22,24 @@ " 0::NUMERIC(23, 2) AS total_subsidy_cost,", " 0::NUMERIC(23, 2) AS total_loan_value,", "", - " COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(", - " CASE", - " WHEN transaction_fpds.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fpds.awardee_or_recipient_uniqu)", - " ELSE CONCAT('name-', transaction_fpds.awardee_or_recipient_legal) END", - " ))::uuid) AS recipient_hash,", + " recipient_profile.recipient_hash,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)) AS recipient_name,", + " CASE", + " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", + " THEN", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"\",\"levels\":\"\"}'", + " )", + " ELSE", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"', recipient_profile.recipient_hash,", + " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", + " )", + " END AS recipient_agg_key,", " transaction_fpds.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fpds.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -50,6 +65,24 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", + " CASE", + " WHEN TFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', TFA.name,", + " '\",\"code\":\"', TFA.toptier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_toptier_agency_agg_key,", + " CASE", + " WHEN SFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', SFA.name,", + " '\",\"code\":\"', SFA.subtier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_subtier_agency_agg_key,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -70,7 +103,9 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fpds.place_of_perform_city_name) AS pop_city_name,", "", + " NULL::text AS cfda_program_title,", " NULL::text AS cfda_number,", + "", " NULL::text AS sai_number,", " transaction_fpds.type_of_contract_pricing,", " transaction_fpds.extent_competed,", @@ -79,7 +114,84 @@ " transaction_fpds.product_or_service_code,", " psc.description AS product_or_service_description,", " transaction_fpds.naics AS naics_code,", - " transaction_fpds.naics_description", + " transaction_fpds.naics_description,", + " CASE", + " WHEN", + " transaction_fpds.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name),", + " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_county_agg_key,", + " CASE", + " WHEN", + " transaction_fpds.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_congressional_agg_key,", + " CASE", + " WHEN transaction_fpds.legal_entity_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", + " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", + " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_state_agg_key,", + + " CASE", + " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na),", + " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_county_agg_key,", + " CASE", + " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_congressional_agg_key,", + " CASE", + " WHEN transaction_fpds.place_of_performance_state IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", + " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", + " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_state_agg_key,", + "", + " TREASURY_ACCT.tas_paths,", + " TREASURY_ACCT.tas_components,", + " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", + " DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay,", + " DEFC.transaction_obligated_amount AS total_covid_obligation", "FROM", " awards", "INNER JOIN", @@ -142,6 +254,105 @@ " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", " pop_county_lookup.state_alpha = transaction_fpds.place_of_performance_state and", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS POP_STATE_LOOKUP", + " ON (POP_STATE_LOOKUP.code = transaction_fpds.place_of_performance_state)", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS RL_STATE_LOOKUP", + " ON (RL_STATE_LOOKUP.code = transaction_fpds.legal_entity_state_code)", + "LEFT JOIN ref_population_county AS POP_STATE_POPULATION ", + " ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS POP_COUNTY_POPULATION", + " ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_county AS RL_STATE_POPULATION", + " ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS RL_COUNTY_POPULATION", + " ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_cong_district AS POP_DISTRICT_POPULATION", + " ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN ref_population_cong_district AS RL_DISTRICT_POPULATION", + " ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN LATERAL (", + "SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", + "FROM recipient_profile", + "WHERE (recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(CASE WHEN transaction_fpds.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fpds.awardee_or_recipient_uniqu) ELSE CONCAT('name-', transaction_fpds.awardee_or_recipient_legal) END))::uuid)", + "OR recipient_unique_id = transaction_fpds.awardee_or_recipient_uniqu) and", + "recipient_name NOT IN (", + "'MULTIPLE RECIPIENTS',", + "'REDACTED DUE TO PII',", + "'MULTIPLE FOREIGN RECIPIENTS',", + "'PRIVATE INDIVIDUAL',", + "'INDIVIDUAL RECIPIENT',", + "'MISCELLANEOUS FOREIGN AWARDEES'", + ") AND recipient_name IS NOT NULL", + "AND recipient_level != 'P'", + "GROUP BY recipient_hash, recipient_unique_id", + "LIMIT 1", + ") recipient_profile ON TRUE", + "LEFT JOIN (", + " -- Get awards with COVID-related data", + " -- CONDITIONS:", + " -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards", + " -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward", + " -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes,", + " COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe,", + " COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount", + " FROM", + " financial_accounts_by_awards faba", + " INNER JOIN disaster_emergency_fund_code defc", + " ON defc.code = faba.disaster_emergency_fund_code", + " AND defc.group_name = 'covid_19'", + " INNER JOIN submission_attributes sa", + " ON faba.submission_id = sa.submission_id", + " AND sa.reporting_period_start >= '2020-04-01'", + " INNER JOIN dabs_submission_window_schedule AS closed_periods", + " ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now()", + " AND sa.submission_window_id = closed_periods.id", + " WHERE faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") DEFC ON (DEFC.award_id = awards.id)", + "LEFT JOIN (", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'agency=', agency.toptier_code,", + " 'faaid=', fa.agency_identifier,", + " 'famain=', fa.main_account_code,", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_paths,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_components", + " FROM", + " treasury_appropriation_account taa", + " INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id)", + " INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id)", + " INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id)", + " WHERE", + " faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") TREASURY_ACCT ON (TREASURY_ACCT.award_id = awards.id)", "WHERE", " latest_transaction.action_date >= '2007-10-01'", " AND awards.type IN ('A', 'B', 'C', 'D')", diff --git a/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json index 7a14f8f52d..aeb215e8aa 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_directpayment_award_search.json @@ -9,6 +9,9 @@ " awards.category,", " awards.type,", " awards.type_description,", + " awards.generated_unique_award_id,", + " COALESCE(awards.fain, awards.uri) AS display_award_id,", + " awards.update_date,", " NULL::text AS piid,", " awards.fain,", " awards.uri,", @@ -19,12 +22,24 @@ " 0::NUMERIC(23, 2) AS total_subsidy_cost,", " 0::NUMERIC(23, 2) AS total_loan_value,", "", - " COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(", - " CASE", - " WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu)", - " ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END", - " ))::uuid) AS recipient_hash,", + " recipient_profile.recipient_hash,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", + " CASE", + " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", + " THEN", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"\",\"levels\":\"\"}'", + " )", + " ELSE", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"', recipient_profile.recipient_hash,", + " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", + " )", + " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -50,6 +65,24 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", + " CASE", + " WHEN TFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', TFA.name,", + " '\",\"code\":\"', TFA.toptier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_toptier_agency_agg_key,", + " CASE", + " WHEN SFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', SFA.name,", + " '\",\"code\":\"', SFA.subtier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_subtier_agency_agg_key,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -70,6 +103,7 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", "", + " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", " transaction_fabs.sai_number,", " NULL::text AS type_of_contract_pricing,", @@ -79,7 +113,85 @@ " NULL::text AS product_or_service_code,", " NULL::text AS product_or_service_description,", " NULL::text AS naics_code,", - " NULL::text AS naics_description", + " NULL::text AS naics_description,", + "", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", + " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_county_agg_key,", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", + " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_state_agg_key,", + "", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", + " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_county_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", + " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_state_agg_key,", + "", + " TREASURY_ACCT.tas_paths,", + " TREASURY_ACCT.tas_components,", + " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", + " DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay,", + " DEFC.transaction_obligated_amount AS total_covid_obligation", "FROM", " awards", "INNER JOIN", @@ -140,6 +252,105 @@ " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", " pop_county_lookup.state_alpha = transaction_fabs.place_of_perfor_state_code and", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS POP_STATE_LOOKUP", + " ON (POP_STATE_LOOKUP.code = transaction_fabs.place_of_perfor_state_code)", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS RL_STATE_LOOKUP", + " ON (RL_STATE_LOOKUP.code = transaction_fabs.legal_entity_state_code)", + "LEFT JOIN ref_population_county AS POP_STATE_POPULATION ", + " ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS POP_COUNTY_POPULATION", + " ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_county AS RL_STATE_POPULATION", + " ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS RL_COUNTY_POPULATION", + " ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_cong_district AS POP_DISTRICT_POPULATION", + " ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN ref_population_cong_district AS RL_DISTRICT_POPULATION", + " ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN LATERAL (", + "SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", + "FROM recipient_profile", + " WHERE (recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER( CASE WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu) ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END ))::uuid)", + " OR recipient_unique_id = transaction_fabs.awardee_or_recipient_uniqu) AND", + "recipient_name NOT IN (", + "'MULTIPLE RECIPIENTS',", + "'REDACTED DUE TO PII',", + "'MULTIPLE FOREIGN RECIPIENTS',", + "'PRIVATE INDIVIDUAL',", + "'INDIVIDUAL RECIPIENT',", + "'MISCELLANEOUS FOREIGN AWARDEES'", + ") AND recipient_name IS NOT NULL", + "AND recipient_level != 'P'", + "GROUP BY recipient_hash, recipient_unique_id", + "LIMIT 1", + ") recipient_profile ON TRUE", + "LEFT JOIN (", + " -- Get awards with COVID-related data", + " -- CONDITIONS:", + " -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards", + " -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward", + " -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes,", + " COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe,", + " COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount", + " FROM", + " financial_accounts_by_awards faba", + " INNER JOIN disaster_emergency_fund_code defc", + " ON defc.code = faba.disaster_emergency_fund_code", + " AND defc.group_name = 'covid_19'", + " INNER JOIN submission_attributes sa", + " ON faba.submission_id = sa.submission_id", + " AND sa.reporting_period_start >= '2020-04-01'", + " INNER JOIN dabs_submission_window_schedule AS closed_periods", + " ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now()", + " AND sa.submission_window_id = closed_periods.id", + " WHERE faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") DEFC ON (DEFC.award_id = awards.id)", + "LEFT JOIN (", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'agency=', agency.toptier_code,", + " 'faaid=', fa.agency_identifier,", + " 'famain=', fa.main_account_code,", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_paths,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_components", + " FROM", + " treasury_appropriation_account taa", + " INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id)", + " INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id)", + " INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id)", + " WHERE", + " faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") TREASURY_ACCT ON (TREASURY_ACCT.award_id = awards.id)", "WHERE", " latest_transaction.action_date >= '2007-10-01'", " AND awards.type IN ('06','10')", diff --git a/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json index 534335a7be..960a2fc7ee 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_grant_award_search.json @@ -9,6 +9,9 @@ " awards.category,", " awards.type,", " awards.type_description,", + " awards.generated_unique_award_id,", + " COALESCE(awards.fain, awards.uri) AS display_award_id,", + " awards.update_date,", " NULL::text AS piid,", " awards.fain,", " awards.uri,", @@ -19,12 +22,24 @@ " 0::NUMERIC(23, 2) AS total_subsidy_cost,", " 0::NUMERIC(23, 2) AS total_loan_value,", "", - " COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(", - " CASE", - " WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu)", - " ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END", - " ))::uuid) AS recipient_hash,", + " recipient_profile.recipient_hash,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", + " CASE", + " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", + " THEN", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"\",\"levels\":\"\"}'", + " )", + " ELSE", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"', recipient_profile.recipient_hash,", + " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", + " )", + " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -50,6 +65,24 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", + " CASE", + " WHEN TFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', TFA.name,", + " '\",\"code\":\"', TFA.toptier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_toptier_agency_agg_key,", + " CASE", + " WHEN SFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', SFA.name,", + " '\",\"code\":\"', SFA.subtier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_subtier_agency_agg_key,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -70,6 +103,7 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", "", + " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", " transaction_fabs.sai_number,", " NULL::text AS type_of_contract_pricing,", @@ -79,7 +113,85 @@ " NULL::text AS product_or_service_code,", " NULL::text AS product_or_service_description,", " NULL::text AS naics_code,", - " NULL::text AS naics_description", + " NULL::text AS naics_description,", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", + " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_county_agg_key,", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", + " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_state_agg_key,", + "", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", + " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_county_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", + " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_state_agg_key,", + "", + "", + " TREASURY_ACCT.tas_paths,", + " TREASURY_ACCT.tas_components,", + " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", + " DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay,", + " DEFC.transaction_obligated_amount AS total_covid_obligation", "FROM", " awards", "INNER JOIN", @@ -140,6 +252,105 @@ " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", " pop_county_lookup.state_alpha = transaction_fabs.place_of_perfor_state_code and", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS POP_STATE_LOOKUP", + " ON (POP_STATE_LOOKUP.code = transaction_fabs.place_of_perfor_state_code)", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS RL_STATE_LOOKUP", + " ON (RL_STATE_LOOKUP.code = transaction_fabs.legal_entity_state_code)", + "LEFT JOIN ref_population_county AS POP_STATE_POPULATION ", + " ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS POP_COUNTY_POPULATION", + " ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_county AS RL_STATE_POPULATION", + " ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS RL_COUNTY_POPULATION", + " ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_cong_district AS POP_DISTRICT_POPULATION", + " ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN ref_population_cong_district AS RL_DISTRICT_POPULATION", + " ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN LATERAL (", + "SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", + "FROM recipient_profile", + " WHERE (recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER( CASE WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu) ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END ))::uuid)", + " OR recipient_unique_id = transaction_fabs.awardee_or_recipient_uniqu) AND", + "recipient_name NOT IN (", + "'MULTIPLE RECIPIENTS',", + "'REDACTED DUE TO PII',", + "'MULTIPLE FOREIGN RECIPIENTS',", + "'PRIVATE INDIVIDUAL',", + "'INDIVIDUAL RECIPIENT',", + "'MISCELLANEOUS FOREIGN AWARDEES'", + ") AND recipient_name IS NOT NULL", + "AND recipient_level != 'P'", + "GROUP BY recipient_hash, recipient_unique_id", + "LIMIT 1", + ") recipient_profile ON TRUE", + "LEFT JOIN (", + " -- Get awards with COVID-related data", + " -- CONDITIONS:", + " -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards", + " -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward", + " -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes,", + " COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe,", + " COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount", + " FROM", + " financial_accounts_by_awards faba", + " INNER JOIN disaster_emergency_fund_code defc", + " ON defc.code = faba.disaster_emergency_fund_code", + " AND defc.group_name = 'covid_19'", + " INNER JOIN submission_attributes sa", + " ON faba.submission_id = sa.submission_id", + " AND sa.reporting_period_start >= '2020-04-01'", + " INNER JOIN dabs_submission_window_schedule AS closed_periods", + " ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now()", + " AND sa.submission_window_id = closed_periods.id", + " WHERE faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") DEFC ON (DEFC.award_id = awards.id)", + "LEFT JOIN (", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'agency=', agency.toptier_code,", + " 'faaid=', fa.agency_identifier,", + " 'famain=', fa.main_account_code,", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_paths,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_components", + " FROM", + " treasury_appropriation_account taa", + " INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id)", + " INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id)", + " INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id)", + " WHERE", + " faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") TREASURY_ACCT ON (TREASURY_ACCT.award_id = awards.id)", "WHERE", " latest_transaction.action_date >= '2007-10-01'", " AND awards.type IN ('02','03','04','05')", diff --git a/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json index 3c8d17bfee..2ec0ad1a58 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_idv_award_search.json @@ -9,6 +9,9 @@ " awards.category,", " awards.type,", " awards.type_description,", + " awards.generated_unique_award_id,", + " awards.piid AS display_award_id,", + " awards.update_date,", " awards.piid,", " NULL::text AS fain,", " NULL::text AS uri,", @@ -19,12 +22,24 @@ " 0::NUMERIC(23, 2) AS total_subsidy_cost,", " 0::NUMERIC(23, 2) AS total_loan_value,", "", - " COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(", - " CASE", - " WHEN transaction_fpds.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fpds.awardee_or_recipient_uniqu)", - " ELSE CONCAT('name-', transaction_fpds.awardee_or_recipient_legal) END", - " ))::uuid) AS recipient_hash,", + " recipient_profile.recipient_hash,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)) AS recipient_name,", + " CASE", + " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", + " THEN", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"\",\"levels\":\"\"}'", + " )", + " ELSE", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fpds.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"', recipient_profile.recipient_hash,", + " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", + " )", + " END AS recipient_agg_key,", " transaction_fpds.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fpds.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -50,6 +65,24 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", + " CASE", + " WHEN TFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', TFA.name,", + " '\",\"code\":\"', TFA.toptier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_toptier_agency_agg_key,", + " CASE", + " WHEN SFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', SFA.name,", + " '\",\"code\":\"', SFA.subtier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_subtier_agency_agg_key,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -70,6 +103,7 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fpds.place_of_perform_city_name) AS pop_city_name,", "", + " NULL::text AS cfda_program_title,", " NULL::text AS cfda_number,", " NULL::text AS sai_number,", " transaction_fpds.type_of_contract_pricing,", @@ -79,7 +113,85 @@ " transaction_fpds.product_or_service_code,", " psc.description AS product_or_service_description,", " transaction_fpds.naics AS naics_code,", - " transaction_fpds.naics_description", + " transaction_fpds.naics_description,", + " CASE", + " WHEN", + " transaction_fpds.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name),", + " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_county_agg_key,", + " CASE", + " WHEN", + " transaction_fpds.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_congressional_agg_key,", + " CASE", + " WHEN transaction_fpds.legal_entity_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.legal_entity_state_code,", + " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", + " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_state_agg_key,", + "", + " CASE", + " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na),", + " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_county_agg_key,", + " CASE", + " WHEN transaction_fpds.place_of_performance_state IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_congressional_agg_key,", + " CASE", + " WHEN transaction_fpds.place_of_performance_state IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fpds.place_of_performance_state,", + " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", + " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_state_agg_key,", + "", + "", + " TREASURY_ACCT.tas_paths,", + " TREASURY_ACCT.tas_components,", + " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", + " DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay,", + " DEFC.transaction_obligated_amount AS total_covid_obligation", "FROM", " awards", "INNER JOIN", @@ -142,6 +254,105 @@ " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", " pop_county_lookup.state_alpha = transaction_fpds.place_of_performance_state and", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS POP_STATE_LOOKUP", + " ON (POP_STATE_LOOKUP.code = transaction_fpds.place_of_performance_state)", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS RL_STATE_LOOKUP", + " ON (RL_STATE_LOOKUP.code = transaction_fpds.legal_entity_state_code)", + "LEFT JOIN ref_population_county AS POP_STATE_POPULATION ", + " ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS POP_COUNTY_POPULATION", + " ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_county AS RL_STATE_POPULATION", + " ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS RL_COUNTY_POPULATION", + " ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_cong_district AS POP_DISTRICT_POPULATION", + " ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN ref_population_cong_district AS RL_DISTRICT_POPULATION", + " ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fpds.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN LATERAL (", + "SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", + "FROM recipient_profile", + "WHERE (recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(CASE WHEN transaction_fpds.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fpds.awardee_or_recipient_uniqu) ELSE CONCAT('name-', transaction_fpds.awardee_or_recipient_legal) END))::uuid)", + "OR recipient_unique_id = transaction_fpds.awardee_or_recipient_uniqu) and", + "recipient_name NOT IN (", + "'MULTIPLE RECIPIENTS',", + "'REDACTED DUE TO PII',", + "'MULTIPLE FOREIGN RECIPIENTS',", + "'PRIVATE INDIVIDUAL',", + "'INDIVIDUAL RECIPIENT',", + "'MISCELLANEOUS FOREIGN AWARDEES'", + ") AND recipient_name IS NOT NULL", + "AND recipient_level != 'P'", + "GROUP BY recipient_hash, recipient_unique_id", + "LIMIT 1", + ") recipient_profile ON TRUE", + "LEFT JOIN (", + " -- Get awards with COVID-related data", + " -- CONDITIONS:", + " -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards", + " -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward", + " -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes,", + " COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe,", + " COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount", + " FROM", + " financial_accounts_by_awards faba", + " INNER JOIN disaster_emergency_fund_code defc", + " ON defc.code = faba.disaster_emergency_fund_code", + " AND defc.group_name = 'covid_19'", + " INNER JOIN submission_attributes sa", + " ON faba.submission_id = sa.submission_id", + " AND sa.reporting_period_start >= '2020-04-01'", + " INNER JOIN dabs_submission_window_schedule AS closed_periods", + " ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now()", + " AND sa.submission_window_id = closed_periods.id", + " WHERE faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") DEFC ON (DEFC.award_id = awards.id)", + "LEFT JOIN (", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'agency=', agency.toptier_code,", + " 'faaid=', fa.agency_identifier,", + " 'famain=', fa.main_account_code,", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_paths,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_components", + " FROM", + " treasury_appropriation_account taa", + " INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id)", + " INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id)", + " INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id)", + " WHERE", + " faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") TREASURY_ACCT ON (TREASURY_ACCT.award_id = awards.id)", "WHERE", " latest_transaction.action_date >= '2007-10-01'", " AND awards.type LIKE 'IDV%'", @@ -182,3 +393,5 @@ } ] } + + diff --git a/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json index c003585b29..1d1e2017df 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_loan_award_search.json @@ -9,6 +9,9 @@ " awards.category,", " awards.type,", " awards.type_description,", + " awards.generated_unique_award_id,", + " COALESCE(awards.fain, awards.uri) AS display_award_id,", + " awards.update_date,", " NULL::text AS piid,", " awards.fain,", " awards.uri,", @@ -19,12 +22,24 @@ " COALESCE(awards.total_subsidy_cost, 0)::NUMERIC(23, 2) AS total_subsidy_cost,", " COALESCE(awards.total_loan_value, 0)::NUMERIC(23, 2) AS total_loan_value,", "", - " COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(", - " CASE", - " WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu)", - " ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END", - " ))::uuid) AS recipient_hash,", + " recipient_profile.recipient_hash,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", + " CASE", + " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", + " THEN", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"\",\"levels\":\"\"}'", + " )", + " ELSE", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"', recipient_profile.recipient_hash,", + " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", + " )", + " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -50,6 +65,24 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", + " CASE", + " WHEN TFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', TFA.name,", + " '\",\"code\":\"', TFA.toptier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_toptier_agency_agg_key,", + " CASE", + " WHEN SFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', SFA.name,", + " '\",\"code\":\"', SFA.subtier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_subtier_agency_agg_key,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -70,6 +103,7 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", "", + " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", " transaction_fabs.sai_number,", " NULL::text AS type_of_contract_pricing,", @@ -79,7 +113,85 @@ " NULL::text AS product_or_service_code,", " NULL::text AS product_or_service_description,", " NULL::text AS naics_code,", - " NULL::text AS naics_description", + " NULL::text AS naics_description,", + "", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", + " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_county_agg_key,", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", + " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_state_agg_key,", + "", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", + " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_county_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", + " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_state_agg_key,", + "", + " TREASURY_ACCT.tas_paths,", + " TREASURY_ACCT.tas_components,", + " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", + " DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay,", + " DEFC.transaction_obligated_amount AS total_covid_obligation", "FROM", " awards", "INNER JOIN", @@ -140,6 +252,105 @@ " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", " pop_county_lookup.state_alpha = transaction_fabs.place_of_perfor_state_code and", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS POP_STATE_LOOKUP", + " ON (POP_STATE_LOOKUP.code = transaction_fabs.place_of_perfor_state_code)", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS RL_STATE_LOOKUP", + " ON (RL_STATE_LOOKUP.code = transaction_fabs.legal_entity_state_code)", + "LEFT JOIN ref_population_county AS POP_STATE_POPULATION ", + " ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS POP_COUNTY_POPULATION", + " ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_county AS RL_STATE_POPULATION", + " ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS RL_COUNTY_POPULATION", + " ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_cong_district AS POP_DISTRICT_POPULATION", + " ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN ref_population_cong_district AS RL_DISTRICT_POPULATION", + " ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN LATERAL (", + " SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", + " FROM recipient_profile", + " WHERE (recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER( CASE WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu) ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END ))::uuid)", + " OR recipient_unique_id = transaction_fabs.awardee_or_recipient_uniqu) AND", + " recipient_name NOT IN (", + " 'MULTIPLE RECIPIENTS',", + " 'REDACTED DUE TO PII',", + " 'MULTIPLE FOREIGN RECIPIENTS',", + " 'PRIVATE INDIVIDUAL',", + " 'INDIVIDUAL RECIPIENT',", + " 'MISCELLANEOUS FOREIGN AWARDEES'", + " ) AND recipient_name IS NOT NULL", + " AND recipient_level != 'P'", + " GROUP BY recipient_hash, recipient_unique_id", + " LIMIT 1", + ") recipient_profile ON TRUE", + "LEFT JOIN (", + " -- Get awards with COVID-related data", + " -- CONDITIONS:", + " -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards", + " -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward", + " -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes,", + " COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe,", + " COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount", + " FROM", + " financial_accounts_by_awards faba", + " INNER JOIN disaster_emergency_fund_code defc", + " ON defc.code = faba.disaster_emergency_fund_code", + " AND defc.group_name = 'covid_19'", + " INNER JOIN submission_attributes sa", + " ON faba.submission_id = sa.submission_id", + " AND sa.reporting_period_start >= '2020-04-01'", + " INNER JOIN dabs_submission_window_schedule AS closed_periods", + " ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now()", + " AND sa.submission_window_id = closed_periods.id", + " WHERE faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") DEFC ON (DEFC.award_id = awards.id)", + "LEFT JOIN (", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'agency=', agency.toptier_code,", + " 'faaid=', fa.agency_identifier,", + " 'famain=', fa.main_account_code,", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_paths,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_components", + " FROM", + " treasury_appropriation_account taa", + " INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id)", + " INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id)", + " INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id)", + " WHERE", + " faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") TREASURY_ACCT ON (TREASURY_ACCT.award_id = awards.id)", "WHERE", " latest_transaction.action_date >= '2007-10-01'", " AND awards.type IN ('07','08')", diff --git a/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json index 3c5688371c..9535c2d131 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_other_award_search.json @@ -9,6 +9,9 @@ " awards.category,", " awards.type,", " awards.type_description,", + " awards.generated_unique_award_id,", + " COALESCE(awards.fain, awards.uri) AS display_award_id,", + " awards.update_date,", " NULL::text AS piid,", " awards.fain,", " awards.uri,", @@ -19,12 +22,24 @@ " 0::NUMERIC(23, 2) AS total_subsidy_cost,", " 0::NUMERIC(23, 2) AS total_loan_value,", "", - " COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(", - " CASE", - " WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu)", - " ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END", - " ))::uuid) AS recipient_hash,", + " recipient_profile.recipient_hash,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", + " CASE", + " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", + " THEN", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"\",\"levels\":\"\"}'", + " )", + " ELSE", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', transaction_fabs.awardee_or_recipient_uniqu,", + " '\",\"hash\":\"', recipient_profile.recipient_hash,", + " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", + " )", + " END AS recipient_agg_key,", " transaction_fabs.awardee_or_recipient_uniqu AS recipient_unique_id,", " transaction_fabs.ultimate_parent_unique_ide AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -50,6 +65,24 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", + " CASE", + " WHEN TFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', TFA.name,", + " '\",\"code\":\"', TFA.toptier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_toptier_agency_agg_key,", + " CASE", + " WHEN SFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', SFA.name,", + " '\",\"code\":\"', SFA.subtier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_subtier_agency_agg_key,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -70,6 +103,7 @@ " LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM transaction_fabs.place_of_performance_city) AS pop_city_name,", "", + " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", " transaction_fabs.sai_number,", " NULL::text AS type_of_contract_pricing,", @@ -79,7 +113,85 @@ " NULL::text AS product_or_service_code,", " NULL::text AS product_or_service_description,", " NULL::text AS naics_code,", - " NULL::text AS naics_description", + " NULL::text AS naics_description,", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fabs.legal_entity_county_name),", + " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_county_agg_key,", + " CASE", + " WHEN", + " transaction_fabs.legal_entity_state_code IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.legal_entity_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.legal_entity_state_code,", + " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", + " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_state_agg_key,", + "", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fabs.place_of_perform_county_na),", + " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_county_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_congressional_agg_key,", + " CASE", + " WHEN transaction_fabs.place_of_perfor_state_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', transaction_fabs.place_of_perfor_state_code,", + " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", + " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_state_agg_key,", + "", + "", + " TREASURY_ACCT.tas_paths,", + " TREASURY_ACCT.tas_components,", + " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", + " DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay,", + " DEFC.transaction_obligated_amount AS total_covid_obligation", "FROM", " awards", "INNER JOIN", @@ -140,6 +252,105 @@ " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", " pop_county_lookup.state_alpha = transaction_fabs.place_of_perfor_state_code and", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS POP_STATE_LOOKUP", + " ON (POP_STATE_LOOKUP.code = transaction_fabs.place_of_perfor_state_code)", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS RL_STATE_LOOKUP", + " ON (RL_STATE_LOOKUP.code = transaction_fabs.legal_entity_state_code)", + "LEFT JOIN ref_population_county AS POP_STATE_POPULATION ", + " ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS POP_COUNTY_POPULATION", + " ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_county AS RL_STATE_POPULATION", + " ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS RL_COUNTY_POPULATION", + " ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_cong_district AS POP_DISTRICT_POPULATION", + " ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN ref_population_cong_district AS RL_DISTRICT_POPULATION", + " ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN LATERAL (", + "SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", + "FROM recipient_profile", + " WHERE (recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER( CASE WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu) ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END ))::uuid)", + " OR recipient_unique_id = transaction_fabs.awardee_or_recipient_uniqu) AND", + "recipient_name NOT IN (", + "'MULTIPLE RECIPIENTS',", + "'REDACTED DUE TO PII',", + "'MULTIPLE FOREIGN RECIPIENTS',", + "'PRIVATE INDIVIDUAL',", + "'INDIVIDUAL RECIPIENT',", + "'MISCELLANEOUS FOREIGN AWARDEES'", + ") AND recipient_name IS NOT NULL", + "AND recipient_level != 'P'", + "GROUP BY recipient_hash, recipient_unique_id", + "LIMIT 1", + ") recipient_profile ON TRUE", + "LEFT JOIN (", + " -- Get awards with COVID-related data", + " -- CONDITIONS:", + " -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards", + " -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward", + " -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes,", + " COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe,", + " COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount", + " FROM", + " financial_accounts_by_awards faba", + " INNER JOIN disaster_emergency_fund_code defc", + " ON defc.code = faba.disaster_emergency_fund_code", + " AND defc.group_name = 'covid_19'", + " INNER JOIN submission_attributes sa", + " ON faba.submission_id = sa.submission_id", + " AND sa.reporting_period_start >= '2020-04-01'", + " INNER JOIN dabs_submission_window_schedule AS closed_periods", + " ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now()", + " AND sa.submission_window_id = closed_periods.id", + " WHERE faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") DEFC ON (DEFC.award_id = awards.id)", + "LEFT JOIN (", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'agency=', agency.toptier_code,", + " 'faaid=', fa.agency_identifier,", + " 'famain=', fa.main_account_code,", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_paths,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_components", + " FROM", + " treasury_appropriation_account taa", + " INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id)", + " INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id)", + " INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id)", + " WHERE", + " faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") TREASURY_ACCT ON (TREASURY_ACCT.award_id = awards.id)", "WHERE", " latest_transaction.action_date >= '2007-10-01'", " AND awards.type IN ('09','11')", diff --git a/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json b/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json index 2d95b26a6c..0cc420793b 100644 --- a/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json +++ b/usaspending_api/database_scripts/matview_generator/mv_pre2008_award_search.json @@ -9,6 +9,13 @@ " awards.category,", " awards.type,", " awards.type_description,", + " awards.generated_unique_award_id,", + " CASE", + " WHEN awards.type IN ('02', '03', '04', '05', '06', '10', '07', '08', '09', '11') AND awards.fain IS NOT NULL THEN awards.fain", + " WHEN awards.piid IS NOT NULL THEN awards.piid -- contracts. Did it this way to easily handle IDV contracts", + " ELSE awards.uri", + " END AS display_award_id,", + " awards.update_date,", " awards.piid,", " awards.fain,", " awards.uri,", @@ -25,12 +32,24 @@ " COALESCE(awards.total_subsidy_cost, 0)::NUMERIC(23, 2) AS total_subsidy_cost,", " COALESCE(awards.total_loan_value, 0)::NUMERIC(23, 2) AS total_loan_value,", "", - " COALESCE(recipient_lookup.recipient_hash, MD5(UPPER(", - " CASE", - " WHEN COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu) IS NOT NULL THEN CONCAT('duns-', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu))", - " ELSE CONCAT('name-', COALESCE(transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)) END", - " ))::uuid) AS recipient_hash,", + " recipient_profile.recipient_hash,", " UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)) AS recipient_name,", + " CASE", + " WHEN recipient_profile.recipient_hash IS NULL or recipient_profile.recipient_levels IS NULL", + " THEN", + " CONCAT(", + " '{\"name\":\"', UPPER(COALESCE(recipient_lookup.recipient_name, transaction_fpds.awardee_or_recipient_legal, transaction_fabs.awardee_or_recipient_legal)),", + " '\",\"unique_id\":\"', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu),", + " '\",\"hash\":\"\",\"levels\":\"\"}'", + " )", + " ELSE", + " CONCAT(", + " '{\"name\":\"', recipient_lookup.recipient_name,", + " '\",\"unique_id\":\"', COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu),", + " '\",\"hash\":\"', recipient_profile.recipient_hash,", + " '\",\"levels\":\"', recipient_profile.recipient_levels, '\"}'", + " )", + " END AS recipient_agg_key,", " COALESCE(transaction_fpds.awardee_or_recipient_uniqu, transaction_fabs.awardee_or_recipient_uniqu) AS recipient_unique_id,", " COALESCE(transaction_fpds.ultimate_parent_unique_ide, transaction_fabs.ultimate_parent_unique_ide) AS parent_recipient_unique_id,", " latest_transaction.business_categories,", @@ -56,6 +75,24 @@ " TFA.toptier_code AS funding_toptier_agency_code,", " SAA.subtier_code AS awarding_subtier_agency_code,", " SFA.subtier_code AS funding_subtier_agency_code,", + " CASE", + " WHEN TFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', TFA.name,", + " '\",\"code\":\"', TFA.toptier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_toptier_agency_agg_key,", + " CASE", + " WHEN SFA.name IS NOT NULL", + " THEN CONCAT(", + " '{\"name\":\"', SFA.name,", + " '\",\"code\":\"', SFA.subtier_code,", + " '\",\"id\":\"', (SELECT a1.id FROM agency a1 WHERE a1.toptier_agency_id = (SELECT a2.toptier_agency_id FROM agency a2 WHERE a2.id = latest_transaction.funding_agency_id) ORDER BY a1.toptier_flag DESC, a1.id LIMIT 1), '\"}'", + " )", + " ELSE NULL", + " END AS funding_subtier_agency_agg_key,", "", " rl_country_lookup.country_code AS recipient_location_country_code,", " rl_country_lookup.country_name AS recipient_location_country_name,", @@ -76,6 +113,7 @@ " LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') AS pop_congressional_code,", " TRIM(TRAILING FROM COALESCE(transaction_fpds.place_of_perform_city_name, transaction_fabs.place_of_performance_city)) AS pop_city_name,", "", + " transaction_fabs.cfda_title AS cfda_program_title,", " transaction_fabs.cfda_number,", " transaction_fabs.sai_number,", " transaction_fpds.type_of_contract_pricing,", @@ -85,7 +123,85 @@ " transaction_fpds.product_or_service_code,", " psc.description AS product_or_service_description,", " transaction_fpds.naics AS naics_code,", - " transaction_fpds.naics_description", + " transaction_fpds.naics_description,", + "", + " CASE", + " WHEN", + " COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_county_code, transaction_fabs.legal_entity_county_code), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(rl_county_lookup.county_name, transaction_fpds.legal_entity_county_name, transaction_fabs.legal_entity_county_name),", + " '\",\"population\":\"', RL_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_county_agg_key,", + " CASE", + " WHEN", + " COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL", + " AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_congressional, transaction_fabs.legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", + " '\",\"state_fips\":\"', RL_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.legal_entity_congressional, transaction_fabs.legal_entity_congressional), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', RL_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_congressional_agg_key,", + " CASE", + " WHEN COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code) IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', rl_country_lookup.country_code,", + " '\",\"state_code\":\"', COALESCE(transaction_fpds.legal_entity_state_code, transaction_fabs.legal_entity_state_code),", + " '\",\"state_name\":\"', RL_STATE_LOOKUP.name,", + " '\",\"population\":\"', RL_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS recipient_location_state_agg_key,", + " ", + " CASE", + " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL AND pop_country_lookup.country_code IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"county_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'),", + " '\",\"county_name\":\"', COALESCE(pop_county_lookup.county_name, transaction_fpds.place_of_perform_county_na, transaction_fabs.place_of_perform_county_na),", + " '\",\"population\":\"', POP_COUNTY_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_county_agg_key,", + " CASE", + " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL AND LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0') IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", + " '\",\"state_fips\":\"', POP_STATE_LOOKUP.fips,", + " '\",\"congressional_code\":\"', LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_performance_congr, transaction_fabs.place_of_performance_congr), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'),", + " '\",\"population\":\"', POP_DISTRICT_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_congressional_agg_key,", + " CASE", + " WHEN COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) IS NOT NULL", + " THEN CONCAT(", + " '{\"country_code\":\"', pop_country_lookup.country_code,", + " '\",\"state_code\":\"', COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code),", + " '\",\"state_name\":\"', POP_STATE_LOOKUP.name,", + " '\",\"population\":\"', POP_STATE_POPULATION.latest_population, '\"}'", + " )", + " ELSE NULL", + " END AS pop_state_agg_key,", + "", + " TREASURY_ACCT.tas_paths,", + " TREASURY_ACCT.tas_components,", + " DEFC.disaster_emergency_fund_codes AS disaster_emergency_fund_codes,", + " DEFC.gross_outlay_amount_by_award_cpe AS total_covid_outlay,", + " DEFC.transaction_obligated_amount AS total_covid_obligation", "FROM", " awards", "INNER JOIN", @@ -151,6 +267,105 @@ " (SELECT DISTINCT ON (state_alpha, county_numeric) state_alpha, county_numeric, UPPER(county_name) AS county_name FROM ref_city_county_state_code) AS pop_county_lookup on", " pop_county_lookup.state_alpha = COALESCE(transaction_fpds.place_of_performance_state, transaction_fabs.place_of_perfor_state_code) and", " pop_county_lookup.county_numeric = LPAD(CAST(CAST((REGEXP_MATCH(COALESCE(transaction_fpds.place_of_perform_county_co, transaction_fabs.place_of_perform_county_co), '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0')", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS POP_STATE_LOOKUP", + " ON (POP_STATE_LOOKUP.code = transaction_fabs.place_of_perfor_state_code)", + "LEFT JOIN", + " (SELECT code, name, fips, MAX(id) FROM state_data GROUP BY code, name, fips) AS RL_STATE_LOOKUP", + " ON (RL_STATE_LOOKUP.code = transaction_fabs.legal_entity_state_code)", + "LEFT JOIN ref_population_county AS POP_STATE_POPULATION ", + " ON (POP_STATE_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS POP_COUNTY_POPULATION", + " ON (POP_COUNTY_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_perform_county_co, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_county AS RL_STATE_POPULATION", + " ON (RL_STATE_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_STATE_POPULATION.county_number = '000')", + "LEFT JOIN ref_population_county AS RL_COUNTY_POPULATION", + " ON (RL_COUNTY_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_COUNTY_POPULATION.county_number = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_county_code, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 3, '0'))", + "LEFT JOIN ref_population_cong_district AS POP_DISTRICT_POPULATION", + " ON (POP_DISTRICT_POPULATION.state_code = POP_STATE_LOOKUP.fips AND POP_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.place_of_performance_congr, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN ref_population_cong_district AS RL_DISTRICT_POPULATION", + " ON (RL_DISTRICT_POPULATION.state_code = RL_STATE_LOOKUP.fips AND RL_DISTRICT_POPULATION.congressional_district = LPAD(CAST(CAST((REGEXP_MATCH(transaction_fabs.legal_entity_congressional, '^[A-Z]*(\\d+)(?:\\.\\d+)?$'))[1] AS smallint) AS text), 2, '0'))", + "LEFT JOIN LATERAL (", + " SELECT recipient_hash, recipient_unique_id, ARRAY_AGG(recipient_level) AS recipient_levels", + " FROM recipient_profile", + " WHERE (recipient_hash = COALESCE(recipient_lookup.recipient_hash, MD5(UPPER( CASE WHEN transaction_fabs.awardee_or_recipient_uniqu IS NOT NULL THEN CONCAT('duns-', transaction_fabs.awardee_or_recipient_uniqu) ELSE CONCAT('name-', transaction_fabs.awardee_or_recipient_legal) END ))::uuid)", + " OR recipient_unique_id = transaction_fabs.awardee_or_recipient_uniqu) AND", + " recipient_name NOT IN (", + " 'MULTIPLE RECIPIENTS',", + " 'REDACTED DUE TO PII',", + " 'MULTIPLE FOREIGN RECIPIENTS',", + " 'PRIVATE INDIVIDUAL',", + " 'INDIVIDUAL RECIPIENT',", + " 'MISCELLANEOUS FOREIGN AWARDEES'", + " ) AND recipient_name IS NOT NULL", + " AND recipient_level != 'P'", + " GROUP BY recipient_hash, recipient_unique_id", + " LIMIT 1", + ") recipient_profile ON TRUE", + "LEFT JOIN (", + " -- Get awards with COVID-related data", + " -- CONDITIONS:", + " -- 1. Only care about data that references an (D1/D2) award, since this is used to update those referenced awards", + " -- 2. Only care about those awards if they are in a closed submission period, from FY2020 P07 onward", + " -- 3. Only care about outlays for those awards if the period with outlay data is the last closed period in its FY", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(DISTINCT disaster_emergency_fund_code ORDER BY disaster_emergency_fund_code) AS disaster_emergency_fund_codes,", + " COALESCE(SUM(CASE WHEN sa.is_final_balances_for_fy = TRUE THEN faba.gross_outlay_amount_by_award_cpe END), 0) AS gross_outlay_amount_by_award_cpe,", + " COALESCE(SUM(faba.transaction_obligated_amount), 0) AS transaction_obligated_amount", + " FROM", + " financial_accounts_by_awards faba", + " INNER JOIN disaster_emergency_fund_code defc", + " ON defc.code = faba.disaster_emergency_fund_code", + " AND defc.group_name = 'covid_19'", + " INNER JOIN submission_attributes sa", + " ON faba.submission_id = sa.submission_id", + " AND sa.reporting_period_start >= '2020-04-01'", + " INNER JOIN dabs_submission_window_schedule AS closed_periods", + " ON closed_periods.period_start_date >= '2020-04-01' AND closed_periods.submission_reveal_date < now()", + " AND sa.submission_window_id = closed_periods.id", + " WHERE faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") DEFC ON (DEFC.award_id = awards.id)", + "LEFT JOIN (", + " SELECT", + " faba.award_id,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'agency=', agency.toptier_code,", + " 'faaid=', fa.agency_identifier,", + " 'famain=', fa.main_account_code,", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_paths,", + " ARRAY_AGG(", + " DISTINCT CONCAT(", + " 'aid=', taa.agency_id,", + " 'main=', taa.main_account_code,", + " 'ata=', taa.allocation_transfer_agency_id,", + " 'sub=', taa.sub_account_code,", + " 'bpoa=', taa.beginning_period_of_availability,", + " 'epoa=', taa.ending_period_of_availability,", + " 'a=', taa.availability_type_code", + " )", + " ) tas_components", + " FROM", + " treasury_appropriation_account taa", + " INNER JOIN financial_accounts_by_awards faba ON (taa.treasury_account_identifier = faba.treasury_account_id)", + " INNER JOIN federal_account fa ON (taa.federal_account_id = fa.id)", + " INNER JOIN toptier_agency agency ON (fa.parent_toptier_agency_id = agency.toptier_agency_id)", + " WHERE", + " faba.award_id IS NOT NULL", + " GROUP BY", + " faba.award_id", + ") TREASURY_ACCT ON (TREASURY_ACCT.award_id = awards.id)", "WHERE", " latest_transaction.action_date >= '2000-10-01'", " AND latest_transaction.action_date < '2007-10-01'" diff --git a/usaspending_api/database_scripts/matviews/drop_old_matviews.sql b/usaspending_api/database_scripts/matviews/drop_old_matviews.sql index 6866f49c89..303d7d91b4 100644 --- a/usaspending_api/database_scripts/matviews/drop_old_matviews.sql +++ b/usaspending_api/database_scripts/matviews/drop_old_matviews.sql @@ -1,5 +1,6 @@ DROP MATERIALIZED VIEW IF EXISTS mv_agency_autocomplete_old; DROP MATERIALIZED VIEW IF EXISTS mv_contract_award_search_old; +DROP MATERIALIZED VIEW IF EXISTS mv_covid_financial_account_old; DROP MATERIALIZED VIEW IF EXISTS mv_directpayment_award_search_old; DROP MATERIALIZED VIEW IF EXISTS mv_grant_award_search_old; DROP MATERIALIZED VIEW IF EXISTS mv_idv_award_search_old; diff --git a/usaspending_api/database_scripts/matviews/refresh_matviews.sql b/usaspending_api/database_scripts/matviews/refresh_matviews.sql index f1c08f9ae5..d466cbbe7c 100644 --- a/usaspending_api/database_scripts/matviews/refresh_matviews.sql +++ b/usaspending_api/database_scripts/matviews/refresh_matviews.sql @@ -1,5 +1,6 @@ REFRESH MATERIALIZED VIEW CONCURRENTLY mv_agency_autocomplete; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_contract_award_search; +REFRESH MATERIALIZED VIEW CONCURRENTLY mv_covid_financial_account; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_directpayment_award_search; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_grant_award_search; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_idv_award_search; diff --git a/usaspending_api/database_scripts/matviews/vacuum_matviews.sql b/usaspending_api/database_scripts/matviews/vacuum_matviews.sql index f096b7e032..142ec6a26e 100644 --- a/usaspending_api/database_scripts/matviews/vacuum_matviews.sql +++ b/usaspending_api/database_scripts/matviews/vacuum_matviews.sql @@ -1,5 +1,6 @@ VACUUM (ANALYZE, VERBOSE) mv_agency_autocomplete; VACUUM (ANALYZE, VERBOSE) mv_contract_award_search; +VACUUM (ANALYZE, VERBOSE) mv_covid_financial_account; VACUUM (ANALYZE, VERBOSE) mv_directpayment_award_search; VACUUM (ANALYZE, VERBOSE) mv_grant_award_search; VACUUM (ANALYZE, VERBOSE) mv_idv_award_search; diff --git a/usaspending_api/disaster/tests/fixtures/disaster_account_data.py b/usaspending_api/disaster/tests/fixtures/disaster_account_data.py index 1780444258..d9bb518456 100644 --- a/usaspending_api/disaster/tests/fixtures/disaster_account_data.py +++ b/usaspending_api/disaster/tests/fixtures/disaster_account_data.py @@ -430,6 +430,7 @@ def disaster_account_data(): disaster_emergency_fund=defc_l, transaction_obligated_amount=2, gross_outlay_amount_by_award_cpe=20000000, + distinct_award_key=0, ) mommy.make( faba, @@ -439,6 +440,7 @@ def disaster_account_data(): transaction_obligated_amount=20, gross_outlay_amount_by_award_cpe=2000000, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -448,6 +450,7 @@ def disaster_account_data(): transaction_obligated_amount=200, gross_outlay_amount_by_award_cpe=200000, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -457,6 +460,7 @@ def disaster_account_data(): transaction_obligated_amount=1000, gross_outlay_amount_by_award_cpe=10000, award=a1, + distinct_award_key=1, ) mommy.make( faba, @@ -466,6 +470,7 @@ def disaster_account_data(): transaction_obligated_amount=1000, gross_outlay_amount_by_award_cpe=10000, award=a1, + distinct_award_key=1, ) mommy.make( faba, @@ -475,6 +480,7 @@ def disaster_account_data(): transaction_obligated_amount=20000, gross_outlay_amount_by_award_cpe=2000, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -484,6 +490,7 @@ def disaster_account_data(): transaction_obligated_amount=200000, gross_outlay_amount_by_award_cpe=200, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -493,6 +500,7 @@ def disaster_account_data(): transaction_obligated_amount=2000000, gross_outlay_amount_by_award_cpe=20, award=a2, + distinct_award_key=2, ) mommy.make( faba, @@ -502,6 +510,7 @@ def disaster_account_data(): transaction_obligated_amount=20000000, gross_outlay_amount_by_award_cpe=2, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -511,6 +520,7 @@ def disaster_account_data(): transaction_obligated_amount=0, gross_outlay_amount_by_award_cpe=0, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -520,6 +530,7 @@ def disaster_account_data(): transaction_obligated_amount=20, gross_outlay_amount_by_award_cpe=2000000, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -529,6 +540,7 @@ def disaster_account_data(): transaction_obligated_amount=200, gross_outlay_amount_by_award_cpe=200000, award=a3, + distinct_award_key=3, ) mommy.make( faba, @@ -538,6 +550,7 @@ def disaster_account_data(): transaction_obligated_amount=-2, gross_outlay_amount_by_award_cpe=200000000, award=a4, + distinct_award_key=4, ) mommy.make( faba, @@ -547,6 +560,7 @@ def disaster_account_data(): transaction_obligated_amount=80, gross_outlay_amount_by_award_cpe=20, award=a1, + distinct_award_key=1, ) mommy.make( diff --git a/usaspending_api/disaster/tests/fixtures/object_class_data.py b/usaspending_api/disaster/tests/fixtures/object_class_data.py index 4788a00f5a..35cbc64a4f 100644 --- a/usaspending_api/disaster/tests/fixtures/object_class_data.py +++ b/usaspending_api/disaster/tests/fixtures/object_class_data.py @@ -20,6 +20,7 @@ def basic_faba_with_object_class(award_count_sub_schedule, award_count_submissio disaster_emergency_fund=DisasterEmergencyFundCode.objects.filter(code="M").first(), submission=SubmissionAttributes.objects.all().first(), object_class=basic_object_class[0], + transaction_obligated_amount=1, ) @@ -142,6 +143,7 @@ def faba_with_object_class_and_two_awards(award_count_sub_schedule, award_count_ disaster_emergency_fund=DisasterEmergencyFundCode.objects.filter(code="M").first(), submission=SubmissionAttributes.objects.all().first(), object_class=basic_object_class[0], + transaction_obligated_amount=1, ) mommy.make( @@ -151,6 +153,7 @@ def faba_with_object_class_and_two_awards(award_count_sub_schedule, award_count_ disaster_emergency_fund=DisasterEmergencyFundCode.objects.filter(code="M").first(), submission=SubmissionAttributes.objects.all().first(), object_class=basic_object_class[0], + transaction_obligated_amount=1, ) @@ -169,6 +172,7 @@ def faba_with_two_object_classes_and_two_awards(award_count_sub_schedule, award_ disaster_emergency_fund=DisasterEmergencyFundCode.objects.filter(code="M").first(), submission=SubmissionAttributes.objects.all().first(), object_class=object_class1[0], + transaction_obligated_amount=1, ) mommy.make( @@ -178,6 +182,7 @@ def faba_with_two_object_classes_and_two_awards(award_count_sub_schedule, award_ disaster_emergency_fund=DisasterEmergencyFundCode.objects.filter(code="M").first(), submission=SubmissionAttributes.objects.all().first(), object_class=object_class2[0], + transaction_obligated_amount=1, ) diff --git a/usaspending_api/disaster/tests/fixtures/object_class_loan_data.py b/usaspending_api/disaster/tests/fixtures/object_class_loan_data.py index b45fda3fcc..cf334609c8 100644 --- a/usaspending_api/disaster/tests/fixtures/object_class_loan_data.py +++ b/usaspending_api/disaster/tests/fixtures/object_class_loan_data.py @@ -22,6 +22,7 @@ def basic_object_class_faba_with_loan_value(award_count_sub_schedule, award_coun submission=SubmissionAttributes.objects.all().first(), object_class=basic_object_class[0], gross_outlays_delivered_orders_paid_total_cpe=8, + transaction_obligated_amount=1, ) mommy.make( @@ -32,6 +33,7 @@ def basic_object_class_faba_with_loan_value(award_count_sub_schedule, award_coun submission=SubmissionAttributes.objects.all().first(), object_class=basic_object_class[0], gross_outlays_delivered_orders_paid_total_cpe=9, + transaction_obligated_amount=1, ) @@ -52,16 +54,18 @@ def basic_object_class_multiple_faba_with_loan_value_with_single_object_class( submission=SubmissionAttributes.objects.all().first(), object_class=object_class1[0], gross_outlays_delivered_orders_paid_total_cpe=8, + transaction_obligated_amount=1, ) mommy.make( "awards.FinancialAccountsByAwards", award=award2, - parent_award_id="basic award", + parent_award_id="basic award 2", disaster_emergency_fund=DisasterEmergencyFundCode.objects.filter(code="M").first(), submission=SubmissionAttributes.objects.all().first(), object_class=object_class1[0], gross_outlays_delivered_orders_paid_total_cpe=8, + transaction_obligated_amount=1, ) @@ -83,6 +87,7 @@ def basic_object_class_multiple_faba_with_loan_value_with_two_object_classes( submission=SubmissionAttributes.objects.all().first(), object_class=object_class1[0], gross_outlays_delivered_orders_paid_total_cpe=8, + transaction_obligated_amount=1, ) mommy.make( @@ -93,6 +98,7 @@ def basic_object_class_multiple_faba_with_loan_value_with_two_object_classes( submission=SubmissionAttributes.objects.all().first(), object_class=object_class2[0], gross_outlays_delivered_orders_paid_total_cpe=8, + transaction_obligated_amount=1, ) diff --git a/usaspending_api/disaster/tests/integration/test_disaster_agency_loans.py b/usaspending_api/disaster/tests/integration/test_disaster_agency_loans.py index 4a80752480..8ca7f46aa8 100644 --- a/usaspending_api/disaster/tests/integration/test_disaster_agency_loans.py +++ b/usaspending_api/disaster/tests/integration/test_disaster_agency_loans.py @@ -8,8 +8,8 @@ @pytest.mark.django_db -def test_basic_success(client, disaster_account_data, elasticsearch_award_index, monkeypatch, helpers): - setup_elasticsearch_test(monkeypatch, elasticsearch_award_index) +def test_basic_success(client, disaster_account_data, elasticsearch_account_index, monkeypatch, helpers): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["L", "M", "N", "O", "P"]) expected_results = [ @@ -24,7 +24,6 @@ def test_basic_success(client, disaster_account_data, elasticsearch_award_index, "face_value_of_loan": 333.0, }, ] - assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results @@ -40,7 +39,10 @@ def test_basic_success(client, disaster_account_data, elasticsearch_award_index, @pytest.mark.django_db -def test_award_type_codes(client, disaster_account_data, elasticsearch_award_index, monkeypatch, helpers): +def test_award_type_codes( + client, disaster_account_data, elasticsearch_award_index, monkeypatch, helpers, elasticsearch_account_index +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) setup_elasticsearch_test(monkeypatch, elasticsearch_award_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) @@ -69,7 +71,6 @@ def test_award_type_codes(client, disaster_account_data, elasticsearch_award_ind ], } ] - assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results diff --git a/usaspending_api/disaster/tests/integration/test_disaster_agency_spending.py b/usaspending_api/disaster/tests/integration/test_disaster_agency_spending.py index 8ec5dc2a37..1fe1d9be38 100644 --- a/usaspending_api/disaster/tests/integration/test_disaster_agency_spending.py +++ b/usaspending_api/disaster/tests/integration/test_disaster_agency_spending.py @@ -1,16 +1,22 @@ +import datetime + import pytest from rest_framework import status from usaspending_api.search.tests.data.utilities import setup_elasticsearch_test - +from usaspending_api.submissions.models import DABSSubmissionWindowSchedule url = "/api/v2/disaster/agency/spending/" @pytest.mark.django_db -def test_basic_success(client, disaster_account_data, elasticsearch_award_index, monkeypatch, helpers): - setup_elasticsearch_test(monkeypatch, elasticsearch_award_index) +def test_basic_success(client, disaster_account_data, elasticsearch_account_index, monkeypatch, helpers): + helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) + bad_date_window = DABSSubmissionWindowSchedule.objects.get(id=2022071) + bad_date_window.submission_reveal_date = datetime.date(2020, 4, 15) + bad_date_window.save() + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) resp = helpers.post_for_spending_endpoint( client, url, def_codes=["L", "M", "N", "O", "P"], spending_type="total", sort="description" ) @@ -97,17 +103,16 @@ def test_basic_success(client, disaster_account_data, elasticsearch_award_index, "code": "007", "description": "Agency 007", "children": [], - "award_count": 1, + "award_count": 2, "obligation": 222.0, "outlay": 0.0, "total_budgetary_resources": None, }, ] - assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results - expected_totals = {"award_count": 6, "obligation": 22222220.0, "outlay": 200020022.0} + expected_totals = {"award_count": 7, "obligation": 22222220.0, "outlay": 200020022.0} assert resp.json()["totals"] == expected_totals @@ -166,7 +171,6 @@ def test_award_type_codes(client, disaster_account_data, elasticsearch_award_ind ], }, ] - assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results @@ -292,3 +296,28 @@ def test_missing_spending_type(client, monkeypatch, generic_account_data, helper resp = helpers.post_for_spending_endpoint(client, url, def_codes=["A"]) assert resp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY assert resp.data["detail"] == "Missing value: 'spending_type' is a required field" + + +@pytest.mark.django_db +def test_query_search(client, disaster_account_data, elasticsearch_account_index, monkeypatch, helpers): + helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) + + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) + resp = helpers.post_for_spending_endpoint( + client, url, query="Agency 008", def_codes=["L", "M", "N", "O", "P"], spending_type="award", + ) + expected_results = [ + { + "id": 2, + "code": "008", + "description": "Agency 008", + "children": [], + "award_count": 1, + "obligation": 2000.0, + "outlay": 20000.0, + "total_budgetary_resources": None, + } + ] + + assert resp.status_code == status.HTTP_200_OK + assert resp.json()["results"] == expected_results diff --git a/usaspending_api/disaster/tests/integration/test_federal_account_award.py b/usaspending_api/disaster/tests/integration/test_federal_account_award.py index 95370680d2..27dce2e079 100644 --- a/usaspending_api/disaster/tests/integration/test_federal_account_award.py +++ b/usaspending_api/disaster/tests/integration/test_federal_account_award.py @@ -2,11 +2,14 @@ from rest_framework import status +from usaspending_api.search.tests.data.utilities import setup_elasticsearch_test + url = "/api/v2/disaster/federal_account/spending/" @pytest.mark.django_db -def test_federal_account_award_success(client, generic_account_data, monkeypatch, helpers): +def test_federal_account_award_success(client, generic_account_data, monkeypatch, helpers, elasticsearch_account_index): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) helpers.reset_dabs_cache() @@ -39,6 +42,13 @@ def test_federal_account_award_success(client, generic_account_data, monkeypatch resp = helpers.post_for_spending_endpoint(client, url, def_codes=["M", "L", "N", "O"], spending_type="award") expected_results = [ { + "id": 21, + "code": "000-0000", + "description": "gifts", + "award_count": 4, + "obligation": 304.0, + "outlay": 667.0, + "total_budgetary_resources": None, "children": [ { "code": "2020/52", @@ -68,13 +78,6 @@ def test_federal_account_award_success(client, generic_account_data, monkeypatch "total_budgetary_resources": None, }, ], - "code": "000-0000", - "award_count": 4, - "description": "gifts", - "id": 21, - "obligation": 304.0, - "outlay": 667.0, - "total_budgetary_resources": None, } ] assert resp.status_code == status.HTTP_200_OK @@ -90,3 +93,38 @@ def test_federal_account_award_empty(client, monkeypatch, helpers, generic_accou resp = helpers.post_for_spending_endpoint(client, url, def_codes=["A"], spending_type="award") assert resp.status_code == status.HTTP_200_OK assert len(resp.json()["results"]) == 0 + + +@pytest.mark.django_db +def test_federal_account_award_query(client, generic_account_data, monkeypatch, helpers, elasticsearch_account_index): + helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) + helpers.reset_dabs_cache() + + resp = helpers.post_for_spending_endpoint( + client, url, query="flowers", def_codes=["M", "L", "N", "O"], spending_type="award" + ) + expected_results = [ + { + "children": [ + { + "code": "2020/99", + "award_count": 1, + "description": "flowers", + "id": 22, + "obligation": 100.0, + "outlay": 111.0, + "total_budgetary_resources": None, + } + ], + "code": "000-0000", + "award_count": 1, + "description": "gifts", + "id": 21, + "obligation": 100.0, + "outlay": 111.0, + "total_budgetary_resources": None, + } + ] + assert resp.status_code == status.HTTP_200_OK + assert resp.json()["results"] == expected_results diff --git a/usaspending_api/disaster/tests/integration/test_federal_account_loans.py b/usaspending_api/disaster/tests/integration/test_federal_account_loans.py index f2638902e0..6fe7f01201 100644 --- a/usaspending_api/disaster/tests/integration/test_federal_account_loans.py +++ b/usaspending_api/disaster/tests/integration/test_federal_account_loans.py @@ -1,11 +1,14 @@ import pytest from rest_framework import status +from usaspending_api.search.tests.data.utilities import setup_elasticsearch_test + url = "/api/v2/disaster/federal_account/loans/" @pytest.mark.django_db -def test_federal_account_loans_success(client, generic_account_data, monkeypatch, helpers): +def test_federal_account_loans_success(client, generic_account_data, elasticsearch_account_index, monkeypatch, helpers): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["M"]) expected_results = [] @@ -52,7 +55,8 @@ def test_federal_account_loans_success(client, generic_account_data, monkeypatch @pytest.mark.django_db -def test_federal_account_loans_empty(client, monkeypatch, helpers, generic_account_data): +def test_federal_account_loans_empty(client, monkeypatch, helpers, generic_account_data, elasticsearch_account_index): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["A"]) assert resp.status_code == status.HTTP_200_OK @@ -60,21 +64,30 @@ def test_federal_account_loans_empty(client, monkeypatch, helpers, generic_accou @pytest.mark.django_db -def test_federal_account_loans_invalid_defc(client, generic_account_data, helpers): +def test_federal_account_loans_invalid_defc( + client, generic_account_data, helpers, elasticsearch_account_index, monkeypatch +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["ZZ"]) assert resp.status_code == status.HTTP_400_BAD_REQUEST assert resp.data["detail"] == "Field 'filter|def_codes' is outside valid values ['9', 'A', 'L', 'M', 'N', 'O', 'P']" @pytest.mark.django_db -def test_federal_account_loans_invalid_defc_type(client, generic_account_data, helpers): +def test_federal_account_loans_invalid_defc_type( + client, generic_account_data, helpers, elasticsearch_account_index, monkeypatch +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) resp = helpers.post_for_spending_endpoint(client, url, def_codes="100") assert resp.status_code == status.HTTP_400_BAD_REQUEST assert resp.data["detail"] == "Invalid value in 'filter|def_codes'. '100' is not a valid type (array)" @pytest.mark.django_db -def test_federal_account_loans_missing_defc(client, generic_account_data, helpers): +def test_federal_account_loans_missing_defc( + client, generic_account_data, helpers, elasticsearch_account_index, monkeypatch +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) resp = helpers.post_for_spending_endpoint(client, url) assert resp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY assert resp.data["detail"] == "Missing value: 'filter|def_codes' is a required field" diff --git a/usaspending_api/disaster/tests/integration/test_object_class_spending_award.py b/usaspending_api/disaster/tests/integration/test_object_class_spending_award.py index 34a33106d8..5a15cb32fc 100644 --- a/usaspending_api/disaster/tests/integration/test_object_class_spending_award.py +++ b/usaspending_api/disaster/tests/integration/test_object_class_spending_award.py @@ -2,11 +2,16 @@ from rest_framework import status +from usaspending_api.search.tests.data.utilities import setup_elasticsearch_test + url = "/api/v2/disaster/object_class/spending/" @pytest.mark.django_db -def test_basic_object_class_award_success(client, basic_faba_with_object_class, monkeypatch, helpers): +def test_basic_object_class_award_success( + client, elasticsearch_account_index, basic_faba_with_object_class, monkeypatch, helpers +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) helpers.reset_dabs_cache() @@ -17,7 +22,7 @@ def test_basic_object_class_award_success(client, basic_faba_with_object_class, "code": "001", "description": "001 name", "award_count": 1, - "obligation": 0.0, + "obligation": 1.0, "outlay": 0.0, "children": [ { @@ -25,7 +30,7 @@ def test_basic_object_class_award_success(client, basic_faba_with_object_class, "code": "0001", "description": "0001 name", "award_count": 1, - "obligation": 0.0, + "obligation": 1.0, "outlay": 0.0, } ], @@ -34,12 +39,15 @@ def test_basic_object_class_award_success(client, basic_faba_with_object_class, assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results - expected_totals = {"award_count": 1, "obligation": 0, "outlay": 0} + expected_totals = {"award_count": 1, "obligation": 1.0, "outlay": 0} assert resp.json()["totals"] == expected_totals @pytest.mark.django_db -def test_object_class_counts_awards(client, faba_with_object_class_and_two_awards, monkeypatch, helpers): +def test_object_class_counts_awards( + client, elasticsearch_account_index, faba_with_object_class_and_two_awards, monkeypatch, helpers +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) helpers.reset_dabs_cache() @@ -52,8 +60,9 @@ def test_object_class_counts_awards(client, faba_with_object_class_and_two_award @pytest.mark.django_db def test_object_class_groups_by_object_classes( - client, faba_with_two_object_classes_and_two_awards, monkeypatch, helpers + client, elasticsearch_account_index, faba_with_two_object_classes_and_two_awards, monkeypatch, helpers ): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) helpers.reset_dabs_cache() @@ -63,7 +72,10 @@ def test_object_class_groups_by_object_classes( @pytest.mark.django_db -def test_object_class_spending_filters_on_defc(client, basic_faba_with_object_class, monkeypatch, helpers): +def test_object_class_spending_filters_on_defc( + client, elasticsearch_account_index, basic_faba_with_object_class, monkeypatch, helpers +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) helpers.reset_dabs_cache() @@ -76,10 +88,47 @@ def test_object_class_spending_filters_on_defc(client, basic_faba_with_object_cl @pytest.mark.django_db def test_object_class_spending_filters_on_object_class_existance( - client, award_count_sub_schedule, basic_faba, monkeypatch, helpers + client, elasticsearch_account_index, award_count_sub_schedule, basic_faba, monkeypatch, helpers ): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) helpers.reset_dabs_cache() resp = helpers.post_for_spending_endpoint(client, url, def_codes=["M"], spending_type="award") assert len(resp.json()["results"]) == 0 + + +@pytest.mark.django_db +def test_object_class_query(client, elasticsearch_account_index, basic_faba_with_object_class, monkeypatch, helpers): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) + helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) + helpers.reset_dabs_cache() + + resp = helpers.post_for_spending_endpoint( + client, url, query="001 name", def_codes=["A", "M", "N"], spending_type="award" + ) + expected_results = [ + { + "id": "001", + "code": "001", + "description": "001 name", + "award_count": 1, + "obligation": 1.0, + "outlay": 0.0, + "children": [ + { + "id": "1", + "code": "0001", + "description": "0001 name", + "award_count": 1, + "obligation": 1.0, + "outlay": 0.0, + } + ], + } + ] + assert resp.status_code == status.HTTP_200_OK + assert resp.json()["results"] == expected_results + + expected_totals = {"award_count": 1, "obligation": 1.0, "outlay": 0} + assert resp.json()["totals"] == expected_totals diff --git a/usaspending_api/disaster/tests/integration/test_object_class_spending_loans.py b/usaspending_api/disaster/tests/integration/test_object_class_spending_loans.py index 2898abe5c8..25e5d2f6dc 100644 --- a/usaspending_api/disaster/tests/integration/test_object_class_spending_loans.py +++ b/usaspending_api/disaster/tests/integration/test_object_class_spending_loans.py @@ -1,12 +1,16 @@ import pytest from rest_framework import status +from usaspending_api.search.tests.data.utilities import setup_elasticsearch_test url = "/api/v2/disaster/object_class/loans/" @pytest.mark.django_db -def test_basic_object_class_award_success(client, basic_object_class_faba_with_loan_value, monkeypatch, helpers): +def test_basic_object_class_award_success( + client, basic_object_class_faba_with_loan_value, elasticsearch_account_index, monkeypatch, helpers +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["M"]) @@ -16,31 +20,34 @@ def test_basic_object_class_award_success(client, basic_object_class_faba_with_l "code": "001", "description": "001 name", "award_count": 1, - "obligation": 0.0, + "obligation": 1.0, "outlay": 0.0, "children": [ { - "id": "1", + "id": 1, "code": "0001", "description": "0001 name", "award_count": 1, - "obligation": 0.0, + "obligation": 1.0, "outlay": 0.0, - "face_value_of_loan": 5, + "face_value_of_loan": 5.0, } ], - "face_value_of_loan": 5, + "face_value_of_loan": 5.0, } ] assert resp.status_code == status.HTTP_200_OK assert resp.json()["results"] == expected_results - expected_totals = {"award_count": 1, "face_value_of_loan": 5.0, "obligation": 0, "outlay": 0} + expected_totals = {"award_count": 1, "face_value_of_loan": 5.0, "obligation": 1.0, "outlay": 0} assert resp.json()["totals"] == expected_totals @pytest.mark.django_db -def test_object_class_spending_filters_on_defc(client, basic_object_class_faba_with_loan_value, monkeypatch, helpers): +def test_object_class_spending_filters_on_defc( + client, basic_object_class_faba_with_loan_value, elasticsearch_account_index, monkeypatch, helpers +): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["A"]) @@ -52,18 +59,27 @@ def test_object_class_spending_filters_on_defc(client, basic_object_class_faba_w @pytest.mark.django_db def test_object_class_adds_value_across_awards( - client, basic_object_class_multiple_faba_with_loan_value_with_single_object_class, monkeypatch, helpers + client, + elasticsearch_account_index, + basic_object_class_multiple_faba_with_loan_value_with_single_object_class, + monkeypatch, + helpers, ): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) - resp = helpers.post_for_spending_endpoint(client, url, def_codes=["M"]) assert resp.json()["results"][0]["face_value_of_loan"] == 10 @pytest.mark.django_db def test_object_class_doesnt_add_across_object_classes( - client, basic_object_class_multiple_faba_with_loan_value_with_two_object_classes, monkeypatch, helpers + client, + elasticsearch_account_index, + basic_object_class_multiple_faba_with_loan_value_with_two_object_classes, + monkeypatch, + helpers, ): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["M"]) @@ -73,8 +89,9 @@ def test_object_class_doesnt_add_across_object_classes( @pytest.mark.django_db def test_object_class_spending_filters_on_object_class_existance( - client, award_count_sub_schedule, basic_faba, monkeypatch, helpers + client, elasticsearch_account_index, award_count_sub_schedule, basic_faba, monkeypatch, helpers ): + setup_elasticsearch_test(monkeypatch, elasticsearch_account_index) helpers.patch_datetime_now(monkeypatch, 2022, 12, 31) resp = helpers.post_for_spending_endpoint(client, url, def_codes=["M"]) diff --git a/usaspending_api/disaster/v2/views/agency/loans.py b/usaspending_api/disaster/v2/views/agency/loans.py index d1c07d6662..20786c7900 100644 --- a/usaspending_api/disaster/v2/views/agency/loans.py +++ b/usaspending_api/disaster/v2/views/agency/loans.py @@ -1,19 +1,19 @@ import logging +from decimal import Decimal from django.contrib.postgres.fields import ArrayField from django.db.models import F, Value, IntegerField, Subquery, OuterRef from django.views.decorators.csrf import csrf_exempt -from rest_framework.response import Response from typing import List from usaspending_api.common.cache_decorator import cache_response from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict -from usaspending_api.common.helpers.generic_helper import get_pagination_metadata from usaspending_api.disaster.v2.views.disaster_base import ( DisasterBase, LoansPaginationMixin, LoansMixin, FabaOutlayMixin, ) +from usaspending_api.disaster.v2.views.elasticsearch_account_base import ElasticsearchAccountDisasterBase from usaspending_api.disaster.v2.views.elasticsearch_base import ( ElasticsearchDisasterBase, ElasticsearchLoansPaginationMixin, @@ -44,25 +44,47 @@ def route_agency_loans_backend(request, *args, **kwargs): return route_agency_loans_backend -class LoansByAgencyViewSet(LoansPaginationMixin, LoansMixin, FabaOutlayMixin, DisasterBase): +class LoansByAgencyViewSet(LoansPaginationMixin, ElasticsearchAccountDisasterBase, LoansMixin, FabaOutlayMixin): """ This endpoint provides insights on the Agencies awarding loans from disaster/emergency funding per the requested filters. """ endpoint_doc = "usaspending_api/api_contracts/contracts/v2/disaster/agency/loans.md" + required_filters = ["def_codes", "query"] + query_fields = ["funding_toptier_agency_name.contains"] + agg_key = "financial_accounts_by_award.funding_toptier_agency_id" # primary (tier-1) aggregation key + nested_nonzero_fields = {"obligation": "transaction_obligated_amount", "outlay": "gross_outlay_amount_by_award_cpe"} + top_hits_fields = [ + "financial_accounts_by_award.funding_toptier_agency_code", + "financial_accounts_by_award.funding_toptier_agency_name", + ] @cache_response() def post(self, request): + self.filters.update({"award_type_codes": ["07", "08"]}) + return self.perform_elasticsearch_search(loans=True) - results = list(self.queryset.order_by(*self.pagination.robust_order_by_fields)) - return Response( - { - "totals": self.accumulate_total_values(results, ["award_count", "face_value_of_loan"]), - "results": results[self.pagination.lower_limit : self.pagination.upper_limit], - "page_metadata": get_pagination_metadata(len(results), self.pagination.limit, self.pagination.page), - } - ) + def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: + results = [] + for bucket in info_buckets: + results.append(self._build_json_result(bucket)) + return results + + def _build_json_result(self, bucket: dict): + return { + "id": int(bucket["key"]), + "code": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["funding_toptier_agency_code"], + "description": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["funding_toptier_agency_name"], + "children": [], + # the count of distinct awards contributing to the totals + "award_count": int(bucket["count_awards_by_dim"]["award_count"]["value"]), + **{ + key: Decimal(bucket.get(f"sum_{val}", {"value": 0})["value"]) + for key, val in self.nested_nonzero_fields.items() + }, + "face_value_of_loan": bucket["count_awards_by_dim"]["sum_loan_value"]["value"], + } @property def queryset(self): diff --git a/usaspending_api/disaster/v2/views/agency/spending.py b/usaspending_api/disaster/v2/views/agency/spending.py index 7f138396a5..0ea2b04b20 100644 --- a/usaspending_api/disaster/v2/views/agency/spending.py +++ b/usaspending_api/disaster/v2/views/agency/spending.py @@ -1,4 +1,5 @@ import logging +from decimal import Decimal from django.contrib.postgres.fields import ArrayField from django.db.models import Case, DecimalField, F, IntegerField, Q, Sum, Value, When, Subquery, OuterRef, Func, Exists @@ -8,7 +9,6 @@ from rest_framework.response import Response from typing import List -from usaspending_api.awards.models import FinancialAccountsByAwards from usaspending_api.common.cache_decorator import cache_response from usaspending_api.common.elasticsearch.json_helpers import json_str_to_dict from usaspending_api.common.helpers.generic_helper import get_pagination_metadata @@ -22,12 +22,12 @@ ElasticsearchDisasterBase, ElasticsearchSpendingPaginationMixin, ) +from usaspending_api.disaster.v2.views.elasticsearch_account_base import ElasticsearchAccountDisasterBase from usaspending_api.financial_activities.models import FinancialAccountsByProgramActivityObjectClass from usaspending_api.references.models import GTASSF133Balances, Agency, ToptierAgency from usaspending_api.submissions.models import SubmissionAttributes from usaspending_api.search.v2.elasticsearch_helper import get_summed_value_as_float - logger = logging.getLogger(__name__) @@ -57,16 +57,28 @@ def route_agency_spending_backend(request, *args, **kwargs): return route_agency_spending_backend -class SpendingByAgencyViewSet(PaginationMixin, SpendingMixin, FabaOutlayMixin, DisasterBase): +class SpendingByAgencyViewSet( + PaginationMixin, SpendingMixin, FabaOutlayMixin, ElasticsearchAccountDisasterBase, +): """ Returns disaster spending by agency. """ endpoint_doc = "usaspending_api/api_contracts/contracts/v2/disaster/agency/spending.md" + required_filters = ["def_codes", "award_type_codes", "query"] + nested_nonzero_fields = {"obligation": "transaction_obligated_amount", "outlay": "gross_outlay_amount_by_award_cpe"} + query_fields = [ + "funding_toptier_agency_name", + "funding_toptier_agency_name.contains", + ] + agg_key = "financial_accounts_by_award.funding_toptier_agency_id" # primary (tier-1) aggregation key + top_hits_fields = [ + "financial_accounts_by_award.funding_toptier_agency_code", + "financial_accounts_by_award.funding_toptier_agency_name", + ] @cache_response() def post(self, request): if self.spending_type == "award": - results = self.award_queryset - extra_columns = ["award_count"] + return self.perform_elasticsearch_search() else: results = self.total_queryset extra_columns = ["total_budgetary_resources"] @@ -85,6 +97,27 @@ def post(self, request): } ) + def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: + results = [] + for bucket in info_buckets: + results.append(self._build_json_result(bucket)) + return results + + def _build_json_result(self, bucket: dict): + return { + "id": int(bucket["key"]), + "code": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["funding_toptier_agency_code"], + "description": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["funding_toptier_agency_name"], + "children": [], + # the count of distinct awards contributing to the totals + "award_count": int(bucket["count_awards_by_dim"]["award_count"]["value"]), + **{ + key: Decimal(bucket.get(f"sum_{val}", {"value": 0})["value"]) + for key, val in self.nested_nonzero_fields.items() + }, + "total_budgetary_resources": None, + } + @property def total_queryset(self): @@ -167,54 +200,6 @@ def total_queryset(self): .values(*annotations) ) - @property - def award_queryset(self): - cte = With( - ToptierAgency.objects.distinct("toptier_agency_id") - .filter(agency__isnull=False) - .annotate(agency_id=F("agency__id"), toptier_name=F("name")) - .values("toptier_agency_id", "agency_id", "toptier_code", "toptier_name") - .order_by("toptier_agency_id", "-agency__toptier_flag", "agency_id") - ) - - filters = [ - self.is_in_provided_def_codes, - self.all_closed_defc_submissions, - Q(treasury_account__isnull=False), - ] - - annotations = { - "id": cte.col.agency_id, - "code": cte.col.toptier_code, - "description": cte.col.toptier_name, - # Currently, this endpoint can never have children. - "children": Value([], output_field=ArrayField(IntegerField())), - "award_count": self.unique_file_d_award_count(), - "obligation": Coalesce(Sum("transaction_obligated_amount"), 0), - "outlay": Coalesce( - Sum( - Case( - When(self.final_period_submission_query_filters, then=F("gross_outlay_amount_by_award_cpe")), - default=Value(0), - ) - ), - 0, - ), - "total_budgetary_resources": Value(None, DecimalField()), # NULL for award spending - } - - # Assuming it is more performant to fetch all rows once rather than - # run a count query and fetch only a page's worth of results - return ( - cte.join(FinancialAccountsByAwards, treasury_account__funding_toptier_agency_id=cte.col.toptier_agency_id) - .with_cte(cte) - .filter(*filters) - .annotate(agency_id=cte.col.agency_id, toptier_code=cte.col.toptier_code, toptier_name=cte.col.toptier_name) - .values("agency_id", "toptier_code", "toptier_name") - .annotate(**annotations) - .values(*annotations) - ) - class SpendingBySubtierAgencyViewSet(ElasticsearchSpendingPaginationMixin, ElasticsearchDisasterBase): """ @@ -225,7 +210,12 @@ class SpendingBySubtierAgencyViewSet(ElasticsearchSpendingPaginationMixin, Elast endpoint_doc = "usaspending_api/api_contracts/contracts/v2/disaster/agency/spending.md" required_filters = ["def_codes", "award_type_codes", "query"] - query_fields = ["funding_toptier_agency_name.contains"] + query_fields = [ + "funding_toptier_agency_name.contains", + "funding_toptier_agency_name", + "funding_subtier_agency_name.contains", + "funding_subtier_agency_name", + ] agg_key = "funding_toptier_agency_agg_key" # primary (tier-1) aggregation key sub_agg_key = "funding_subtier_agency_agg_key" # secondary (tier-2) sub-aggregation key diff --git a/usaspending_api/disaster/v2/views/elasticsearch_account_base.py b/usaspending_api/disaster/v2/views/elasticsearch_account_base.py new file mode 100644 index 0000000000..b769289ac6 --- /dev/null +++ b/usaspending_api/disaster/v2/views/elasticsearch_account_base.py @@ -0,0 +1,241 @@ +from abc import abstractmethod +from typing import List, Optional, Dict + +from elasticsearch_dsl import Q as ES_Q, A +from rest_framework.response import Response + +from usaspending_api.common.cache_decorator import cache_response +from usaspending_api.common.data_classes import Pagination +from usaspending_api.common.elasticsearch.search_wrappers import AccountSearch +from usaspending_api.common.exceptions import ForbiddenException +from usaspending_api.common.helpers.generic_helper import get_pagination_metadata +from usaspending_api.common.query_with_filters import QueryWithFilters +from usaspending_api.disaster.v2.views.disaster_base import DisasterBase + + +class ElasticsearchAccountDisasterBase(DisasterBase): + agg_group_name: str = "group_by_agg_key" # name used for the tier-1 aggregation group + agg_key: str + bucket_count: int + filter_query: ES_Q + has_children: bool = False + nested_nonzero_fields: Dict[str, str] = [] + nonzero_fields: Dict[str, str] = [] + query_fields: List[str] + sub_agg_group_name: str = "sub_group_by_sub_agg_key" # name used for the tier-2 aggregation group + sub_agg_key: str = None # will drive including of a sub-bucket-aggregation if overridden by subclasses + sub_top_hits_fields: List[str] # list used for top_hits sub aggregation + top_hits_fields: List[str] # list used for the top_hits aggregation + + pagination: Pagination # Overwritten by a pagination mixin + + @cache_response() + def post(self, request): + return Response(self.perform_elasticsearch_search()) + + def perform_elasticsearch_search(self, loans=False) -> Response: + filters = {f"nested_{key}": val for key, val in self.filters.items() if key != "award_type_codes"} + if self.filters.get("award_type_codes") is not None: + filters["award_type_codes"] = self.filters["award_type_codes"] + # Need to update the value of "query" to have the fields to search on + query = filters.pop("nested_query", None) + if query: + filters["nested_query"] = {"text": query, "fields": self.query_fields} + + # Ensure that only non-zero values are taken into consideration + filters["nested_nonzero_fields"] = list(self.nested_nonzero_fields.values()) + self.filter_query = QueryWithFilters.generate_accounts_elasticsearch_query(filters) + # using a set value here as doing an extra ES query is detrimental to performance + # And the dimensions on which group-by aggregations are performed so far + # (agency, TAS, object_class) all have cardinality less than this number + # If the data increases to a point where there are more results than this, it should be changed + self.bucket_count = 1000 + messages = [] + if self.pagination.sort_key in ("id", "code"): + messages.append( + ( + f"Notice! API Request to sort on '{self.pagination.sort_key}' field isn't fully implemented." + " Results were actually sorted using 'description' field." + ) + ) + + response = self.query_elasticsearch(loans) + response["page_metadata"] = get_pagination_metadata( + len(response["results"]), self.pagination.limit, self.pagination.page + ) + response["results"] = response["results"][self.pagination.lower_limit : self.pagination.upper_limit] + if messages: + response["messages"] = messages + + return Response(response) + + @abstractmethod + def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: + pass + + def build_elasticsearch_search_with_aggregations(self) -> Optional[AccountSearch]: + """ + Using the provided ES_Q object creates an AccountSearch object with the necessary applied aggregations. + """ + # No need to continue if there is no result + if self.bucket_count == 0: + return None + + # Create the initial search using filters + search = AccountSearch().filter(self.filter_query) + # Create the aggregations + financial_accounts_agg = A("nested", path="financial_accounts_by_award") + if "query" in self.filters: + terms = ES_Q( + "terms", **{"financial_accounts_by_award.disaster_emergency_fund_code": self.filters.get("def_codes")} + ) + query = ES_Q( + "multi_match", + query=self.filters["query"], + type="phrase_prefix", + fields=[f"financial_accounts_by_award.{query}" for query in self.query_fields], + ) + filter_agg_query = ES_Q("bool", should=[terms, query], minimum_should_match=2) + else: + filter_agg_query = ES_Q( + "terms", **{"financial_accounts_by_award.disaster_emergency_fund_code": self.filters.get("def_codes")} + ) + filtered_aggs = A("filter", filter_agg_query) + group_by_dim_agg = A("terms", field=self.agg_key, size=self.bucket_count) + dim_metadata = A( + "top_hits", + size=1, + sort=[{"financial_accounts_by_award.update_date": {"order": "desc"}}], + _source={"includes": self.top_hits_fields}, + ) + sum_covid_outlay = A( + "sum", + field="financial_accounts_by_award.gross_outlay_amount_by_award_cpe", + script={"source": "doc['financial_accounts_by_award.is_final_balances_for_fy'].value ? _value : 0"}, + ) + sum_covid_obligation = A("sum", field="financial_accounts_by_award.transaction_obligated_amount") + count_awards_by_dim = A("reverse_nested", **{}) + award_count = A("value_count", field="financial_account_distinct_award_key") + loan_value = A("sum", field="total_loan_value") + + # Apply the aggregations + search.aggs.bucket(self.agg_group_name, financial_accounts_agg).bucket("filtered_aggs", filtered_aggs).bucket( + "group_by_dim_agg", group_by_dim_agg + ).metric("dim_metadata", dim_metadata).metric("sum_transaction_obligated_amount", sum_covid_obligation).metric( + "sum_gross_outlay_amount_by_award_cpe", sum_covid_outlay + ).bucket( + "count_awards_by_dim", count_awards_by_dim + ).metric( + "award_count", award_count + ).metric( + "sum_loan_value", loan_value + ) + + # Apply sub-aggregation for children if applicable + if self.sub_agg_key: + self.extend_elasticsearch_search_with_sub_aggregation(search) + + search.update_from_dict({"size": 0}) + + return search + + def extend_elasticsearch_search_with_sub_aggregation(self, search: AccountSearch): + """ + This template method is called if the `self.sub_agg_key` is supplied, in order to post-process the query and + inject a sub-aggregation on a secondary dimension (that is subordinate to the first agg_key's dimension). + + Example: Subtier Agency spending rolled up to Toptier Agency spending + """ + sub_bucket_count = 1000 # get_number_of_unique_terms_for_accounts(self.filter_query, f"{self.sub_agg_key}") + size = sub_bucket_count + shard_size = sub_bucket_count + 100 + + if shard_size > 10000: + raise ForbiddenException( + "Current filters return too many unique items. Narrow filters to return results or use downloads." + ) + + # Sub-aggregation to append to primary agg + sub_group_by_sub_agg_key_values = {"field": self.sub_agg_key, "size": size, "shard_size": shard_size} + + sub_group_by_sub_agg_key = A("terms", **sub_group_by_sub_agg_key_values) + sub_dim_metadata = A( + "top_hits", + size=1, + sort=[{"financial_accounts_by_award.update_date": {"order": "desc"}}], + _source={"includes": self.sub_top_hits_fields}, + ) + sub_sum_covid_outlay = A( + "sum", + field="financial_accounts_by_award.gross_outlay_amount_by_award_cpe", + script={"source": "doc['financial_accounts_by_award.is_final_balances_for_fy'].value ? _value : 0"}, + ) + sub_sum_covid_obligation = A("sum", field="financial_accounts_by_award.transaction_obligated_amount") + sub_count_awards_by_dim = A("reverse_nested", **{}) + sub_award_count = A("value_count", field="financial_account_distinct_award_key") + loan_value = A("sum", field="total_loan_value") + + sub_group_by_sub_agg_key.metric("dim_metadata", sub_dim_metadata).metric( + "sum_transaction_obligated_amount", sub_sum_covid_obligation + ).metric("sum_gross_outlay_amount_by_award_cpe", sub_sum_covid_outlay).bucket( + "count_awards_by_dim", sub_count_awards_by_dim + ).metric( + "award_count", sub_award_count + ).metric( + "sum_loan_value", loan_value + ) + + # Append sub-agg to primary agg, and include the sub-agg's sum metric aggs too + search.aggs[self.agg_group_name]["group_by_dim_agg"].bucket(self.sub_agg_group_name, sub_group_by_sub_agg_key) + + def build_totals(self, response: List[dict], loans: bool = False) -> dict: + obligations = 0 + outlays = 0 + award_count = 0 + loan_sum = 0 + for item in response: + obligations += item["obligation"] + outlays += item["outlay"] + award_count += item["award_count"] + if loans: + loan_sum += item["face_value_of_loan"] + + retval = {"obligation": round(obligations, 2), "outlay": round(outlays, 2), "award_count": award_count} + if loans: + retval["face_value_of_loan"] = loan_sum + return retval + + def query_elasticsearch(self, loans) -> dict: + search = self.build_elasticsearch_search_with_aggregations() + if search is None: + totals = self.build_totals(response=[], loans=loans) + return {"totals": totals, "results": []} + response = search.handle_execute() + response = response.aggs.to_dict() + buckets = ( + response.get(self.agg_group_name, {}) + .get("filtered_aggs", {}) + .get("group_by_dim_agg", {}) + .get("buckets", []) + ) + results = self.build_elasticsearch_result(buckets) + totals = self.build_totals(results, loans) + sorted_results = self.sort_results(results) + return {"totals": totals, "results": sorted_results} + + def sort_results(self, results: List[dict]) -> List[dict]: + sorted_parents = sorted( + results, + key=lambda val: val.get(self.pagination.sort_key, "id"), + reverse=self.pagination.sort_order == "desc", + ) + + if self.has_children: + for parent in sorted_parents: + parent["children"] = sorted( + parent.get("children", []), + key=lambda val: val.get(self.pagination.sort_key, "id"), + reverse=self.pagination.sort_order == "desc", + ) + + return sorted_parents diff --git a/usaspending_api/disaster/v2/views/federal_account/loans.py b/usaspending_api/disaster/v2/views/federal_account/loans.py index 40b31a546e..09c965ba36 100644 --- a/usaspending_api/disaster/v2/views/federal_account/loans.py +++ b/usaspending_api/disaster/v2/views/federal_account/loans.py @@ -1,38 +1,105 @@ +from typing import List +from decimal import Decimal + from django.db.models import F -from rest_framework.response import Response from usaspending_api.accounts.models import TreasuryAppropriationAccount from usaspending_api.common.cache_decorator import cache_response from usaspending_api.disaster.v2.views.disaster_base import ( - DisasterBase, LoansPaginationMixin, LoansMixin, FabaOutlayMixin, ) -from usaspending_api.disaster.v2.views.federal_account.spending import construct_response +from usaspending_api.disaster.v2.views.elasticsearch_account_base import ElasticsearchAccountDisasterBase -class LoansViewSet(LoansMixin, LoansPaginationMixin, FabaOutlayMixin, DisasterBase): +class LoansViewSet(LoansMixin, LoansPaginationMixin, FabaOutlayMixin, ElasticsearchAccountDisasterBase): """ Returns loan disaster spending by federal account. """ endpoint_doc = "usaspending_api/api_contracts/contracts/v2/disaster/federal_account/loans.md" + agg_key = "financial_accounts_by_award.treasury_account_id" # primary (tier-1) aggregation key + nested_nonzero_fields = {"obligation": "transaction_obligated_amount", "outlay": "gross_outlay_amount_by_award_cpe"} + query_fields = [ + "federal_account_symbol", + "federal_account_symbol.contains", + "federal_account_title", + "federal_account_title.contains", + "treasury_account_symbol", + "treasury_account_symbol.contains", + "treasury_account_title", + "treasury_account_title.contains", + ] + top_hits_fields = [ + "financial_accounts_by_award.federal_account_symbol", + "financial_accounts_by_award.federal_account_title", + "financial_accounts_by_award.treasury_account_symbol", + "financial_accounts_by_award.treasury_account_title", + "financial_accounts_by_award.federal_account_id", + ] @cache_response() def post(self, request): - # rename hack to use the Dataclasses, setting to Dataclass attribute name - if self.pagination.sort_key == "face_value_of_loan": - self.pagination.sort_key = "total_budgetary_resources" - - results = construct_response(list(self.queryset), self.pagination) + self.filters.update({"award_type_codes": ["07", "08"]}) + self.has_children = True + return self.perform_elasticsearch_search(loans=True) - # rename hack to use the Dataclasses, swapping back in desired loan field name - for result in results["results"]: - for child in result["children"]: - child["face_value_of_loan"] = child.pop("total_budgetary_resources") - result["face_value_of_loan"] = result.pop("total_budgetary_resources") + def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: + temp_results = {} + child_results = [] + for bucket in info_buckets: + child = self._build_child_json_result(bucket) + child_results.append(child) + for child in child_results: + result = self._build_json_result(child) + child.pop("parent_data") + if result["id"] in temp_results.keys(): + temp_results[result["id"]] = { + "id": int(result["id"]), + "code": result["code"], + "description": result["description"], + "award_count": temp_results[result["id"]]["award_count"] + result["award_count"], + # the count of distinct awards contributing to the totals + "obligation": temp_results[result["id"]]["obligation"] + result["obligation"], + "outlay": temp_results[result["id"]]["outlay"] + result["outlay"], + "children": temp_results[result["id"]]["children"] + result["children"], + "face_value_of_loan": temp_results[result["id"]]["face_value_of_loan"] + + result["face_value_of_loan"], + } + else: + temp_results[result["id"]] = result + results = [x for x in temp_results.values()] + return results - results["totals"] = self.accumulate_total_values(results["results"], ["award_count", "face_value_of_loan"]) + def _build_json_result(self, child): + return { + "id": child["parent_data"][2], + "code": child["parent_data"][1], + "description": child["parent_data"][0], + "award_count": child["award_count"], + # the count of distinct awards contributing to the totals + "obligation": child["obligation"], + "outlay": child["outlay"], + "children": [child], + "face_value_of_loan": child["face_value_of_loan"], + } - return Response(results) + def _build_child_json_result(self, bucket: dict): + return { + "id": int(bucket["key"]), + "code": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["treasury_account_symbol"], + "description": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["treasury_account_title"], + # the count of distinct awards contributing to the totals + "award_count": int(bucket["count_awards_by_dim"]["award_count"]["value"]), + **{ + key: Decimal(bucket.get(f"sum_{val}", {"value": 0})["value"]) + for key, val in self.nested_nonzero_fields.items() + }, + "face_value_of_loan": bucket["count_awards_by_dim"]["sum_loan_value"]["value"], + "parent_data": [ + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["federal_account_title"], + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["federal_account_symbol"], + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["federal_account_id"], + ], + } @property def queryset(self): diff --git a/usaspending_api/disaster/v2/views/federal_account/spending.py b/usaspending_api/disaster/v2/views/federal_account/spending.py index 39e8bd1d49..01977c98c1 100644 --- a/usaspending_api/disaster/v2/views/federal_account/spending.py +++ b/usaspending_api/disaster/v2/views/federal_account/spending.py @@ -1,20 +1,22 @@ +from typing import List +from decimal import Decimal + from django.db.models import Q, Sum, F, Value, DecimalField, Case, When, OuterRef, Subquery, Func, IntegerField from django.db.models.functions import Coalesce from rest_framework.response import Response -from usaspending_api.awards.models import FinancialAccountsByAwards from usaspending_api.common.cache_decorator import cache_response from usaspending_api.common.data_classes import Pagination from usaspending_api.common.helpers.generic_helper import get_pagination_metadata from usaspending_api.disaster.v2.views.federal_account.federal_account_result import FedAcctResults, FedAccount, TAS from usaspending_api.disaster.v2.views.disaster_base import ( - DisasterBase, PaginationMixin, SpendingMixin, FabaOutlayMixin, ) from usaspending_api.financial_activities.models import FinancialAccountsByProgramActivityObjectClass from usaspending_api.references.models.gtas_sf133_balances import GTASSF133Balances +from usaspending_api.disaster.v2.views.elasticsearch_account_base import ElasticsearchAccountDisasterBase def construct_response(results: list, pagination: Pagination): @@ -31,16 +33,37 @@ def construct_response(results: list, pagination: Pagination): } -class SpendingViewSet(PaginationMixin, SpendingMixin, FabaOutlayMixin, DisasterBase): +class SpendingViewSet( + SpendingMixin, FabaOutlayMixin, ElasticsearchAccountDisasterBase, PaginationMixin, +): """ Returns disaster spending by federal account. """ endpoint_doc = "usaspending_api/api_contracts/contracts/v2/disaster/federal_account/spending.md" + agg_key = "financial_accounts_by_award.treasury_account_id" # primary (tier-1) aggregation key + nested_nonzero_fields = {"obligation": "transaction_obligated_amount", "outlay": "gross_outlay_amount_by_award_cpe"} + query_fields = [ + "federal_account_symbol", + "federal_account_symbol.contains", + "federal_account_title", + "federal_account_title.contains", + "treasury_account_symbol", + "treasury_account_symbol.contains", + "treasury_account_title", + "treasury_account_title.contains", + ] + top_hits_fields = [ + "financial_accounts_by_award.federal_account_symbol", + "financial_accounts_by_award.federal_account_title", + "financial_accounts_by_award.treasury_account_symbol", + "financial_accounts_by_award.treasury_account_title", + "financial_accounts_by_award.federal_account_id", + ] @cache_response() def post(self, request): if self.spending_type == "award": - results = list(self.award_queryset) - extra_columns = ["award_count"] + self.has_children = True + return self.perform_elasticsearch_search() else: results = list(self.total_queryset) extra_columns = ["total_budgetary_resources"] @@ -50,6 +73,64 @@ def post(self, request): return Response(response) + def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: + temp_results = {} + child_results = [] + for bucket in info_buckets: + child = self._build_child_json_result(bucket) + child_results.append(child) + for child in child_results: + result = self._build_json_result(child) + child.pop("parent_data") + if result["id"] in temp_results.keys(): + temp_results[result["id"]] = { + "id": int(result["id"]), + "code": result["code"], + "description": result["description"], + "award_count": temp_results[result["id"]]["award_count"] + result["award_count"], + # the count of distinct awards contributing to the totals + "obligation": temp_results[result["id"]]["obligation"] + result["obligation"], + "outlay": temp_results[result["id"]]["outlay"] + result["outlay"], + "total_budgetary_resources": None, + "children": temp_results[result["id"]]["children"] + result["children"], + } + else: + temp_results[result["id"]] = result + results = [x for x in temp_results.values()] + return results + + def _build_json_result(self, child): + return { + "id": child["parent_data"][2], + "code": child["parent_data"][1], + "description": child["parent_data"][0], + "award_count": child["award_count"], + # the count of distinct awards contributing to the totals + "obligation": child["obligation"], + "outlay": child["outlay"], + "total_budgetary_resources": None, + "children": [child], + } + + def _build_child_json_result(self, bucket: dict): + return { + "id": int(bucket["key"]), + "code": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["treasury_account_symbol"], + "description": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["treasury_account_title"], + # the count of distinct awards contributing to the totals + "award_count": int(bucket["count_awards_by_dim"]["award_count"]["value"]), + **{ + key: Decimal(bucket.get(f"sum_{val}", {"value": 0})["value"]) + for key, val in self.nested_nonzero_fields.items() + }, + "total_budgetary_resources": None, + "parent_data": [ + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["federal_account_title"], + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["federal_account_symbol"], + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["federal_account_id"], + ], + } + @property def total_queryset(self): filters = [ @@ -120,46 +201,3 @@ def total_queryset(self): .annotate(**annotations) .values(*annotations.keys()) ) - - @property - def award_queryset(self): - filters = [ - self.is_in_provided_def_codes, - Q(treasury_account__isnull=False), - Q(treasury_account__federal_account__isnull=False), - self.all_closed_defc_submissions, - ] - - annotations = { - "fa_code": F("treasury_account__federal_account__federal_account_code"), - "award_count": self.unique_file_d_award_count(), - "description": F("treasury_account__account_title"), - "code": F("treasury_account__tas_rendering_label"), - "id": F("treasury_account__treasury_account_identifier"), - "fa_description": F("treasury_account__federal_account__account_title"), - "fa_id": F("treasury_account__federal_account_id"), - "obligation": Coalesce(Sum("transaction_obligated_amount"), 0), - "outlay": Coalesce( - Sum( - Case( - When(self.final_period_submission_query_filters, then=F("gross_outlay_amount_by_award_cpe")), - default=Value(0), - ) - ), - 0, - ), - "total_budgetary_resources": Value(None, DecimalField()), # NULL for award spending - } - - # Assuming it is more performant to fetch all rows once rather than - # run a count query and fetch only a page's worth of results - return ( - FinancialAccountsByAwards.objects.filter(*filters) - .values( - "treasury_account__federal_account__id", - "treasury_account__federal_account__federal_account_code", - "treasury_account__federal_account__account_title", - ) - .annotate(**annotations) - .values(*annotations.keys()) - ) diff --git a/usaspending_api/disaster/v2/views/object_class/loans.py b/usaspending_api/disaster/v2/views/object_class/loans.py index 47385d9fa4..db22242b6a 100644 --- a/usaspending_api/disaster/v2/views/object_class/loans.py +++ b/usaspending_api/disaster/v2/views/object_class/loans.py @@ -1,42 +1,44 @@ +from typing import List +from decimal import Decimal + from django.db.models import F, Value, TextField, Min from django.db.models.functions import Cast -from rest_framework.response import Response from usaspending_api.common.cache_decorator import cache_response from usaspending_api.common.helpers.orm_helpers import ConcatAll from usaspending_api.disaster.v2.views.disaster_base import ( - DisasterBase, LoansPaginationMixin, LoansMixin, FabaOutlayMixin, ) -from usaspending_api.disaster.v2.views.object_class.spending import construct_response +from usaspending_api.disaster.v2.views.elasticsearch_account_base import ElasticsearchAccountDisasterBase from usaspending_api.references.models import ObjectClass -class ObjectClassLoansViewSet(LoansMixin, LoansPaginationMixin, FabaOutlayMixin, DisasterBase): +class ObjectClassLoansViewSet(LoansMixin, FabaOutlayMixin, LoansPaginationMixin, ElasticsearchAccountDisasterBase): """Provides insights on the Object Classes' loans from disaster/emergency funding per the requested filters""" endpoint_doc = "usaspending_api/api_contracts/contracts/v2/disaster/object_class/loans.md" + agg_key = "financial_accounts_by_award.object_class" # primary (tier-1) aggregation key + nested_nonzero_fields = {"obligation": "transaction_obligated_amount", "outlay": "gross_outlay_amount_by_award_cpe"} + query_fields = [ + "major_object_class_name", + "major_object_class_name.contains", + "object_class_name", + "object_class_name.contains", + ] + top_hits_fields = [ + "financial_accounts_by_award.object_class_id", + "financial_accounts_by_award.major_object_class_name", + "financial_accounts_by_award.major_object_class", + "financial_accounts_by_award.object_class_name", + "financial_accounts_by_award.object_class", + ] @cache_response() def post(self, request): - # rename hack to use the Dataclasses, setting to Dataclass attribute name - if self.pagination.sort_key == "face_value_of_loan": - self.pagination.sort_key = "total_budgetary_resources" - - results = list(self.queryset) - results = [{("id" if k == "id_" else k): v for k, v in r.items()} for r in results] - results = construct_response(results, self.pagination, False) - - # rename hack to use the Dataclasses, swapping back in desired loan field name - for result in results["results"]: - for child in result["children"]: - child["face_value_of_loan"] = child.pop("total_budgetary_resources") - result["face_value_of_loan"] = result.pop("total_budgetary_resources") - - results["totals"] = self.accumulate_total_values(results["results"], ["award_count", "face_value_of_loan"]) - - return Response(results) + self.filters.update({"award_type_codes": ["07", "08"]}) + self.has_children = True + return self.perform_elasticsearch_search(loans=True) @property def queryset(self): @@ -59,3 +61,60 @@ def queryset(self): } return query.queryset.values("major_object_class", "object_class").annotate(**annotations).values(*annotations) + + def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: + temp_results = {} + child_results = [] + for bucket in info_buckets: + child = self._build_child_json_result(bucket) + child_results.append(child) + for child in child_results: + result = self._build_json_result(child) + child.pop("parent_data") + if result["id"] in temp_results.keys(): + temp_results[result["id"]] = { + "id": int(result["id"]), + "code": result["code"], + "description": result["description"], + "award_count": temp_results[result["id"]]["award_count"] + result["award_count"], + # the count of distinct awards contributing to the totals + "obligation": temp_results[result["id"]]["obligation"] + result["obligation"], + "outlay": temp_results[result["id"]]["outlay"] + result["outlay"], + "face_value_of_loan": bucket["count_awards_by_dim"]["sum_loan_value"]["value"], + "children": temp_results[result["id"]]["children"] + result["children"], + } + else: + temp_results[result["id"]] = result + results = [x for x in temp_results.values()] + return results + + def _build_json_result(self, child): + return { + "id": child["parent_data"][1], + "code": child["parent_data"][1], + "description": child["parent_data"][0], + "award_count": child["award_count"], + # the count of distinct awards contributing to the totals + "obligation": child["obligation"], + "outlay": child["outlay"], + "face_value_of_loan": child["face_value_of_loan"], + "children": [child], + } + + def _build_child_json_result(self, bucket: dict): + return { + "id": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["object_class_id"], + "code": bucket["key"], + "description": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["object_class_name"], + # the count of distinct awards contributing to the totals + "award_count": int(bucket["count_awards_by_dim"]["award_count"]["value"]), + **{ + key: Decimal(bucket.get(f"sum_{val}", {"value": 0})["value"]) + for key, val in self.nested_nonzero_fields.items() + }, + "face_value_of_loan": bucket["count_awards_by_dim"]["sum_loan_value"]["value"], + "parent_data": [ + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["major_object_class_name"], + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["major_object_class"], + ], + } diff --git a/usaspending_api/disaster/v2/views/object_class/spending.py b/usaspending_api/disaster/v2/views/object_class/spending.py index 8d4d1bd76f..e31faf452e 100644 --- a/usaspending_api/disaster/v2/views/object_class/spending.py +++ b/usaspending_api/disaster/v2/views/object_class/spending.py @@ -1,18 +1,20 @@ +from typing import List +from decimal import Decimal + from django.db.models import Q, Sum, F, Value, Case, When, Min, TextField, IntegerField from django.db.models.functions import Coalesce, Cast from rest_framework.response import Response -from usaspending_api.awards.models import FinancialAccountsByAwards from usaspending_api.common.cache_decorator import cache_response from usaspending_api.common.data_classes import Pagination from usaspending_api.common.helpers.generic_helper import get_pagination_metadata +from usaspending_api.disaster.v2.views.elasticsearch_account_base import ElasticsearchAccountDisasterBase from usaspending_api.disaster.v2.views.object_class.object_class_result import ( ObjectClassResults, MajorClass, ObjectClass, ) from usaspending_api.disaster.v2.views.disaster_base import ( - DisasterBase, PaginationMixin, SpendingMixin, FabaOutlayMixin, @@ -35,22 +37,37 @@ def construct_response(results: list, pagination: Pagination, strip_total_budget } -class ObjectClassSpendingViewSet(PaginationMixin, SpendingMixin, FabaOutlayMixin, DisasterBase): +class ObjectClassSpendingViewSet(SpendingMixin, FabaOutlayMixin, PaginationMixin, ElasticsearchAccountDisasterBase): """View to implement the API""" endpoint_doc = "usaspending_api/api_contracts/contracts/v2/disaster/object_class/spending.md" + # Defined for the Elasticsearch implementation of Spending by Award + agg_key = "financial_accounts_by_award.object_class" # primary (tier-1) aggregation key + nested_nonzero_fields = {"obligation": "transaction_obligated_amount", "outlay": "gross_outlay_amount_by_award_cpe"} + query_fields = [ + "major_object_class_name", + "major_object_class_name.contains", + "object_class_name", + "object_class_name.contains", + ] + top_hits_fields = [ + "financial_accounts_by_award.object_class_id", + "financial_accounts_by_award.major_object_class_name", + "financial_accounts_by_award.object_class_name", + "financial_accounts_by_award.major_object_class", + ] + @cache_response() def post(self, request): if self.spending_type == "award": - results = list(self.award_queryset) - extra_columns = ["award_count"] + self.has_children = True + return self.perform_elasticsearch_search() else: results = list(self.total_queryset) extra_columns = [] - - response = construct_response(results, self.pagination) - response["totals"] = self.accumulate_total_values(results, extra_columns) + response = construct_response(results, self.pagination) + response["totals"] = self.accumulate_total_values(results, extra_columns) return Response(response) @@ -63,8 +80,16 @@ def total_queryset(self): Q(object_class__isnull=False), ] + object_class_annotations = { + "major_code": F("object_class__major_object_class"), + "description": F("object_class__object_class_name"), + "code": F("object_class__object_class"), + "id": Cast(Min("object_class_id"), TextField()), + "major_description": F("object_class__major_object_class_name"), + } + annotations = { - **shared_object_class_annotations(), + **object_class_annotations, "obligation": Coalesce( Sum( Case( @@ -101,44 +126,56 @@ def total_queryset(self): .values(*annotations.keys()) ) - @property - def award_queryset(self): - filters = [ - self.is_in_provided_def_codes, - Q(object_class__isnull=False), - self.all_closed_defc_submissions, - ] - - annotations = { - **shared_object_class_annotations(), - "obligation": Coalesce(Sum("transaction_obligated_amount"), 0), - "outlay": Coalesce( - Sum( - Case( - When(self.final_period_submission_query_filters, then=F("gross_outlay_amount_by_award_cpe")), - default=Value(0), - ) - ), - 0, - ), - "award_count": self.unique_file_d_award_count(), + def build_elasticsearch_result(self, info_buckets: List[dict]) -> List[dict]: + temp_results = {} + child_results = [] + for bucket in info_buckets: + child = self._build_child_json_result(bucket) + child_results.append(child) + for child in child_results: + result = self._build_json_result(child) + child.pop("parent_data") + if result["code"] in temp_results.keys(): + temp_results[result["code"]] = { + "id": result["id"], + "code": result["code"], + "description": result["description"], + "award_count": temp_results[result["code"]]["award_count"] + result["award_count"], + # the count of distinct awards contributing to the totals + "obligation": temp_results[result["code"]]["obligation"] + result["obligation"], + "outlay": temp_results[result["code"]]["outlay"] + result["outlay"], + "children": temp_results[result["code"]]["children"] + result["children"], + } + else: + temp_results[result["code"]] = result + results = [x for x in temp_results.values()] + return results + + def _build_json_result(self, child): + return { + "id": str(child["parent_data"][1]), + "code": child["parent_data"][1], + "description": child["parent_data"][0], + "award_count": child["award_count"], + # the count of distinct awards contributing to the totals + "obligation": child["obligation"], + "outlay": child["outlay"], + "children": [child], } - # Assuming it is more performant to fetch all rows once rather than - # run a count query and fetch only a page's worth of results - return ( - FinancialAccountsByAwards.objects.filter(*filters) - .values("object_class__major_object_class", "object_class__major_object_class_name") - .annotate(**annotations) - .values(*annotations.keys()) - ) - - -def shared_object_class_annotations(): - return { - "major_code": F("object_class__major_object_class"), - "description": F("object_class__object_class_name"), - "code": F("object_class__object_class"), - "id": Cast(Min("object_class_id"), TextField()), - "major_description": F("object_class__major_object_class_name"), - } + def _build_child_json_result(self, bucket: dict): + return { + "id": str(bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["object_class_id"]), + "code": bucket["key"], + "description": bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["object_class_name"], + # the count of distinct awards contributing to the totals + "award_count": int(bucket["count_awards_by_dim"]["award_count"]["value"]), + **{ + key: Decimal(bucket.get(f"sum_{val}", {"value": 0})["value"]) + for key, val in self.nested_nonzero_fields.items() + }, + "parent_data": [ + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["major_object_class_name"], + bucket["dim_metadata"]["hits"]["hits"][0]["_source"]["major_object_class"], + ], + } diff --git a/usaspending_api/download/helpers/download_annotation_functions.py b/usaspending_api/download/helpers/download_annotation_functions.py index 92889d4c20..014cca58e7 100644 --- a/usaspending_api/download/helpers/download_annotation_functions.py +++ b/usaspending_api/download/helpers/download_annotation_functions.py @@ -23,6 +23,7 @@ filter_by_latest_closed_periods, final_submissions_for_all_fy, ) +from usaspending_api.download.filestreaming import NAMING_CONFLICT_DISCRIMINATOR from usaspending_api.settings import HOST @@ -190,7 +191,8 @@ def universal_award_matview_annotations(): "usaspending_permalink": ConcatAll( Value(AWARD_URL), Func(F("award__generated_unique_award_id"), function="urlencode"), Value("/") ), - "disaster_emergency_fund_codes": Subquery( + "disaster_emergency_fund_codes" + + NAMING_CONFLICT_DISCRIMINATOR: Subquery( FinancialAccountsByAwards.objects.filter(filter_limit_to_closed_periods(), award_id=OuterRef("award_id")) .annotate( value=ExpressionWrapper( @@ -299,7 +301,8 @@ def idv_order_annotations(): "usaspending_permalink": ConcatAll( Value(AWARD_URL), Func(F("generated_unique_award_id"), function="urlencode"), Value("/") ), - "disaster_emergency_fund_codes": Subquery( + "disaster_emergency_fund_codes" + + NAMING_CONFLICT_DISCRIMINATOR: Subquery( FinancialAccountsByAwards.objects.filter(filter_limit_to_closed_periods(), award_id=OuterRef("id")) .annotate( value=ExpressionWrapper( diff --git a/usaspending_api/download/v2/download_column_historical_lookups.py b/usaspending_api/download/v2/download_column_historical_lookups.py index 3a30a1bf57..2cf95487be 100644 --- a/usaspending_api/download/v2/download_column_historical_lookups.py +++ b/usaspending_api/download/v2/download_column_historical_lookups.py @@ -25,7 +25,10 @@ ("parent_award_agency_id", "award__latest_transaction__contract_data__referenced_idv_agency_iden"), ("parent_award_agency_name", "award__latest_transaction__contract_data__referenced_idv_agency_desc"), ("parent_award_id_piid", "award__parent_award_piid"), - ("disaster_emergency_fund_codes", None), # Annotation is used to create this column + ( + "disaster_emergency_fund_codes" + NAMING_CONFLICT_DISCRIMINATOR, + None, + ), # Annotation is used to create this column ("outlayed_amount_funded_by_COVID-19_supplementals", None), # Annotation is used to create this column ("obligated_amount_funded_by_COVID-19_supplementals", None), # Annotation is used to create this column ( @@ -601,7 +604,10 @@ ("award_id_fain", "award__fain"), ("award_id_uri", "award__uri"), ("sai_number", "award__latest_transaction__assistance_data__sai_number"), - ("disaster_emergency_fund_codes", None), # Annotation is used to create this column + ( + "disaster_emergency_fund_codes" + NAMING_CONFLICT_DISCRIMINATOR, + None, + ), # Annotation is used to create this column ("outlayed_amount_funded_by_COVID-19_supplementals", None), # Annotation is used to create this column ("obligated_amount_funded_by_COVID-19_supplementals", None), # Annotation is used to create this column ( diff --git a/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py b/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py index 99ecf978a1..c72fdc23e2 100644 --- a/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py +++ b/usaspending_api/etl/elasticsearch_loader_helpers/transform_data.py @@ -35,21 +35,27 @@ def transform_covid19_faba_data(worker: TaskSpec, records: List[dict]) -> List[d es_id_field = record[worker.field_for_es_id] disinct_award_key = record.pop("financial_account_distinct_award_key") award_id = record.pop("award_id") - award_type = record.pop("award_type") + award_type = record.pop("type") generated_unique_award_id = record.pop("generated_unique_award_id") total_loan_value = record.pop("total_loan_value") - temp_key = f"{disinct_award_key}|{award_id}|{award_type}|{generated_unique_award_id}|{total_loan_value}" + obligated_sum = record.get("transaction_obligated_amount") or 0 # record value for key may be None + outlay_sum = record.get("gross_outlay_amount_by_award_cpe") or 0 # record value for key may be None + temp_key = disinct_award_key if temp_key not in results: results[temp_key] = { "financial_account_distinct_award_key": disinct_award_key, "award_id": award_id, - "award_type": award_type, + "type": award_type, "generated_unique_award_id": generated_unique_award_id, "total_loan_value": total_loan_value, "financial_accounts_by_award": list(), + "obligated_sum": 0, + "outlay_sum": 0, "_id": es_id_field, } - + results[temp_key]["obligated_sum"] += obligated_sum + if record.get("is_final_balances_for_fy"): + results[temp_key]["outlay_sum"] += outlay_sum results[temp_key]["financial_accounts_by_award"].append(record) if len(results) != len(records): diff --git a/usaspending_api/etl/es_covid19_faba_template.json b/usaspending_api/etl/es_covid19_faba_template.json index e8b2b3619d..b583e71118 100644 --- a/usaspending_api/etl/es_covid19_faba_template.json +++ b/usaspending_api/etl/es_covid19_faba_template.json @@ -14,7 +14,7 @@ "null_value": "NULL", "type": "keyword" }, - "award_type": { + "type": { "null_value": "NULL", "type": "keyword" }, @@ -343,6 +343,14 @@ "total_loan_value": { "scaling_factor": 100, "type": "scaled_float" + }, + "obligated_sum": { + "scaling_factor": 100, + "type": "scaled_float" + }, + "outlay_sum": { + "scaling_factor": 100, + "type": "scaled_float" } } } diff --git a/usaspending_api/reporting/__init__.py b/usaspending_api/reporting/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/usaspending_api/reporting/apps.py b/usaspending_api/reporting/apps.py new file mode 100644 index 0000000000..73a4e42e42 --- /dev/null +++ b/usaspending_api/reporting/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class ReportingConfig(AppConfig): + name = "reporting" diff --git a/usaspending_api/reporting/management/commands/reporting_placeholder.py b/usaspending_api/reporting/management/commands/reporting_placeholder.py new file mode 100644 index 0000000000..fa1542671e --- /dev/null +++ b/usaspending_api/reporting/management/commands/reporting_placeholder.py @@ -0,0 +1,8 @@ +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + """ Dummy command. Remove after creating a command in this folder""" + + def handle(self, *args, **options): + print("Success") diff --git a/usaspending_api/reporting/migrations/__init__.py b/usaspending_api/reporting/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/usaspending_api/reporting/models.py b/usaspending_api/reporting/models.py new file mode 100644 index 0000000000..6b20219993 --- /dev/null +++ b/usaspending_api/reporting/models.py @@ -0,0 +1 @@ +# Create your models here. diff --git a/usaspending_api/reporting/tests/integration/__init__.py b/usaspending_api/reporting/tests/integration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/usaspending_api/reporting/tests/unit/__init__.py b/usaspending_api/reporting/tests/unit/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/usaspending_api/reporting/v2/urls.py b/usaspending_api/reporting/v2/urls.py new file mode 100644 index 0000000000..183687a38f --- /dev/null +++ b/usaspending_api/reporting/v2/urls.py @@ -0,0 +1,6 @@ +from django.conf.urls import url +from usaspending_api.reporting.v2.views.placeholder import Placeholder + +urlpatterns = [ + url(r"^placeholder/$", Placeholder.as_view()), +] diff --git a/usaspending_api/reporting/v2/views/__init__.py b/usaspending_api/reporting/v2/views/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/usaspending_api/reporting/v2/views/placeholder.py b/usaspending_api/reporting/v2/views/placeholder.py new file mode 100644 index 0000000000..9c4c3101f0 --- /dev/null +++ b/usaspending_api/reporting/v2/views/placeholder.py @@ -0,0 +1,11 @@ +from rest_framework.response import Response +from rest_framework.views import APIView + + +class Placeholder(APIView): + """Placeholder""" + + endpoint_doc = "usaspending_api/api_contracts/contracts/v2/reporting/placeholder.md" + + def get(self, request): + return Response({"status": "success"}) diff --git a/usaspending_api/search/filters/elasticsearch/filter.py b/usaspending_api/search/filters/elasticsearch/filter.py index 20483c2cf7..44fa448b7b 100644 --- a/usaspending_api/search/filters/elasticsearch/filter.py +++ b/usaspending_api/search/filters/elasticsearch/filter.py @@ -8,6 +8,7 @@ class _QueryType(Enum): TRANSACTIONS = "transactions" AWARDS = "awards" + ACCOUNTS = "accounts" class _Filter(metaclass=ABCMeta): @@ -18,17 +19,19 @@ class _Filter(metaclass=ABCMeta): underscore_name = None @classmethod - def generate_query(cls, filter_values: Union[str, list, dict], query_type: _QueryType) -> dict: + def generate_query( + cls, filter_values: Union[str, list, dict], query_type: _QueryType, nested_path: str = "" + ) -> dict: if filter_values is None: raise InvalidParameterException(f"Invalid filter: {cls.underscore_name} has null as its value.") - return cls.generate_elasticsearch_query(filter_values, query_type) + return cls.generate_elasticsearch_query(filter_values, query_type, nested_path) @classmethod @abstractmethod def generate_elasticsearch_query( - cls, filter_values: Union[str, list, dict], query_type: _QueryType + cls, filter_values: Union[str, list, dict], query_type: _QueryType, nested_path: str = "" ) -> Union[ES_Q, List[ES_Q]]: """ Returns a Q object used to query Elasticsearch. """ pass diff --git a/usaspending_api/search/filters/elasticsearch/naics.py b/usaspending_api/search/filters/elasticsearch/naics.py index f74eb2936a..c1fb1f0719 100644 --- a/usaspending_api/search/filters/elasticsearch/naics.py +++ b/usaspending_api/search/filters/elasticsearch/naics.py @@ -8,7 +8,7 @@ class NaicsCodes(_Filter, HierarchicalFilter): underscore_name = "naics_codes" @classmethod - def generate_elasticsearch_query(cls, filter_values, query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query(cls, filter_values, query_type: _QueryType, nested_path: str = "") -> ES_Q: # legacy functionality permits sending a single list of naics codes, which is treated as the required list if isinstance(filter_values, list): require = [cls.naics_code_to_naics_code_path(str(code)) for code in filter_values] diff --git a/usaspending_api/search/filters/elasticsearch/psc.py b/usaspending_api/search/filters/elasticsearch/psc.py index bc9bc50c40..4b789594c8 100644 --- a/usaspending_api/search/filters/elasticsearch/psc.py +++ b/usaspending_api/search/filters/elasticsearch/psc.py @@ -6,7 +6,7 @@ class PSCCodes(PSCCodesMixin, _Filter, HierarchicalFilter): @classmethod - def generate_elasticsearch_query(cls, filter_values, query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query(cls, filter_values, query_type: _QueryType, nested_path: str = "") -> ES_Q: cls.validate_filter_values(filter_values) require, exclude = cls.split_filter_values(filter_values) require = cls.handle_tier1_names(require) diff --git a/usaspending_api/search/filters/elasticsearch/tas.py b/usaspending_api/search/filters/elasticsearch/tas.py index dd56f5829c..26086ffd9d 100644 --- a/usaspending_api/search/filters/elasticsearch/tas.py +++ b/usaspending_api/search/filters/elasticsearch/tas.py @@ -11,7 +11,7 @@ class TasCodes(_Filter, HierarchicalFilter): underscore_name = "tas_codes" @classmethod - def generate_elasticsearch_query(cls, filter_values, query_type: _QueryType) -> ES_Q: + def generate_elasticsearch_query(cls, filter_values, query_type: _QueryType, nested_path: str = "") -> ES_Q: if isinstance(filter_values, list): # This is a legacy usage, and will be dealt with by the other filter return TreasuryAccounts.generate_elasticsearch_query(filter_values, query_type) diff --git a/usaspending_api/search/models/base_award_search.py b/usaspending_api/search/models/base_award_search.py index 3f635ef1df..c49927794c 100644 --- a/usaspending_api/search/models/base_award_search.py +++ b/usaspending_api/search/models/base_award_search.py @@ -19,6 +19,9 @@ class BaseAwardSearchModel(models.Model): category = models.TextField() type = models.TextField() type_description = models.TextField() + generated_unique_award_id = models.TextField() + display_award_id = models.TextField() + update_date = models.DateField() piid = models.TextField() fain = models.TextField() uri = models.TextField() @@ -31,6 +34,7 @@ class BaseAwardSearchModel(models.Model): recipient_hash = models.UUIDField() recipient_name = models.TextField() + recipient_agg_key = models.TextField() recipient_unique_id = models.TextField() parent_recipient_unique_id = models.TextField() business_categories = ArrayField(models.TextField(), default=list) @@ -58,6 +62,8 @@ class BaseAwardSearchModel(models.Model): funding_toptier_agency_code = models.TextField() awarding_subtier_agency_code = models.TextField() funding_subtier_agency_code = models.TextField() + funding_toptier_agency_agg_key = models.TextField() + funding_subtier_agency_agg_key = models.TextField() recipient_location_country_code = models.TextField() recipient_location_country_name = models.TextField() @@ -78,6 +84,7 @@ class BaseAwardSearchModel(models.Model): pop_congressional_code = models.TextField() pop_city_name = models.TextField() + cfda_program_title = models.TextField() cfda_number = models.TextField() sai_number = models.TextField() type_of_contract_pricing = models.TextField() @@ -89,5 +96,19 @@ class BaseAwardSearchModel(models.Model): naics_code = models.TextField() naics_description = models.TextField() + recipient_location_county_agg_key = models.TextField() + recipient_location_congressional_agg_key = models.TextField() + recipient_location_state_agg_key = models.TextField() + + pop_county_agg_key = models.TextField() + pop_congressional_agg_key = models.TextField() + pop_state_agg_key = models.TextField() + + tas_paths = ArrayField(models.TextField(), default=list) + tas_components = ArrayField(models.TextField(), default=list) + disaster_emergency_fund_codes = ArrayField(models.TextField(), default=list) + total_covid_outlay = models.DecimalField(max_digits=23, decimal_places=2) + total_covid_obligation = models.DecimalField(max_digits=23, decimal_places=2) + class Meta: abstract = True diff --git a/usaspending_api/search/tests/data/utilities.py b/usaspending_api/search/tests/data/utilities.py index 51e9a276b2..daee0fd68e 100644 --- a/usaspending_api/search/tests/data/utilities.py +++ b/usaspending_api/search/tests/data/utilities.py @@ -1,13 +1,20 @@ +from builtins import Exception + from django.conf import settings def setup_elasticsearch_test(monkeypatch, index_fixture, **options): - if index_fixture.index_type == "awards": + if index_fixture.index_type == "award": search_wrapper = "AwardSearch" query_alias = settings.ES_AWARDS_QUERY_ALIAS_PREFIX - else: + elif index_fixture.index_type == "covid19_faba": + search_wrapper = "AccountSearch" + query_alias = settings.ES_COVID19_FABA_QUERY_ALIAS_PREFIX + elif index_fixture.index_type == "transaction": search_wrapper = "TransactionSearch" query_alias = settings.ES_TRANSACTIONS_QUERY_ALIAS_PREFIX + else: + raise Exception("Invalid index type") monkeypatch.setattr( f"usaspending_api.common.elasticsearch.search_wrappers.{search_wrapper}._index_name", query_alias, diff --git a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_psc.py b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_psc.py index c2d5962fec..163cd5de5c 100644 --- a/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_psc.py +++ b/usaspending_api/search/tests/integration/spending_by_category/test_spending_by_psc.py @@ -41,7 +41,6 @@ def test_correct_response(client, monkeypatch, elasticsearch_transaction_index, ], "messages": [get_time_period_message()], } - print(resp.json()) assert resp.status_code == status.HTTP_200_OK, "Failed to return 200 Response" assert resp.json() == expected_response diff --git a/usaspending_api/search/tests/integration/test_spending_over_time.py b/usaspending_api/search/tests/integration/test_spending_over_time.py index 87d4332785..65f5413f87 100644 --- a/usaspending_api/search/tests/integration/test_spending_over_time.py +++ b/usaspending_api/search/tests/integration/test_spending_over_time.py @@ -1007,5 +1007,4 @@ def test_defc_date_filter(client, monkeypatch, elasticsearch_transaction_index): data=json.dumps({"group": "fiscal_year", "filters": {"def_codes": ["L"]}}), ) assert resp.status_code == status.HTTP_200_OK - print(resp.json().get("results")) assert {"aggregated_amount": 10, "time_period": {"fiscal_year": "2020"}} in resp.json().get("results") diff --git a/usaspending_api/search/v2/elasticsearch_helper.py b/usaspending_api/search/v2/elasticsearch_helper.py index e40049c91e..8fe7f2f829 100644 --- a/usaspending_api/search/v2/elasticsearch_helper.py +++ b/usaspending_api/search/v2/elasticsearch_helper.py @@ -10,7 +10,7 @@ INDEX_ALIASES_TO_AWARD_TYPES, ) from usaspending_api.common.data_classes import Pagination -from usaspending_api.common.elasticsearch.search_wrappers import TransactionSearch, AwardSearch +from usaspending_api.common.elasticsearch.search_wrappers import TransactionSearch, AwardSearch, AccountSearch from usaspending_api.common.query_with_filters import QueryWithFilters from usaspending_api.search.v2.es_sanitization import es_minimal_sanitize @@ -177,6 +177,27 @@ def get_number_of_unique_terms_for_awards(filter_query: ES_Q, field: str) -> int return _get_number_of_unique_terms(AwardSearch().filter(filter_query), field) +def get_number_of_unique_terms_for_accounts(filter_query: ES_Q, field: str, is_nested: bool = True) -> int: + """ + Returns the count for a specific filter_query. + NOTE: Counts below the precision_threshold are expected to be close to accurate (per the Elasticsearch + documentation). Since aggregations do not support more than 10k buckets this value is hard coded to + 11k to ensure that endpoints using Elasticsearch do not cross the 10k threshold. Elasticsearch endpoints + should be implemented with a safeguard in case this count is above 10k. + """ + search = AccountSearch().filter(filter_query) + cardinality_aggregation = A("cardinality", field=field, precision_threshold=11000) + if is_nested: + nested_agg = A("nested", path="financial_accounts_by_award") + nested_agg.metric("field_count", cardinality_aggregation) + search.aggs.metric("financial_account_agg", nested_agg) + else: + search.aggs.metric("financial_account_agg", cardinality_aggregation) + response = search.handle_execute() + response_dict = response.aggs.to_dict() + return response_dict.get("financial_account_agg", {}).get("field_count", {"value": 0})["value"] + + def _get_number_of_unique_terms(search, field: str) -> int: """ Returns the count for a specific filter_query. diff --git a/usaspending_api/settings.py b/usaspending_api/settings.py index 54680dfa83..edd03b6ec2 100644 --- a/usaspending_api/settings.py +++ b/usaspending_api/settings.py @@ -169,6 +169,7 @@ "usaspending_api.financial_activities", "usaspending_api.recipient", "usaspending_api.references", + "usaspending_api.reporting", "usaspending_api.search", "usaspending_api.submissions", "usaspending_api.transactions", diff --git a/usaspending_api/urls.py b/usaspending_api/urls.py index a79a7a579d..071bc312e7 100644 --- a/usaspending_api/urls.py +++ b/usaspending_api/urls.py @@ -47,6 +47,7 @@ url(r"^api/v2/idvs/", include("usaspending_api.idvs.v2.urls_idvs")), url(r"^api/v2/recipient/", include("usaspending_api.recipient.v2.urls")), url(r"^api/v2/references/", include("usaspending_api.references.v2.urls")), + url(r"^api/v2/reporting/", include("usaspending_api.reporting.v2.urls")), url(r"^api/v2/search/", include("usaspending_api.search.v2.urls")), url(r"^api/v2/spending/", include("usaspending_api.spending_explorer.v2.urls")), url(r"^api/v2/subawards/", include("usaspending_api.awards.v2.urls_subawards")),