From b1205b889cab2feeccbd9d4f8b5ec8fe7a28cf04 Mon Sep 17 00:00:00 2001 From: Takehiro Suzuki Date: Thu, 4 Jul 2024 03:30:09 +0900 Subject: [PATCH] feat(opensearch serverless): analyzer (#537) feat(oss): analyzer --------- Co-authored-by: Takehiro Suzuki Co-authored-by: Alain Krok --- ...pensearchserverless.CharacterFilterType.md | 28 ++ .../opensearchserverless.TokenFilterType.md | 59 +++ .../opensearchserverless.TokenizerType.md | 24 ++ .../opensearch_vectorindex.Analyzer.md | 39 ++ ...opensearch_vectorindex.VectorIndexProps.md | 15 + apidocs/modules/opensearch_vectorindex.md | 1 + apidocs/modules/opensearchserverless.md | 3 + .../custom_resources/opensearch_index.py | 76 +++- src/cdk-lib/bedrock/knowledge-base.ts | 335 ++++++++++-------- src/cdk-lib/opensearch-vectorindex/README.md | 78 ++-- .../opensearch-vectorindex/vector-index.ts | 148 +++++--- .../opensearchserverless/analysis-plugins.ts | 34 ++ src/cdk-lib/opensearchserverless/index.ts | 3 +- .../vector-index.test.ts | 114 +++++- 14 files changed, 712 insertions(+), 245 deletions(-) create mode 100644 apidocs/enums/opensearchserverless.CharacterFilterType.md create mode 100644 apidocs/enums/opensearchserverless.TokenFilterType.md create mode 100644 apidocs/enums/opensearchserverless.TokenizerType.md create mode 100644 apidocs/interfaces/opensearch_vectorindex.Analyzer.md create mode 100644 src/cdk-lib/opensearchserverless/analysis-plugins.ts diff --git a/apidocs/enums/opensearchserverless.CharacterFilterType.md b/apidocs/enums/opensearchserverless.CharacterFilterType.md new file mode 100644 index 00000000..c738ae13 --- /dev/null +++ b/apidocs/enums/opensearchserverless.CharacterFilterType.md @@ -0,0 +1,28 @@ +[@cdklabs/generative-ai-cdk-constructs](../README.md) / [opensearchserverless](../modules/opensearchserverless.md) / CharacterFilterType + +# Enumeration: CharacterFilterType + +[opensearchserverless](../modules/opensearchserverless.md).CharacterFilterType + +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + with the License. A copy of the License is located at + + http://www.apache.org/licenses/LICENSE-2.0 + + or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + and limitations under the License. + +## Table of contents + +### Enumeration Members + +- [ICU\_NORMALIZER](opensearchserverless.CharacterFilterType.md#icu_normalizer) + +## Enumeration Members + +### ICU\_NORMALIZER + +• **ICU\_NORMALIZER** = ``"icu_normalizer"`` diff --git a/apidocs/enums/opensearchserverless.TokenFilterType.md b/apidocs/enums/opensearchserverless.TokenFilterType.md new file mode 100644 index 00000000..0e6279dd --- /dev/null +++ b/apidocs/enums/opensearchserverless.TokenFilterType.md @@ -0,0 +1,59 @@ +[@cdklabs/generative-ai-cdk-constructs](../README.md) / [opensearchserverless](../modules/opensearchserverless.md) / TokenFilterType + +# Enumeration: TokenFilterType + +[opensearchserverless](../modules/opensearchserverless.md).TokenFilterType + +## Table of contents + +### Enumeration Members + +- [CJK\_WIDTH](opensearchserverless.TokenFilterType.md#cjk_width) +- [ICU\_FOLDING](opensearchserverless.TokenFilterType.md#icu_folding) +- [JA\_STOP](opensearchserverless.TokenFilterType.md#ja_stop) +- [KUROMOJI\_BASEFORM](opensearchserverless.TokenFilterType.md#kuromoji_baseform) +- [KUROMOJI\_PART\_OF\_SPEECH](opensearchserverless.TokenFilterType.md#kuromoji_part_of_speech) +- [KUROMOJI\_STEMMER](opensearchserverless.TokenFilterType.md#kuromoji_stemmer) +- [LOWERCASE](opensearchserverless.TokenFilterType.md#lowercase) + +## Enumeration Members + +### CJK\_WIDTH + +• **CJK\_WIDTH** = ``"cjk_width"`` + +___ + +### ICU\_FOLDING + +• **ICU\_FOLDING** = ``"icu_folding"`` + +___ + +### JA\_STOP + +• **JA\_STOP** = ``"ja_stop"`` + +___ + +### KUROMOJI\_BASEFORM + +• **KUROMOJI\_BASEFORM** = ``"kuromoji_baseform"`` + +___ + +### KUROMOJI\_PART\_OF\_SPEECH + +• **KUROMOJI\_PART\_OF\_SPEECH** = ``"kuromoji_part_of_speech"`` + +___ + +### KUROMOJI\_STEMMER + +• **KUROMOJI\_STEMMER** = ``"kuromoji_stemmer"`` + +___ + +### LOWERCASE + +• **LOWERCASE** = ``"lowercase"`` diff --git a/apidocs/enums/opensearchserverless.TokenizerType.md b/apidocs/enums/opensearchserverless.TokenizerType.md new file mode 100644 index 00000000..32327e64 --- /dev/null +++ b/apidocs/enums/opensearchserverless.TokenizerType.md @@ -0,0 +1,24 @@ +[@cdklabs/generative-ai-cdk-constructs](../README.md) / [opensearchserverless](../modules/opensearchserverless.md) / TokenizerType + +# Enumeration: TokenizerType + +[opensearchserverless](../modules/opensearchserverless.md).TokenizerType + +## Table of contents + +### Enumeration Members + +- [ICU\_TOKENIZER](opensearchserverless.TokenizerType.md#icu_tokenizer) +- [KUROMOJI\_TOKENIZER](opensearchserverless.TokenizerType.md#kuromoji_tokenizer) + +## Enumeration Members + +### ICU\_TOKENIZER + +• **ICU\_TOKENIZER** = ``"icu_tokenizer"`` + +___ + +### KUROMOJI\_TOKENIZER + +• **KUROMOJI\_TOKENIZER** = ``"kuromoji_tokenizer"`` diff --git a/apidocs/interfaces/opensearch_vectorindex.Analyzer.md b/apidocs/interfaces/opensearch_vectorindex.Analyzer.md new file mode 100644 index 00000000..531f1ee4 --- /dev/null +++ b/apidocs/interfaces/opensearch_vectorindex.Analyzer.md @@ -0,0 +1,39 @@ +[@cdklabs/generative-ai-cdk-constructs](../README.md) / [opensearch\_vectorindex](../modules/opensearch_vectorindex.md) / Analyzer + +# Interface: Analyzer + +[opensearch\_vectorindex](../modules/opensearch_vectorindex.md).Analyzer + +Properties for the Analyzer. + +## Table of contents + +### Properties + +- [characterFilters](opensearch_vectorindex.Analyzer.md#characterfilters) +- [tokenFilters](opensearch_vectorindex.Analyzer.md#tokenfilters) +- [tokenizer](opensearch_vectorindex.Analyzer.md#tokenizer) + +## Properties + +### characterFilters + +• `Readonly` **characterFilters**: [`ICU_NORMALIZER`](../enums/opensearchserverless.CharacterFilterType.md#icu_normalizer)[] + +The analyzers to use. + +___ + +### tokenFilters + +• `Readonly` **tokenFilters**: [`TokenFilterType`](../enums/opensearchserverless.TokenFilterType.md)[] + +The token filters to use. + +___ + +### tokenizer + +• `Readonly` **tokenizer**: [`TokenizerType`](../enums/opensearchserverless.TokenizerType.md) + +The tokenizer to use. diff --git a/apidocs/interfaces/opensearch_vectorindex.VectorIndexProps.md b/apidocs/interfaces/opensearch_vectorindex.VectorIndexProps.md index 68bac7dc..dd1fc6c5 100644 --- a/apidocs/interfaces/opensearch_vectorindex.VectorIndexProps.md +++ b/apidocs/interfaces/opensearch_vectorindex.VectorIndexProps.md @@ -10,6 +10,7 @@ Properties for the VectorIndex. ### Properties +- [analyzer](opensearch_vectorindex.VectorIndexProps.md#analyzer) - [collection](opensearch_vectorindex.VectorIndexProps.md#collection) - [indexName](opensearch_vectorindex.VectorIndexProps.md#indexname) - [mappings](opensearch_vectorindex.VectorIndexProps.md#mappings) @@ -18,6 +19,20 @@ Properties for the VectorIndex. ## Properties +### analyzer + +• `Optional` `Readonly` **analyzer**: [`Analyzer`](opensearch_vectorindex.Analyzer.md) + +The analyzer to use. + +**`Default`** + +```ts +- No analyzer. +``` + +___ + ### collection • `Readonly` **collection**: [`VectorCollection`](../classes/opensearchserverless.VectorCollection.md) diff --git a/apidocs/modules/opensearch_vectorindex.md b/apidocs/modules/opensearch_vectorindex.md index b1096e84..6cbdb1ee 100644 --- a/apidocs/modules/opensearch_vectorindex.md +++ b/apidocs/modules/opensearch_vectorindex.md @@ -10,6 +10,7 @@ ### Interfaces +- [Analyzer](../interfaces/opensearch_vectorindex.Analyzer.md) - [MetadataManagementFieldProps](../interfaces/opensearch_vectorindex.MetadataManagementFieldProps.md) - [VectorIndexProps](../interfaces/opensearch_vectorindex.VectorIndexProps.md) diff --git a/apidocs/modules/opensearchserverless.md b/apidocs/modules/opensearchserverless.md index 694f650a..3a92a08a 100644 --- a/apidocs/modules/opensearchserverless.md +++ b/apidocs/modules/opensearchserverless.md @@ -6,6 +6,9 @@ ### Enumerations +- [CharacterFilterType](../enums/opensearchserverless.CharacterFilterType.md) +- [TokenFilterType](../enums/opensearchserverless.TokenFilterType.md) +- [TokenizerType](../enums/opensearchserverless.TokenizerType.md) - [VectorCollectionStandbyReplicas](../enums/opensearchserverless.VectorCollectionStandbyReplicas.md) ### Classes diff --git a/lambda/opensearch-serverless-custom-resources/custom_resources/opensearch_index.py b/lambda/opensearch-serverless-custom-resources/custom_resources/opensearch_index.py index 7775a90f..b06aebd4 100644 --- a/lambda/opensearch-serverless-custom-resources/custom_resources/opensearch_index.py +++ b/lambda/opensearch-serverless-custom-resources/custom_resources/opensearch_index.py @@ -11,16 +11,19 @@ # and limitations under the License. # +import logging +import os +import time +from typing import Sequence, TypedDict + +import boto3 +from custom_resources.cr_types import CustomResourceRequest, CustomResourceResponse from opensearchpy import ( + AuthorizationException, + AWSV4SignerAuth, OpenSearch, RequestsHttpConnection, - AWSV4SignerAuth, - AuthorizationException, ) -import boto3 -import logging -import os -import time from tenacity import ( retry, retry_if_exception_type, @@ -28,10 +31,6 @@ wait_exponential_jitter, ) -from typing import TypedDict, Sequence - -from custom_resources.cr_types import CustomResourceRequest, CustomResourceResponse - LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") logger = logging.getLogger(__name__) @@ -44,12 +43,19 @@ class MetadataManagementField(TypedDict): Filterable: bool +class AnalyzerProperties(TypedDict): + CharacterFilters: Sequence[str] + Tokenizer: str + TokenFilters: Sequence[str] + + class VectorIndexProperties(TypedDict): Endpoint: str IndexName: str VectorField: str Dimensions: int | str MetadataManagement: Sequence[MetadataManagementField] + Analyzer: AnalyzerProperties | None def validate_event(event: CustomResourceRequest[VectorIndexProperties]) -> bool: @@ -70,6 +76,14 @@ def validate_event(event: CustomResourceRequest[VectorIndexProperties]) -> bool: raise ValueError("MetadataManagement is required") if event["RequestType"] == "Update" and event["PhysicalResourceId"] is None: raise ValueError("PhysicalResourceId is required") + if event["ResourceProperties"].get("Analyzer") is not None: + analyzer = event["ResourceProperties"]["Analyzer"] + if analyzer["CharacterFilters"] is None: + raise ValueError("CharacterFilters is required") + if analyzer["Tokenizer"] is None: + raise ValueError("Tokenizer is required") + if analyzer["TokenFilters"] is None: + raise ValueError("TokenFilters is required") elif event["RequestType"] == "Delete": if event["PhysicalResourceId"] is None: raise ValueError("PhysicalResourceId is required") @@ -139,18 +153,39 @@ def create_mapping( return mapping -def create_index(client: OpenSearch, index_name: str, mapping: dict[str, str]) -> None: +def create_setting(analyzer: AnalyzerProperties | None) -> dict: + setting = { + "index": { + "number_of_shards": "2", + "knn.algo_param": {"ef_search": "512"}, + "knn": "true", + }, + } + if analyzer: + setting["analysis"] = { + "analyzer": { + "custom_analyzer": { + "type": "custom", + "tokenizer": analyzer["Tokenizer"], + "char_filter": analyzer["CharacterFilters"], + "filter": analyzer["TokenFilters"], + } + } + } + + return setting + + +def create_index( + client: OpenSearch, index_name: str, mapping: dict[str, str], setting: dict[str, str] +) -> None: logger.debug(f"creating index {index_name}") + logger.debug(f"setting: {setting}") + logger.debug(f"mapping: {mapping}") client.indices.create( index_name, body={ - "settings": { - "index": { - "number_of_shards": "2", - "knn.algo_param": {"ef_search": "512"}, - "knn": "true", - } - }, + "settings": setting, "mappings": mapping, }, params={"wait_for_active_shards": "all"}, @@ -171,13 +206,15 @@ def handle_create( vector_field: str, dimensions: int, metadata_management: Sequence[MetadataManagementField], + analyzer: AnalyzerProperties | None, ): if client.indices.exists(index_name): raise ValueError(f"Index {index_name} already exists") try: mapping = create_mapping(vector_field, dimensions, metadata_management) - create_index(client, index_name, mapping) + setting = create_setting(analyzer) + create_index(client, index_name, mapping, setting) except Exception as e: logger.error(f"Error creating index {index_name}") logger.exception(e) @@ -211,6 +248,7 @@ def on_create( event["ResourceProperties"]["VectorField"], int(event["ResourceProperties"]["Dimensions"]), event["ResourceProperties"]["MetadataManagement"], + event["ResourceProperties"].get("Analyzer", None), ) return {"PhysicalResourceId": physical_id} diff --git a/src/cdk-lib/bedrock/knowledge-base.ts b/src/cdk-lib/bedrock/knowledge-base.ts index 4ad4792e..92898100 100644 --- a/src/cdk-lib/bedrock/knowledge-base.ts +++ b/src/cdk-lib/bedrock/knowledge-base.ts @@ -61,8 +61,11 @@ interface StorageConfiguration { * `AmazonAuroraVectorStore` or `AmazonAuroraDefaultVectorStore` * types. */ - vectorStore: VectorCollection | - PineconeVectorStore | AmazonAuroraDefaultVectorStore | AmazonAuroraVectorStore; + vectorStore: + | VectorCollection + | PineconeVectorStore + | AmazonAuroraDefaultVectorStore + | AmazonAuroraVectorStore; /** * The type of the vector store. @@ -156,8 +159,11 @@ export interface KnowledgeBaseProps { * * @default - A new OpenSearch Serverless vector collection is created. */ - readonly vectorStore?: VectorCollection | - PineconeVectorStore | AmazonAuroraVectorStore | AmazonAuroraDefaultVectorStore; + readonly vectorStore?: + | VectorCollection + | PineconeVectorStore + | AmazonAuroraVectorStore + | AmazonAuroraDefaultVectorStore; /** * The vector index for the OpenSearch Serverless backed knowledge base. @@ -206,8 +212,11 @@ export class KnowledgeBase extends Construct { /** * The vector store for the knowledge base. */ - public readonly vectorStore: VectorCollection | - PineconeVectorStore | AmazonAuroraVectorStore | AmazonAuroraDefaultVectorStore; + public readonly vectorStore: + | VectorCollection + | PineconeVectorStore + | AmazonAuroraVectorStore + | AmazonAuroraDefaultVectorStore; /** * A narrative instruction of the knowledge base. @@ -239,7 +248,6 @@ export class KnowledgeBase extends Construct { */ public readonly knowledgeBaseState: string; - /** * The type of the knowledge base. * @private @@ -251,25 +259,27 @@ export class KnowledgeBase extends Construct { this.instruction = props.instruction; const embeddingsModel = props.embeddingsModel; const indexName = props.indexName ?? 'bedrock-knowledge-base-default-index'; - const vectorField = props.vectorField ?? 'bedrock-knowledge-base-default-vector'; + const vectorField = + props.vectorField ?? 'bedrock-knowledge-base-default-vector'; const textField = 'AMAZON_BEDROCK_TEXT_CHUNK'; const metadataField = 'AMAZON_BEDROCK_METADATA'; this.description = props.description ?? 'CDK deployed Knowledge base'; // even though this prop is optional, if no value is provided it will fail to deploy this.knowledgeBaseState = props.knowledgeBaseState ?? 'ENABLED'; - validateModel(embeddingsModel); - validateVectorIndex(props.vectorStore, props.vectorIndex, props.vectorField, props.indexName); + validateVectorIndex( + props.vectorStore, + props.vectorIndex, + props.vectorField, + props.indexName, + ); if (props.vectorIndex) { validateIndexParameters(props.vectorIndex, indexName, vectorField); } - this.name = props.name ?? generatePhysicalNameV2( - this, - 'KB', - { maxLength: 32 }); - + this.name = + props.name ?? generatePhysicalNameV2(this, 'KB', { maxLength: 32 }); if (props.existingRole) { this.role = props.existingRole; @@ -277,7 +287,8 @@ export class KnowledgeBase extends Construct { const roleName = generatePhysicalNameV2( this, 'AmazonBedrockExecutionRoleForKnowledgeBase', - { maxLength: 64 }); + { maxLength: 64 }, + ); this.role = new iam.Role(this, 'Role', { roleName: roleName, assumedBy: new iam.ServicePrincipal('bedrock.amazonaws.com'), @@ -302,10 +313,12 @@ export class KnowledgeBase extends Construct { }), ); - this.role.addToPolicy(new iam.PolicyStatement({ - actions: ['bedrock:InvokeModel'], - resources: [embeddingsModel.asArn(this)], - })); + this.role.addToPolicy( + new iam.PolicyStatement({ + actions: ['bedrock:InvokeModel'], + resources: [embeddingsModel.asArn(this)], + }), + ); } /** * Create the vector store if the vector store was provided by the user. @@ -317,25 +330,21 @@ export class KnowledgeBase extends Construct { vectorStore: this.vectorStore, vectorStoreType: this.vectorStoreType, } = this.handleOpenSearchCollection(props)); - } else if (props.vectorStore instanceof PineconeVectorStore) { ({ vectorStore: this.vectorStore, vectorStoreType: this.vectorStoreType, } = this.handlePineconeVectorStore(props)); - } else if (props.vectorStore instanceof AmazonAuroraVectorStore) { ({ vectorStore: this.vectorStore, vectorStoreType: this.vectorStoreType, } = this.handleAmazonAuroraVectorStore(props)); - } else if (props.vectorStore instanceof AmazonAuroraDefaultVectorStore) { ({ vectorStore: this.vectorStore, vectorStoreType: this.vectorStoreType, } = this.handleAmazonAuroraDefaultVectorStore(props)); - } else { ({ vectorStore: this.vectorStore, @@ -349,12 +358,12 @@ export class KnowledgeBase extends Construct { * other than OpenSearch Serverless. */ if (!(this.vectorStore instanceof VectorCollection)) { - this.role.addToPolicy(new iam.PolicyStatement({ - actions: ['secretsmanager:GetSecretValue'], - resources: [ - this.vectorStore.credentialsSecretArn, - ], - })); + this.role.addToPolicy( + new iam.PolicyStatement({ + actions: ['secretsmanager:GetSecretValue'], + resources: [this.vectorStore.credentialsSecretArn], + }), + ); } /** @@ -364,18 +373,20 @@ export class KnowledgeBase extends Construct { * of the knowledge base if we use Amazon Aurora as * a data source. */ - if (this.vectorStore instanceof AmazonAuroraDefaultVectorStore || - this.vectorStore instanceof AmazonAuroraVectorStore) { - this.role.addToPolicy(new iam.PolicyStatement({ - actions: [ - 'rds-data:ExecuteStatement', - 'rds-data:BatchExecuteStatement', - 'rds:DescribeDBClusters', - ], - resources: [ - this.vectorStore.resourceArn, - ], - })); + if ( + this.vectorStore instanceof AmazonAuroraDefaultVectorStore || + this.vectorStore instanceof AmazonAuroraVectorStore + ) { + this.role.addToPolicy( + new iam.PolicyStatement({ + actions: [ + 'rds-data:ExecuteStatement', + 'rds-data:BatchExecuteStatement', + 'rds:DescribeDBClusters', + ], + resources: [this.vectorStore.resourceArn], + }), + ); } /** @@ -418,28 +429,39 @@ export class KnowledgeBase extends Construct { indexName: indexName, vectorStore: this.vectorStore, vectorStoreType: this.vectorStoreType, - vectorField: (this.vectorStore instanceof AmazonAuroraVectorStore) ? - this.vectorStore.vectorField : vectorField, - textField: (this.vectorStore instanceof AmazonAuroraVectorStore || this.vectorStore instanceof PineconeVectorStore) ? - this.vectorStore.textField : textField, - metadataField: (this.vectorStore instanceof AmazonAuroraVectorStore || this.vectorStore instanceof PineconeVectorStore) ? - this.vectorStore.metadataField : metadataField, + vectorField: + this.vectorStore instanceof AmazonAuroraVectorStore + ? this.vectorStore.vectorField + : vectorField, + textField: + this.vectorStore instanceof AmazonAuroraVectorStore || + this.vectorStore instanceof PineconeVectorStore + ? this.vectorStore.textField + : textField, + metadataField: + this.vectorStore instanceof AmazonAuroraVectorStore || + this.vectorStore instanceof PineconeVectorStore + ? this.vectorStore.metadataField + : metadataField, }; - - const knowledgeBase = new bedrock.CfnKnowledgeBase(this, 'MyCfnKnowledgeBase', { - knowledgeBaseConfiguration: { - type: 'VECTOR', - vectorKnowledgeBaseConfiguration: { - embeddingModelArn: embeddingsModel.asArn(this), + const knowledgeBase = new bedrock.CfnKnowledgeBase( + this, + 'MyCfnKnowledgeBase', + { + knowledgeBaseConfiguration: { + type: 'VECTOR', + vectorKnowledgeBaseConfiguration: { + embeddingModelArn: embeddingsModel.asArn(this), + }, }, + name: this.name, + roleArn: this.role.roleArn, + storageConfiguration: getStorageConfiguration(storageConfiguration), + description: props.description, + tags: props.tags, }, - name: this.name, - roleArn: this.role.roleArn, - storageConfiguration: getStorageConfiguration(storageConfiguration), - description: props.description, - tags: props.tags, - }); + ); this.knowledgeBaseInstance = knowledgeBase; @@ -455,45 +477,46 @@ export class KnowledgeBase extends Construct { * we are deploying Redis or Pinecone data sources */ //...(this.vectorStoreType === VectorStoreType.REDIS_ENTERPRISE_CLOUD || - ...(this.vectorStoreType === VectorStoreType.PINECONE ? - ['bedrock:AssociateThirdPartyKnowledgeBase'] : []), + ...(this.vectorStoreType === VectorStoreType.PINECONE + ? ['bedrock:AssociateThirdPartyKnowledgeBase'] + : []), ], resources: ['*'], }), - new iam.PolicyStatement( - { - actions: [ - 'bedrock:UpdateKnowledgeBase', - 'bedrock:DeleteKnowledgeBase', - 'bedrock:TagResource', - ], - resources: [ - cdk.Stack.of(this).formatArn({ - service: 'bedrock', - resource: 'knowledge-base', - resourceName: '*', - arnFormat: cdk.ArnFormat.SLASH_RESOURCE_NAME, - }), - ], - }, - ), - new iam.PolicyStatement( - { - actions: ['iam:PassRole'], - resources: [this.role.roleArn], - }, - ), + new iam.PolicyStatement({ + actions: [ + 'bedrock:UpdateKnowledgeBase', + 'bedrock:DeleteKnowledgeBase', + 'bedrock:TagResource', + ], + resources: [ + cdk.Stack.of(this).formatArn({ + service: 'bedrock', + resource: 'knowledge-base', + resourceName: '*', + arnFormat: cdk.ArnFormat.SLASH_RESOURCE_NAME, + }), + ], + }), + new iam.PolicyStatement({ + actions: ['iam:PassRole'], + resources: [this.role.roleArn], + }), ], }); knowledgeBase.node.addDependency(this.role); knowledgeBase.node.addDependency(kbCRPolicy); - if (this.vectorStoreType === VectorStoreType.OPENSEARCH_SERVERLESS && - this.vectorIndex) { + if ( + this.vectorStoreType === VectorStoreType.OPENSEARCH_SERVERLESS && + this.vectorIndex + ) { knowledgeBase.node.addDependency(this.vectorIndex); } - if (this.vectorStoreType === VectorStoreType.AMAZON_AURORA && - this.vectorStore instanceof AmazonAuroraDefaultVectorStore) { + if ( + this.vectorStoreType === VectorStoreType.AMAZON_AURORA && + this.vectorStore instanceof AmazonAuroraDefaultVectorStore + ) { knowledgeBase.node.addDependency(this.vectorStore); } @@ -502,7 +525,8 @@ export class KnowledgeBase extends Construct { [ { id: 'AwsSolutions-IAM5', - reason: "Bedrock CreateKnowledgeBase can't be restricted by resource.", + reason: + "Bedrock CreateKnowledgeBase can't be restricted by resource.", }, ], true, @@ -510,7 +534,6 @@ export class KnowledgeBase extends Construct { this.knowledgeBaseArn = knowledgeBase.attrKnowledgeBaseArn; this.knowledgeBaseId = knowledgeBase.attrKnowledgeBaseId; - } /** @@ -520,12 +543,10 @@ export class KnowledgeBase extends Construct { * @returns The instance of VectorCollection, VectorStoreType. * @internal This is an internal core function and should not be called directly. */ - private handleOpenSearchCollection( - props: KnowledgeBaseProps, - ): { - vectorStore: VectorCollection; - vectorStoreType: VectorStoreType; - } { + private handleOpenSearchCollection(props: KnowledgeBaseProps): { + vectorStore: VectorCollection; + vectorStoreType: VectorStoreType; + } { const vectorStore = props.vectorStore as VectorCollection; vectorStore.grantDataAccess(this.role); return { @@ -534,20 +555,17 @@ export class KnowledgeBase extends Construct { }; } - /** - * Handle PineconeVectorStore type of VectorStore. - * - * @param props - The properties of the KnowledgeBase. - * @returns The instance of PineconeVectorStore, VectorStoreType. - * @internal This is an internal core function and should not be called directly. - */ - private handlePineconeVectorStore( - props: KnowledgeBaseProps, - ): { - vectorStore: PineconeVectorStore; - vectorStoreType: VectorStoreType; - } { + * Handle PineconeVectorStore type of VectorStore. + * + * @param props - The properties of the KnowledgeBase. + * @returns The instance of PineconeVectorStore, VectorStoreType. + * @internal This is an internal core function and should not be called directly. + */ + private handlePineconeVectorStore(props: KnowledgeBaseProps): { + vectorStore: PineconeVectorStore; + vectorStoreType: VectorStoreType; + } { const vectorStore = props.vectorStore as PineconeVectorStore; return { vectorStore: vectorStore, @@ -562,12 +580,10 @@ export class KnowledgeBase extends Construct { * @returns The instance of AmazonAuroraVectorStore, VectorStoreType. * @internal This is an internal core function and should not be called directly. */ - private handleAmazonAuroraVectorStore( - props: KnowledgeBaseProps, - ): { - vectorStore: AmazonAuroraVectorStore; - vectorStoreType: VectorStoreType; - } { + private handleAmazonAuroraVectorStore(props: KnowledgeBaseProps): { + vectorStore: AmazonAuroraVectorStore; + vectorStoreType: VectorStoreType; + } { const vectorStore = props.vectorStore as AmazonAuroraVectorStore; return { vectorStore: vectorStore, @@ -582,12 +598,10 @@ export class KnowledgeBase extends Construct { * @returns The instance of AmazonAuroraDefaultVectorStore, VectorStoreType. * @internal This is an internal core function and should not be called directly. */ - private handleAmazonAuroraDefaultVectorStore( - props: KnowledgeBaseProps, - ): { - vectorStore: AmazonAuroraDefaultVectorStore; - vectorStoreType: VectorStoreType; - } { + private handleAmazonAuroraDefaultVectorStore(props: KnowledgeBaseProps): { + vectorStore: AmazonAuroraDefaultVectorStore; + vectorStoreType: VectorStoreType; + } { const vectorStore = props.vectorStore as AmazonAuroraDefaultVectorStore; return { vectorStore: vectorStore, @@ -595,7 +609,6 @@ export class KnowledgeBase extends Construct { }; } - /** * Handle the default VectorStore type. * @@ -614,21 +627,20 @@ export class KnowledgeBase extends Construct { }; } - /** - * Associate knowledge base with an agent - */ + * Associate knowledge base with an agent + */ public associateToAgent(agent: Agent) { - const agentKnowledgeBaseProperty: bedrock.CfnAgent.AgentKnowledgeBaseProperty = { - description: this.description, - knowledgeBaseId: this.knowledgeBaseId, - knowledgeBaseState: this.knowledgeBaseState, - }; + const agentKnowledgeBaseProperty: bedrock.CfnAgent.AgentKnowledgeBaseProperty = + { + description: this.description, + knowledgeBaseId: this.knowledgeBaseId, + knowledgeBaseState: this.knowledgeBaseState, + }; agent.knowledgeBases = [agentKnowledgeBaseProperty]; } } - /** * Validate that Bedrock Knowledge Base can use the selected model. * @@ -636,7 +648,9 @@ export class KnowledgeBase extends Construct { */ function validateModel(foundationModel: BedrockFoundationModel) { if (!foundationModel.supportsKnowledgeBase) { - throw new Error(`The model ${foundationModel} is not supported by Bedrock Knowledge Base.`); + throw new Error( + `The model ${foundationModel} is not supported by Bedrock Knowledge Base.`, + ); } } @@ -653,16 +667,22 @@ function validateVectorIndex( indexName: any, ) { if (!(vectorStore instanceof VectorCollection) && vectorIndex) { - throw new Error('If vectorStore is not of type VectorCollection, vectorIndex should not be provided ' + - 'in KnowledgeBase construct.'); + throw new Error( + 'If vectorStore is not of type VectorCollection, vectorIndex should not be provided ' + + 'in KnowledgeBase construct.', + ); } if (!(vectorStore instanceof VectorCollection) && indexName) { - throw new Error('If vectorStore is not of type VectorCollection, indexName should not be provided ' + - 'in KnowledgeBase construct.'); + throw new Error( + 'If vectorStore is not of type VectorCollection, indexName should not be provided ' + + 'in KnowledgeBase construct.', + ); } if (!(vectorStore instanceof VectorCollection) && vectorField) { - throw new Error('If vectorStore is not of type VectorCollection, vectorField should not be provided ' + - 'in KnowledgeBase construct.'); + throw new Error( + 'If vectorStore is not of type VectorCollection, vectorField should not be provided ' + + 'in KnowledgeBase construct.', + ); } } @@ -684,20 +704,24 @@ function validateIndexParameters( ) { if (vectorIndex.indexName !== 'bedrock-knowledge-base-default-index') { if (vectorIndex.indexName !== indexName) { - throw new Error('Default value of indexName is `bedrock-knowledge-base-default-index`.' + - ' If you create VectorIndex manually and assign vectorIndex to value other than' + - ' `bedrock-knowledge-base-default-index` then you must provide the same value in KnowledgeBase construct.' + - ' If you created VectorIndex manually and set it to `bedrock-knowledge-base-default-index`' + - ' then do not assign indexName in KnowledgeBase construct.'); + throw new Error( + 'Default value of indexName is `bedrock-knowledge-base-default-index`.' + + ' If you create VectorIndex manually and assign vectorIndex to value other than' + + ' `bedrock-knowledge-base-default-index` then you must provide the same value in KnowledgeBase construct.' + + ' If you created VectorIndex manually and set it to `bedrock-knowledge-base-default-index`' + + ' then do not assign indexName in KnowledgeBase construct.', + ); } } if (vectorIndex.vectorField !== 'bedrock-knowledge-base-default-vector') { if (vectorIndex.vectorField !== vectorField) { - throw new Error('Default value of vectorField is `bedrock-knowledge-base-default-vector`.' + - ' If you create VectorIndex manually and assign vectorField to value other than' + - ' `bedrock-knowledge-base-default-field` then you must provide the same value in KnowledgeBase construct.' + - ' If you created VectorIndex manually and set it to `bedrock-knowledge-base-default-vector`' + - ' then do not assign vectorField in KnowledgeBase construct.'); + throw new Error( + 'Default value of vectorField is `bedrock-knowledge-base-default-vector`.' + + ' If you create VectorIndex manually and assign vectorField to value other than' + + ' `bedrock-knowledge-base-default-field` then you must provide the same value in KnowledgeBase construct.' + + ' If you created VectorIndex manually and set it to `bedrock-knowledge-base-default-vector`' + + ' then do not assign vectorField in KnowledgeBase construct.', + ); } } } @@ -738,9 +762,10 @@ function getStorageConfiguration(params: StorageConfiguration): any { }, }; case VectorStoreType.AMAZON_AURORA: - params.vectorStore = params.vectorStore instanceof AmazonAuroraVectorStore ? - params.vectorStore as AmazonAuroraVectorStore : - params.vectorStore as AmazonAuroraDefaultVectorStore; + params.vectorStore = + params.vectorStore instanceof AmazonAuroraVectorStore + ? (params.vectorStore as AmazonAuroraVectorStore) + : (params.vectorStore as AmazonAuroraDefaultVectorStore); return { type: VectorStoreType.AMAZON_AURORA, rdsConfiguration: { @@ -757,6 +782,8 @@ function getStorageConfiguration(params: StorageConfiguration): any { }, }; default: - throw new Error(`Unsupported vector store type: ${params.vectorStoreType}`); + throw new Error( + `Unsupported vector store type: ${params.vectorStoreType}`, + ); } -} \ No newline at end of file +} diff --git a/src/cdk-lib/opensearch-vectorindex/README.md b/src/cdk-lib/opensearch-vectorindex/README.md index b7ce0c7b..83996c67 100644 --- a/src/cdk-lib/opensearch-vectorindex/README.md +++ b/src/cdk-lib/opensearch-vectorindex/README.md @@ -1,4 +1,5 @@ # Amazon OpenSearch Vector Index Construct Library + --- @@ -10,55 +11,72 @@ > This means that while you may use them, you may need to update your source code when upgrading to a newer version of this package. --- - + -| **Language** | **Package** | -|:-------------|-----------------| -|![Typescript Logo](https://docs.aws.amazon.com/cdk/api/latest/img/typescript32.png) TypeScript|`@cdklabs/generative-ai-cdk-constructs`| -|![Python Logo](https://docs.aws.amazon.com/cdk/api/latest/img/python32.png) Python|`cdklabs.generative_ai_cdk_constructs`| +| **Language** | **Package** | +| :--------------------------------------------------------------------------------------------- | --------------------------------------- | +| ![Typescript Logo](https://docs.aws.amazon.com/cdk/api/latest/img/typescript32.png) TypeScript | `@cdklabs/generative-ai-cdk-constructs` | +| ![Python Logo](https://docs.aws.amazon.com/cdk/api/latest/img/python32.png) Python | `cdklabs.generative_ai_cdk_constructs` | This construct library provides a resource that creates a vector index on an Amazon OpenSearch Domain. It currently only supports Amazon OpenSearch Serverless. ## Table of contents + - [API](#api) - [Vector Index](#vector-index) - ## API + See the [API documentation](../../../apidocs/modules/opensearchserverless.md). ## Vector Index + The `VectorIndex` resource connects to OpenSearch and creates an index suitable for use with Amazon Bedrock Knowledge Bases. TypeScript ```ts -import { opensearchserverless, opensearch_vectorindex } from '@cdklabs/generative-ai-cdk-constructs'; +import { + opensearchserverless, + opensearch_vectorindex, +} from '@cdklabs/generative-ai-cdk-constructs'; -const vectorStore = new opensearchserverless.VectorCollection(this, 'VectorCollection'); +const vectorStore = new opensearchserverless.VectorCollection( + this, + 'VectorCollection' +); new opensearch_vectorindex.VectorIndex(this, 'VectorIndex', { -collection: vectorStore, -indexName, -vectorField, -vectorDimensions: 1536, -mappings: [ - { - mappingField: 'AMAZON_BEDROCK_TEXT_CHUNK', - dataType: 'text', - filterable: true, - }, - { - mappingField: 'AMAZON_BEDROCK_METADATA', - dataType: 'text', - filterable: false, + collection: vectorStore, + indexName: 'bedrock-knowledge-base-default-index', + vectorField: 'bedrock-knowledge-base-default-vector', + vectorDimensions: 1536, + mappings: [ + { + mappingField: 'AMAZON_BEDROCK_TEXT_CHUNK', + dataType: 'text', + filterable: true, + }, + { + mappingField: 'AMAZON_BEDROCK_METADATA', + dataType: 'text', + filterable: false, + }, + ], + analyzer: { + characterFilters: [opensearchserverless.CharacterFilterType.ICU_NORMALIZER], + tokenizer: opensearchserverless.TokenizerType.KUROMOJI_TOKENIZER, + tokenFilters: [ + opensearchserverless.TokenFilterType.KUROMOJI_BASEFORM, + opensearchserverless.TokenFilterType.JA_STOP, + ], }, -], }); ``` Python + ```python from cdklabs.generative_ai_cdk_constructs import ( opensearchserverless, @@ -70,8 +88,8 @@ vectorCollection = opensearchserverless.VectorCollection(self, "VectorCollection vectorIndex = opensearch_vectorindex.VectorIndex(self, "VectorIndex", vector_dimensions= 1536, collection=vectorCollection, - index_name='myindex', - vector_field='vectorfieldname', + index_name='bedrock-knowledge-base-default-index', + vector_field='bedrock-knowledge-base-default-vector', mappings= [ opensearch_vectorindex.MetadataManagementFieldProps( mapping_field='AMAZON_BEDROCK_TEXT_CHUNK', @@ -84,5 +102,13 @@ vectorIndex = opensearch_vectorindex.VectorIndex(self, "VectorIndex", filterable=False ) ], + analyzer=opensearchserverless.AnalyzerProps( + character_filters=[opensearchserverless.CharacterFilterType.ICU_NORMALIZER], + tokenizer=opensearchserverless.TokenizerType.KUROMOJI_TOKENIZER, + token_filters=[ + opensearchserverless.TokenFilterType.KUROMOJI_BASEFORM, + opensearchserverless.TokenFilterType.JA_STOP, + ], + ) ) -``` \ No newline at end of file +``` diff --git a/src/cdk-lib/opensearch-vectorindex/vector-index.ts b/src/cdk-lib/opensearch-vectorindex/vector-index.ts index fedd760e..1da64e71 100644 --- a/src/cdk-lib/opensearch-vectorindex/vector-index.ts +++ b/src/cdk-lib/opensearch-vectorindex/vector-index.ts @@ -18,6 +18,11 @@ import { Construct } from 'constructs'; import { buildCustomResourceProvider } from '../../common/helpers/custom-resource-provider-helper'; import { generatePhysicalNameV2 } from '../../common/helpers/utils'; import { VectorCollection } from '../opensearchserverless'; +import { + CharacterFilterType, + TokenFilterType, + TokenizerType, +} from '../opensearchserverless/analysis-plugins'; /** * Metadata field definitions. @@ -56,7 +61,7 @@ type MetadataManagementField = { * Whether the field is filterable. */ readonly Filterable: boolean; -} +}; /** * Properties for the Custom::OpenSearchIndex custom resource. @@ -84,6 +89,48 @@ interface VectorIndexResourceProps { * The metadata management fields. */ readonly MetadataManagement: MetadataManagementField[]; + /** + * The analyzer to use. + */ + readonly Analyzer?: AnalyzerProps; +} + +/** + * Properties for the Analyzer used in Custom::OpenSearchIndex custom resource. + * + * @internal - JSII requires the exported interface to have camel camelCase properties + */ +interface AnalyzerProps { + /** + * The analyzers to use. + */ + readonly CharacterFilters: CharacterFilterType[]; + /** + * The tokenizer to use. + */ + readonly Tokenizer: TokenizerType; + /** + * The token filters to use. + */ + readonly TokenFilters: TokenFilterType[]; +} + +/** + * Properties for the Analyzer. + */ +export interface Analyzer { + /** + * The analyzers to use. + */ + readonly characterFilters: CharacterFilterType[]; + /** + * The tokenizer to use. + */ + readonly tokenizer: TokenizerType; + /** + * The token filters to use. + */ + readonly tokenFilters: TokenFilterType[]; } /** @@ -110,6 +157,11 @@ export interface VectorIndexProps { * The metadata management fields. */ readonly mappings: MetadataManagementFieldProps[]; + /** + * The analyzer to use. + * @default - No analyzer. + */ + readonly analyzer?: Analyzer; } /** @@ -129,11 +181,7 @@ export class VectorIndex extends cdk.Resource { */ public readonly vectorDimensions: number; - constructor( - scope: Construct, - id: string, - props: VectorIndexProps, - ) { + constructor(scope: Construct, id: string, props: VectorIndexProps) { super(scope, id); this.indexName = props.indexName; @@ -142,46 +190,56 @@ export class VectorIndex extends cdk.Resource { const crProvider = OpenSearchIndexCRProvider.getProvider(this); crProvider.role.addManagedPolicy(props.collection.aossPolicy); - const manageIndexPolicyName = generatePhysicalNameV2(this, + const manageIndexPolicyName = generatePhysicalNameV2( + this, 'ManageIndexPolicy', - { maxLength: 32, lower: true }); - const manageIndexPolicy = new oss.CfnAccessPolicy(this, 'ManageIndexPolicy', { - name: manageIndexPolicyName, - type: 'data', - policy: JSON.stringify([ - { - Rules: [ - { - Resource: [`index/${props.collection.collectionName}/*`], - Permission: [ - 'aoss:DescribeIndex', - 'aoss:CreateIndex', - 'aoss:DeleteIndex', - 'aoss:UpdateIndex', - ], - ResourceType: 'index', - }, - { - Resource: [`collection/${props.collection.collectionName}`], - Permission: [ - 'aoss:DescribeCollectionItems', - ], - ResourceType: 'collection', - }, - ], - Principal: [ - crProvider.role.roleArn, - ], - Description: '', - }, - ]), - }); - + { maxLength: 32, lower: true }, + ); + const manageIndexPolicy = new oss.CfnAccessPolicy( + this, + 'ManageIndexPolicy', + { + name: manageIndexPolicyName, + type: 'data', + policy: JSON.stringify([ + { + Rules: [ + { + Resource: [`index/${props.collection.collectionName}/*`], + Permission: [ + 'aoss:DescribeIndex', + 'aoss:CreateIndex', + 'aoss:DeleteIndex', + 'aoss:UpdateIndex', + ], + ResourceType: 'index', + }, + { + Resource: [`collection/${props.collection.collectionName}`], + Permission: ['aoss:DescribeCollectionItems'], + ResourceType: 'collection', + }, + ], + Principal: [crProvider.role.roleArn], + Description: '', + }, + ]), + }, + ); + const analyzerProps = props.analyzer + ? { + CharacterFilters: props.analyzer.characterFilters, + Tokenizer: props.analyzer.tokenizer, + TokenFilters: props.analyzer.tokenFilters, + } + : undefined; const vectorIndex = new cdk.CustomResource(this, 'VectorIndex', { serviceToken: crProvider.serviceToken, properties: { - Endpoint: `${props.collection.collectionId}.${cdk.Stack.of(this).region}.aoss.amazonaws.com`, + Endpoint: `${props.collection.collectionId}.${ + cdk.Stack.of(this).region + }.aoss.amazonaws.com`, IndexName: props.indexName, VectorField: props.vectorField, Dimensions: props.vectorDimensions, @@ -192,6 +250,7 @@ export class VectorIndex extends cdk.Resource { Filterable: m.filterable, }; }), + Analyzer: analyzerProps, } as VectorIndexResourceProps, resourceType: 'Custom::OpenSearchIndex', }); @@ -200,7 +259,6 @@ export class VectorIndex extends cdk.Resource { vectorIndex.node.addDependency(props.collection); vectorIndex.node.addDependency(props.collection.dataAccessPolicy); } - } /** @@ -211,7 +269,9 @@ export class VectorIndex extends cdk.Resource { export const OpenSearchIndexCRProvider = buildCustomResourceProvider({ providerName: 'OpenSearchIndexCRProvider', codePath: path.join( - __dirname, '../../../lambda/opensearch-serverless-custom-resources'), + __dirname, + '../../../lambda/opensearch-serverless-custom-resources', + ), handler: 'custom_resources.on_event', runtime: lambda.Runtime.PYTHON_3_12, -}); \ No newline at end of file +}); diff --git a/src/cdk-lib/opensearchserverless/analysis-plugins.ts b/src/cdk-lib/opensearchserverless/analysis-plugins.ts new file mode 100644 index 00000000..5fb54236 --- /dev/null +++ b/src/cdk-lib/opensearchserverless/analysis-plugins.ts @@ -0,0 +1,34 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +export enum CharacterFilterType { + ICU_NORMALIZER = 'icu_normalizer', +} + +// Currently we only support Kuromoji and ICU tokenizers. +// Also see the following link for more information regarding supported plugins: +// https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-genref.html#serverless-plugins +export enum TokenizerType { + KUROMOJI_TOKENIZER = 'kuromoji_tokenizer', + ICU_TOKENIZER = 'icu_tokenizer', +} + +export enum TokenFilterType { + KUROMOJI_BASEFORM = 'kuromoji_baseform', + KUROMOJI_PART_OF_SPEECH = 'kuromoji_part_of_speech', + KUROMOJI_STEMMER = 'kuromoji_stemmer', + CJK_WIDTH = 'cjk_width', + JA_STOP = 'ja_stop', + LOWERCASE = 'lowercase', + ICU_FOLDING = 'icu_folding', +} diff --git a/src/cdk-lib/opensearchserverless/index.ts b/src/cdk-lib/opensearchserverless/index.ts index 69646a60..1c8b161d 100644 --- a/src/cdk-lib/opensearchserverless/index.ts +++ b/src/cdk-lib/opensearchserverless/index.ts @@ -10,4 +10,5 @@ * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions * and limitations under the License. */ -export * from './vector-collection'; \ No newline at end of file +export * from './vector-collection'; +export * from './analysis-plugins'; diff --git a/test/cdk-lib/opensearch-vectorindex/vector-index.test.ts b/test/cdk-lib/opensearch-vectorindex/vector-index.test.ts index 00575208..621a63d1 100644 --- a/test/cdk-lib/opensearch-vectorindex/vector-index.test.ts +++ b/test/cdk-lib/opensearch-vectorindex/vector-index.test.ts @@ -16,7 +16,7 @@ import { Annotations, Match, Template } from 'aws-cdk-lib/assertions'; import * as iam from 'aws-cdk-lib/aws-iam'; import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; import { OpenSearchIndexCRProvider, VectorIndex } from '../../../src/cdk-lib/opensearch-vectorindex'; -import { VectorCollection } from '../../../src/cdk-lib/opensearchserverless'; +import { VectorCollection, CharacterFilterType, TokenizerType, TokenFilterType } from '../../../src/cdk-lib/opensearchserverless'; // mock lambda.Code.fromDockerBuild() jest.mock('aws-cdk-lib/aws-lambda', () => { @@ -133,3 +133,115 @@ describe('OpenSearch Serverless Vector Index', () => { expect(errors).toHaveLength(0); }); }); + +describe('OpenSearch Serverless Vector Index with analyzer', () => { + let template: Template; + let app: cdk.App; + let stack: cdk.Stack; + let aossVector: VectorCollection; + let aossVectorIndex: VectorIndex; + let testRole: iam.Role; + + beforeAll(() => { + app = new cdk.App(); + cdk.Aspects.of(app).add(new AwsSolutionsChecks()); + stack = new cdk.Stack(app, 'test-stack', { + env: { + account: '123456789012', + region: 'us-east-1', + }, + }); + + aossVector = new VectorCollection(stack, 'test-aoss-vector'); + + aossVectorIndex = new VectorIndex(stack, 'test-aoss-vector-index', { + collection: aossVector, + indexName: 'test-index', + vectorField: 'vector', + vectorDimensions: 1536, + mappings: [ + { + mappingField: 'AMAZON_BEDROCK_TEXT_CHUNK', + dataType: 'text', + filterable: true, + }, + ], + analyzer: { + characterFilters: [CharacterFilterType.ICU_NORMALIZER], + tokenizer: TokenizerType.KUROMOJI_TOKENIZER, + tokenFilters: [ + TokenFilterType.KUROMOJI_BASEFORM, + TokenFilterType.JA_STOP, + ], + }, + }); + + NagSuppressions.addResourceSuppressionsByPath( + stack, + '/test-stack/LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a/ServiceRole', + [ + { + id: 'AwsSolutions-IAM4', + reason: 'CDK CustomResource LogRetention Lambda uses the AWSLambdaBasicExecutionRole AWS Managed Policy. Managed by CDK.', + }, + { + id: 'AwsSolutions-IAM5', + reason: 'CDK CustomResource LogRetention Lambda uses a wildcard to manage log streams created at runtime. Managed by CDK.', + }, + ], + true, + ); + + + aossVectorIndex.node.addDependency(aossVector.dataAccessPolicy); + + testRole = new iam.Role(stack, 'TestRole', { + assumedBy: new iam.AccountRootPrincipal(), + }); + + aossVector.grantDataAccess(testRole); + + app.synth(); + template = Template.fromStack(stack); + }); + + + test('Should have the correct resources', () => { + console.log(template.toJSON()); + template.resourceCountIs('AWS::OpenSearchServerless::Collection', 1); + template.resourceCountIs('Custom::OpenSearchIndex', 1); + template.resourceCountIs('AWS::OpenSearchServerless::AccessPolicy', 2); + template.resourceCountIs('AWS::Lambda::Function', 3); + }); + + test('ManageIndexPolicy should allow CRProvider', () => { + const crProvider = OpenSearchIndexCRProvider.getProvider(stack); + const crRoleLogicalId = stack.getLogicalId(crProvider.role.node.defaultChild as iam.CfnRole); + + template.hasResource('AWS::OpenSearchServerless::AccessPolicy', { + Properties: { + Name: Match.stringLikeRegexp('^manageindexpolicy[a-z0-9]+'), + Policy: { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + Match.objectEquals({ + 'Fn::GetAtt': Match.arrayWith([ + crRoleLogicalId, + ]), + }), + ]), + ]), + }, + Type: 'data', + }, + }); + }); + + test('No unsuppressed Errors', () => { + const errors = Annotations.fromStack(stack).findError( + '*', + Match.stringLikeRegexp('AwsSolutions-.*'), + ); + expect(errors).toHaveLength(0); + }); +});