From 7bd78a4d336cbb2db5de399cc4426d61c15e9bd1 Mon Sep 17 00:00:00 2001 From: Fen Qin Date: Fri, 14 Nov 2025 17:33:45 +0000 Subject: [PATCH] add asymmetric embedding blueprint Signed-off-by: Fen Qin --- ...asymmetric_embedding_standard_blueprint.md | 224 ++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 docs/remote_inference_blueprints/standard_blueprints/sagemaker_asymmetric_embedding_standard_blueprint.md diff --git a/docs/remote_inference_blueprints/standard_blueprints/sagemaker_asymmetric_embedding_standard_blueprint.md b/docs/remote_inference_blueprints/standard_blueprints/sagemaker_asymmetric_embedding_standard_blueprint.md new file mode 100644 index 0000000000..22dea5854f --- /dev/null +++ b/docs/remote_inference_blueprints/standard_blueprints/sagemaker_asymmetric_embedding_standard_blueprint.md @@ -0,0 +1,224 @@ +# AWS SageMaker Asymmetric Embedding Model Standard Blueprint + +This blueprint demonstrates how to deploy an asymmetric embedding model (multilingual-e5-small) using AWS SageMaker and integrate it with OpenSearch for semantic search. The asymmetric model uses different prefixes for queries and passages to optimize search performance. + +## Overview + +The asymmetric embedding model provides separate embeddings for queries and passages, improving semantic search accuracy. This blueprint shows how to: + +1. Create a SageMaker connector +2. Register a model group +3. Register and deploy the model +4. Test the model inference with query and passage embeddings + +## Prerequisites + +1. AWS account with SageMaker access +2. SageMaker endpoint deployed with the multilingual-e5-small model +3. AWS credentials with appropriate permissions + +## Steps + +### 1. Create SageMaker Connector + +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "sagemaker-e5-asymmetric-connector", + "description": "Connector for multilingual-e5-small asymmetric model", + "version": "1", + "protocol": "aws_sigv4", + "parameters": { + "region": "", + "service_name": "sagemaker" + }, + "credential": { + "access_key": "", + "secret_key": "", + "session_token": "" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "https://runtime.sagemaker..amazonaws.com/endpoints//invocations", + "headers": { + "content-type": "application/json" + }, + "request_body": "{ \"texts\": ${parameters.texts}, \"content_type\": \"${parameters.content_type}\" }" + } + ] +} +``` + +Replace the placeholders: +- ``: Your AWS region (e.g., us-east-1) +- ``: Your AWS access key +- ``: Your AWS secret key +- ``: Your AWS session token (if using temporary credentials) +- ``: Your SageMaker endpoint name + +### 2. Create Model Group + +```json +POST /_plugins/_ml/model_groups/_register +{ + "name": "e5_asymmetric_group", + "description": "Model group for asymmetric E5 embedding model" +} +``` + +### 3. Register and Deploy Model + +```json +POST /_plugins/_ml/models/_register?deploy=true +{ + "name": "e5-asymmetric-remote", + "function_name": "remote", + "model_group_id": "", + "description": "Asymmetric E5 embedding model for semantic search", + "connector_id": "", + "model_config": { + "model_type": "text_embedding", + "embedding_dimension": 384, + "framework_type": "SENTENCE_TRANSFORMERS", + "additional_config": { + "space_type": "l2", + "is_asymmetric": true, + "model_family": "e5", + "query_prefix": "query: ", + "passage_prefix": "passage: " + } + } +} +``` + +Replace: +- ``: The model group ID from step 2 +- ``: The connector ID from step 1 + +### 4. Test Query Embedding + +```json +POST /_plugins/_ml/models//_predict +{ + "parameters": { + "texts": ["What is machine learning?"], + "content_type": "query" + } +} +``` + +### 5. Test Passage Embedding + +```json +POST /_plugins/_ml/models//_predict +{ + "parameters": { + "texts": ["Machine learning is a subset of artificial intelligence that focuses on algorithms and statistical models."], + "content_type": "passage" + } +} +``` + +### 6. Test Bulk Embedding + +```json +POST /_plugins/_ml/models//_predict +{ + "parameters": { + "texts": [ + "What is machine learning?", + "How does neural search work?", + "What are embedding models?" + ], + "content_type": "query" + } +} +``` + +Replace `` with your deployed model ID. + +## Example Response + +### Query Embedding Response + +```json +{ + "inference_results": [ + { + "output": [ + { + "dataAsMap": { + "response": [ + [-0.123, 0.456, -0.789, ...] + ] + } + } + ], + "status_code": 200 + } + ] +} +``` + +### Passage Embedding Response + +```json +{ + "inference_results": [ + { + "output": [ + { + "dataAsMap": { + "response": [ + [0.321, -0.654, 0.987, ...] + ] + } + } + ], + "status_code": 200 + } + ] +} +``` + +### Bulk Embedding Response + +```json +{ + "inference_results": [ + { + "output": [ + { + "dataAsMap": { + "response": [ + [0.123, -0.456, 0.789, ...], + [0.321, -0.654, 0.987, ...], + [0.111, -0.222, 0.333, ...] + ] + } + } + ], + "status_code": 200 + } + ] +} +``` + +## Model Configuration Details + +The asymmetric model configuration includes: + +- **embedding_dimension**: 384 (multilingual-e5-small dimension) +- **is_asymmetric**: true (enables different processing for queries vs passages) +- **query_prefix**: "query: " (prefix added to search queries) +- **passage_prefix**: "passage: " (prefix added to document passages) +- **space_type**: "l2" (distance metric for similarity calculation) + +## References + +- [Multilingual E5 Model Documentation](https://huggingface.co/intfloat/multilingual-e5-small) +- [OpenSearch ML Commons Documentation](https://opensearch.org/docs/latest/ml-commons-plugin/remote-models/index/) +- [SageMaker Endpoints Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-model.html) +- [Asymmetric Embedding Models Guide](https://opensearch.org/docs/latest/search-plugins/neural-search/)