allow context management hook register in during agent execute

mingshl · mingshl · commit d97fd6a323b7 · 2025-11-02T12:21:48.000-08:00
Signed-off-by: Mingshi Liu &lt;mingshl@amazon.com&gt;
diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/agent/MLAgentExecutor.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/agent/MLAgentExecutor.java
@@ -209,9 +209,12 @@ public void execute(Input input, ActionListener<Output> listener, TransportChann
                                     ) {
                                         ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
                                         MLAgent mlAgent = MLAgent.parse(parser);
-                                        // Always create a fresh HookRegistry for agent execution
-                                        // This prevents callback accumulation from previous executions
-                                        HookRegistry hookRegistry = new HookRegistry();
+                                        // Use existing HookRegistry from AgentMLInput if available (set by MLExecuteTaskRunner for template
+                                        // references)
+                                        // Otherwise create a fresh HookRegistry for agent execution
+                                        final HookRegistry hookRegistry = agentMLInput.getHookRegistry() != null
+                                            ? agentMLInput.getHookRegistry()
+                                            : new HookRegistry();
                                         if (isMultiTenancyEnabled && !Objects.equals(tenantId, mlAgent.getTenantId())) {
                                             listener
                                                 .onFailure(
diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/contextmanager/SummarizationManager.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/contextmanager/SummarizationManager.java
@@ -6,6 +6,7 @@
 package org.opensearch.ml.engine.algorithms.contextmanager;
 
 import static org.opensearch.ml.common.FunctionName.REMOTE;
+import static org.opensearch.ml.engine.algorithms.agent.AgentUtils.LLM_RESPONSE_FILTER;
 import static org.opensearch.ml.engine.algorithms.agent.MLChatAgentRunner.INTERACTIONS;
 
 import java.util.ArrayList;
@@ -31,11 +32,15 @@
 import org.opensearch.ml.common.utils.StringUtils;
 import org.opensearch.transport.client.Client;
 
+import com.jayway.jsonpath.JsonPath;
+import com.jayway.jsonpath.PathNotFoundException;
+
 import lombok.extern.log4j.Log4j2;
 
 /**
  * Context manager that implements summarization approach for tool interactions.
- * Summarizes older interactions while preserving recent ones to manage context window.
+ * Summarizes older interactions while preserving recent ones to manage context
+ * window.
  */
 @Log4j2
 public class SummarizationManager implements ContextManager {
@@ -191,7 +196,7 @@ protected void executeSummarization(
             // Execute prediction
             ActionListener<MLTaskResponse> listener = ActionListener.wrap(response -> {
                 try {
-                    String summary = extractSummaryFromResponse(response);
+                    String summary = extractSummaryFromResponse(response, context);
                     processSummarizationResult(context, summary, messagesToSummarizeCount, remainingMessages, originalToolInteractions);
                 } catch (Exception e) {
                     // Fallback to default behavior
@@ -279,7 +284,7 @@ protected void processSummarizationResult(
         }
     }
 
-    private String extractSummaryFromResponse(MLTaskResponse response) {
+    private String extractSummaryFromResponse(MLTaskResponse response, ContextManagerContext context) {
         try {
             MLOutput output = response.getOutput();
             if (output instanceof ModelTensorOutput) {
@@ -290,7 +295,38 @@ private String extractSummaryFromResponse(MLTaskResponse response) {
                     List<ModelTensor> tensors = mlModelOutputs.get(0).getMlModelTensors();
                     if (tensors != null && !tensors.isEmpty()) {
                         Map<String, ?> dataAsMap = tensors.get(0).getDataAsMap();
-                        // TODO need to parse LLM response output, maybe reused how filtered output from chatAgentRunner
+
+                        // Use LLM_RESPONSE_FILTER from agent configuration if available
+                        Map<String, String> parameters = context.getParameters();
+                        if (parameters != null
+                            && parameters.containsKey(LLM_RESPONSE_FILTER)
+                            && !parameters.get(LLM_RESPONSE_FILTER).isEmpty()) {
+                            try {
+                                String responseFilter = parameters.get(LLM_RESPONSE_FILTER);
+                                Object filteredResponse = JsonPath.read(dataAsMap, responseFilter);
+                                if (filteredResponse instanceof String) {
+                                    String result = ((String) filteredResponse).trim();
+                                    return result;
+                                } else {
+                                    String result = StringUtils.toJson(filteredResponse);
+                                    return result;
+                                }
+                            } catch (PathNotFoundException e) {
+                                // Fall back to default parsing
+                            } catch (Exception e) {
+                                // Fall back to default parsing
+                            }
+                        }
+
+                        // Fallback to default parsing if no filter or filter fails
+                        if (dataAsMap.size() == 1 && dataAsMap.containsKey("response")) {
+                            Object responseObj = dataAsMap.get("response");
+                            if (responseObj instanceof String) {
+                                return ((String) responseObj).trim();
+                            }
+                        }
+
+                        // Last resort: return JSON representation
                         return StringUtils.toJson(dataAsMap);
                     }
                 }
diff --git a/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/contextmanager/SummarizationManagerTest.java b/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/contextmanager/SummarizationManagerTest.java
@@ -5,6 +5,8 @@
 
 package org.opensearch.ml.engine.algorithms.contextmanager;
 
+import static org.opensearch.ml.engine.algorithms.agent.AgentUtils.LLM_RESPONSE_FILTER;
+
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -16,6 +18,10 @@
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 import org.opensearch.ml.common.contextmanager.ContextManagerContext;
+import org.opensearch.ml.common.output.model.ModelTensor;
+import org.opensearch.ml.common.output.model.ModelTensorOutput;
+import org.opensearch.ml.common.output.model.ModelTensors;
+import org.opensearch.ml.common.transport.MLTaskResponse;
 import org.opensearch.transport.client.Client;
 
 /**
@@ -161,6 +167,159 @@ public void testProcessSummarizationResult() {
         Assert.assertTrue(firstOutput.contains("Test summary"));
     }
 
+    @Test
+    public void testExtractSummaryFromResponseWithLLMResponseFilter() throws Exception {
+        Map<String, Object> config = new HashMap<>();
+        manager.initialize(config);
+
+        // Set up context with LLM_RESPONSE_FILTER
+        Map<String, String> parameters = new HashMap<>();
+        parameters.put(LLM_RESPONSE_FILTER, "$.choices[0].message.content");
+        context.setParameters(parameters);
+
+        // Create mock response with OpenAI-style structure
+        Map<String, Object> responseData = new HashMap<>();
+        Map<String, Object> choice = new HashMap<>();
+        Map<String, Object> message = new HashMap<>();
+        message.put("content", "This is the extracted summary content");
+        choice.put("message", message);
+        responseData.put("choices", List.of(choice));
+
+        MLTaskResponse mockResponse = createMockMLTaskResponse(responseData);
+
+        // Use reflection to access the private method
+        java.lang.reflect.Method extractMethod = SummarizationManager.class
+            .getDeclaredMethod("extractSummaryFromResponse", MLTaskResponse.class, ContextManagerContext.class);
+        extractMethod.setAccessible(true);
+
+        String result = (String) extractMethod.invoke(manager, mockResponse, context);
+
+        Assert.assertEquals("This is the extracted summary content", result);
+    }
+
+    @Test
+    public void testExtractSummaryFromResponseWithBedrockResponseFilter() throws Exception {
+        Map<String, Object> config = new HashMap<>();
+        manager.initialize(config);
+
+        // Set up context with Bedrock-style LLM_RESPONSE_FILTER
+        Map<String, String> parameters = new HashMap<>();
+        parameters.put(LLM_RESPONSE_FILTER, "$.output.message.content[0].text");
+        context.setParameters(parameters);
+
+        // Create mock response with Bedrock-style structure
+        Map<String, Object> responseData = new HashMap<>();
+        Map<String, Object> output = new HashMap<>();
+        Map<String, Object> message = new HashMap<>();
+        Map<String, Object> content = new HashMap<>();
+        content.put("text", "Bedrock extracted summary");
+        message.put("content", List.of(content));
+        output.put("message", message);
+        responseData.put("output", output);
+
+        MLTaskResponse mockResponse = createMockMLTaskResponse(responseData);
+
+        // Use reflection to access the private method
+        java.lang.reflect.Method extractMethod = SummarizationManager.class
+            .getDeclaredMethod("extractSummaryFromResponse", MLTaskResponse.class, ContextManagerContext.class);
+        extractMethod.setAccessible(true);
+
+        String result = (String) extractMethod.invoke(manager, mockResponse, context);
+
+        Assert.assertEquals("Bedrock extracted summary", result);
+    }
+
+    @Test
+    public void testExtractSummaryFromResponseWithInvalidFilter() throws Exception {
+        Map<String, Object> config = new HashMap<>();
+        manager.initialize(config);
+
+        // Set up context with invalid LLM_RESPONSE_FILTER path
+        Map<String, String> parameters = new HashMap<>();
+        parameters.put(LLM_RESPONSE_FILTER, "$.invalid.path");
+        context.setParameters(parameters);
+
+        // Create mock response with simple structure
+        Map<String, Object> responseData = new HashMap<>();
+        responseData.put("response", "Fallback summary content");
+
+        MLTaskResponse mockResponse = createMockMLTaskResponse(responseData);
+
+        // Use reflection to access the private method
+        java.lang.reflect.Method extractMethod = SummarizationManager.class
+            .getDeclaredMethod("extractSummaryFromResponse", MLTaskResponse.class, ContextManagerContext.class);
+        extractMethod.setAccessible(true);
+
+        String result = (String) extractMethod.invoke(manager, mockResponse, context);
+
+        // Should fall back to default parsing
+        Assert.assertEquals("Fallback summary content", result);
+    }
+
+    @Test
+    public void testExtractSummaryFromResponseWithoutFilter() throws Exception {
+        Map<String, Object> config = new HashMap<>();
+        manager.initialize(config);
+
+        // Context without LLM_RESPONSE_FILTER
+        Map<String, String> parameters = new HashMap<>();
+        context.setParameters(parameters);
+
+        // Create mock response with simple structure
+        Map<String, Object> responseData = new HashMap<>();
+        responseData.put("response", "Default parsed summary");
+
+        MLTaskResponse mockResponse = createMockMLTaskResponse(responseData);
+
+        // Use reflection to access the private method
+        java.lang.reflect.Method extractMethod = SummarizationManager.class
+            .getDeclaredMethod("extractSummaryFromResponse", MLTaskResponse.class, ContextManagerContext.class);
+        extractMethod.setAccessible(true);
+
+        String result = (String) extractMethod.invoke(manager, mockResponse, context);
+
+        Assert.assertEquals("Default parsed summary", result);
+    }
+
+    @Test
+    public void testExtractSummaryFromResponseWithEmptyFilter() throws Exception {
+        Map<String, Object> config = new HashMap<>();
+        manager.initialize(config);
+
+        // Set up context with empty LLM_RESPONSE_FILTER
+        Map<String, String> parameters = new HashMap<>();
+        parameters.put(LLM_RESPONSE_FILTER, "");
+        context.setParameters(parameters);
+
+        // Create mock response
+        Map<String, Object> responseData = new HashMap<>();
+        responseData.put("response", "Empty filter fallback");
+
+        MLTaskResponse mockResponse = createMockMLTaskResponse(responseData);
+
+        // Use reflection to access the private method
+        java.lang.reflect.Method extractMethod = SummarizationManager.class
+            .getDeclaredMethod("extractSummaryFromResponse", MLTaskResponse.class, ContextManagerContext.class);
+        extractMethod.setAccessible(true);
+
+        String result = (String) extractMethod.invoke(manager, mockResponse, context);
+
+        Assert.assertEquals("Empty filter fallback", result);
+    }
+
+    /**
+     * Helper method to create a mock MLTaskResponse with the given data.
+     */
+    private MLTaskResponse createMockMLTaskResponse(Map<String, Object> responseData) {
+        ModelTensor tensor = ModelTensor.builder().dataAsMap(responseData).build();
+
+        ModelTensors tensors = ModelTensors.builder().mlModelTensors(List.of(tensor)).build();
+
+        ModelTensorOutput output = ModelTensorOutput.builder().mlModelOutputs(List.of(tensors)).build();
+
+        return MLTaskResponse.builder().output(output).build();
+    }
+
     /**
      * Helper method to add tool interactions to the context.
      */
diff --git a/plugin/src/main/java/org/opensearch/ml/task/MLExecuteTaskRunner.java b/plugin/src/main/java/org/opensearch/ml/task/MLExecuteTaskRunner.java
@@ -274,7 +274,7 @@ private void executeAgentWithContextManagement(
 
     /**
      * Gets the effective context management name for an agent.
-     * Priority: 1) Runtime parameter from execution request, 2) Agent's stored configuration (set by MLAgentExecutor)
+     * Priority: 1) Runtime parameter from execution request, 2) Agent's stored configuration, 3) Runtime parameters set by MLAgentExecutor
      * This follows the same pattern as MCP connectors.
      * 
      * @param agentInput the agent ML input
@@ -288,7 +288,69 @@ private String getEffectiveContextManagementName(AgentMLInput agentInput) {
             return runtimeContextManagementName;
         }
 
-        // Priority 2: Agent's stored configuration (set by MLAgentExecutor in input parameters)
+        // Priority 2: Check agent's stored configuration directly
+        String agentId = agentInput.getAgentId();
+        if (agentId != null) {
+            try {
+                // Use a blocking call to get the agent synchronously
+                // This is acceptable here since we're in the task execution path
+                java.util.concurrent.CompletableFuture<String> future = new java.util.concurrent.CompletableFuture<>();
+
+                try (
+                    org.opensearch.common.util.concurrent.ThreadContext.StoredContext context = client
+                        .threadPool()
+                        .getThreadContext()
+                        .stashContext()
+                ) {
+                    client
+                        .get(
+                            new org.opensearch.action.get.GetRequest(org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX, agentId),
+                            org.opensearch.core.action.ActionListener.runBefore(org.opensearch.core.action.ActionListener.wrap(response -> {
+                                if (response.isExists()) {
+                                    try {
+                                        org.opensearch.core.xcontent.XContentParser parser =
+                                            org.opensearch.common.xcontent.json.JsonXContent.jsonXContent
+                                                .createParser(
+                                                    null,
+                                                    org.opensearch.common.xcontent.LoggingDeprecationHandler.INSTANCE,
+                                                    response.getSourceAsString()
+                                                );
+                                        org.opensearch.core.xcontent.XContentParserUtils
+                                            .ensureExpectedToken(
+                                                org.opensearch.core.xcontent.XContentParser.Token.START_OBJECT,
+                                                parser.nextToken(),
+                                                parser
+                                            );
+                                        org.opensearch.ml.common.agent.MLAgent mlAgent = org.opensearch.ml.common.agent.MLAgent
+                                            .parse(parser);
+
+                                        if (mlAgent.hasContextManagementTemplate()) {
+                                            String templateName = mlAgent.getContextManagementTemplateName();
+                                            future.complete(templateName);
+                                        } else {
+                                            future.complete(null);
+                                        }
+                                    } catch (Exception e) {
+                                        future.completeExceptionally(e);
+                                    }
+                                } else {
+                                    future.complete(null); // Agent not found
+                                }
+                            }, future::completeExceptionally), context::restore)
+                        );
+                }
+
+                // Wait for the result with a timeout
+                String contextManagementName = future.get(5, java.util.concurrent.TimeUnit.SECONDS);
+                if (contextManagementName != null && !contextManagementName.trim().isEmpty()) {
+                    return contextManagementName;
+                }
+            } catch (Exception e) {
+                // Continue to fallback methods
+            }
+        }
+
+        // Priority 3: Agent's runtime parameters (set by MLAgentExecutor in input parameters)
         if (agentInput.getInputDataset() instanceof org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet) {
             org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet dataset =
                 (org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet) agentInput.getInputDataset();
@@ -303,7 +365,6 @@ private String getEffectiveContextManagementName(AgentMLInput agentInput) {
             // Handle template references (not processed by MLAgentExecutor)
             String agentContextManagementName = dataset.getParameters().get("context_management");
             if (agentContextManagementName != null && !agentContextManagementName.trim().isEmpty()) {
-                log.debug("Using agent-level context management template reference: {}", agentContextManagementName);
                 return agentContextManagementName;
             }
         }