Support custom template in RelevancyEvaluator

ThomasVitale · markpollack · commit 144ae1ec9232 · 2025-05-18T18:20:24.000-04:00
The RelevancyEvaluator is key for validating RAG flows. This pull request improves it by making the PromptTemplate configurable, improving the format of the default one, introducing a Builder, and extending the documentation with more details on how to use it.

I added some unit tests. The RelevancyEvaluator is used in lots of integration tests in the project to test the QuestionAnswerAdvisor and RetrievalAugmentationAdvisor, that also help assessing the evaluator itself.

Signed-off-by: Thomas Vitale &lt;ThomasVitale@users.noreply.github.com&gt;
diff --git a/spring-ai-client-chat/src/main/java/org/springframework/ai/chat/evaluation/RelevancyEvaluator.java b/spring-ai-client-chat/src/main/java/org/springframework/ai/chat/evaluation/RelevancyEvaluator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023-2024 the original author or authors.
+ * Copyright 2023-2025 the original author or authors.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,56 +16,104 @@
 
 package org.springframework.ai.chat.evaluation;
 
-import java.util.Collections;
-
 import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.prompt.PromptTemplate;
 import org.springframework.ai.evaluation.EvaluationRequest;
 import org.springframework.ai.evaluation.EvaluationResponse;
 import org.springframework.ai.evaluation.Evaluator;
+import org.springframework.lang.Nullable;
+import org.springframework.util.Assert;
+
+import java.util.Collections;
+import java.util.Map;
 
+/**
+ * Evaluates the relevancy of a response to a query based on the context provided.
+ */
 public class RelevancyEvaluator implements Evaluator {
 
-	private static final String DEFAULT_EVALUATION_PROMPT_TEXT = """
+	private static final PromptTemplate DEFAULT_PROMPT_TEMPLATE = new PromptTemplate("""
 				Your task is to evaluate if the response for the query
-				is in line with the context information provided.\\n
-				You have two options to answer. Either YES/ NO.\\n
-				Answer - YES, if the response for the query
-				is in line with context information otherwise NO.\\n
-				Query: \\n {query}\\n
-				Response: \\n {response}\\n
-				Context: \\n {context}\\n
-				Answer: "
-			""";
+				is in line with the context information provided.
+
+				You have two options to answer. Either YES or NO.
+
+				Answer YES, if the response for the query
+				is in line with context information otherwise NO.
+
+				Query:
+				{query}
+
+				Response:
+				{response}
+
+				Context:
+				{context}
+
+				Answer:
+			""");
 
 	private final ChatClient.Builder chatClientBuilder;
 
+	private final PromptTemplate promptTemplate;
+
 	public RelevancyEvaluator(ChatClient.Builder chatClientBuilder) {
+		this(chatClientBuilder, null);
+	}
+
+	private RelevancyEvaluator(ChatClient.Builder chatClientBuilder, @Nullable PromptTemplate promptTemplate) {
+		Assert.notNull(chatClientBuilder, "chatClientBuilder cannot be null");
 		this.chatClientBuilder = chatClientBuilder;
+		this.promptTemplate = promptTemplate != null ? promptTemplate : DEFAULT_PROMPT_TEMPLATE;
 	}
 
 	@Override
 	public EvaluationResponse evaluate(EvaluationRequest evaluationRequest) {
-
 		var response = evaluationRequest.getResponseContent();
 		var context = doGetSupportingData(evaluationRequest);
 
-		String evaluationResponse = this.chatClientBuilder.build()
-			.prompt()
-			.user(userSpec -> userSpec.text(DEFAULT_EVALUATION_PROMPT_TEXT)
-				.param("query", evaluationRequest.getUserText())
-				.param("response", response)
-				.param("context", context))
-			.call()
-			.content();
+		var userMessage = this.promptTemplate
+			.render(Map.of("query", evaluationRequest.getUserText(), "response", response, "context", context));
+
+		String evaluationResponse = this.chatClientBuilder.build().prompt().user(userMessage).call().content();
 
 		boolean passing = false;
 		float score = 0;
-		if (evaluationResponse.toLowerCase().contains("yes")) {
+		if (evaluationResponse != null && evaluationResponse.toLowerCase().contains("yes")) {
 			passing = true;
 			score = 1;
 		}
 
 		return new EvaluationResponse(passing, score, "", Collections.emptyMap());
 	}
 
+	public static Builder builder() {
+		return new Builder();
+	}
+
+	public static class Builder {
+
+		private ChatClient.Builder chatClientBuilder;
+
+		private PromptTemplate promptTemplate;
+
+		private Builder() {
+		}
+
+		public Builder chatClientBuilder(ChatClient.Builder chatClientBuilder) {
+			this.chatClientBuilder = chatClientBuilder;
+			return this;
+		}
+
+		public Builder promptTemplate(PromptTemplate promptTemplate) {
+			this.promptTemplate = promptTemplate;
+			return this;
+		}
+
+		public RelevancyEvaluator build() {
+			return new RelevancyEvaluator(this.chatClientBuilder, this.promptTemplate);
+		}
+
+	}
+
 }
diff --git a/spring-ai-client-chat/src/test/java/org/springframework/ai/chat/evaluation/RelevancyEvaluatorTests.java b/spring-ai-client-chat/src/test/java/org/springframework/ai/chat/evaluation/RelevancyEvaluatorTests.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.chat.evaluation;
+
+import org.junit.jupiter.api.Test;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.model.ChatModel;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.mockito.Mockito.mock;
+
+/**
+ * Unit tests for {@link RelevancyEvaluator}.
+ *
+ * @author Thomas Vitale
+ */
+class RelevancyEvaluatorTests {
+
+	@Test
+	void whenChatClientBuilderIsNullThenThrow() {
+		assertThatThrownBy(() -> new RelevancyEvaluator(null)).isInstanceOf(IllegalArgumentException.class)
+			.hasMessageContaining("chatClientBuilder cannot be null");
+
+		assertThatThrownBy(() -> RelevancyEvaluator.builder().chatClientBuilder(null).build())
+			.isInstanceOf(IllegalArgumentException.class)
+			.hasMessageContaining("chatClientBuilder cannot be null");
+	}
+
+	@Test
+	void whenPromptTemplateIsNullThenUseDefault() {
+		RelevancyEvaluator evaluator = new RelevancyEvaluator(ChatClient.builder(mock(ChatModel.class)));
+		assertThat(evaluator).isNotNull();
+
+		evaluator = RelevancyEvaluator.builder()
+			.chatClientBuilder(ChatClient.builder(mock(ChatModel.class)))
+			.promptTemplate(null)
+			.build();
+		assertThat(evaluator).isNotNull();
+	}
+
+}
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/testing.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/testing.adoc
@@ -6,8 +6,6 @@ One method to evaluate the response is to use the AI model itself for evaluation
 
 The Spring AI interface for evaluating responses is `Evaluator`, defined as:
 
-
-
 [source,java]
 ----
 @FunctionalInterface
@@ -42,58 +40,88 @@ public class EvaluationRequest {
 * `dataList`: Contextual data, such as from Retrieval Augmented Generation, appended to the raw input.
 * `responseContent`: The AI model's response content as a `String`
 
-== RelevancyEvaluator
+== Relevancy Evaluator
 
-One implementation is the `RelevancyEvaluator`, which uses the AI model for evaluation. More implementations will be available in future releases.
+The `RelevancyEvaluator` is an implementation of the `Evaluator` interface, designed to assess the relevance of AI-generated responses against provided context. This evaluator helps assess the quality of a RAG flow by determining if the AI model's response is relevant to the user's input with respect to the retrieved context.
 
-The `RelevancyEvaluator` uses the input (`userText`) and the AI model's output (`chatResponse`) to ask the question:
+The evaluation is based on the user input, the AI model's response, and the context information. It uses a prompt template to ask the AI model if the response is relevant to the user input and context.
 
-[source, text]
+This is the default prompt template used by the `RelevancyEvaluator`:
+
+[source,text]
 ----
 Your task is to evaluate if the response for the query
-is in line with the context information provided.\n
-You have two options to answer. Either YES/ NO.\n
-Answer - YES, if the response for the query
-is in line with context information otherwise NO.\n
-Query: \n {query}\n
-Response: \n {response}\n
-Context: \n {context}\n
-Answer: "
-----
+is in line with the context information provided.
 
-Here is an example of a JUnit test that performs a RAG query over a PDF document loaded into a Vector Store and then evaluates if the response is relevant to the user text.
+You have two options to answer. Either YES or NO.
 
-[source,java]
-----
-@Test
-void testEvaluation() {
+Answer YES, if the response for the query
+is in line with context information otherwise NO.
 
-    dataController.delete();
-    dataController.load();
+Query:
+{query}
 
-    String userText = "What is the purpose of Carina?";
+Response:
+{response}
 
-    ChatResponse response = ChatClient.builder(chatModel)
-            .build().prompt()
-            .advisors(new QuestionAnswerAdvisor(vectorStore))
-            .user(userText)
-            .call()
-            .chatResponse();
-    String responseContent = response.getResult().getOutput().getContent();
+Context:
+{context}
 
-    var relevancyEvaluator = new RelevancyEvaluator(ChatClient.builder(chatModel));
+Answer:
+----
 
-    EvaluationRequest evaluationRequest = new EvaluationRequest(userText,
-            (List<Content>) response.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS), responseContent);
+NOTE: You can customize the prompt template by providing your own `PromptTemplate` object via the `.promptTemplate()` builder method. See xref:_custom_template[Custom Template] for details.
 
-    EvaluationResponse evaluationResponse = relevancyEvaluator.evaluate(evaluationRequest);
+== Usage in Integration Tests
 
-    assertTrue(evaluationResponse.isPass(), "Response is not relevant to the question");
+Here is an example of usage of the `RelevancyEvaluator` in an integration test, validating the result of a RAG flow using the `RetrievalAugmentationAdvisor`:
 
+[source,java]
+----
+@Test
+void evaluateRelevancy() {
+    String question = "Where does the adventure of Anacletus and Birba take place?";
+
+    RetrievalAugmentationAdvisor ragAdvisor = RetrievalAugmentationAdvisor.builder()
+        .documentRetriever(VectorStoreDocumentRetriever.builder()
+            .vectorStore(pgVectorStore)
+            .build())
+        .build();
+
+    ChatResponse chatResponse = ChatClient.builder(chatModel).build()
+        .prompt(question)
+        .advisors(ragAdvisor)
+        .call()
+        .chatResponse();
+
+    EvaluationRequest evaluationRequest = new EvaluationRequest(
+        // The original user question
+        question,
+        // The retrieved context from the RAG flow
+        chatResponse.getMetadata().get(RetrievalAugmentationAdvisor.DOCUMENT_CONTEXT),
+        // The AI model's response
+        chatResponse.getResult().getOutput().getText()
+    );
+
+    RelevancyEvaluator evaluator = new RelevancyEvaluator(ChatClient.builder(chatModel));
+
+    EvaluationResponse evaluationResponse = evaluator.evaluate(evaluationRequest);
+
+    assertThat(evaluationResponse.isPass()).isTrue();
 }
 ----
 
-The code above is from the example application located https://github.com/rd-1-2022/ai-azure-rag.git[here].
+You can find several integration tests in the Spring AI project that use the `RelevancyEvaluator` to test the functionality of the `QuestionAnswerAdvisor` (see https://github.com/spring-projects/spring-ai/blob/main/spring-ai-integration-tests/src/test/java/org/springframework/ai/integration/tests/client/advisor/QuestionAnswerAdvisorIT.java[tests]) and `RetrievalAugmentationAdvisor` (see https://github.com/spring-projects/spring-ai/blob/main/spring-ai-integration-tests/src/test/java/org/springframework/ai/integration/tests/client/advisor/RetrievalAugmentationAdvisorIT.java[tests]).
+
+=== Custom Template
+
+The `RelevancyEvaluator` uses a default template to prompt the AI model for evaluation. You can customize this behavior by providing your own `PromptTemplate` object via the `.promptTemplate()` builder method.
+
+The custom `PromptTemplate` can use any `TemplateRenderer` implementation (by default, it uses `StPromptTemplate` based on the https://www.stringtemplate.org/[StringTemplate] engine). The important requirement is that the template must contain the following placeholders:
+
+* a `query` placeholder to receive the user question.
+* a `response` placeholder to receive the AI model's response.
+* a `context` placeholder to receive the context information.
 
 == FactCheckingEvaluator