add Rubric and Rubric.evaluate (#155)

brettpthomas · web-flow · commit c7e63fc4dc38 · 2025-10-27T20:35:50.000-04:00
diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-VERSION = "1.5.17"
+VERSION = "1.5.18"
 
 with open("requirements.txt") as f:
     requirements = f.read().splitlines()
diff --git a/surge/__init__.py b/surge/__init__.py
@@ -4,6 +4,7 @@
 from surge.tasks import Task
 from surge.teams import Team
 from surge.reports import Report
+from surge.rubrics import Rubric
 
 api_key = os.environ.get("SURGE_API_KEY", None)
 base_url = os.environ.get("SURGE_BASE_URL", "https://app.surgehq.ai/api")
diff --git a/surge/rubrics.py b/surge/rubrics.py
@@ -0,0 +1,37 @@
+from surge.api_resource import APIResource
+
+
+class Rubric(APIResource):
+
+    @classmethod
+    def evaluate(
+        cls,
+        text_for_grading: str,
+        rubric_text: str,
+        prompt: str = None,
+        api_key: str = None,
+    ):
+        """
+        Evaluate text against a rubric using AI grading.
+
+        Arguments:
+            text_for_grading (str): The text content to be graded.
+            rubric_text (str): The rubric or criteria to evaluate against.
+            prompt (str, optional): Additional instructions for how to grade the text.
+            api_key (str, optional): API key to use for this request.
+
+        Returns:
+            dict: A dictionary containing:
+                - answer (bool): Whether the text meets the rubric criteria.
+                - explanation (str): An explanation of the grading decision.
+        """
+        endpoint = "evaluate_rubric"
+        params = {
+            "text_for_grading": text_for_grading,
+            "rubric_text": rubric_text,
+        }
+        if prompt is not None:
+            params["prompt"] = prompt
+
+        response_json = cls.post(endpoint, params, api_key=api_key)
+        return response_json
diff --git a/tests/test_rubrics.py b/tests/test_rubrics.py
@@ -0,0 +1,80 @@
+from unittest.mock import patch
+
+from surge.api_resource import APIResource
+from surge.rubrics import Rubric
+
+
+def test_evaluate_with_all_params():
+    """Test evaluate method with all parameters provided"""
+    with patch.object(Rubric, "post") as mock_post:
+        mock_post.return_value = {
+            "answer": True,
+            "explanation": 'The text explicitly mentions two animals: "fox" and "dog." Therefore, it contains an animal, satisfying the rubric.',
+        }
+
+        result = Rubric.evaluate(
+            text_for_grading="The quick brown fox jumps over the lazy dog",
+            rubric_text="Check if the text contains an animal",
+            prompt="Grade this text based on the rubric",
+            api_key="test_key",
+        )
+
+        mock_post.assert_called_once_with(
+            "evaluate_rubric",
+            {
+                "text_for_grading": "The quick brown fox jumps over the lazy dog",
+                "rubric_text": "Check if the text contains an animal",
+                "prompt": "Grade this text based on the rubric",
+            },
+            api_key="test_key",
+        )
+
+        assert result["answer"] == True
+        assert "fox" in result["explanation"] or "dog" in result["explanation"]
+
+
+def test_evaluate_without_prompt():
+    """Test evaluate method without optional prompt parameter"""
+    with patch.object(Rubric, "post") as mock_post:
+        mock_post.return_value = {
+            "answer": False,
+            "explanation": "The text does not contain any animals.",
+        }
+
+        result = Rubric.evaluate(
+            text_for_grading="The quick brown car drives down the road",
+            rubric_text="Check if the text contains an animal",
+        )
+
+        mock_post.assert_called_once_with(
+            "evaluate_rubric",
+            {
+                "text_for_grading": "The quick brown car drives down the road",
+                "rubric_text": "Check if the text contains an animal",
+            },
+            api_key=None,
+        )
+
+        assert result["answer"] == False
+        assert "explanation" in result
+
+
+def test_evaluate_returns_dict():
+    """Test that evaluate returns a dictionary with expected keys"""
+    with patch.object(Rubric, "post") as mock_post:
+        mock_post.return_value = {"answer": True, "explanation": "Test explanation"}
+
+        result = Rubric.evaluate(
+            text_for_grading="Sample text", rubric_text="Sample rubric"
+        )
+
+        assert isinstance(result, dict)
+        assert "answer" in result
+        assert "explanation" in result
+        assert isinstance(result["answer"], bool)
+        assert isinstance(result["explanation"], str)
+
+
+def test_rubric_inherits_from_api_resource():
+    """Test that Rubric class inherits from APIResource"""
+    assert issubclass(Rubric, APIResource)