Handle sync guardrail calls to avoid awaitable error (#21)

steven10a · web-flow · commit 515bd41bbdf0 · 2025-10-16T10:44:03.000-07:00
diff --git a/src/guardrails/checks/text/hallucination_detection.py b/src/guardrails/checks/text/hallucination_detection.py
@@ -52,10 +52,7 @@
 from guardrails.spec import GuardrailSpecMetadata
 from guardrails.types import GuardrailLLMContextProto, GuardrailResult
 
-from .llm_base import (
- LLMConfig,
- LLMOutput,
-)
+from .llm_base import LLMConfig, LLMOutput, _invoke_openai_callable
 
 logger = logging.getLogger(__name__)
 
@@ -210,9 +207,10 @@ async def hallucination_detection(
 validation_query = f"{VALIDATION_PROMPT}\n\nText to validate:\n{candidate}"
 
 # Use the Responses API with file search and structured output
- response = await ctx.guardrail_llm.responses.parse(
- model=config.model,
+ response = await _invoke_openai_callable(
+ ctx.guardrail_llm.responses.parse,
 input=validation_query,
+ model=config.model,
 text_format=HallucinationDetectionOutput,
 tools=[{"type": "file_search", "vector_store_ids": [config.knowledge_source]}],
 )
diff --git a/src/guardrails/checks/text/llm_base.py b/src/guardrails/checks/text/llm_base.py
@@ -31,12 +31,16 @@ class MyLLMOutput(LLMOutput):
 
 from __future__ import annotations
 
+import asyncio
+import functools
+import inspect
 import json
 import logging
 import textwrap
-from typing import TYPE_CHECKING, TypeVar
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, TypeVar
 
-from openai import AsyncOpenAI
+from openai import AsyncOpenAI, OpenAI
 from pydantic import BaseModel, ConfigDict, Field
 
 from guardrails.registry import default_spec_registry
@@ -45,7 +49,13 @@ class MyLLMOutput(LLMOutput):
 from guardrails.utils.output import OutputSchema
 
 if TYPE_CHECKING:
- from openai import AsyncOpenAI
+ from openai import AsyncAzureOpenAI, AzureOpenAI # type: ignore[unused-import]
+else:
+ try:
+ from openai import AsyncAzureOpenAI, AzureOpenAI # type: ignore
+ except Exception: # pragma: no cover - optional dependency
+ AsyncAzureOpenAI = object # type: ignore[assignment]
+ AzureOpenAI = object # type: ignore[assignment]
 
 logger = logging.getLogger(__name__)
 
@@ -165,10 +175,46 @@ def _strip_json_code_fence(text: str) -> str:
 return candidate
 
 
+async def _invoke_openai_callable(
+ method: Callable[..., Any],
+ /,
+ *args: Any,
+ **kwargs: Any,
+) -> Any:
+ """Invoke OpenAI SDK methods that may be sync or async."""
+ if inspect.iscoroutinefunction(method):
+ return await method(*args, **kwargs)
+
+ loop = asyncio.get_running_loop()
+ result = await loop.run_in_executor(
+ None,
+ functools.partial(method, *args, **kwargs),
+ )
+ if inspect.isawaitable(result):
+ return await result
+ return result
+
+
+async def _request_chat_completion(
+ client: AsyncOpenAI | OpenAI | AsyncAzureOpenAI | AzureOpenAI,
+ *,
+ messages: list[dict[str, str]],
+ model: str,
+ response_format: dict[str, Any],
+) -> Any:
+ """Invoke chat.completions.create on sync or async OpenAI clients."""
+ return await _invoke_openai_callable(
+ client.chat.completions.create,
+ messages=messages,
+ model=model,
+ response_format=response_format,
+ )
+
+
 async def run_llm(
 text: str,
 system_prompt: str,
- client: AsyncOpenAI,
+ client: AsyncOpenAI | OpenAI | AsyncAzureOpenAI | AzureOpenAI,
 model: str,
 output_model: type[LLMOutput],
 ) -> LLMOutput:
@@ -180,7 +226,7 @@ async def run_llm(
 Args:
 text (str): Text to analyze.
 system_prompt (str): Prompt instructions for the LLM.
- client (AsyncOpenAI): OpenAI client for LLM inference.
+ client (AsyncOpenAI | OpenAI | AsyncAzureOpenAI | AzureOpenAI): OpenAI client used for guardrails.
 model (str): Identifier for which LLM model to use.
 output_model (type[LLMOutput]): Model for parsing and validating the LLM's response.
 
@@ -190,7 +236,8 @@ async def run_llm(
 full_prompt = _build_full_prompt(system_prompt)
 
 try:
- response = await client.chat.completions.create(
+ response = await _request_chat_completion(
+ client=client,
 messages=[
{"role": "system", "content": full_prompt},
{"role": "user", "content": f"# Text\n\n{text}"},
diff --git a/src/guardrails/checks/text/prompt_injection_detection.py b/src/guardrails/checks/text/prompt_injection_detection.py
@@ -36,7 +36,7 @@
 from guardrails.spec import GuardrailSpecMetadata
 from guardrails.types import GuardrailLLMContextProto, GuardrailResult
 
-from .llm_base import LLMConfig, LLMOutput
+from .llm_base import LLMConfig, LLMOutput, _invoke_openai_callable
 
 __all__ = ["prompt_injection_detection", "PromptInjectionDetectionOutput"]
 
@@ -373,9 +373,10 @@ def _create_skip_result(
 
 async def _call_prompt_injection_detection_llm(ctx: GuardrailLLMContextProto, prompt: str, config: LLMConfig) -> PromptInjectionDetectionOutput:
 """Call LLM for prompt injection detection analysis."""
- parsed_response = await ctx.guardrail_llm.responses.parse(
- model=config.model,
+ parsed_response = await _invoke_openai_callable(
+ ctx.guardrail_llm.responses.parse,
 input=prompt,
+ model=config.model,
 text_format=PromptInjectionDetectionOutput,
 )
 return parsed_response.output_parsed
diff --git a/tests/unit/checks/test_llm_base.py b/tests/unit/checks/test_llm_base.py
@@ -34,6 +34,20 @@ def __init__(self, content: str | None) -> None:
 self.chat = SimpleNamespace(completions=_FakeCompletions(content))
 
 
+class _FakeSyncCompletions:
+ def __init__(self, content: str | None) -> None:
+ self._content = content
+
+ def create(self, **kwargs: Any) -> Any:
+ _ = kwargs
+ return SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=self._content))])
+
+
+class _FakeSyncClient:
+ def __init__(self, content: str | None) -> None:
+ self.chat = SimpleNamespace(completions=_FakeSyncCompletions(content))
+
+
 def test_strip_json_code_fence_removes_wrapping() -> None:
 """Valid JSON code fences should be removed."""
 fenced = """```json
@@ -64,6 +78,23 @@ async def test_run_llm_returns_valid_output() -> None:
 assert result.flagged is True and result.confidence == 0.9 # noqa: S101
 
 
+@pytest.mark.asyncio
+async def test_run_llm_supports_sync_clients() -> None:
+ """run_llm should invoke synchronous clients without awaiting them."""
+ client = _FakeSyncClient('{"flagged": false, "confidence": 0.25}')
+
+ result = await run_llm(
+ text="General text",
+ system_prompt="Assess text.",
+ client=client, # type: ignore[arg-type]
+ model="gpt-test",
+ output_model=LLMOutput,
+ )
+
+ assert isinstance(result, LLMOutput) # noqa: S101
+ assert result.flagged is False and result.confidence == 0.25 # noqa: S101
+
+
 @pytest.mark.asyncio
 async def test_run_llm_handles_content_filter_error(monkeypatch: pytest.MonkeyPatch) -> None:
 """Content filter errors should return LLMErrorOutput with flagged=True."""
diff --git a/tests/unit/checks/test_prompt_injection_detection.py b/tests/unit/checks/test_prompt_injection_detection.py
@@ -147,3 +147,21 @@ async def failing_llm(*_args: Any, **_kwargs: Any) -> PromptInjectionDetectionOu
 
 assert result.tripwire_triggered is False # noqa: S101
 assert "Error during prompt injection detection check" in result.info["observation"] # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_prompt_injection_detection_llm_supports_sync_responses() -> None:
+ """Underlying responses.parse may be synchronous for some clients."""
+ analysis = PromptInjectionDetectionOutput(flagged=True, confidence=0.4, observation="Action summary")
+
+ class _SyncResponses:
+ def parse(self, **kwargs: Any) -> Any:
+ _ = kwargs
+ return SimpleNamespace(output_parsed=analysis)
+
+ context = SimpleNamespace(guardrail_llm=SimpleNamespace(responses=_SyncResponses()))
+ config = LLMConfig(model="gpt-test", confidence_threshold=0.5)
+
+ parsed = await pid_module._call_prompt_injection_detection_llm(context, "prompt", config)
+
+ assert parsed is analysis # noqa: S101

-Original file line number
+Diff line change
 fromguardrails.specimportGuardrailSpecMetadata
 fromguardrails.typesimportGuardrailLLMContextProto, GuardrailResult
 -from .llm_baseimport (
 -LLMConfig,
 -LLMOutput,
 -)
 +from .llm_baseimportLLMConfig, LLMOutput, _invoke_openai_callable
 logger=logging.getLogger(__name__)
 validation_query=f"{VALIDATION_PROMPT}\n\nText to validate:\n{candidate}"
 # Use the Responses API with file search and structured output
 -response=awaitctx.guardrail_llm.responses.parse(
 -model=config.model,
 +response=await_invoke_openai_callable(
 +ctx.guardrail_llm.responses.parse,
 input=validation_query,
 +model=config.model,
 text_format=HallucinationDetectionOutput,
 tools=[{"type": "file_search", "vector_store_ids": [config.knowledge_source]}],
+ )
-Original file line number
+Diff line change
 from __future__ importannotations
 +importasyncio
 +importfunctools
 +importinspect
 importjson
 importlogging
 importtextwrap
 -fromtypingimportTYPE_CHECKING, TypeVar
 +fromcollections.abcimportCallable
 +fromtypingimportTYPE_CHECKING, Any, TypeVar
 -fromopenaiimportAsyncOpenAI
 +fromopenaiimportAsyncOpenAI, OpenAI
 frompydanticimportBaseModel, ConfigDict, Field
 fromguardrails.registryimportdefault_spec_registry
 fromguardrails.utils.outputimportOutputSchema
 ifTYPE_CHECKING:
 -fromopenaiimportAsyncOpenAI
 +fromopenaiimportAsyncAzureOpenAI, AzureOpenAI# type: ignore[unused-import]
 +else:
 +try:
 +fromopenaiimportAsyncAzureOpenAI, AzureOpenAI# type: ignore
 +exceptException: # pragma: no cover - optional dependency
 +AsyncAzureOpenAI=object# type: ignore[assignment]
 +AzureOpenAI=object# type: ignore[assignment]
 logger=logging.getLogger(__name__)
 returncandidate
 +asyncdef_invoke_openai_callable(
 +method: Callable[..., Any],
 +/,
 +*args: Any,
 +**kwargs: Any,
 +) ->Any:
 +"""Invoke OpenAI SDK methods that may be sync or async."""
 +ifinspect.iscoroutinefunction(method):
 +returnawaitmethod(*args, **kwargs)
++
 +loop=asyncio.get_running_loop()
 +result=awaitloop.run_in_executor(
 +None,
 +functools.partial(method, *args, **kwargs),
 + )
 +ifinspect.isawaitable(result):
 +returnawaitresult
 +returnresult
++
++
 +asyncdef_request_chat_completion(
 +client: AsyncOpenAI|OpenAI|AsyncAzureOpenAI|AzureOpenAI,
 +*,
 +messages: list[dict[str, str]],
 +model: str,
 +response_format: dict[str, Any],
 +) ->Any:
 +"""Invoke chat.completions.create on sync or async OpenAI clients."""
 +returnawait_invoke_openai_callable(
 +client.chat.completions.create,
 +messages=messages,
 +model=model,
 +response_format=response_format,
 + )
++
++
 asyncdefrun_llm(
 text: str,
 system_prompt: str,
 -client: AsyncOpenAI,
 +client: AsyncOpenAI|OpenAI|AsyncAzureOpenAI|AzureOpenAI,
 model: str,
 output_model: type[LLMOutput],
 ) ->LLMOutput:
  Args:
  text (str): Text to analyze.
  system_prompt (str): Prompt instructions for the LLM.
 - client (AsyncOpenAI): OpenAI client for LLM inference.
 + client (AsyncOpenAI | OpenAI | AsyncAzureOpenAI | AzureOpenAI): OpenAI client used for guardrails.
  model (str): Identifier for which LLM model to use.
  output_model (type[LLMOutput]): Model for parsing and validating the LLM's response.
 full_prompt=_build_full_prompt(system_prompt)
 try:
 -response=awaitclient.chat.completions.create(
 +response=await_request_chat_completion(
 +client=client,
 messages=[
 {"role": "system", "content": full_prompt},
 {"role": "user", "content": f"# Text\n\n{text}"},
-Original file line number
+Diff line change
 self.chat=SimpleNamespace(completions=_FakeCompletions(content))
 +class_FakeSyncCompletions:
 +def__init__(self, content: str|None) ->None:
 +self._content=content
++
 +defcreate(self, **kwargs: Any) ->Any:
 +_=kwargs
 +returnSimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=self._content))])
++
++
 +class_FakeSyncClient:
 +def__init__(self, content: str|None) ->None:
 +self.chat=SimpleNamespace(completions=_FakeSyncCompletions(content))
++
++
 deftest_strip_json_code_fence_removes_wrapping() ->None:
 """Valid JSON code fences should be removed."""
 fenced="""```json
 assertresult.flaggedisTrueandresult.confidence==0.9# noqa: S101
 +@pytest.mark.asyncio
 +asyncdeftest_run_llm_supports_sync_clients() ->None:
 +"""run_llm should invoke synchronous clients without awaiting them."""
 +client=_FakeSyncClient('{"flagged": false, "confidence": 0.25}')
++
 +result=awaitrun_llm(
 +text="General text",
 +system_prompt="Assess text.",
 +client=client, # type: ignore[arg-type]
 +model="gpt-test",
 +output_model=LLMOutput,
 + )
++
 +assertisinstance(result, LLMOutput) # noqa: S101
 +assertresult.flaggedisFalseandresult.confidence==0.25# noqa: S101
++
++
 @pytest.mark.asyncio
 asyncdeftest_run_llm_handles_content_filter_error(monkeypatch: pytest.MonkeyPatch) ->None:
 """Content filter errors should return LLMErrorOutput with flagged=True."""
-Original file line number
+Diff line change
 assertresult.tripwire_triggeredisFalse# noqa: S101
 assert"Error during prompt injection detection check"inresult.info["observation"] # noqa: S101
++
++
 +@pytest.mark.asyncio
 +asyncdeftest_prompt_injection_detection_llm_supports_sync_responses() ->None:
 +"""Underlying responses.parse may be synchronous for some clients."""
 +analysis=PromptInjectionDetectionOutput(flagged=True, confidence=0.4, observation="Action summary")
++
 +class_SyncResponses:
 +defparse(self, **kwargs: Any) ->Any:
 +_=kwargs
 +returnSimpleNamespace(output_parsed=analysis)
++
 +context=SimpleNamespace(guardrail_llm=SimpleNamespace(responses=_SyncResponses()))
 +config=LLMConfig(model="gpt-test", confidence_threshold=0.5)
++
 +parsed=awaitpid_module._call_prompt_injection_detection_llm(context, "prompt", config)
++
 +assertparsedisanalysis# noqa: S101