Skip to main content
AI Predicates are AI-powered comparison functions that evaluate inputs based on complex properties like semantic meaning or style matching.
Some other libraries call similar functions “LLM-as-a-Judge”.
This is a premium feature that requires a Merit API key.
Using AI Predicates enables:
  • Asserting on semantic properties like factual consistency and topic coverage
  • Asserting on formatting properties like style and layout matching
  • Asserting on behavioral properties like policy following

Basic Usage

Merit provides 8 built-in AI predicates for common LLM evaluation scenarios. All predicates are async functions that return PredicateResult objects with boolean values, confidence scores, and explanatory messages.
import merit
from merit.predicates import has_unsupported_facts, follows_policy

async def merit_customer_faq_bot(faq_bot):
    # Knowledge base the bot should use
    knowledge = """
    Our store hours are 9 AM to 6 PM, Monday through Saturday.
    We're closed on Sundays and major holidays.
    Free shipping on orders over $50.
    """

    # Customer asks a question, bot generates response
    response = faq_bot.answer(
        "When are you open?",
        context=knowledge
    )
    # Example output:
    # "We're open Monday through Saturday, 9 AM to 6 PM.
    # We're closed Sundays and holidays."

    # Verify response doesn't hallucinate facts
    assert not await has_unsupported_facts(
        response,
        knowledge
    )

    # Verify response follows customer service guidelines
    conversation_policy = """
    Agent always asks if they can help with
    any other questions.
    """
    assert await follows_policy(response, conversation_policy)

Factual Accuracy

has_conflicting_facts

Detects when generated text contradicts source material.
from merit.predicates import has_conflicting_facts

async def merit_rag_no_contradictions(rag_system):
    # Source document about a company
    source = """
    Acme Corp was founded in 2018.
    The company has 150 employees and is headquartered
    in Austin, Texas. Revenue was $12M in 2023.
    """

    # LLM generates answer based on retrieved context
    answer = rag_system.query("Tell me about Acme Corp")

    # Passes: answer doesn't contradict
    assert not await has_conflicting_facts(answer, source)

    # Bad output:
    # "Acme Corp was founded in 2015 in San Francisco..."
    # Would fail: contradicts year and location

has_unsupported_facts

Catches hallucinations - facts the LLM invented that aren’t grounded in source material.
from merit.predicates import has_unsupported_facts

async def merit_no_hallucinations(rag_system):
    # Knowledge base only contains this information
    source = """
    Python 3.12 was released in October 2023.
    It introduced f-string improvements.
    """

    answer = rag_system.query("What's new in Python 3.12?")
    # Example output:
    # "Python 3.12 came out in October 2023
    # with better f-strings."

    # Passes: all facts are grounded in source
    assert not await has_unsupported_facts(answer, source)

    # Bad output:
    # "Python 3.12 released October 2023 with
    # f-string improvements and a new JIT compiler
    # for 2x faster performance."
    # Would fail: JIT compiler claim is hallucinated

has_facts

Verifies that required information appears in the output. Use when certain facts must be mentioned.
from merit.predicates import has_facts

async def merit_includes_required_info(medical_summary_bot):
    # Patient notes that must be summarized
    patient_notes = """
    Patient: John Doe, 45M
    Chief complaint: Chest pain for 2 hours
    Vitals: BP 150/95, HR 88, O2 98%
    Assessment: Rule out MI, start workup
    """

    summary = medical_summary_bot.summarize(patient_notes)
    # Example:
    # "45-year-old male presenting with 2-hour
    # chest pain. Elevated BP at 150/95.
    # Cardiac workup initiated."

    # Critical facts that must appear in any summary
    required = """
    chest pain, elevated blood pressure,
    cardiac workup
    """
    assert await has_facts(summary, required)

matches_facts

Checks bidirectional factual equivalence - both texts convey the same information.
from merit.predicates import matches_facts

async def merit_translation_preserves_meaning(translator):
    original = """
    The quarterly report shows 23% growth in
    European markets.
    """

    translated = translator.translate(
        original,
        target="spanish"
    )
    back_translated = translator.translate(
        translated,
        target="english"
    )
    # Example:
    # "The quarterly report indicates 23% growth
    # in European markets."

    # Facts should match despite rewording
    assert await matches_facts(back_translated, original)

Topic Coverage

has_topics

Verifies output covers required subjects. Useful for content generation where specific themes must be addressed.
from merit.predicates import has_topics

async def merit_onboarding_covers_topics(onboarding_bot):
    # New employee asks about benefits
    response = onboarding_bot.chat(
        "What benefits do I get?"
    )
    # Example:
    # "Welcome! Your benefits include comprehensive
    # health insurance with dental and vision,
    # a 401k with 4% company match, and 20 days PTO.
    # You're also eligible for our annual bonus
    # program."

    # Response must cover these key topics
    topics = """
    health insurance, retirement plan,
    paid time off
    """
    assert await has_topics(response, topics)

Policy Compliance

follows_policy

Ensures LLM outputs adhere to business rules, safety guidelines, or content policies.
from merit.predicates import follows_policy

async def merit_support_follows_guidelines(support_bot):
    # Customer asking about competitor
    question = "Is your product better than CompetitorX?"
    response = support_bot.chat(question)
    # Example:
    # "I'd be happy to tell you about our product's
    # strengths! We offer 24/7 support, 99.9% uptime,
    # and flexible pricing. I can't compare directly
    # to other products, but I can answer any
    # questions about what we offer."

    policy = """
    - Never disparage competitors by name
    - Focus on our product's strengths, not competitor weaknesses
    - Don't make claims about competitor products
    - Redirect to our features when asked for comparisons
    """

    assert await follows_policy(response, policy)

Style and Structure

matches_writing_style

Validates tone, formality, and voice match a reference example.
from merit.predicates import matches_writing_style

async def merit_maintains_brand_voice(marketing_bot):
    # Generate product description
    description = marketing_bot.generate(
        "Describe our new running shoes"
    )
    # Example:
    # "Meet the CloudRunner Pro. Engineered for
    # the long haul. 47% lighter than last gen.
    # Zero compromises."

    # Brand voice reference: punchy, confident, minimal
    brand_voice = """
    Built different. The UltraFrame bike handles
    like nothing else. Carbon fiber.
    Precision engineering. Pure speed.
    """

    assert await matches_writing_style(
        description,
        brand_voice
    )

    # Would fail with:
    # "Our new running shoes are very comfortable
    # and lightweight, offering great support for
    # runners of all levels..."
    # (too generic and wordy for this brand voice)

matches_writing_layout

Checks document structure and formatting patterns match a template.
from merit.predicates import matches_writing_layout

async def merit_follows_report_structure(report_generator):
    report = report_generator.create_weekly_report(
        data=metrics
    )
    # Example output:
    # "## Weekly Summary
    #
    # Key metrics improved across the board.
    #
    # ## Highlights
    # - Revenue up 12%
    # - Churn down 3%
    #
    # ## Action Items
    # 1. Review pricing
    # 2. Update dashboard"

    # Template showing expected structure
    template = """
    ## Weekly Summary
    [Overview paragraph]

    ## Highlights
    - [Bullet points]

    ## Action Items
    1. [Numbered list]
    """

    assert await matches_writing_layout(report, template)

Saving Results for Investigation

When you run merits with database persistence enabled (default behavior), all AI predicate evaluations used inside assert statements are automatically saved to the Merit database. This enables post-run analysis, debugging, and quality monitoring. Every PredicateResult evaluated in an assertion is stored with full context. After the run completes, you can investigate these evaluations even if all tests passed.

Available Data for Analysis

The database stores comprehensive information for each predicate evaluation: Per-predicate data:
  • predicate_name: Which predicate function was used (e.g., “has_conflicting_facts”)
  • actual: The full text that was evaluated
  • reference: The reference text used for comparison
  • strict: Whether strict mode was enabled
  • confidence: The AI judge’s confidence score (0.0 to 1.0)
  • value: Boolean result (True/False)
  • message: The AI’s reasoning and explanation
Linkage to test context:
  • Which test execution it came from
  • Which assertion it was part of
  • Associated run ID for filtering by test session
Database persistence is controlled by the --save-to-db flag (enabled by default). Database location defaults to .merit/merit.db in your project root.

Building Custom Predicates

While Merit provides 8 built-in AI predicates, you can create custom predicates for domain-specific comparisons or integrate third-party LLM evaluation tools. Use the @predicate decorator to ensure your custom predicates integrate seamlessly with Merit’s assertion tracking and database persistence.

Protocol Conformance Requirements

The @predicate decorator transforms ordinary comparison functions into protocol-conforming predicates. To be eligible for decoration, your function must satisfy the Predicate protocol’s signature constraints: Signature Requirements:
  1. Return type: Must return bool representing the evaluation outcome
  2. Required parameters: Must accept actual and reference as either:
    • The first two positional parameters, or
    • Named keyword parameters (actual=, reference=)
  3. Execution model: Can be synchronous or asynchronous—the decorator adapts to both def and async def functions
  4. Additional parameters: May accept optional keyword arguments (e.g., strict, tolerance, domain-specific flags)

Custom Predicate Examples

Example: Integrating third-party LLM judge
from merit import predicate
from openai import AsyncOpenAI

client = AsyncOpenAI()

@predicate
async def matches_tone_with_gpt4(
    actual: str,
    reference: str,
    *,
    strict: bool = False
) -> bool:
    """Check if actual text matches the tone of reference using GPT-4.

    Args:
        actual: The text to evaluate
        reference: Example text with desired tone
        strict: Whether to require exact tone match or allow similar tones
    """
    prompt = f"""Compare the tone of these two texts.

    Reference tone: {reference}

    Text to evaluate: {actual}

    Does the text to evaluate match the reference tone?
    {"Require exact match." if strict else "Allow similar tones."}

    Answer only 'yes' or 'no'."""

    response = await client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )

    answer = response.choices[0].message.content.strip().lower()
    return answer == "yes"

async def merit_brand_consistency(marketing_bot):
    brand_example = "Hey there! 👋 Let's make this happen together."
    generated = marketing_bot.create_post("Announce new feature")

    assert await matches_tone_with_gpt4(
        generated,
        brand_example,
        strict=False
    )
When building custom predicates, follow the naming convention of starting with action verbs like has_, matches_, follows_, or contains_ to make assertions read naturally.

Recommendations

1. Use AI predicates for natural language assertions

AI predicates shine when evaluating LLM outputs where exact string matching is too brittle. Don’t do this:
# Using AI predicates for exact matching
from merit.predicates import has_facts

async def merit_json_output(api):
    result = api.get_user(id=123)

    # Semantic predicate overkill for structured data
    assert await has_facts(
        str(result),
        '{"name": "Alice"}'
    )
Do this:
# Use standard assertions for structured data
def merit_json_output(api):
    result = api.get_user(id=123)
    assert result["name"] == "Alice"
    assert result["id"] == 123

# Use semantic predicates for natural language
from merit.predicates import (
    has_facts,
    has_unsupported_facts
)

async def merit_text_generation(llm):
    context = "The company was founded in 2020."
    summary = llm.summarize(context)

    # Semantic checks for flexible language matching
    assert await has_facts(summary, "founded in 2020")
    assert not await has_unsupported_facts(
        summary,
        context
    )

2. Combine multiple predicates for comprehensive validation

Layer semantic checks to validate different aspects of LLM outputs. This provides stronger guarantees than single assertions.
from merit.predicates import (
    has_unsupported_facts,
    has_conflicting_facts,
    has_topics,
    follows_policy
)

async def merit_product_description(product_copilot):
    # Source: product database entry
    product_data = """
    Name: ThermoPro X500
    Price: $299
    Features: Temperature sensing, WiFi connectivity,
              Mobile app
    Warranty: 2 years
    """

    description = product_copilot.generate_description(
        product_data
    )
    # Example output:
    # "The ThermoPro X500 ($299) brings smart
    # temperature monitoring to your home.
    # Connect via WiFi, control from our mobile app,
    # and enjoy peace of mind with a 2-year warranty."

    # Layer 1: No hallucinated features or specs
    assert not await has_unsupported_facts(
        description,
        product_data
    )

    # Layer 2: Price and warranty not misstated
    assert not await has_conflicting_facts(
        description,
        product_data
    )

    # Layer 3: Must mention key selling points
    assert await has_topics(
        description,
        "WiFi, mobile app, warranty"
    )

    # Layer 4: Follow marketing guidelines
    marketing_policy = """
    No superlatives like 'best' or 'revolutionary'.
    No competitor mentions.
    """
    assert await follows_policy(
        description,
        marketing_policy
    )

3. Use strict mode appropriately

The strict parameter controls comparison sensitivity. Use strict=False (default) for semantic flexibility, and strict=True when precision matters.
from merit.predicates import has_facts

async def merit_financial_report(report_bot):
    # Financial data requires precision
    quarterly_data = """
    Q3 revenue: $4.2M. Operating margin: 23.5%.
    Headcount: 142.
    """

    report = report_bot.summarize(quarterly_data)
    # Example:
    # "Third quarter brought in $4.2M revenue
    # with healthy 23.5% margins. Team size stable
    # at 142 employees."

    # Lenient: "brought in $4.2M" semantically
    # matches "$4.2M revenue"
    assert await has_facts(
        report,
        "revenue was $4.2M",
        strict=False
    )

    # Strict: exact figures must appear -
    # "around 24%" would fail
    assert await has_facts(report, "23.5%", strict=True)
    assert await has_facts(report, "142", strict=True)