Source code for pydantic_ai_toolsets.toolsets.self_refine.toolset
"""Self-refinement toolset for pydantic-ai agents."""
from __future__ import annotations
import sys
import time
import uuid
from typing import Any
from pydantic_ai import Agent
from pydantic_ai.toolsets import FunctionToolset
from .storage import SelfRefineStorage, SelfRefineStorageProtocol
from .types import (
Feedback,
GenerateOutputItem,
ProvideFeedbackItem,
RefinementOutput,
RefineOutputItem,
)
# =============================================================================
# SYSTEM PROMPT - Contains "when and why" to use the toolset
# =============================================================================
SELF_REFINE_SYSTEM_PROMPT = """
## Self-Refinement
You have access to tools for improving outputs through iterative self-refinement:
- `read_refinement_state`: Read current refinement state
- `generate_output`: Create initial output (iteration 0)
- `provide_feedback`: Provide structured, actionable feedback
- `refine_output`: Generate improved version based on feedback
- `get_best_output`: Find the best refined output
### When to Use Self-Refinement
Use these tools in these scenarios:
1. Tasks requiring high-quality, polished outputs
2. Problems where initial solutions may have flaws
3. Situations where iterative improvement is valuable
4. Tasks where structured feedback helps identify issues
5. Problems where multiple refinement cycles improve results
6. When you need to meet specific quality thresholds
### Self-Refinement Process
1. **Generate**: Create initial output (iteration 0)
- Set quality_threshold if you have a target quality level
- Set iteration_limit if you want to cap refinement cycles (typically 2-3)
2. **Feedback**: Provide detailed, actionable feedback on the output
- Use structured feedback types: additive, subtractive, transformative, corrective
- Evaluate multiple dimensions: factuality, coherence, completeness, style
- Prioritize feedback (higher priority = more important)
- Indicate if refinement should continue
3. **Refine**: Generate improved version incorporating the feedback
- Address all feedback, especially high-priority items
- Provide quality score to track improvement
- Mark as final if quality threshold is met or no further improvement needed
4. **Repeat**: Continue feedback-refine cycle until:
- Quality threshold is met (quality_score >= quality_threshold)
- Iteration limit is reached (iteration >= iteration_limit)
- Feedback indicates no further improvements needed
5. **Select**: Choose the best refined output
### Feedback Types
- **Additive**: Missing information about X, should include Y
- **Subtractive**: Remove redundant section Z
- **Transformative**: Restructure argument to lead with conclusion
- **Corrective**: Fix factual error in paragraph 3
### Feedback Dimensions
- **Factuality**: Accuracy and correctness of information
- **Coherence**: Logical flow and consistency
- **Completeness**: All necessary information included
- **Style**: Writing style and clarity
### Key Principles
- **Structured Feedback**: Use feedback types and dimensions systematically
- **Actionable Suggestions**: Feedback should be specific enough to guide improvement
- **Weighted Feedback**: Prioritize certain aspects (e.g., correctness > style)
- **Iterative Convergence**: Quality typically improves most in first 2-3 iterations
- **Quality Tracking**: Use quality scores to track improvement and compare against thresholds
### Workflow
1. Call `read_refinement_state` to see current state
2. Generate initial output using `generate_output` (if none exists)
3. Provide feedback using `provide_feedback`
4. Refine using `refine_output` based on feedback
5. Repeat steps 3-4 until satisfied or limits reached
6. Use `get_best_output` for final result
**IMPORTANT**: Always call `read_refinement_state` before generating, providing feedback, or refining.
"""
# =============================================================================
# TOOL DESCRIPTIONS - Contains "how" to use each specific tool
# =============================================================================
READ_REFINEMENT_STATE_DESCRIPTION = """Read the current self-refinement state.
Returns all outputs organized by iteration with feedbacks and refinement chains.
Precondition: Call before every generate_output, provide_feedback, or refine_output.
"""
GENERATE_OUTPUT_DESCRIPTION = """Generate an initial output (iteration 0).
Parameters:
- content: Initial output content
- quality_threshold: Optional target quality (0-100)
- iteration_limit: Optional max iterations (typically 2-3)
Returns output ID and iteration info.
Precondition: Call read_refinement_state first.
"""
PROVIDE_FEEDBACK_DESCRIPTION = """Provide structured, actionable feedback on an output.
Parameters:
- output_id: Output to provide feedback on
- feedback_type: additive, subtractive, transformative, or corrective
- dimension: factuality, coherence, completeness, or style
- feedback_text: Specific, actionable feedback
- priority: Priority level (higher = more important)
- should_continue_refining: Whether to continue refinement
Returns feedback ID and summary.
Precondition: Call read_refinement_state first.
"""
REFINE_OUTPUT_DESCRIPTION = """Generate improved version based on feedback.
Parameters:
- output_id: Output to refine (must have feedback)
- refined_content: Improved content addressing feedback
- quality_score: Quality score (0-100) to track improvement
- is_final: True if quality threshold met or no further improvement needed
Returns refined output ID at iteration+1.
Precondition: Call read_refinement_state first.
"""
GET_BEST_OUTPUT_DESCRIPTION = """Find the best refined output.
Returns highest-quality output (prefers final, then highest score/iteration).
Precondition: Call read_refinement_state first.
"""
# Legacy constant for backward compatibility
SELF_REFINE_TOOL_DESCRIPTION = GENERATE_OUTPUT_DESCRIPTION
READ_REFINEMENT_STATE_DESCRIPTION = """
Read the current self-refinement state.
**CRITICAL**: Call this BEFORE every generate_output, provide_feedback, or refine_output call to:
- Review the current refinement state
- See which outputs exist and their refinement iterations
- Understand feedback and identified areas for improvement
- Track quality scores and thresholds
- Know iteration limits and current iteration counts
- Make informed decisions about next steps
Returns:
- All outputs with their content, iterations, and quality scores
- All feedback with types, dimensions, and suggestions
- Output refinement chain (parent-child relationships)
- Summary statistics (total outputs, iterations, final outputs, quality tracking)
"""
GENERATE_OUTPUT_DESCRIPTION = """
Generate an initial output (iteration 0).
Use this tool to create your first attempt at solving the problem or answering the question.
This initial output will be the starting point for feedback and refinement.
**CRITICAL**: Call read_refinement_state first to see existing outputs.
When generating outputs:
- This should be your initial attempt (iteration 0)
- Don't worry about perfection - you'll refine it later
- Focus on generating a complete response
- Optionally set quality_threshold if you have a target quality level
- Optionally set iteration_limit if you want to cap refinement cycles (typically 2-3)
- The output will receive feedback and be refined in subsequent steps
"""
PROVIDE_FEEDBACK_DESCRIPTION = """
Provide structured, actionable feedback on an output.
Use this tool to analyze an output systematically and provide detailed feedback
with specific types and dimensions to guide refinement.
**CRITICAL**: Call read_refinement_state first to see which outputs exist.
When providing feedback:
- Use structured feedback types:
- **Additive**: Missing information about X, should include Y
- **Subtractive**: Remove redundant section Z
- **Transformative**: Restructure argument to lead with conclusion
- **Corrective**: Fix factual error in paragraph 3
- Evaluate multiple dimensions:
- **Factuality**: Accuracy and correctness
- **Coherence**: Logical flow and consistency
- **Completeness**: All necessary information included
- **Style**: Writing style and clarity
- Prioritize feedback (higher priority = more important)
- Typically prioritize: correctness > completeness > coherence > style
- Be specific and actionable in suggestions
- Provide overall assessment
- Indicate if refinement should continue
The feedback will guide the refinement process, with high-priority items addressed first.
"""
REFINE_OUTPUT_DESCRIPTION = """
Generate an improved version of an output based on feedback.
Use this tool to create a refined output that addresses the feedback provided.
The refined output should incorporate all suggestions, especially high-priority ones.
**CRITICAL**: Call read_refinement_state first to see outputs and feedback.
When refining:
- Address ALL feedback, especially high-priority items
- Incorporate improvement suggestions systematically
- The refined output will be at iteration+1 (next refinement iteration)
- Provide quality score to track improvement
- Compare quality_score against quality_threshold if set
- Check if iteration_limit has been reached
- Mark as final if:
- Quality threshold is met (quality_score >= quality_threshold)
- Iteration limit is reached (iteration >= iteration_limit)
- You believe no further refinement is needed
Refinement process:
1. Review the output being refined
2. Review all feedback and identified areas for improvement
3. Prioritize high-priority feedback items
4. Generate improved version addressing all feedback
5. Provide quality score
6. Mark as final if thresholds are met or satisfactory
"""
GET_BEST_OUTPUT_DESCRIPTION = """
Find the best refined output.
Use this tool to identify the highest-quality output, typically the most refined version
or the one marked as final.
**CRITICAL**: Call read_refinement_state first to see all outputs.
Selection criteria:
- Prefer outputs marked as final
- Consider quality scores if available
- Consider refinement iteration (higher iterations may be better)
- Consider quality threshold achievement
- Return the output that best addresses the problem
Returns:
- Best output content
- Output metadata (iteration, quality score, etc.)
- Refinement chain showing how it was improved
"""
[docs]
def create_self_refine_toolset(
storage: SelfRefineStorageProtocol | None = None,
*,
id: str | None = None,
track_usage: bool = False,
) -> FunctionToolset[Any]:
"""Create a self-refinement toolset for iterative output improvement.
This toolset provides tools for AI agents to improve outputs through structured feedback
and refinement cycles, with support for quality thresholds and iteration limits.
Args:
storage: Optional storage backend. Defaults to in-memory SelfRefineStorage.
You can provide a custom storage implementing SelfRefineStorageProtocol
for persistence or integration with other systems.
id: Optional unique ID for the toolset.
Returns:
FunctionToolset compatible with any pydantic-ai agent.
Example (standalone):
```python
from pydantic_ai import Agent
from pydantic_ai_toolsets import create_self_refine_toolset
agent = Agent("openai:gpt-4.1", toolsets=[create_self_refine_toolset()])
result = await agent.run("Solve this problem using self-refinement")
```
Example (with custom storage):
```python
from pydantic_ai_toolsets import create_self_refine_toolset, SelfRefineStorage
storage = SelfRefineStorage()
toolset = create_self_refine_toolset(storage=storage)
# After agent runs, access outputs and feedbacks directly
print(storage.outputs)
print(storage.feedbacks)
```
"""
if storage is not None:
_storage = storage
else:
_storage = SelfRefineStorage(track_usage=track_usage)
toolset: FunctionToolset[Any] = FunctionToolset(id=id)
_metrics = getattr(_storage, "metrics", None) if hasattr(_storage, "metrics") else None
def _get_status_summary() -> str:
"""Get one-line status summary."""
if not _storage.outputs:
return "Status: ○ Empty"
max_iter = max((o.iteration for o in _storage.outputs.values()), default=0)
final_outputs = sum(1 for o in _storage.outputs.values() if o.is_final)
# Check for iteration limit
limit = next((o.iteration_limit for o in _storage.outputs.values() if o.iteration_limit), None)
limit_str = f"/{limit}" if limit else ""
# Check for quality threshold
threshold_met = sum(
1 for o in _storage.outputs.values()
if o.quality_threshold and o.quality_score and o.quality_score >= o.quality_threshold
)
if final_outputs > 0 or threshold_met > 0:
return f"Status: ✓ Complete | Iteration {max_iter}{limit_str}"
return f"Status: ● Active | Iteration {max_iter}{limit_str}"
def _get_next_hint() -> str:
"""Get contextual hint for next action."""
if not _storage.outputs:
return "Use generate_output to create your initial output."
final_outputs = sum(1 for o in _storage.outputs.values() if o.is_final)
if final_outputs > 0:
return "Refinement complete. Use get_best_output to retrieve the final result."
# Find outputs without feedback
outputs_with_feedback = {f.output_id for f in _storage.feedbacks.values()}
unfeedback = [o for o in _storage.outputs.values() if o.output_id not in outputs_with_feedback and not o.is_final]
if unfeedback:
return f"Use provide_feedback on [{unfeedback[0].output_id}] to identify improvements."
# Find outputs with feedback that haven't been refined
parent_ids = {o.parent_id for o in _storage.outputs.values() if o.parent_id}
unrefined = [oid for oid in outputs_with_feedback if oid not in parent_ids]
if unrefined:
return f"Use refine_output on [{unrefined[0]}] to address the feedback."
return "Continue refining or mark an output as final."
@toolset.tool(description=READ_REFINEMENT_STATE_DESCRIPTION)
async def read_refinement_state() -> str:
"""Read the current self-refinement state."""
start_time = time.perf_counter()
if not _storage.outputs:
result = f"{_get_status_summary()}\n\nNo outputs in refinement.\n\nNext: {_get_next_hint()}"
if _metrics is not None:
duration_ms = (time.perf_counter() - start_time) * 1000
_metrics.record_invocation("read_refinement_state", "", result, duration_ms)
return result
lines: list[str] = [_get_status_summary(), "", "Self-Refinement State:"]
lines.append("")
# Display outputs by iteration
outputs_by_iteration: dict[int, list[RefinementOutput]] = {}
for output in _storage.outputs.values():
if output.iteration not in outputs_by_iteration:
outputs_by_iteration[output.iteration] = []
outputs_by_iteration[output.iteration].append(output)
lines.append("Outputs by Refinement Iteration:")
for iteration in sorted(outputs_by_iteration.keys()):
outputs = outputs_by_iteration[iteration]
lines.append(f" Iteration {iteration}:")
for output in outputs:
final_str = " ⭐ FINAL" if output.is_final else ""
score_str = (
f" (quality: {output.quality_score:.1f})"
if output.quality_score is not None
else ""
)
threshold_str = (
f" (threshold: {output.quality_threshold:.1f})"
if output.quality_threshold is not None
else ""
)
limit_str = (
f" (limit: {output.iteration_limit})"
if output.iteration_limit is not None
else ""
)
parent_str = (
f" (refined from: [{output.parent_id}])"
if output.parent_id
else " (initial output)"
)
output_line = (
f" [{output.output_id}]{final_str}{score_str}"
f"{threshold_str}{limit_str}{parent_str}"
)
lines.append(output_line)
lines.append(f" Content: {output.content}")
lines.append("")
# Display feedback grouped by output
if _storage.feedbacks:
lines.append("Feedback:")
feedbacks_by_output: dict[str, list[Feedback]] = {}
for feedback in _storage.feedbacks.values():
if feedback.output_id not in feedbacks_by_output:
feedbacks_by_output[feedback.output_id] = []
feedbacks_by_output[feedback.output_id].append(feedback)
for output_id, feedbacks in feedbacks_by_output.items():
output = _storage.outputs.get(output_id)
output_ref = (
f"[{output_id}]" if output else f"[{output_id}] (missing)"
)
lines.append(f" Feedback for output {output_ref}:")
# Sort by priority (highest first)
sorted_feedbacks = sorted(feedbacks, key=lambda f: f.priority, reverse=True)
for feedback in sorted_feedbacks:
priority_str = (
f" (priority: {feedback.priority:.2f})"
if feedback.priority != 0.5
else ""
)
lines.append(
f" [{feedback.feedback_id}] {feedback.feedback_type.value} / "
f"{feedback.dimension.value}{priority_str}"
)
lines.append(f" Description: {feedback.description}")
lines.append(f" Suggestion: {feedback.suggestion}")
lines.append("")
lines.append("")
# Display refinement chains
root_outputs = [o for o in _storage.outputs.values() if o.parent_id is None]
if root_outputs:
lines.append("Refinement Chains:")
for root in root_outputs:
chain: list[RefinementOutput] = [root]
current = root
while True:
# Find children
children = [
o for o in _storage.outputs.values() if o.parent_id == current.output_id
]
if not children:
break
# Take the first child (could be multiple, but show one chain)
current = children[0]
chain.append(current)
chain_str = " → ".join([f"[{o.output_id}] (iter {o.iteration})" for o in chain])
lines.append(f" {chain_str}")
if chain[-1].is_final:
lines.append(f" Final output: [{chain[-1].output_id}]")
if chain[-1].quality_score is not None:
lines.append(f" Quality score: {chain[-1].quality_score:.1f}")
if chain[-1].quality_threshold is not None:
threshold_met = chain[-1].quality_score >= chain[-1].quality_threshold
lines.append(
f" Threshold {'✓ MET' if threshold_met else '✗ NOT MET'}"
)
lines.append("")
# Summary statistics
total_outputs = len(_storage.outputs)
total_feedbacks = len(_storage.feedbacks)
final_outputs = sum(1 for o in _storage.outputs.values() if o.is_final)
max_iteration = max((o.iteration for o in _storage.outputs.values()), default=0)
scored_outputs = sum(1 for o in _storage.outputs.values() if o.quality_score is not None)
threshold_outputs = sum(
1 for o in _storage.outputs.values() if o.quality_threshold is not None
)
threshold_met_outputs = sum(
1
for o in _storage.outputs.values()
if o.quality_threshold is not None
and o.quality_score is not None
and o.quality_score >= o.quality_threshold
)
lines.append("Summary:")
lines.append(f" Total outputs: {total_outputs}")
lines.append(f" Total feedback items: {total_feedbacks}")
lines.append(f" Final outputs: {final_outputs}")
lines.append(f" Maximum refinement iteration: {max_iteration}")
lines.append(f" Scored outputs: {scored_outputs}")
if threshold_outputs > 0:
lines.append(f" Outputs with quality threshold: {threshold_outputs}")
lines.append(f" Thresholds met: {threshold_met_outputs}")
lines.append("")
lines.append(f"Next: {_get_next_hint()}")
result = "\n".join(lines)
if _metrics is not None:
duration_ms = (time.perf_counter() - start_time) * 1000
_metrics.record_invocation("read_refinement_state", "", result, duration_ms)
return result
@toolset.tool(description=GENERATE_OUTPUT_DESCRIPTION)
async def generate_output(output: GenerateOutputItem) -> str:
"""Generate an initial output (iteration 0)."""
start_time = time.perf_counter()
input_text = output.model_dump_json() if _metrics else ""
output_id = str(uuid.uuid4())
new_output = RefinementOutput(
output_id=output_id,
content=output.content,
iteration=0,
parent_id=None,
is_final=False,
quality_score=None,
quality_threshold=output.quality_threshold,
iteration_limit=output.iteration_limit,
)
_storage.outputs = new_output
result_parts = [f"Generated initial output [{output_id}] at iteration 0"]
if output.quality_threshold is not None:
result_parts.append(f"Quality threshold: {output.quality_threshold:.1f}")
if output.iteration_limit is not None:
result_parts.append(f"Iteration limit: {output.iteration_limit}")
result = ". ".join(result_parts)
if _metrics is not None:
duration_ms = (time.perf_counter() - start_time) * 1000
_metrics.record_invocation("generate_output", input_text, result, duration_ms)
return result
@toolset.tool(description=PROVIDE_FEEDBACK_DESCRIPTION)
async def provide_feedback(feedback: ProvideFeedbackItem) -> str:
"""Provide structured, actionable feedback on an output."""
start_time = time.perf_counter()
input_text = feedback.model_dump_json() if _metrics else ""
if feedback.output_id not in _storage.outputs:
available = ", ".join([o.output_id for o in _storage.outputs.values()])
return (
f"Error: Output '{feedback.output_id}' not found. "
f"Available: [{available}]. Call read_refinement_state."
)
output = _storage.outputs[feedback.output_id]
# Check iteration limit
if output.iteration_limit is not None and output.iteration >= output.iteration_limit:
return (
f"Warning: Iteration limit ({output.iteration_limit}) reached for output "
f"'{feedback.output_id}'. Consider marking as final or increasing limit."
)
feedback_ids: list[str] = []
for item in feedback.feedback_items:
feedback_id = str(uuid.uuid4())
feedback_ids.append(feedback_id)
new_feedback = Feedback(
feedback_id=feedback_id,
output_id=feedback.output_id,
feedback_type=item.feedback_type,
dimension=item.dimension,
description=item.description,
suggestion=item.suggestion,
priority=item.priority,
is_actionable=item.priority > 0.0, # Non-zero priority implies actionable
)
_storage.feedbacks = new_feedback
result_parts = [
f"Provided {len(feedback.feedback_items)} feedback item(s) "
f"for output [{feedback.output_id}]",
f"Overall assessment: {feedback.overall_assessment}",
]
if not feedback.should_continue_refining:
result_parts.append("Refinement should STOP - no further improvements needed")
else:
result_parts.append("Refinement should CONTINUE")
result = ". ".join(result_parts)
if _metrics is not None:
duration_ms = (time.perf_counter() - start_time) * 1000
_metrics.record_invocation("provide_feedback", input_text, result, duration_ms)
return result
@toolset.tool(description=REFINE_OUTPUT_DESCRIPTION)
async def refine_output(refine: RefineOutputItem) -> str:
"""Generate an improved version of an output based on feedback."""
start_time = time.perf_counter()
input_text = refine.model_dump_json() if _metrics else ""
if refine.output_id not in _storage.outputs:
available = ", ".join([o.output_id for o in _storage.outputs.values()])
return (
f"Error: Output '{refine.output_id}' not found. "
f"Available: [{available}]. Call read_refinement_state."
)
parent_output = _storage.outputs[refine.output_id]
# Check iteration limit
if (
parent_output.iteration_limit is not None
and parent_output.iteration >= parent_output.iteration_limit
):
return (
f"Error: Iteration limit ({parent_output.iteration_limit}) reached. "
f"Cannot refine output [{refine.output_id}] further."
)
# Check if there's feedback for this output
feedbacks_for_output = [
f for f in _storage.feedbacks.values() if f.output_id == refine.output_id
]
if not feedbacks_for_output:
return (
f"Warning: No feedback found for output '{refine.output_id}'. "
"Consider providing feedback first to guide refinement."
)
output_id = str(uuid.uuid4())
new_iteration = parent_output.iteration + 1
# Determine if should be final based on quality threshold or explicit flag
is_final = refine.is_final
if (
not is_final
and parent_output.quality_threshold is not None
and refine.quality_score is not None
):
is_final = refine.quality_score >= parent_output.quality_threshold
# Check iteration limit
if (
parent_output.iteration_limit is not None
and new_iteration >= parent_output.iteration_limit
):
is_final = True # Force final if limit reached
new_output = RefinementOutput(
output_id=output_id,
content=refine.refined_content,
iteration=new_iteration,
parent_id=refine.output_id,
is_final=is_final,
quality_score=refine.quality_score,
quality_threshold=parent_output.quality_threshold, # Inherit from parent
iteration_limit=parent_output.iteration_limit, # Inherit from parent
)
_storage.outputs = new_output
result_parts = [
f"Created refined output [{output_id}] at iteration {new_iteration}",
f"Refined from [{refine.output_id}] (iteration {parent_output.iteration})",
]
if is_final:
result_parts.append("⭐ MARKED AS FINAL")
if refine.quality_score is not None:
result_parts.append(f"Quality score: {refine.quality_score:.1f}")
if parent_output.quality_threshold is not None:
threshold_met = refine.quality_score >= parent_output.quality_threshold
result_parts.append(
f"Quality threshold {'✓ MET' if threshold_met else '✗ NOT MET'}"
)
if parent_output.iteration_limit is not None:
result_parts.append(
f"Iteration limit: {new_iteration}/{parent_output.iteration_limit}"
)
result = ". ".join(result_parts)
if _metrics is not None:
duration_ms = (time.perf_counter() - start_time) * 1000
_metrics.record_invocation("refine_output", input_text, result, duration_ms)
return result
@toolset.tool(description=GET_BEST_OUTPUT_DESCRIPTION)
async def get_best_output() -> str:
"""Find the best refined output."""
start_time = time.perf_counter()
if not _storage.outputs:
return "No outputs found. Use generate_output to start."
# Prefer final outputs
final_outputs = [o for o in _storage.outputs.values() if o.is_final]
if final_outputs:
# Among final outputs, prefer highest quality score or highest iteration
best = max(
final_outputs,
key=lambda o: (
o.quality_score if o.quality_score is not None else 0,
o.iteration,
),
)
else:
# Among all outputs, prefer highest quality score or highest iteration
best = max(
_storage.outputs.values(),
key=lambda o: (
o.quality_score if o.quality_score is not None else 0,
o.iteration,
),
)
lines: list[str] = [f"Best Output: [{best.output_id}]"]
lines.append(f"Iteration: {best.iteration}")
if best.quality_score is not None:
lines.append(f"Quality Score: {best.quality_score:.1f}")
if best.quality_threshold is not None:
lines.append(f"Quality Threshold: {best.quality_threshold:.1f}")
if best.quality_score is not None:
threshold_met = best.quality_score >= best.quality_threshold
lines.append(f"Threshold Status: {'✓ MET' if threshold_met else '✗ NOT MET'}")
if best.is_final:
lines.append("Status: ⭐ FINAL")
lines.append("")
lines.append("Content:")
lines.append(best.content)
lines.append("")
# Show refinement chain
chain: list[RefinementOutput] = []
current: RefinementOutput | None = best
while current:
chain.insert(0, current)
current = (
_storage.outputs.get(current.parent_id) if current.parent_id else None
)
if len(chain) > 1:
lines.append("Refinement Chain:")
for i, output in enumerate(chain):
marker = " → " if i < len(chain) - 1 else " (best)"
lines.append(f" Iteration {output.iteration}: [{output.output_id}]{marker}")
result = "\n".join(lines)
if _metrics is not None:
duration_ms = (time.perf_counter() - start_time) * 1000
_metrics.record_invocation("get_best_output", "", result, duration_ms)
return result
return toolset
[docs]
def get_self_refine_system_prompt() -> str:
"""Get the system prompt for self-refinement-based reasoning.
Returns:
System prompt string that can be used with pydantic-ai agents.
"""
return SELF_REFINE_SYSTEM_PROMPT
def create_self_refine_toolset_agent(model: str = "openrouter:x-ai/grok-4.1-fast") -> Agent:
"""Create a Pydantic-ai agent with the self-refinement toolset.
Args:
model: The model to use for the agent.
Returns:
Pydantic-ai agent with the self-refinement toolset.
"""
storage = SelfRefineStorage()
toolset = create_self_refine_toolset(storage=storage)
agent = Agent(
model,
system_prompt="""
You are a self-refinement agent. You have access to tools for improving outputs through iterative refinement:
- `read_refinement_state`: Read the current refinement state
- `generate_output`: Create initial output
- `provide_feedback`: Provide structured, actionable feedback
- `refine_output`: Generate improved version based on feedback
- `get_best_output`: Find the best refined output
**IMPORTANT**: Use these tools to improve outputs through structured feedback and refinement cycles.
""",
toolsets=[toolset]
)
@agent.instructions
async def add_prompt() -> str:
"""Add the self-refinement system prompt."""
return get_self_refine_system_prompt()
return agent