Master advanced prompt engineering techniques including chain-of-thought, few-shot learning, constitutional AI, and production prompt management systems.
Prompt engineering has evolved from simple text inputs to sophisticated techniques that dramatically improve LLM performance. After designing prompts that power production AI systems processing millions of requests, I've learned that great prompt engineering is both art and science. Here's your comprehensive guide to advanced prompt techniques.
Fundamental Prompt Patterns
Comprehensive Prompt Framework
# prompt_framework.py
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
import json
import re
from enum import Enum
class PromptType(Enum):
ZERO_SHOT = "zero_shot"
FEW_SHOT = "few_shot"
CHAIN_OF_THOUGHT = "chain_of_thought"
TREE_OF_THOUGHT = "tree_of_thought"
CONSTITUTIONAL = "constitutional"
ROLE_BASED = "role_based"
STRUCTURED = "structured"
@dataclass
class PromptComponent:
role: Optional[str] = None
context: Optional[str] = None
examples: Optional[List[Dict]] = None
instructions: Optional[str] = None
constraints: Optional[List[str]] = None
output_format: Optional[str] = None
class AdvancedPromptEngine:
def __init__(self):
self.templates = {}
self.validators = {}
self.metrics = {}
def create_prompt(self,
prompt_type: PromptType,
components: PromptComponent,
task: str) -> str:
"""Create optimized prompt based on type and components"""
if prompt_type == PromptType.ZERO_SHOT:
return self._create_zero_shot(components, task)
elif prompt_type == PromptType.FEW_SHOT:
return self._create_few_shot(components, task)
elif prompt_type == PromptType.CHAIN_OF_THOUGHT:
return self._create_chain_of_thought(components, task)
elif prompt_type == PromptType.TREE_OF_THOUGHT:
return self._create_tree_of_thought(components, task)
elif prompt_type == PromptType.CONSTITUTIONAL:
return self._create_constitutional(components, task)
elif prompt_type == PromptType.ROLE_BASED:
return self._create_role_based(components, task)
elif prompt_type == PromptType.STRUCTURED:
return self._create_structured(components, task)
def _create_chain_of_thought(self,
components: PromptComponent,
task: str) -> str:
"""Create chain-of-thought prompt"""
prompt_parts = []
# Add role if specified
if components.role:
prompt_parts.append(f"You are {components.role}.")
# Add context
if components.context:
prompt_parts.append(f"Context: {components.context}")
# Add task with reasoning steps
prompt_parts.append(f"""
Task: {task}
Let's approach this step by step:
1. First, I'll identify the key components of the problem
2. Then, I'll analyze each component
3. Next, I'll consider the relationships between components
4. Finally, I'll synthesize a comprehensive solution
Step 1: Identifying key components...
""")
# Add constraints
if components.constraints:
prompt_parts.append("\nConstraints to consider:")
for constraint in components.constraints:
prompt_parts.append(f"- {constraint}")
# Add output format
if components.output_format:
prompt_parts.append(f"\nProvide your answer in the following format:\n{components.output_format}")
return "\n".join(prompt_parts)
def _create_few_shot(self,
components: PromptComponent,
task: str) -> str:
"""Create few-shot learning prompt"""
prompt_parts = []
# Add instructions
if components.instructions:
prompt_parts.append(components.instructions)
# Add examples
if components.examples:
prompt_parts.append("\nHere are some examples:")
for i, example in enumerate(components.examples, 1):
prompt_parts.append(f"\nExample {i}:")
prompt_parts.append(f"Input: {example['input']}")
prompt_parts.append(f"Output: {example['output']}")
if 'explanation' in example:
prompt_parts.append(f"Explanation: {example['explanation']}")
# Add the actual task
prompt_parts.append(f"\nNow, please complete this task:\nInput: {task}")
prompt_parts.append("Output:")
return "\n".join(prompt_parts)
def _create_tree_of_thought(self,
components: PromptComponent,
task: str) -> str:
"""Create tree-of-thought prompt for complex reasoning"""
prompt = f"""
{components.role if components.role else "You are an expert problem solver."}
Task: {task}
Please solve this using the Tree of Thoughts method:
1. Generate multiple possible approaches (at least 3)
2. For each approach, evaluate its potential
3. Explore the most promising path in detail
4. If needed, backtrack and try alternative paths
5. Synthesize the best solution
## Approach 1:
Idea: [Describe first approach]
Evaluation: [Score 1-10 and explain]
Details: [If promising, elaborate]
## Approach 2:
Idea: [Describe second approach]
Evaluation: [Score 1-10 and explain]
Details: [If promising, elaborate]
## Approach 3:
Idea: [Describe third approach]
Evaluation: [Score 1-10 and explain]
Details: [If promising, elaborate]
## Final Solution:
Based on exploring these approaches, here's the optimal solution:
"""
return prompt
def _create_constitutional(self,
components: PromptComponent,
task: str) -> str:
"""Create constitutional AI prompt with built-in critique"""
prompt = f"""
Task: {task}
Please complete this task following these principles:
1. Be helpful, harmless, and honest
2. Provide accurate and verifiable information
3. Consider ethical implications
4. Be transparent about limitations
Initial Response:
[Provide your initial response here]
Self-Critique:
Now, critique your response:
- Is it accurate and factual?
- Could it cause any harm?
- Are there biases to address?
- What could be improved?
Revised Response:
[Provide improved response based on critique]
Final Verification:
- Confirm all facts are accurate
- Ensure response is helpful
- Verify no harmful content
- Check for completeness
"""
return prompt
Dynamic Prompt Optimization
# prompt_optimization.py
class PromptOptimizer:
def __init__(self):
self.performance_history = []
self.optimization_rules = {}
def optimize_prompt(self,
base_prompt: str,
performance_data: Dict) -> str:
"""Optimize prompt based on performance metrics"""
optimizations = []
# Analyze performance issues
if performance_data.get('accuracy', 1.0) < 0.8:
optimizations.append(self._improve_clarity(base_prompt))
if performance_data.get('completeness', 1.0) < 0.9:
optimizations.append(self._add_detail_instructions(base_prompt))
if performance_data.get('relevance', 1.0) < 0.85:
optimizations.append(self._add_focus_constraints(base_prompt))
# Apply optimizations
optimized_prompt = base_prompt
for optimization in optimizations:
optimized_prompt = optimization
return optimized_prompt
def _improve_clarity(self, prompt: str) -> str:
"""Improve prompt clarity"""
improvements = []
# Add explicit structure
if "step" not in prompt.lower():
improvements.append("\nPlease proceed step by step:")
# Add examples if missing
if "example" not in prompt.lower():
improvements.append("\nFor example:")
# Add clarification for ambiguous terms
ambiguous_terms = self._detect_ambiguous_terms(prompt)
if ambiguous_terms:
improvements.append(f"\nDefinitions: {', '.join(ambiguous_terms)}")
return prompt + "\n".join(improvements)
def _add_detail_instructions(self, prompt: str) -> str:
"""Add instructions for more detailed responses"""
detail_instructions = """
Please ensure your response:
- Covers all aspects of the question
- Provides specific examples
- Includes relevant details
- Explains reasoning thoroughly
"""
return prompt + detail_instructions
def _add_focus_constraints(self, prompt: str) -> str:
"""Add constraints to maintain focus"""
focus_constraints = """
Important:
- Stay directly relevant to the question
- Avoid tangential information
- Prioritize key points
- Be concise yet complete
"""
return prompt + focus_constraints
Advanced Prompting Techniques
Meta-Prompting System
# meta_prompting.py
class MetaPromptSystem:
def __init__(self):
self.meta_templates = {}
self.reasoning_chains = {}
def create_meta_prompt(self, task_description: str) -> str:
"""Generate optimal prompt for a given task"""
meta_prompt = f"""
I need to create the best possible prompt for this task:
{task_description}
Please design a prompt that:
1. Clearly defines the objective
2. Provides necessary context
3. Includes relevant constraints
4. Specifies output format
5. Incorporates best practices
Consider these prompt engineering principles:
- Use clear, specific language
- Include examples when helpful
- Break complex tasks into steps
- Specify desired output format
- Add quality checks
Generated Prompt:
---
[Your optimized prompt here]
---
Explanation of Design Choices:
[Explain why this prompt structure will be effective]
"""
return meta_prompt
def create_self_improving_prompt(self,
initial_prompt: str,
task: str) -> str:
"""Create self-improving prompt with iteration"""
prompt = f"""
Initial Task: {task}
Initial Prompt: {initial_prompt}
Iteration 1:
Response: [Generate response to initial prompt]
Evaluation: [Evaluate response quality]
Prompt Improvement: [Suggest prompt improvements]
Iteration 2:
Improved Prompt: [Apply improvements]
Response: [Generate new response]
Evaluation: [Evaluate improvement]
Iteration 3:
Final Prompt: [Final optimized version]
Final Response: [Best response]
Summary:
- Original prompt weaknesses: [List issues]
- Improvements made: [List improvements]
- Final prompt strengths: [List strengths]
"""
return prompt
def create_adaptive_prompt(self,
context: Dict,
user_level: str) -> str:
"""Create prompt that adapts to user level"""
if user_level == "beginner":
style = "simple explanations with analogies"
detail = "basic concepts"
elif user_level == "intermediate":
style = "moderate technical detail"
detail = "practical applications"
else: # expert
style = "technical precision"
detail = "advanced nuances"
prompt = f"""
Context: {json.dumps(context, indent=2)}
User Level: {user_level}
Please provide a response using {style} and focusing on {detail}.
Adjust your response to:
- Match the user's expertise level
- Use appropriate terminology
- Provide suitable examples
- Include relevant depth
Response:
"""
return prompt
Prompt Chaining and Composition
# prompt_chaining.py
class PromptChain:
def __init__(self):
self.chain = []
self.intermediate_results = {}
def add_step(self,
step_name: str,
prompt_template: str,
dependencies: List[str] = None):
"""Add step to prompt chain"""
self.chain.append({
'name': step_name,
'template': prompt_template,
'dependencies': dependencies or []
})
async def execute_chain(self,
initial_input: str,
llm_client) -> Dict:
"""Execute prompt chain"""
results = {'initial_input': initial_input}
for step in self.chain:
# Prepare prompt with dependencies
prompt = self._prepare_prompt(
step['template'],
step['dependencies'],
results
)
# Execute step
response = await llm_client.generate(prompt)
# Store result
results[step['name']] = response
# Validate before continuing
if not self._validate_step(response, step['name']):
raise ValueError(f"Step {step['name']} failed validation")
return results
def _prepare_prompt(self,
template: str,
dependencies: List[str],
results: Dict) -> str:
"""Prepare prompt with dependency injection"""
prompt = template
# Inject dependencies
for dep in dependencies:
if dep in results:
placeholder = f"{{{dep}}}"
prompt = prompt.replace(placeholder, str(results[dep]))
return prompt
def create_research_chain(self, topic: str) -> Dict:
"""Create research prompt chain"""
# Step 1: Generate research questions
self.add_step(
'questions',
f"""
Generate 5 key research questions about: {topic}
Format each question clearly and ensure they cover different aspects.
"""
)
# Step 2: Research each question
self.add_step(
'research',
"""
For each question from the previous step:
{{questions}}
Provide detailed, factual answers with sources when possible.
""",
dependencies=['questions']
)
# Step 3: Synthesize findings
self.add_step(
'synthesis',
"""
Based on the research findings:
{{research}}
Create a comprehensive summary that:
1. Identifies key themes
2. Highlights important insights
3. Notes areas needing further research
4. Provides actionable conclusions
""",
dependencies=['research']
)
# Step 4: Generate report
self.add_step(
'report',
"""
Using the synthesis:
{{synthesis}}
Create a professional report with:
- Executive summary
- Key findings
- Detailed analysis
- Recommendations
- References
""",
dependencies=['synthesis']
)
return self.chain
Prompt Testing and Validation
Comprehensive Testing Framework
# prompt_testing.py
class PromptTestingFramework:
def __init__(self):
self.test_cases = []
self.metrics = {}
self.benchmarks = {}
def add_test_case(self,
name: str,
input_data: str,
expected_output: Any,
evaluation_criteria: Dict):
"""Add test case for prompt evaluation"""
self.test_cases.append({
'name': name,
'input': input_data,
'expected': expected_output,
'criteria': evaluation_criteria
})
async def test_prompt(self,
prompt_template: str,
llm_client,
iterations: int = 3) -> Dict:
"""Test prompt across all test cases"""
results = {
'overall_score': 0,
'test_results': [],
'metrics': {}
}
for test_case in self.test_cases:
test_result = {
'name': test_case['name'],
'iterations': []
}
# Run multiple iterations for consistency
for i in range(iterations):
# Prepare prompt
prompt = prompt_template.replace(
'{input}',
test_case['input']
)
# Get response
response = await llm_client.generate(prompt)
# Evaluate response
scores = self._evaluate_response(
response,
test_case['expected'],
test_case['criteria']
)
test_result['iterations'].append({
'iteration': i + 1,
'response': response,
'scores': scores
})
# Calculate average scores
avg_scores = self._calculate_average_scores(
test_result['iterations']
)
test_result['average_scores'] = avg_scores
results['test_results'].append(test_result)
# Calculate overall metrics
results['overall_score'] = self._calculate_overall_score(
results['test_results']
)
return results
def _evaluate_response(self,
response: str,
expected: Any,
criteria: Dict) -> Dict:
"""Evaluate response against criteria"""
scores = {}
# Accuracy check
if 'accuracy' in criteria:
scores['accuracy'] = self._check_accuracy(
response,
expected
)
# Completeness check
if 'completeness' in criteria:
scores['completeness'] = self._check_completeness(
response,
criteria['completeness']
)
# Format check
if 'format' in criteria:
scores['format'] = self._check_format(
response,
criteria['format']
)
# Relevance check
if 'relevance' in criteria:
scores['relevance'] = self._check_relevance(
response,
criteria['relevance']
)
return scores
def create_robustness_tests(self, base_prompt: str) -> List[Dict]:
"""Create robustness test variations"""
variations = []
# Test with typos
typo_prompt = self._introduce_typos(base_prompt)
variations.append({
'name': 'typo_resistance',
'prompt': typo_prompt,
'expected_behavior': 'Should handle typos gracefully'
})
# Test with different formats
reformatted = self._reformat_prompt(base_prompt)
variations.append({
'name': 'format_flexibility',
'prompt': reformatted,
'expected_behavior': 'Should work with different formats'
})
# Test with edge cases
edge_cases = self._generate_edge_cases(base_prompt)
for i, edge_case in enumerate(edge_cases):
variations.append({
'name': f'edge_case_{i}',
'prompt': edge_case,
'expected_behavior': 'Should handle edge cases'
})
return variations
Production Prompt Management
Prompt Version Control
# prompt_versioning.py
class PromptVersionManager:
def __init__(self, storage_backend):
self.storage = storage_backend
self.versions = {}
self.active_versions = {}
def save_prompt_version(self,
prompt_id: str,
prompt_content: str,
metadata: Dict) -> str:
"""Save new prompt version"""
import hashlib
from datetime import datetime
# Generate version hash
version_hash = hashlib.sha256(
prompt_content.encode()
).hexdigest()[:8]
version_data = {
'id': prompt_id,
'version': version_hash,
'content': prompt_content,
'metadata': metadata,
'created_at': datetime.now().isoformat(),
'performance_metrics': {},
'status': 'draft'
}
# Store version
self.storage.save(f"{prompt_id}/{version_hash}", version_data)
# Update version tracking
if prompt_id not in self.versions:
self.versions[prompt_id] = []
self.versions[prompt_id].append(version_hash)
return version_hash
def deploy_version(self,
prompt_id: str,
version: str,
environment: str = 'production'):
"""Deploy specific prompt version"""
# Load version
version_data = self.storage.load(f"{prompt_id}/{version}")
# Update status
version_data['status'] = 'deployed'
version_data['environment'] = environment
version_data['deployed_at'] = datetime.now().isoformat()
# Set as active version
self.active_versions[f"{prompt_id}_{environment}"] = version
# Save updated data
self.storage.save(f"{prompt_id}/{version}", version_data)
return version_data
def rollback(self, prompt_id: str, environment: str = 'production'):
"""Rollback to previous version"""
current_version = self.active_versions.get(
f"{prompt_id}_{environment}"
)
if not current_version:
raise ValueError(f"No active version for {prompt_id}")
# Find previous version
versions = self.versions[prompt_id]
current_index = versions.index(current_version)
if current_index == 0:
raise ValueError("No previous version to rollback to")
previous_version = versions[current_index - 1]
# Deploy previous version
return self.deploy_version(prompt_id, previous_version, environment)
def a_b_test(self,
prompt_id: str,
version_a: str,
version_b: str,
traffic_split: float = 0.5) -> Dict:
"""Setup A/B test for prompt versions"""
test_config = {
'prompt_id': prompt_id,
'version_a': version_a,
'version_b': version_b,
'traffic_split': traffic_split,
'start_time': datetime.now().isoformat(),
'metrics': {
'version_a': {'requests': 0, 'success': 0},
'version_b': {'requests': 0, 'success': 0}
}
}
self.storage.save(f"ab_tests/{prompt_id}", test_config)
return test_config
Prompt Analytics
# prompt_analytics.py
class PromptAnalytics:
def __init__(self):
self.metrics_store = {}
self.performance_data = []
def track_prompt_usage(self,
prompt_id: str,
version: str,
execution_data: Dict):
"""Track prompt execution metrics"""
metrics = {
'prompt_id': prompt_id,
'version': version,
'timestamp': datetime.now().isoformat(),
'latency_ms': execution_data.get('latency'),
'tokens_used': execution_data.get('tokens'),
'success': execution_data.get('success', True),
'error': execution_data.get('error'),
'user_feedback': execution_data.get('feedback')
}
self.performance_data.append(metrics)
# Update aggregated metrics
self._update_aggregates(prompt_id, version, metrics)
def generate_performance_report(self,
prompt_id: str,
version: str = None) -> Dict:
"""Generate performance report for prompt"""
# Filter data
data = [
m for m in self.performance_data
if m['prompt_id'] == prompt_id
and (version is None or m['version'] == version)
]
if not data:
return {'error': 'No data available'}
report = {
'prompt_id': prompt_id,
'version': version,
'total_executions': len(data),
'success_rate': sum(1 for d in data if d['success']) / len(data),
'average_latency_ms': sum(d['latency_ms'] for d in data if d['latency_ms']) / len(data),
'total_tokens': sum(d['tokens_used'] for d in data if d['tokens_used']),
'errors': [d['error'] for d in data if d['error']],
'feedback_summary': self._summarize_feedback(data)
}
# Add time-based analysis
report['usage_over_time'] = self._analyze_usage_pattern(data)
# Add performance trends
report['performance_trends'] = self._analyze_trends(data)
return report
def identify_optimization_opportunities(self,
prompt_id: str) -> List[Dict]:
"""Identify opportunities to optimize prompt"""
opportunities = []
# Analyze error patterns
errors = self._analyze_errors(prompt_id)
if errors['error_rate'] > 0.05:
opportunities.append({
'type': 'error_reduction',
'description': f"High error rate: {errors['error_rate']:.2%}",
'recommendation': 'Review error patterns and adjust prompt',
'priority': 'high'
})
# Check token usage
token_stats = self._analyze_token_usage(prompt_id)
if token_stats['average'] > 1000:
opportunities.append({
'type': 'token_optimization',
'description': f"High token usage: {token_stats['average']} average",
'recommendation': 'Consider prompt compression techniques',
'priority': 'medium'
})
# Analyze response quality
quality_metrics = self._analyze_quality(prompt_id)
if quality_metrics['consistency'] < 0.8:
opportunities.append({
'type': 'consistency_improvement',
'description': f"Low consistency: {quality_metrics['consistency']:.2f}",
'recommendation': 'Add more specific instructions or examples',
'priority': 'high'
})
return opportunities
Best Practices Checklist
Conclusion
Advanced prompt engineering is the key to unlocking LLM potential in production systems. By mastering techniques from chain-of-thought reasoning to constitutional AI, implementing robust testing frameworks, and maintaining proper version control, you can create prompts that consistently deliver exceptional results. Remember, great prompts evolve through iteration and measurement—treat your prompts as code and continuously optimize based on real-world performance.