Files
2026-02-02 18:19:00 +00:00

373 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Wave 0: Sector Brief Generator
Generates alignment context briefs for each sector.
These briefs inform Wave 1 and Wave 2 primitive config generation.
Usage:
python generate_sector_briefs.py # Generate all sectors
python generate_sector_briefs.py --sector FOOD_DINING # Generate one sector
python generate_sector_briefs.py --dry-run # Show what would be generated
python generate_sector_briefs.py --validate # Validate existing briefs
"""
import argparse
import json
import os
import sys
from datetime import datetime
from pathlib import Path
try:
from openai import OpenAI
except ImportError:
print("ERROR: openai package required. Install with: pip install openai")
sys.exit(1)
PROMPT_TEMPLATE = '''You are an expert in customer experience analysis across industries.
Your task: Generate a **sector brief** for the "{sector_name}" sector.
This brief will be used to align classification agents with industry-specific context.
It describes what customers care about — NOT how to classify, NOT what primitives to use.
## Sector Information
- **Code**: {sector_code}
- **Name**: {sector_name}
- **Description**: {description}
- **Sample Business Types**: {business_types}
## Output Requirements
Generate a JSON object with this exact structure:
```json
{{
"sector_code": "{sector_code}",
"sector_name": "{sector_name}",
"generated_at": "<ISO timestamp>",
"version": "1.0",
"what_customers_judge": {{
"description": "The primary dimensions customers evaluate in this sector",
"items": [
{{
"aspect": "string (2-5 words)",
"importance": "critical | high | moderate",
"why_it_matters": "string (1 sentence)"
}}
]
}},
"critical_pain_points": {{
"description": "What damages reputation most severely",
"items": [
{{
"pain_point": "string (2-5 words)",
"typical_language": ["phrases customers actually use in reviews"],
"reputation_impact": "severe | significant | moderate"
}}
]
}},
"common_praise": {{
"description": "What earns customer loyalty and positive reviews",
"items": [
{{
"praise_area": "string (2-5 words)",
"typical_language": ["phrases customers actually use in reviews"],
"loyalty_impact": "high | moderate"
}}
]
}},
"industry_terminology": {{
"description": "Domain-specific vocabulary",
"staff_terms": ["terms for staff roles in this sector"],
"product_terms": ["terms for products/services"],
"process_terms": ["terms for processes/interactions"],
"quality_terms": ["positive quality descriptors"],
"problem_terms": ["negative quality descriptors"]
}},
"mode_specific_concerns": {{
"description": "Different service modes have different priorities",
"modes": [
{{
"mode": "string (e.g., 'In-person', 'Online', 'Phone')",
"primary_concerns": ["top concerns for this mode"],
"unique_pain_points": ["pain points specific to this mode"]
}}
]
}},
"what_is_actionable": {{
"description": "Feedback businesses can act on",
"actionable_examples": [
{{
"feedback_type": "string",
"example": "string (realistic review excerpt)",
"action_owner": "role/team that can fix it"
}}
],
"not_actionable_examples": [
{{
"feedback_type": "string",
"example": "string (realistic review excerpt)",
"why_not_actionable": "string"
}}
]
}},
"sector_specific_signals": {{
"description": "Signals with sector-specific meaning",
"examples": [
{{
"signal": "string (word or phrase)",
"meaning_in_this_sector": "string",
"contrast_with": "how it differs in other sectors"
}}
]
}}
}}
```
## Critical Rules
1. **Use realistic review language** in `typical_language` arrays - actual phrases customers write
2. **Include 4-8 items** per array (not too few, not excessive)
3. **Be sector-specific** - don't use generic phrases that apply to all businesses
4. **Include appropriate modes** - only modes that actually exist in this sector
5. **NO primitive codes, priorities, weights, or solutions**
6. **Focus on WHAT customers care about**, not HOW to classify it
Return ONLY the JSON object, no markdown formatting or explanation.'''
def load_sectors(data_path: Path) -> list[dict]:
"""Load sector definitions from JSON file."""
with open(data_path) as f:
data = json.load(f)
return data["sectors"]
def generate_sector_brief(client: OpenAI, sector: dict, model: str) -> dict:
"""Generate a sector brief using LLM."""
prompt = PROMPT_TEMPLATE.format(
sector_code=sector["sector_code"],
sector_name=sector["sector_name"],
description=sector["description"],
business_types=", ".join(sector["sample_business_types"])
)
response = client.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": "You are an expert customer experience analyst. Return only valid JSON, no markdown."
},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=4000,
response_format={"type": "json_object"}
)
text = response.choices[0].message.content.strip()
# Parse JSON
brief = json.loads(text)
# Ensure required fields
brief["sector_code"] = sector["sector_code"]
brief["sector_name"] = sector["sector_name"]
brief["generated_at"] = datetime.utcnow().isoformat() + "Z"
brief["version"] = "1.0"
return brief
def validate_brief(brief: dict) -> list[str]:
"""Validate a sector brief, return list of issues."""
issues = []
required_keys = [
"what_customers_judge",
"critical_pain_points",
"common_praise",
"industry_terminology",
"mode_specific_concerns",
"what_is_actionable",
"sector_specific_signals"
]
for key in required_keys:
if key not in brief:
issues.append(f"Missing required key: {key}")
# Check array lengths
if "what_customers_judge" in brief:
items = brief["what_customers_judge"].get("items", [])
if len(items) < 3:
issues.append(f"what_customers_judge has only {len(items)} items (need 3+)")
if len(items) > 10:
issues.append(f"what_customers_judge has {len(items)} items (max 10)")
if "critical_pain_points" in brief:
items = brief["critical_pain_points"].get("items", [])
if len(items) < 3:
issues.append(f"critical_pain_points has only {len(items)} items (need 3+)")
if "common_praise" in brief:
items = brief["common_praise"].get("items", [])
if len(items) < 3:
issues.append(f"common_praise has only {len(items)} items (need 3+)")
# Check for forbidden content
text = json.dumps(brief).lower()
forbidden = ["priority", "weight", "primitive", "enabled", "disabled", "solution"]
for word in forbidden:
if word in text and word != "solution": # solution can appear in context
issues.append(f"Contains potentially forbidden term: {word}")
return issues
def save_brief(brief: dict, output_dir: Path) -> Path:
"""Save brief to JSON file."""
output_dir.mkdir(parents=True, exist_ok=True)
filename = f"{brief['sector_code'].lower()}_brief.json"
output_path = output_dir / filename
with open(output_path, "w") as f:
json.dump(brief, f, indent=2)
return output_path
def validate_existing_briefs(output_dir: Path) -> None:
"""Validate all existing brief files."""
if not output_dir.exists():
print(f"Output directory does not exist: {output_dir}")
return
files = list(output_dir.glob("*_brief.json"))
if not files:
print("No brief files found")
return
print(f"Validating {len(files)} brief files...\n")
all_valid = True
for filepath in sorted(files):
with open(filepath) as f:
brief = json.load(f)
issues = validate_brief(brief)
status = "" if not issues else ""
print(f"{status} {filepath.name}")
if issues:
all_valid = False
for issue in issues:
print(f" - {issue}")
print()
if all_valid:
print("All briefs valid!")
else:
print("Some briefs have issues.")
def main():
parser = argparse.ArgumentParser(description="Generate sector briefs for Wave 0")
parser.add_argument("--sector", help="Generate only this sector code")
parser.add_argument("--dry-run", action="store_true", help="Show what would be generated")
parser.add_argument("--validate", action="store_true", help="Validate existing briefs")
parser.add_argument("--output-dir", default="data/sector_briefs", help="Output directory")
parser.add_argument("--model", default="gpt-4o", help="OpenAI model to use")
args = parser.parse_args()
# Paths
script_dir = Path(__file__).parent
package_dir = script_dir.parent
data_path = package_dir / "data" / "sectors.json"
output_dir = package_dir / args.output_dir
# Validate mode
if args.validate:
validate_existing_briefs(output_dir)
return
# Load sectors
sectors = load_sectors(data_path)
print(f"Loaded {len(sectors)} sectors")
# Filter to single sector if specified
if args.sector:
sectors = [s for s in sectors if s["sector_code"] == args.sector]
if not sectors:
print(f"ERROR: Sector '{args.sector}' not found")
sys.exit(1)
if args.dry_run:
print("\n[DRY RUN] Would generate briefs for:")
for sector in sectors:
print(f" - {sector['sector_code']}: {sector['sector_name']}")
print(f"\nOutput directory: {output_dir}")
return
# Check API key
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
print("ERROR: OPENAI_API_KEY environment variable required")
sys.exit(1)
# Initialize client
client = OpenAI(api_key=api_key)
print(f"Using model: {args.model}")
# Generate briefs
results = {"success": [], "failed": []}
for i, sector in enumerate(sectors, 1):
print(f"\n[{i}/{len(sectors)}] Generating brief for: {sector['sector_name']}")
try:
brief = generate_sector_brief(client, sector, args.model)
# Validate
issues = validate_brief(brief)
if issues:
print(f" Warnings:")
for issue in issues:
print(f" - {issue}")
# Save
output_path = save_brief(brief, output_dir)
print(f" ✓ Saved to: {output_path}")
results["success"].append(sector["sector_code"])
except Exception as e:
print(f" ✗ FAILED: {e}")
results["failed"].append(sector["sector_code"])
# Summary
print(f"\n{'='*60}")
print(f"SUMMARY")
print(f"{'='*60}")
print(f"Success: {len(results['success'])}")
print(f"Failed: {len(results['failed'])}")
if results["failed"]:
print(f"\nFailed sectors: {', '.join(results['failed'])}")
sys.exit(1)
if __name__ == "__main__":
main()