373 lines
11 KiB
Python
373 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Wave 0: Sector Brief Generator
|
|
|
|
Generates alignment context briefs for each sector.
|
|
These briefs inform Wave 1 and Wave 2 primitive config generation.
|
|
|
|
Usage:
|
|
python generate_sector_briefs.py # Generate all sectors
|
|
python generate_sector_briefs.py --sector FOOD_DINING # Generate one sector
|
|
python generate_sector_briefs.py --dry-run # Show what would be generated
|
|
python generate_sector_briefs.py --validate # Validate existing briefs
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from openai import OpenAI
|
|
except ImportError:
|
|
print("ERROR: openai package required. Install with: pip install openai")
|
|
sys.exit(1)
|
|
|
|
|
|
PROMPT_TEMPLATE = '''You are an expert in customer experience analysis across industries.
|
|
|
|
Your task: Generate a **sector brief** for the "{sector_name}" sector.
|
|
|
|
This brief will be used to align classification agents with industry-specific context.
|
|
It describes what customers care about — NOT how to classify, NOT what primitives to use.
|
|
|
|
## Sector Information
|
|
|
|
- **Code**: {sector_code}
|
|
- **Name**: {sector_name}
|
|
- **Description**: {description}
|
|
- **Sample Business Types**: {business_types}
|
|
|
|
## Output Requirements
|
|
|
|
Generate a JSON object with this exact structure:
|
|
|
|
```json
|
|
{{
|
|
"sector_code": "{sector_code}",
|
|
"sector_name": "{sector_name}",
|
|
"generated_at": "<ISO timestamp>",
|
|
"version": "1.0",
|
|
|
|
"what_customers_judge": {{
|
|
"description": "The primary dimensions customers evaluate in this sector",
|
|
"items": [
|
|
{{
|
|
"aspect": "string (2-5 words)",
|
|
"importance": "critical | high | moderate",
|
|
"why_it_matters": "string (1 sentence)"
|
|
}}
|
|
]
|
|
}},
|
|
|
|
"critical_pain_points": {{
|
|
"description": "What damages reputation most severely",
|
|
"items": [
|
|
{{
|
|
"pain_point": "string (2-5 words)",
|
|
"typical_language": ["phrases customers actually use in reviews"],
|
|
"reputation_impact": "severe | significant | moderate"
|
|
}}
|
|
]
|
|
}},
|
|
|
|
"common_praise": {{
|
|
"description": "What earns customer loyalty and positive reviews",
|
|
"items": [
|
|
{{
|
|
"praise_area": "string (2-5 words)",
|
|
"typical_language": ["phrases customers actually use in reviews"],
|
|
"loyalty_impact": "high | moderate"
|
|
}}
|
|
]
|
|
}},
|
|
|
|
"industry_terminology": {{
|
|
"description": "Domain-specific vocabulary",
|
|
"staff_terms": ["terms for staff roles in this sector"],
|
|
"product_terms": ["terms for products/services"],
|
|
"process_terms": ["terms for processes/interactions"],
|
|
"quality_terms": ["positive quality descriptors"],
|
|
"problem_terms": ["negative quality descriptors"]
|
|
}},
|
|
|
|
"mode_specific_concerns": {{
|
|
"description": "Different service modes have different priorities",
|
|
"modes": [
|
|
{{
|
|
"mode": "string (e.g., 'In-person', 'Online', 'Phone')",
|
|
"primary_concerns": ["top concerns for this mode"],
|
|
"unique_pain_points": ["pain points specific to this mode"]
|
|
}}
|
|
]
|
|
}},
|
|
|
|
"what_is_actionable": {{
|
|
"description": "Feedback businesses can act on",
|
|
"actionable_examples": [
|
|
{{
|
|
"feedback_type": "string",
|
|
"example": "string (realistic review excerpt)",
|
|
"action_owner": "role/team that can fix it"
|
|
}}
|
|
],
|
|
"not_actionable_examples": [
|
|
{{
|
|
"feedback_type": "string",
|
|
"example": "string (realistic review excerpt)",
|
|
"why_not_actionable": "string"
|
|
}}
|
|
]
|
|
}},
|
|
|
|
"sector_specific_signals": {{
|
|
"description": "Signals with sector-specific meaning",
|
|
"examples": [
|
|
{{
|
|
"signal": "string (word or phrase)",
|
|
"meaning_in_this_sector": "string",
|
|
"contrast_with": "how it differs in other sectors"
|
|
}}
|
|
]
|
|
}}
|
|
}}
|
|
```
|
|
|
|
## Critical Rules
|
|
|
|
1. **Use realistic review language** in `typical_language` arrays - actual phrases customers write
|
|
2. **Include 4-8 items** per array (not too few, not excessive)
|
|
3. **Be sector-specific** - don't use generic phrases that apply to all businesses
|
|
4. **Include appropriate modes** - only modes that actually exist in this sector
|
|
5. **NO primitive codes, priorities, weights, or solutions**
|
|
6. **Focus on WHAT customers care about**, not HOW to classify it
|
|
|
|
Return ONLY the JSON object, no markdown formatting or explanation.'''
|
|
|
|
|
|
def load_sectors(data_path: Path) -> list[dict]:
|
|
"""Load sector definitions from JSON file."""
|
|
with open(data_path) as f:
|
|
data = json.load(f)
|
|
return data["sectors"]
|
|
|
|
|
|
def generate_sector_brief(client: OpenAI, sector: dict, model: str) -> dict:
|
|
"""Generate a sector brief using LLM."""
|
|
prompt = PROMPT_TEMPLATE.format(
|
|
sector_code=sector["sector_code"],
|
|
sector_name=sector["sector_name"],
|
|
description=sector["description"],
|
|
business_types=", ".join(sector["sample_business_types"])
|
|
)
|
|
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "You are an expert customer experience analyst. Return only valid JSON, no markdown."
|
|
},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
temperature=0.3,
|
|
max_tokens=4000,
|
|
response_format={"type": "json_object"}
|
|
)
|
|
|
|
text = response.choices[0].message.content.strip()
|
|
|
|
# Parse JSON
|
|
brief = json.loads(text)
|
|
|
|
# Ensure required fields
|
|
brief["sector_code"] = sector["sector_code"]
|
|
brief["sector_name"] = sector["sector_name"]
|
|
brief["generated_at"] = datetime.utcnow().isoformat() + "Z"
|
|
brief["version"] = "1.0"
|
|
|
|
return brief
|
|
|
|
|
|
def validate_brief(brief: dict) -> list[str]:
|
|
"""Validate a sector brief, return list of issues."""
|
|
issues = []
|
|
|
|
required_keys = [
|
|
"what_customers_judge",
|
|
"critical_pain_points",
|
|
"common_praise",
|
|
"industry_terminology",
|
|
"mode_specific_concerns",
|
|
"what_is_actionable",
|
|
"sector_specific_signals"
|
|
]
|
|
|
|
for key in required_keys:
|
|
if key not in brief:
|
|
issues.append(f"Missing required key: {key}")
|
|
|
|
# Check array lengths
|
|
if "what_customers_judge" in brief:
|
|
items = brief["what_customers_judge"].get("items", [])
|
|
if len(items) < 3:
|
|
issues.append(f"what_customers_judge has only {len(items)} items (need 3+)")
|
|
if len(items) > 10:
|
|
issues.append(f"what_customers_judge has {len(items)} items (max 10)")
|
|
|
|
if "critical_pain_points" in brief:
|
|
items = brief["critical_pain_points"].get("items", [])
|
|
if len(items) < 3:
|
|
issues.append(f"critical_pain_points has only {len(items)} items (need 3+)")
|
|
|
|
if "common_praise" in brief:
|
|
items = brief["common_praise"].get("items", [])
|
|
if len(items) < 3:
|
|
issues.append(f"common_praise has only {len(items)} items (need 3+)")
|
|
|
|
# Check for forbidden content
|
|
text = json.dumps(brief).lower()
|
|
forbidden = ["priority", "weight", "primitive", "enabled", "disabled", "solution"]
|
|
for word in forbidden:
|
|
if word in text and word != "solution": # solution can appear in context
|
|
issues.append(f"Contains potentially forbidden term: {word}")
|
|
|
|
return issues
|
|
|
|
|
|
def save_brief(brief: dict, output_dir: Path) -> Path:
|
|
"""Save brief to JSON file."""
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
filename = f"{brief['sector_code'].lower()}_brief.json"
|
|
output_path = output_dir / filename
|
|
|
|
with open(output_path, "w") as f:
|
|
json.dump(brief, f, indent=2)
|
|
|
|
return output_path
|
|
|
|
|
|
def validate_existing_briefs(output_dir: Path) -> None:
|
|
"""Validate all existing brief files."""
|
|
if not output_dir.exists():
|
|
print(f"Output directory does not exist: {output_dir}")
|
|
return
|
|
|
|
files = list(output_dir.glob("*_brief.json"))
|
|
if not files:
|
|
print("No brief files found")
|
|
return
|
|
|
|
print(f"Validating {len(files)} brief files...\n")
|
|
|
|
all_valid = True
|
|
for filepath in sorted(files):
|
|
with open(filepath) as f:
|
|
brief = json.load(f)
|
|
|
|
issues = validate_brief(brief)
|
|
status = "✓" if not issues else "✗"
|
|
print(f"{status} {filepath.name}")
|
|
|
|
if issues:
|
|
all_valid = False
|
|
for issue in issues:
|
|
print(f" - {issue}")
|
|
|
|
print()
|
|
if all_valid:
|
|
print("All briefs valid!")
|
|
else:
|
|
print("Some briefs have issues.")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate sector briefs for Wave 0")
|
|
parser.add_argument("--sector", help="Generate only this sector code")
|
|
parser.add_argument("--dry-run", action="store_true", help="Show what would be generated")
|
|
parser.add_argument("--validate", action="store_true", help="Validate existing briefs")
|
|
parser.add_argument("--output-dir", default="data/sector_briefs", help="Output directory")
|
|
parser.add_argument("--model", default="gpt-4o", help="OpenAI model to use")
|
|
args = parser.parse_args()
|
|
|
|
# Paths
|
|
script_dir = Path(__file__).parent
|
|
package_dir = script_dir.parent
|
|
data_path = package_dir / "data" / "sectors.json"
|
|
output_dir = package_dir / args.output_dir
|
|
|
|
# Validate mode
|
|
if args.validate:
|
|
validate_existing_briefs(output_dir)
|
|
return
|
|
|
|
# Load sectors
|
|
sectors = load_sectors(data_path)
|
|
print(f"Loaded {len(sectors)} sectors")
|
|
|
|
# Filter to single sector if specified
|
|
if args.sector:
|
|
sectors = [s for s in sectors if s["sector_code"] == args.sector]
|
|
if not sectors:
|
|
print(f"ERROR: Sector '{args.sector}' not found")
|
|
sys.exit(1)
|
|
|
|
if args.dry_run:
|
|
print("\n[DRY RUN] Would generate briefs for:")
|
|
for sector in sectors:
|
|
print(f" - {sector['sector_code']}: {sector['sector_name']}")
|
|
print(f"\nOutput directory: {output_dir}")
|
|
return
|
|
|
|
# Check API key
|
|
api_key = os.environ.get("OPENAI_API_KEY")
|
|
if not api_key:
|
|
print("ERROR: OPENAI_API_KEY environment variable required")
|
|
sys.exit(1)
|
|
|
|
# Initialize client
|
|
client = OpenAI(api_key=api_key)
|
|
print(f"Using model: {args.model}")
|
|
|
|
# Generate briefs
|
|
results = {"success": [], "failed": []}
|
|
|
|
for i, sector in enumerate(sectors, 1):
|
|
print(f"\n[{i}/{len(sectors)}] Generating brief for: {sector['sector_name']}")
|
|
|
|
try:
|
|
brief = generate_sector_brief(client, sector, args.model)
|
|
|
|
# Validate
|
|
issues = validate_brief(brief)
|
|
if issues:
|
|
print(f" Warnings:")
|
|
for issue in issues:
|
|
print(f" - {issue}")
|
|
|
|
# Save
|
|
output_path = save_brief(brief, output_dir)
|
|
print(f" ✓ Saved to: {output_path}")
|
|
results["success"].append(sector["sector_code"])
|
|
|
|
except Exception as e:
|
|
print(f" ✗ FAILED: {e}")
|
|
results["failed"].append(sector["sector_code"])
|
|
|
|
# Summary
|
|
print(f"\n{'='*60}")
|
|
print(f"SUMMARY")
|
|
print(f"{'='*60}")
|
|
print(f"Success: {len(results['success'])}")
|
|
print(f"Failed: {len(results['failed'])}")
|
|
|
|
if results["failed"]:
|
|
print(f"\nFailed sectors: {', '.join(results['failed'])}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|