#!/usr/bin/env python3 """ Wave 0: Sector Brief Generator Generates alignment context briefs for each sector. These briefs inform Wave 1 and Wave 2 primitive config generation. Usage: python generate_sector_briefs.py # Generate all sectors python generate_sector_briefs.py --sector FOOD_DINING # Generate one sector python generate_sector_briefs.py --dry-run # Show what would be generated python generate_sector_briefs.py --validate # Validate existing briefs """ import argparse import json import os import sys from datetime import datetime from pathlib import Path try: from openai import OpenAI except ImportError: print("ERROR: openai package required. Install with: pip install openai") sys.exit(1) PROMPT_TEMPLATE = '''You are an expert in customer experience analysis across industries. Your task: Generate a **sector brief** for the "{sector_name}" sector. This brief will be used to align classification agents with industry-specific context. It describes what customers care about — NOT how to classify, NOT what primitives to use. ## Sector Information - **Code**: {sector_code} - **Name**: {sector_name} - **Description**: {description} - **Sample Business Types**: {business_types} ## Output Requirements Generate a JSON object with this exact structure: ```json {{ "sector_code": "{sector_code}", "sector_name": "{sector_name}", "generated_at": "", "version": "1.0", "what_customers_judge": {{ "description": "The primary dimensions customers evaluate in this sector", "items": [ {{ "aspect": "string (2-5 words)", "importance": "critical | high | moderate", "why_it_matters": "string (1 sentence)" }} ] }}, "critical_pain_points": {{ "description": "What damages reputation most severely", "items": [ {{ "pain_point": "string (2-5 words)", "typical_language": ["phrases customers actually use in reviews"], "reputation_impact": "severe | significant | moderate" }} ] }}, "common_praise": {{ "description": "What earns customer loyalty and positive reviews", "items": [ {{ "praise_area": "string (2-5 words)", "typical_language": ["phrases customers actually use in reviews"], "loyalty_impact": "high | moderate" }} ] }}, "industry_terminology": {{ "description": "Domain-specific vocabulary", "staff_terms": ["terms for staff roles in this sector"], "product_terms": ["terms for products/services"], "process_terms": ["terms for processes/interactions"], "quality_terms": ["positive quality descriptors"], "problem_terms": ["negative quality descriptors"] }}, "mode_specific_concerns": {{ "description": "Different service modes have different priorities", "modes": [ {{ "mode": "string (e.g., 'In-person', 'Online', 'Phone')", "primary_concerns": ["top concerns for this mode"], "unique_pain_points": ["pain points specific to this mode"] }} ] }}, "what_is_actionable": {{ "description": "Feedback businesses can act on", "actionable_examples": [ {{ "feedback_type": "string", "example": "string (realistic review excerpt)", "action_owner": "role/team that can fix it" }} ], "not_actionable_examples": [ {{ "feedback_type": "string", "example": "string (realistic review excerpt)", "why_not_actionable": "string" }} ] }}, "sector_specific_signals": {{ "description": "Signals with sector-specific meaning", "examples": [ {{ "signal": "string (word or phrase)", "meaning_in_this_sector": "string", "contrast_with": "how it differs in other sectors" }} ] }} }} ``` ## Critical Rules 1. **Use realistic review language** in `typical_language` arrays - actual phrases customers write 2. **Include 4-8 items** per array (not too few, not excessive) 3. **Be sector-specific** - don't use generic phrases that apply to all businesses 4. **Include appropriate modes** - only modes that actually exist in this sector 5. **NO primitive codes, priorities, weights, or solutions** 6. **Focus on WHAT customers care about**, not HOW to classify it Return ONLY the JSON object, no markdown formatting or explanation.''' def load_sectors(data_path: Path) -> list[dict]: """Load sector definitions from JSON file.""" with open(data_path) as f: data = json.load(f) return data["sectors"] def generate_sector_brief(client: OpenAI, sector: dict, model: str) -> dict: """Generate a sector brief using LLM.""" prompt = PROMPT_TEMPLATE.format( sector_code=sector["sector_code"], sector_name=sector["sector_name"], description=sector["description"], business_types=", ".join(sector["sample_business_types"]) ) response = client.chat.completions.create( model=model, messages=[ { "role": "system", "content": "You are an expert customer experience analyst. Return only valid JSON, no markdown." }, {"role": "user", "content": prompt} ], temperature=0.3, max_tokens=4000, response_format={"type": "json_object"} ) text = response.choices[0].message.content.strip() # Parse JSON brief = json.loads(text) # Ensure required fields brief["sector_code"] = sector["sector_code"] brief["sector_name"] = sector["sector_name"] brief["generated_at"] = datetime.utcnow().isoformat() + "Z" brief["version"] = "1.0" return brief def validate_brief(brief: dict) -> list[str]: """Validate a sector brief, return list of issues.""" issues = [] required_keys = [ "what_customers_judge", "critical_pain_points", "common_praise", "industry_terminology", "mode_specific_concerns", "what_is_actionable", "sector_specific_signals" ] for key in required_keys: if key not in brief: issues.append(f"Missing required key: {key}") # Check array lengths if "what_customers_judge" in brief: items = brief["what_customers_judge"].get("items", []) if len(items) < 3: issues.append(f"what_customers_judge has only {len(items)} items (need 3+)") if len(items) > 10: issues.append(f"what_customers_judge has {len(items)} items (max 10)") if "critical_pain_points" in brief: items = brief["critical_pain_points"].get("items", []) if len(items) < 3: issues.append(f"critical_pain_points has only {len(items)} items (need 3+)") if "common_praise" in brief: items = brief["common_praise"].get("items", []) if len(items) < 3: issues.append(f"common_praise has only {len(items)} items (need 3+)") # Check for forbidden content text = json.dumps(brief).lower() forbidden = ["priority", "weight", "primitive", "enabled", "disabled", "solution"] for word in forbidden: if word in text and word != "solution": # solution can appear in context issues.append(f"Contains potentially forbidden term: {word}") return issues def save_brief(brief: dict, output_dir: Path) -> Path: """Save brief to JSON file.""" output_dir.mkdir(parents=True, exist_ok=True) filename = f"{brief['sector_code'].lower()}_brief.json" output_path = output_dir / filename with open(output_path, "w") as f: json.dump(brief, f, indent=2) return output_path def validate_existing_briefs(output_dir: Path) -> None: """Validate all existing brief files.""" if not output_dir.exists(): print(f"Output directory does not exist: {output_dir}") return files = list(output_dir.glob("*_brief.json")) if not files: print("No brief files found") return print(f"Validating {len(files)} brief files...\n") all_valid = True for filepath in sorted(files): with open(filepath) as f: brief = json.load(f) issues = validate_brief(brief) status = "✓" if not issues else "✗" print(f"{status} {filepath.name}") if issues: all_valid = False for issue in issues: print(f" - {issue}") print() if all_valid: print("All briefs valid!") else: print("Some briefs have issues.") def main(): parser = argparse.ArgumentParser(description="Generate sector briefs for Wave 0") parser.add_argument("--sector", help="Generate only this sector code") parser.add_argument("--dry-run", action="store_true", help="Show what would be generated") parser.add_argument("--validate", action="store_true", help="Validate existing briefs") parser.add_argument("--output-dir", default="data/sector_briefs", help="Output directory") parser.add_argument("--model", default="gpt-4o", help="OpenAI model to use") args = parser.parse_args() # Paths script_dir = Path(__file__).parent package_dir = script_dir.parent data_path = package_dir / "data" / "sectors.json" output_dir = package_dir / args.output_dir # Validate mode if args.validate: validate_existing_briefs(output_dir) return # Load sectors sectors = load_sectors(data_path) print(f"Loaded {len(sectors)} sectors") # Filter to single sector if specified if args.sector: sectors = [s for s in sectors if s["sector_code"] == args.sector] if not sectors: print(f"ERROR: Sector '{args.sector}' not found") sys.exit(1) if args.dry_run: print("\n[DRY RUN] Would generate briefs for:") for sector in sectors: print(f" - {sector['sector_code']}: {sector['sector_name']}") print(f"\nOutput directory: {output_dir}") return # Check API key api_key = os.environ.get("OPENAI_API_KEY") if not api_key: print("ERROR: OPENAI_API_KEY environment variable required") sys.exit(1) # Initialize client client = OpenAI(api_key=api_key) print(f"Using model: {args.model}") # Generate briefs results = {"success": [], "failed": []} for i, sector in enumerate(sectors, 1): print(f"\n[{i}/{len(sectors)}] Generating brief for: {sector['sector_name']}") try: brief = generate_sector_brief(client, sector, args.model) # Validate issues = validate_brief(brief) if issues: print(f" Warnings:") for issue in issues: print(f" - {issue}") # Save output_path = save_brief(brief, output_dir) print(f" ✓ Saved to: {output_path}") results["success"].append(sector["sector_code"]) except Exception as e: print(f" ✗ FAILED: {e}") results["failed"].append(sector["sector_code"]) # Summary print(f"\n{'='*60}") print(f"SUMMARY") print(f"{'='*60}") print(f"Success: {len(results['success'])}") print(f"Failed: {len(results['failed'])}") if results["failed"]: print(f"\nFailed sectors: {', '.join(results['failed'])}") sys.exit(1) if __name__ == "__main__": main()