Initial commit - WhyRating Engine (Google Reviews Scraper)
This commit is contained in:
@@ -5,7 +5,7 @@ This package provides the base abstractions for building pipelines that can be
|
||||
discovered, registered, and rendered with dynamic dashboards.
|
||||
"""
|
||||
|
||||
from pipeline_core.base import BasePipeline, PipelineMetadata, PipelineResult
|
||||
from pipeline_core.base import BasePipeline, PipelineMetadata, PipelineResult, StageResult
|
||||
from pipeline_core.contracts import (
|
||||
DashboardConfig,
|
||||
DashboardSection,
|
||||
@@ -22,6 +22,7 @@ __all__ = [
|
||||
"BasePipeline",
|
||||
"PipelineMetadata",
|
||||
"PipelineResult",
|
||||
"StageResult",
|
||||
# Contracts
|
||||
"DashboardConfig",
|
||||
"DashboardSection",
|
||||
|
||||
311
packages/reviewiq-pipeline/IMPROVEMENTS.md
Normal file
311
packages/reviewiq-pipeline/IMPROVEMENTS.md
Normal file
@@ -0,0 +1,311 @@
|
||||
# ReviewIQ Pipeline Improvement Suggestions
|
||||
|
||||
Based on validation testing and analysis of the classification pipeline.
|
||||
|
||||
---
|
||||
|
||||
## 🔴 High Priority (Quality & Cost Impact)
|
||||
|
||||
### 1. Multi-Aspect Detection Gap
|
||||
**Problem**: LLM misses secondary codes in multi-aspect reviews.
|
||||
- "not too expensive" → V4.01 missed
|
||||
- "easy and fast" → J1.01 missed
|
||||
|
||||
**Solution**: Update classification prompt to:
|
||||
```
|
||||
For reviews with multiple distinct topics:
|
||||
1. Extract ALL aspects, not just the dominant one
|
||||
2. Assign urt_secondary codes for each additional aspect
|
||||
3. Flag reviews with 3+ aspects as "complex"
|
||||
```
|
||||
|
||||
**Impact**: ~15-20% of reviews have multiple aspects being partially captured.
|
||||
|
||||
---
|
||||
|
||||
### 2. Enable Smart Router (Cost Savings)
|
||||
**Problem**: All reviews go through expensive Sonnet model.
|
||||
|
||||
**Solution**: Enable the implemented router:
|
||||
```python
|
||||
Config(
|
||||
router_enabled=True,
|
||||
router_conservative=True,
|
||||
router_cheap_model="claude-3-5-haiku-20241022",
|
||||
)
|
||||
```
|
||||
|
||||
**Impact**:
|
||||
- SKIP (1.6%): $0 cost (was ~$0.05)
|
||||
- CHEAP (31.4%): ~10x cheaper with Haiku
|
||||
- **Estimated 25-30% cost reduction**
|
||||
|
||||
---
|
||||
|
||||
### 3. JSON Truncation Recovery
|
||||
**Problem**: ~33% of batches hit JSON truncation, causing partial failures.
|
||||
|
||||
**Current State**: Partial recovery implemented but still loses some reviews.
|
||||
|
||||
**Solution**:
|
||||
1. Reduce batch size when reviews are long
|
||||
2. Add `max_tokens` buffer based on expected output
|
||||
3. Implement streaming JSON parser for real-time recovery
|
||||
|
||||
```python
|
||||
# Dynamic batch sizing based on review length
|
||||
if avg_review_length > 200:
|
||||
batch_size = min(batch_size, 15)
|
||||
if avg_review_length > 500:
|
||||
batch_size = min(batch_size, 8)
|
||||
```
|
||||
|
||||
**Impact**: Reduce fallback processing by ~50%, saving time and cost.
|
||||
|
||||
---
|
||||
|
||||
## 🟡 Medium Priority (Reliability & Accuracy)
|
||||
|
||||
### 4. LLM Response Caching
|
||||
**Problem**: Retries reprocess already-classified reviews.
|
||||
|
||||
**Solution**: Cache successful LLM responses by content hash:
|
||||
```python
|
||||
class ResponseCache:
|
||||
async def get(self, text_hash: str) -> dict | None:
|
||||
return await redis.get(f"llm:classify:{text_hash}")
|
||||
|
||||
async def set(self, text_hash: str, response: dict, ttl: int = 86400):
|
||||
await redis.setex(f"llm:classify:{text_hash}", ttl, json.dumps(response))
|
||||
```
|
||||
|
||||
**Impact**:
|
||||
- Zero cost for re-runs on same reviews
|
||||
- Faster pipeline retries
|
||||
- Useful for A/B testing prompts
|
||||
|
||||
---
|
||||
|
||||
### 5. Confidence-Based Routing
|
||||
**Problem**: LLM assigns codes even when uncertain.
|
||||
|
||||
**Solution**: Add confidence threshold in prompt:
|
||||
```
|
||||
If confidence < 70%:
|
||||
- Set confidence: "low"
|
||||
- Use generic code (V4.03) instead of guessing
|
||||
- Flag for human review
|
||||
```
|
||||
|
||||
**Impact**: Reduces misclassifications, improves data quality.
|
||||
|
||||
---
|
||||
|
||||
### 6. Post-Classification Validation
|
||||
**Problem**: Some classifications don't match review content.
|
||||
|
||||
**Solution**: Add rule-based validation layer:
|
||||
```python
|
||||
def validate_classification(text: str, urt_code: str) -> bool:
|
||||
# Price mentioned but not V4.xx code?
|
||||
if has_price_mention(text) and not urt_code.startswith("V4"):
|
||||
return False, "V4.01" # Suggest correction
|
||||
|
||||
# Staff mentioned but not P1.xx code?
|
||||
if has_staff_mention(text) and not urt_code.startswith("P1"):
|
||||
return False, "P1.01"
|
||||
|
||||
return True, None
|
||||
```
|
||||
|
||||
**Impact**: Catch ~5-10% of obvious misclassifications.
|
||||
|
||||
---
|
||||
|
||||
### 7. Span Coverage Validation
|
||||
**Problem**: Some review text not covered by any span.
|
||||
|
||||
**Solution**: Track span coverage percentage:
|
||||
```python
|
||||
def calculate_coverage(text: str, spans: list) -> float:
|
||||
covered_chars = set()
|
||||
for span in spans:
|
||||
covered_chars.update(range(span['start'], span['end']))
|
||||
return len(covered_chars) / len(text)
|
||||
|
||||
# Flag if coverage < 60%
|
||||
if coverage < 0.6:
|
||||
log.warning(f"Low span coverage: {coverage:.0%}")
|
||||
```
|
||||
|
||||
**Impact**: Identify reviews where LLM skipped important content.
|
||||
|
||||
---
|
||||
|
||||
## 🟢 Lower Priority (Optimization & Monitoring)
|
||||
|
||||
### 8. Taxonomy Alignment Scoring
|
||||
**Problem**: Hard to measure classification quality at scale.
|
||||
|
||||
**Solution**: Build automated taxonomy alignment checker:
|
||||
```python
|
||||
# Check if keywords in text match expected domain
|
||||
DOMAIN_KEYWORDS = {
|
||||
"V4": ["price", "money", "worth", "cost", "expensive", "cheap"],
|
||||
"P1": ["staff", "employee", "service", "friendly", "rude"],
|
||||
"J1": ["wait", "fast", "slow", "quick", "time", "minutes"],
|
||||
"E1": ["clean", "dirty", "comfortable", "space", "room"],
|
||||
}
|
||||
|
||||
def alignment_score(text: str, urt_code: str) -> float:
|
||||
domain = urt_code[0:2]
|
||||
keywords = DOMAIN_KEYWORDS.get(domain, [])
|
||||
matches = sum(1 for kw in keywords if kw in text.lower())
|
||||
return matches / len(keywords) if keywords else 0.5
|
||||
```
|
||||
|
||||
**Impact**: Quality dashboard, regression detection.
|
||||
|
||||
---
|
||||
|
||||
### 9. Batch Size Auto-Tuning
|
||||
**Problem**: Fixed batch size doesn't adapt to review complexity.
|
||||
|
||||
**Solution**: Implement adaptive batch sizing:
|
||||
```python
|
||||
class AdaptiveBatchSizer:
|
||||
def __init__(self):
|
||||
self.history = [] # (batch_size, success_rate, avg_tokens)
|
||||
|
||||
def recommend_size(self, reviews: list) -> int:
|
||||
avg_length = sum(len(r['text']) for r in reviews) / len(reviews)
|
||||
|
||||
# Learn from history
|
||||
if self.history:
|
||||
# Find optimal size for similar review lengths
|
||||
similar = [h for h in self.history if abs(h['avg_len'] - avg_length) < 50]
|
||||
if similar:
|
||||
return max(h['size'] for h in similar if h['success_rate'] > 0.95)
|
||||
|
||||
# Default heuristics
|
||||
if avg_length > 300:
|
||||
return 10
|
||||
elif avg_length > 150:
|
||||
return 20
|
||||
else:
|
||||
return 30
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 10. Cost Tracking Dashboard
|
||||
**Problem**: No visibility into per-job, per-stage costs.
|
||||
|
||||
**Solution**: Add cost tracking to pipeline output:
|
||||
```python
|
||||
@dataclass
|
||||
class CostBreakdown:
|
||||
stage: str
|
||||
model: str
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
cached_tokens: int
|
||||
cost_usd: float
|
||||
reviews_processed: int
|
||||
cost_per_review: float
|
||||
|
||||
# Store in database
|
||||
CREATE TABLE pipeline.cost_tracking (
|
||||
id SERIAL PRIMARY KEY,
|
||||
execution_id UUID,
|
||||
job_id UUID,
|
||||
stage VARCHAR(50),
|
||||
model VARCHAR(100),
|
||||
input_tokens INT,
|
||||
output_tokens INT,
|
||||
cached_tokens INT,
|
||||
cost_usd DECIMAL(10, 6),
|
||||
reviews_processed INT,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 11. Streaming Classification
|
||||
**Problem**: Large batches block until complete.
|
||||
|
||||
**Solution**: Implement streaming for real-time progress:
|
||||
```python
|
||||
async def classify_streaming(reviews: list):
|
||||
async for partial_result in llm_client.stream_batch(reviews):
|
||||
# Yield each review as it completes
|
||||
yield partial_result
|
||||
|
||||
# Persist immediately
|
||||
await persist_classification(partial_result)
|
||||
```
|
||||
|
||||
**Impact**: Better UX, faster partial results, resilience to failures.
|
||||
|
||||
---
|
||||
|
||||
### 12. A/B Testing Framework
|
||||
**Problem**: Hard to compare prompt/model changes.
|
||||
|
||||
**Solution**: Built-in A/B testing:
|
||||
```python
|
||||
class ABTestConfig:
|
||||
test_name: str
|
||||
variant_a: ClassificationConfig # Control
|
||||
variant_b: ClassificationConfig # Treatment
|
||||
split_ratio: float = 0.1 # 10% to treatment
|
||||
metrics: list[str] = ["accuracy", "cost", "latency"]
|
||||
|
||||
# Run both variants on same reviews
|
||||
results_a = await classify(reviews, config_a)
|
||||
results_b = await classify(reviews[:int(len(reviews)*0.1)], config_b)
|
||||
|
||||
# Compare metrics
|
||||
compare_results(results_a, results_b)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Priority Matrix
|
||||
|
||||
| Improvement | Effort | Impact | Priority |
|
||||
|-------------|--------|--------|----------|
|
||||
| 1. Multi-Aspect Detection | Medium | High | 🔴 P1 |
|
||||
| 2. Enable Smart Router | Low | High | 🔴 P1 |
|
||||
| 3. JSON Truncation Fix | Medium | High | 🔴 P1 |
|
||||
| 4. Response Caching | Medium | Medium | 🟡 P2 |
|
||||
| 5. Confidence Routing | Medium | Medium | 🟡 P2 |
|
||||
| 6. Post-Classification Validation | Low | Medium | 🟡 P2 |
|
||||
| 7. Span Coverage Validation | Low | Low | 🟢 P3 |
|
||||
| 8. Taxonomy Alignment | Medium | Low | 🟢 P3 |
|
||||
| 9. Adaptive Batch Sizing | High | Medium | 🟢 P3 |
|
||||
| 10. Cost Dashboard | Medium | Low | 🟢 P3 |
|
||||
| 11. Streaming Classification | High | Medium | 🟢 P3 |
|
||||
| 12. A/B Testing | High | Low | 🟢 P3 |
|
||||
|
||||
---
|
||||
|
||||
## Quick Wins (Can implement today)
|
||||
|
||||
1. **Enable router** - Already implemented, just needs config flag
|
||||
2. **Reduce batch size** - Change `classification_batch_size=15` for long reviews
|
||||
3. **Add span coverage logging** - Simple metric to track quality
|
||||
4. **Post-classification keyword check** - Basic validation rules
|
||||
|
||||
---
|
||||
|
||||
## Estimated Impact Summary
|
||||
|
||||
| Area | Current | After Improvements |
|
||||
|------|---------|-------------------|
|
||||
| Cost per 1000 reviews | ~$3.40 | ~$2.40 (-30%) |
|
||||
| Classification accuracy | ~85% | ~92% |
|
||||
| Multi-aspect capture | ~65% | ~90% |
|
||||
| Batch failure rate | ~33% | ~10% |
|
||||
| Pipeline retry cost | 100% | ~20% (with caching) |
|
||||
466
packages/reviewiq-pipeline/INDUSTRY_TAXONOMY_GAPS.md
Normal file
466
packages/reviewiq-pipeline/INDUSTRY_TAXONOMY_GAPS.md
Normal file
@@ -0,0 +1,466 @@
|
||||
# Industry-Specific Taxonomy Gap Analysis
|
||||
|
||||
## Current URT Coverage
|
||||
- **Spec**: 7 domains, 28 categories, 140 subcodes (universal)
|
||||
- **Database**: 138 subcodes implemented
|
||||
- **Claim**: "Works universally: Any industry, any size, any geography"
|
||||
|
||||
---
|
||||
|
||||
## Business Sector Analysis
|
||||
|
||||
### Tier 1: High-Volume Google Review Industries
|
||||
|
||||
These sectors have the most Google reviews and are most likely to be clients.
|
||||
|
||||
---
|
||||
|
||||
#### 🍽️ 1. RESTAURANTS & FOOD SERVICE
|
||||
**Expected Review Volume**: Very High
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Food quality/taste | Very High | ❌ No | **O2.06 Food Quality** |
|
||||
| Portion size | High | ❌ No | **O2.09 Portion Size** |
|
||||
| Drink quality | High | ❌ No | **O2.07 Drink Quality** |
|
||||
| Menu variety | Medium | ❌ No | **O2.08 Menu Variety** |
|
||||
| Freshness | High | ⚠️ O2.01 (Materials) | Needs specific code |
|
||||
| Chef/Cook skill | Medium | ⚠️ P2.02 (Skill) | Generic |
|
||||
| Wait time for food | High | ✅ J1.01 | Covered |
|
||||
| Reservation system | Medium | ✅ J2.xx | Covered |
|
||||
| Ambiance | High | ✅ E1.04 | Covered |
|
||||
| Cleanliness | High | ✅ E1.01 | Covered |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
O2.06 - Food Quality (taste, preparation)
|
||||
O2.07 - Drink Quality (beverages, cocktails, coffee)
|
||||
O2.08 - Menu Variety (selection, options)
|
||||
O2.09 - Portion Size (amount served)
|
||||
O2.10 - Freshness (ingredient freshness)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🏨 2. HOTELS & ACCOMMODATION
|
||||
**Expected Review Volume**: Very High
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Room cleanliness | Very High | ✅ E1.01 | Covered |
|
||||
| Bed comfort | High | ⚠️ E1.02 (Comfort) | Needs specific |
|
||||
| Bathroom quality | High | ❌ No | **E1.09 Bathroom Quality** |
|
||||
| Noise level | High | ❌ No | **E1.10 Noise Level** |
|
||||
| WiFi quality | High | ⚠️ E2.xx | Needs specific |
|
||||
| Breakfast quality | High | ❌ No | Links to F&B gap |
|
||||
| Check-in/out speed | High | ✅ J1.01 | Covered |
|
||||
| Pool/Gym facilities | Medium | ❌ No | **E1.11 Amenity Quality** |
|
||||
| View | Medium | ❌ No | **E1.12 Room View** |
|
||||
| Location | High | ✅ A4.01 | Covered |
|
||||
| Value for money | High | ✅ V4.01 | Covered |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
E1.09 - Bathroom Quality (fixtures, water pressure, toiletries)
|
||||
E1.10 - Noise Level (soundproofing, street noise, neighbors)
|
||||
E1.11 - Amenity Quality (pool, gym, spa facilities)
|
||||
E1.12 - Room View (scenery, outlook)
|
||||
E2.06 - WiFi/Internet Quality (speed, reliability)
|
||||
O2.11 - Bed/Sleep Quality (mattress, pillows, linens)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🏥 3. HEALTHCARE (Clinics, Dentists, Doctors)
|
||||
**Expected Review Volume**: High
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Treatment effectiveness | Very High | ✅ O1.05 | Covered |
|
||||
| Doctor manner | High | ✅ P1.01-05 | Covered |
|
||||
| Wait time | Very High | ✅ J1.01-03 | Covered |
|
||||
| Pain management | High | ❌ No | **O1.12 Pain/Comfort Management** |
|
||||
| Diagnosis accuracy | High | ⚠️ O1.02 | Needs specific |
|
||||
| Explanation clarity | High | ❌ No | **P2.06 Medical Communication** |
|
||||
| Insurance handling | High | ❌ No | **V3.06 Insurance Processing** |
|
||||
| Appointment availability | High | ✅ A1.xx | Covered |
|
||||
| Follow-up care | Medium | ❌ No | **R3.06 Follow-up Care** |
|
||||
| Hygiene/Sterilization | High | ✅ E3.04 | Covered |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
O1.12 - Pain/Comfort Management (during procedures)
|
||||
P2.06 - Medical Communication (explaining diagnosis, treatment)
|
||||
V3.06 - Insurance Processing (claims, billing, coverage)
|
||||
R3.06 - Follow-up Care (post-treatment support)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🚗 4. AUTOMOTIVE (Dealerships, Mechanics, Car Wash)
|
||||
**Expected Review Volume**: High
|
||||
**Current Coverage**: ✅ Good (based on ClickRent data)
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Vehicle condition | High | ✅ O1.01-02 | Covered |
|
||||
| Hidden fees | Very High | ✅ V1.03 | Covered |
|
||||
| Staff honesty | High | ✅ R1.01 | Covered |
|
||||
| Repair quality | High | ✅ O2.02 | Covered |
|
||||
| Price fairness | High | ✅ V1.02 | Covered |
|
||||
| Wait time | High | ✅ J1.01 | Covered |
|
||||
| Warranty honoring | Medium | ⚠️ V2.04 | Covered |
|
||||
| Test drive experience | Medium | ❌ No | **O1.13 Demo/Trial Experience** |
|
||||
| Trade-in fairness | Medium | ❌ No | **V1.06 Trade-in Value** |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
O1.13 - Demo/Trial Experience (test drives, product demos)
|
||||
V1.06 - Trade-in Value (exchange/trade fairness)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 💇 5. BEAUTY & WELLNESS (Salons, Spas, Gyms)
|
||||
**Expected Review Volume**: High
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Service result | Very High | ✅ O1.05 | Covered |
|
||||
| Stylist skill | High | ✅ P2.02 | Covered |
|
||||
| Hygiene | High | ✅ E3.04 | Covered |
|
||||
| Relaxation | High | ❌ No | **O1.14 Relaxation/Wellness Outcome** |
|
||||
| Equipment quality | Medium | ⚠️ E1.xx | Generic |
|
||||
| Class quality (gym) | Medium | ❌ No | **O1.15 Class/Instruction Quality** |
|
||||
| Membership value | Medium | ✅ V4.01 | Covered |
|
||||
| Trainer expertise | Medium | ✅ P2.01 | Covered |
|
||||
| Appointment booking | High | ✅ J2.xx | Covered |
|
||||
| Atmosphere | High | ✅ E1.04 | Covered |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
O1.14 - Relaxation/Wellness Outcome (stress relief, rejuvenation)
|
||||
O1.15 - Class/Instruction Quality (fitness classes, workshops)
|
||||
E1.13 - Equipment Quality (gym machines, salon tools)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🎢 6. ENTERTAINMENT & RECREATION
|
||||
**Expected Review Volume**: High
|
||||
**Current Coverage**: ❌ Poor (confirmed by Go Karts data)
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Fun factor | Very High | ❌ No | **O1.08 Entertainment Value** |
|
||||
| Excitement/Thrill | High | ❌ No | **O1.09 Excitement Level** |
|
||||
| Family suitability | High | ❌ No | **O1.06 Family Suitability** |
|
||||
| Group experience | High | ❌ No | **O1.11 Group Suitability** |
|
||||
| Safety (rides) | High | ✅ E4.01 | Covered |
|
||||
| Queue/Wait | High | ✅ J1.03 | Covered |
|
||||
| Value for money | High | ✅ V4.01 | Covered |
|
||||
| Staff friendliness | High | ✅ P1.01 | Covered |
|
||||
| Would recommend | High | ❌ No | **R1.06 Would Recommend** |
|
||||
| Would return | High | ❌ No | **R1.08 Will Return** |
|
||||
|
||||
**Missing Codes**: (Already documented)
|
||||
```sql
|
||||
O1.06 - Family Suitability
|
||||
O1.08 - Entertainment Value
|
||||
O1.09 - Excitement Level
|
||||
O1.11 - Group Suitability
|
||||
R1.06 - Would Recommend
|
||||
R1.08 - Will Return
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🏬 7. RETAIL (Shops, Stores, E-commerce)
|
||||
**Expected Review Volume**: Very High
|
||||
**Current Coverage**: ✅ Good
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Product quality | High | ✅ O2.01 | Covered |
|
||||
| Stock availability | High | ✅ A1.03 | Covered |
|
||||
| Price competitiveness | High | ✅ V2.05 | Covered |
|
||||
| Return policy | High | ✅ V2.04 | Covered |
|
||||
| Staff helpfulness | High | ✅ P2.xx | Covered |
|
||||
| Store organization | High | ✅ E1.03 | Covered |
|
||||
| Checkout speed | High | ✅ J1.01 | Covered |
|
||||
| Delivery (e-comm) | High | ✅ J1.02 | Covered |
|
||||
| Packaging | Medium | ⚠️ O2.05 | Partial |
|
||||
|
||||
**Minor Gaps**:
|
||||
```sql
|
||||
O2.12 - Packaging Quality (e-commerce specific)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🎓 8. EDUCATION & TRAINING
|
||||
**Expected Review Volume**: Medium
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Learning outcome | Very High | ✅ O1.05 | Covered |
|
||||
| Teacher quality | High | ✅ P2.xx | Covered |
|
||||
| Course content | High | ❌ No | **O2.13 Course/Curriculum Quality** |
|
||||
| Materials quality | Medium | ✅ O2.01 | Covered |
|
||||
| Value for tuition | High | ✅ V4.01 | Covered |
|
||||
| Schedule flexibility | Medium | ⚠️ O4.03 | Generic |
|
||||
| Progress tracking | Medium | ❌ No | **J4.06 Progress Communication** |
|
||||
| Certification value | Medium | ❌ No | **O1.16 Credential Value** |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
O2.13 - Course/Curriculum Quality (content, structure, relevance)
|
||||
O1.16 - Credential/Certification Value
|
||||
J4.06 - Progress Communication (tracking, feedback)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🏠 9. HOME SERVICES (Plumbers, Electricians, Cleaners)
|
||||
**Expected Review Volume**: High
|
||||
**Current Coverage**: ✅ Good
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Work quality | Very High | ✅ O2.02 | Covered |
|
||||
| Punctuality | Very High | ✅ J1.02 | Covered |
|
||||
| Price transparency | High | ✅ V1.03 | Covered |
|
||||
| Cleanliness after work | High | ✅ E1.01 | Covered |
|
||||
| Professionalism | High | ✅ P1.xx | Covered |
|
||||
| Problem solved | High | ✅ O1.05 | Covered |
|
||||
| Quote accuracy | High | ⚠️ V1.02 | Covered |
|
||||
| Warranty/Guarantee | Medium | ⚠️ V2.04 | Covered |
|
||||
|
||||
**No major gaps** - well covered by existing codes.
|
||||
|
||||
---
|
||||
|
||||
#### 🌍 10. TRAVEL & TOURISM (Tours, Attractions, Museums)
|
||||
**Expected Review Volume**: High
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Experience quality | High | ⚠️ V4.03 | Too generic |
|
||||
| Guide quality | High | ✅ P2.xx | Covered |
|
||||
| Value for money | High | ✅ V4.01 | Covered |
|
||||
| Educational value | Medium | ❌ No | **O1.17 Educational/Informative Value** |
|
||||
| Crowd management | Medium | ✅ J1.03 | Covered |
|
||||
| Photo opportunities | Medium | ❌ No | **E1.14 Photo/Visual Appeal** |
|
||||
| Accessibility | Medium | ✅ A3.xx | Covered |
|
||||
| Authenticity | Medium | ❌ No | **O2.14 Authenticity/Genuineness** |
|
||||
| Memorable experience | High | ❌ No | Links to Entertainment gap |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
O1.17 - Educational/Informative Value (learning experience)
|
||||
O2.14 - Authenticity/Genuineness (cultural accuracy, real experience)
|
||||
E1.14 - Photo/Visual Appeal (Instagram-worthy, scenic)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🐾 11. PET SERVICES (Vets, Groomers, Pet Stores)
|
||||
**Expected Review Volume**: Medium
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Animal care quality | Very High | ⚠️ O1.05 | Needs specific |
|
||||
| Handling gentleness | High | ❌ No | **P1.06 Animal Handling** |
|
||||
| Treatment outcome | High | ✅ O1.05 | Covered |
|
||||
| Pet comfort/stress | High | ❌ No | **O1.18 Pet Comfort/Stress** |
|
||||
| Staff knowledge | High | ✅ P2.01 | Covered |
|
||||
| Emergency availability | Medium | ✅ A1.01 | Covered |
|
||||
| Price transparency | High | ✅ V1.xx | Covered |
|
||||
| Facility cleanliness | High | ✅ E1.01 | Covered |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
P1.06 - Animal Handling (gentleness, care with pets)
|
||||
O1.18 - Pet Comfort/Stress (during service)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### 🎵 12. NIGHTLIFE (Bars, Clubs, Live Music)
|
||||
**Expected Review Volume**: Medium
|
||||
**Current Coverage**: ⚠️ Partial
|
||||
|
||||
| Topic | Frequency | Has Code? | Gap |
|
||||
|-------|-----------|-----------|-----|
|
||||
| Music/DJ quality | Very High | ❌ No | **E2.07 Music/Sound Quality** |
|
||||
| Drink quality/variety | High | ❌ No | Links to F&B gap |
|
||||
| Atmosphere/Vibe | High | ✅ E1.04 | Covered |
|
||||
| Crowd quality | Medium | ❌ No | **E2.08 Crowd/Clientele Quality** |
|
||||
| Door policy | Medium | ❌ No | **A1.06 Entry/Door Policy** |
|
||||
| Dance floor | Medium | ❌ No | **E1.15 Dance Floor Quality** |
|
||||
| Security/Safety | High | ✅ E4.01-02 | Covered |
|
||||
| Drink prices | High | ✅ V1.01 | Covered |
|
||||
| Staff attitude | High | ✅ P1.xx | Covered |
|
||||
|
||||
**Missing Codes**:
|
||||
```sql
|
||||
E2.07 - Music/Sound Quality (DJ, live music, sound system)
|
||||
E2.08 - Crowd/Clientele Quality (type of people, vibe)
|
||||
A1.06 - Entry/Door Policy (fairness, accessibility)
|
||||
E1.15 - Dance Floor Quality (space, surface, lighting)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Priority Summary: Missing Codes by Urgency
|
||||
|
||||
### 🔴 Critical (Universal - All Industries)
|
||||
```sql
|
||||
R1.06 - Would Recommend (recommendation intent)
|
||||
R1.07 - Would Not Recommend (anti-recommendation)
|
||||
R1.08 - Will Return (return intent positive)
|
||||
R1.09 - Won't Return (return intent negative)
|
||||
```
|
||||
|
||||
### 🟠 High (Multiple Industries)
|
||||
```sql
|
||||
-- Food & Beverage (Restaurants, Hotels, Nightlife, Cafes)
|
||||
O2.06 - Food Quality
|
||||
O2.07 - Drink Quality
|
||||
O2.08 - Menu Variety
|
||||
O2.09 - Portion Size
|
||||
|
||||
-- Entertainment & Tourism
|
||||
O1.06 - Family Suitability
|
||||
O1.08 - Entertainment Value
|
||||
O1.09 - Excitement Level
|
||||
O1.11 - Group Suitability
|
||||
|
||||
-- Hospitality
|
||||
E1.09 - Bathroom Quality
|
||||
E1.10 - Noise Level
|
||||
E1.11 - Amenity Quality
|
||||
E2.06 - WiFi Quality
|
||||
```
|
||||
|
||||
### 🟡 Medium (Industry-Specific)
|
||||
```sql
|
||||
-- Healthcare
|
||||
O1.12 - Pain/Comfort Management
|
||||
P2.06 - Medical Communication
|
||||
V3.06 - Insurance Processing
|
||||
|
||||
-- Nightlife
|
||||
E2.07 - Music/Sound Quality
|
||||
E2.08 - Crowd/Clientele Quality
|
||||
|
||||
-- Education
|
||||
O2.13 - Course/Curriculum Quality
|
||||
O1.16 - Credential Value
|
||||
|
||||
-- Hotels
|
||||
O2.11 - Bed/Sleep Quality
|
||||
E1.12 - Room View
|
||||
```
|
||||
|
||||
### 🟢 Lower (Niche)
|
||||
```sql
|
||||
-- Automotive
|
||||
O1.13 - Demo/Trial Experience
|
||||
V1.06 - Trade-in Value
|
||||
|
||||
-- Pet Services
|
||||
P1.06 - Animal Handling
|
||||
O1.18 - Pet Comfort
|
||||
|
||||
-- Tourism
|
||||
O1.17 - Educational Value
|
||||
O2.14 - Authenticity
|
||||
E1.14 - Photo Appeal
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recommended Implementation Phases
|
||||
|
||||
### Phase 1: Universal Codes (Add Immediately)
|
||||
4 codes - Covers ALL industries
|
||||
```sql
|
||||
INSERT INTO pipeline.urt_subcodes VALUES
|
||||
('R1.06', 'R1', 'R', 'Would Recommend', 'Intent to recommend to others'),
|
||||
('R1.07', 'R1', 'R', 'Would Not Recommend', 'Explicit anti-recommendation'),
|
||||
('R1.08', 'R1', 'R', 'Will Return', 'Intent to visit again'),
|
||||
('R1.09', 'R1', 'R', 'Won''t Return', 'Explicit no-return statement');
|
||||
```
|
||||
|
||||
### Phase 2: High-Frequency Gaps (Next Sprint)
|
||||
12 codes - Covers Hospitality, F&B, Entertainment
|
||||
```sql
|
||||
-- Food & Beverage
|
||||
('O2.06', 'O2', 'O', 'Food Quality', 'Taste, preparation, cooking quality'),
|
||||
('O2.07', 'O2', 'O', 'Drink Quality', 'Beverage quality and preparation'),
|
||||
('O2.08', 'O2', 'O', 'Menu Variety', 'Range of food/drink options'),
|
||||
('O2.09', 'O2', 'O', 'Portion Size', 'Amount of food served'),
|
||||
|
||||
-- Entertainment
|
||||
('O1.06', 'O1', 'O', 'Family Suitability', 'Appropriate for children and families'),
|
||||
('O1.08', 'O1', 'O', 'Entertainment Value', 'How enjoyable/fun the experience was'),
|
||||
('O1.09', 'O1', 'O', 'Excitement Level', 'Thrill and adrenaline factor'),
|
||||
('O1.11', 'O1', 'O', 'Group Suitability', 'Good for groups/parties'),
|
||||
|
||||
-- Hospitality
|
||||
('E1.09', 'E1', 'E', 'Bathroom Quality', 'Fixtures, water pressure, toiletries'),
|
||||
('E1.10', 'E1', 'E', 'Noise Level', 'Soundproofing, ambient noise'),
|
||||
('E1.11', 'E1', 'E', 'Amenity Quality', 'Pool, gym, spa facilities'),
|
||||
('E2.06', 'E2', 'E', 'WiFi Quality', 'Internet speed and reliability');
|
||||
```
|
||||
|
||||
### Phase 3: Industry-Specific (As Clients Onboard)
|
||||
Add codes when specific industries become clients.
|
||||
|
||||
---
|
||||
|
||||
## Coverage Score by Industry
|
||||
|
||||
| Industry | Current Coverage | After Phase 1 | After Phase 2 |
|
||||
|----------|-----------------|---------------|---------------|
|
||||
| Restaurants | 60% | 65% | **90%** |
|
||||
| Hotels | 65% | 70% | **90%** |
|
||||
| Healthcare | 70% | 75% | 80% |
|
||||
| Automotive | 85% | 90% | 90% |
|
||||
| Beauty/Wellness | 75% | 80% | 85% |
|
||||
| Entertainment | 50% | 60% | **90%** |
|
||||
| Retail | 90% | 95% | 95% |
|
||||
| Education | 70% | 75% | 80% |
|
||||
| Home Services | 95% | 95% | 95% |
|
||||
| Travel/Tourism | 60% | 70% | **85%** |
|
||||
| Pet Services | 75% | 80% | 85% |
|
||||
| Nightlife | 55% | 60% | **85%** |
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
**Is the taxonomy ready for production?**
|
||||
|
||||
| Scenario | Ready? |
|
||||
|----------|--------|
|
||||
| Service businesses (auto, home services) | ✅ Yes |
|
||||
| Retail | ✅ Yes |
|
||||
| Healthcare | ⚠️ Mostly (add Phase 1) |
|
||||
| Restaurants/F&B | ❌ No (need Phase 1+2) |
|
||||
| Hotels | ❌ No (need Phase 1+2) |
|
||||
| Entertainment | ❌ No (need Phase 1+2) |
|
||||
| Nightlife | ❌ No (need Phase 1+2) |
|
||||
|
||||
**Recommended Action**:
|
||||
1. Add Phase 1 codes immediately (4 universal codes)
|
||||
2. Add Phase 2 codes before onboarding hospitality/entertainment clients
|
||||
3. Add Phase 3 codes as specific industries come online
|
||||
238
packages/reviewiq-pipeline/TAXONOMY_GAPS.md
Normal file
238
packages/reviewiq-pipeline/TAXONOMY_GAPS.md
Normal file
@@ -0,0 +1,238 @@
|
||||
# URT Taxonomy Gap Analysis
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The current taxonomy has **significant gaps** that cause ~30-40% of review content to be classified as generic codes (V4.03, O1.05) when more specific codes would be appropriate.
|
||||
|
||||
**Current State**: 7 domains, 28 categories, 552 subcodes
|
||||
**Gap Impact**: ~653 reviews (58% of dataset) mention topics without specific codes
|
||||
|
||||
---
|
||||
|
||||
## Critical Gaps (High Frequency, No Coverage)
|
||||
|
||||
### 🔴 Gap 1: Family/Kids Experience
|
||||
**Mentions**: 205 reviews (18% of dataset)
|
||||
**Current Mapping**: → V4.03 (Generic) or O1.05 (Outcome)
|
||||
|
||||
**Missing Codes**:
|
||||
| Proposed Code | Name | Definition |
|
||||
|---------------|------|------------|
|
||||
| O1.06 | Family Suitability | Appropriate for children and families |
|
||||
| O1.07 | Age Appropriateness | Suitable for specific age groups |
|
||||
| E1.06 | Child-Friendly Facilities | Amenities for children |
|
||||
|
||||
**Example Reviews Being Misclassified**:
|
||||
- "Brilliant day for adults and kids" → V4.03 (should be O1.06)
|
||||
- "Great family fun" → O1.05 (should be O1.06)
|
||||
- "Los niños disfrutaron mucho" → V4.03 (should be O1.06)
|
||||
|
||||
---
|
||||
|
||||
### 🔴 Gap 2: Fun/Entertainment Value
|
||||
**Mentions**: 198 reviews (18% of dataset)
|
||||
**Current Mapping**: → V4.03 (Generic) or O1.05 (Outcome)
|
||||
|
||||
**Missing Codes**:
|
||||
| Proposed Code | Name | Definition |
|
||||
|---------------|------|------------|
|
||||
| O1.08 | Entertainment Value | How enjoyable/fun the experience was |
|
||||
| O1.09 | Excitement Level | Thrill and adrenaline factor |
|
||||
| O1.10 | Engagement | How captivating the experience was |
|
||||
|
||||
**Example Reviews Being Misclassified**:
|
||||
- "Everyone had a blast" → V4.03 (should be O1.08)
|
||||
- "Muy divertido" → V4.03 (should be O1.08)
|
||||
- "Fantastische kartbaan" → V4.03 (should be O1.08)
|
||||
|
||||
---
|
||||
|
||||
### 🔴 Gap 3: Recommendation Intent
|
||||
**Mentions**: 103 reviews (9% of dataset)
|
||||
**Current Mapping**: → V4.03 (Generic)
|
||||
|
||||
**Missing Codes**:
|
||||
| Proposed Code | Name | Definition |
|
||||
|---------------|------|------------|
|
||||
| R1.06 | Would Recommend | Intent to recommend to others |
|
||||
| R1.07 | Would Not Recommend | Explicit anti-recommendation |
|
||||
| V4.06 | Net Promoter Signal | Explicit NPS-style sentiment |
|
||||
|
||||
**Example Reviews Being Misclassified**:
|
||||
- "100% recomendable" → V4.03 (should be R1.06)
|
||||
- "Highly recommend" → V4.03 (should be R1.06)
|
||||
- "Don't come here" → V4.03 V- (should be R1.07)
|
||||
|
||||
---
|
||||
|
||||
### 🟡 Gap 4: Return Intent
|
||||
**Mentions**: 65 reviews (6% of dataset)
|
||||
**Current Mapping**: → V4.03 or R4.03
|
||||
|
||||
**Missing Codes**:
|
||||
| Proposed Code | Name | Definition |
|
||||
|---------------|------|------------|
|
||||
| R1.08 | Will Return | Intent to visit again |
|
||||
| R1.09 | Won't Return | Explicit no-return statement |
|
||||
|
||||
**Example Reviews**:
|
||||
- "We'll definitely be back" → R4.03 (should be R1.08)
|
||||
- "No volveré" → V4.03 (should be R1.09)
|
||||
|
||||
---
|
||||
|
||||
### 🟡 Gap 5: Food & Beverage
|
||||
**Mentions**: 59 reviews (5% of dataset)
|
||||
**Current Mapping**: → O1.01 or V4.03
|
||||
|
||||
**Missing Codes**:
|
||||
| Proposed Code | Name | Definition |
|
||||
|---------------|------|------------|
|
||||
| O2.06 | Food Quality | Taste, freshness, presentation |
|
||||
| O2.07 | Drink Quality | Beverage quality |
|
||||
| O2.08 | Menu Variety | Range of food/drink options |
|
||||
| O2.09 | Portion Size | Amount of food served |
|
||||
|
||||
**Example Reviews**:
|
||||
- "Great food at the cafe" → O1.01 (should be O2.06)
|
||||
- "Drinks were overpriced" → V1.01 (should be O2.07 + V1.01)
|
||||
|
||||
---
|
||||
|
||||
### 🟡 Gap 6: Excitement/Thrill
|
||||
**Mentions**: 23 reviews (2% of dataset)
|
||||
**Current Mapping**: → V4.03 or O1.05
|
||||
|
||||
**Missing Code**:
|
||||
| Proposed Code | Name | Definition |
|
||||
|---------------|------|------------|
|
||||
| O1.09 | Excitement Level | Thrill and adrenaline factor |
|
||||
|
||||
---
|
||||
|
||||
## Medium Gaps (Moderate Frequency)
|
||||
|
||||
### Gap 7: Booking/Reservation Process
|
||||
**Current**: J2.xx exists but limited
|
||||
|
||||
**Missing**:
|
||||
| Code | Name | Definition |
|
||||
|------|------|------------|
|
||||
| J2.06 | Online Booking | Digital reservation experience |
|
||||
| J2.07 | Booking Confirmation | Clear confirmation process |
|
||||
|
||||
---
|
||||
|
||||
### Gap 8: Group Experience
|
||||
**Missing**:
|
||||
| Code | Name | Definition |
|
||||
|------|------|------------|
|
||||
| O1.11 | Group Suitability | Good for groups/parties |
|
||||
| O1.12 | Team Building | Corporate/team activities |
|
||||
|
||||
---
|
||||
|
||||
### Gap 9: Seasonal/Weather Factors
|
||||
**Missing**:
|
||||
| Code | Name | Definition |
|
||||
|------|------|------------|
|
||||
| E1.07 | Weather Protection | Shelter from elements |
|
||||
| E1.08 | Seasonal Suitability | Appropriate for season |
|
||||
|
||||
---
|
||||
|
||||
## Impact Analysis
|
||||
|
||||
### Current Classification Distribution (V4.03 Overuse)
|
||||
|
||||
```
|
||||
Code | Count | % | Issue
|
||||
--------|-------|------|-------
|
||||
P1.01 | 477 | 14% | ✅ Correct usage
|
||||
V4.03 | 319 | 10% | ⚠️ Likely 50%+ misclassified
|
||||
O1.02 | 270 | 8% | ✅ Correct usage
|
||||
V1.01 | 211 | 6% | ✅ Correct usage
|
||||
O1.01 | 174 | 5% | ✅ Correct usage
|
||||
```
|
||||
|
||||
### Estimated Misclassification Rate
|
||||
|
||||
| Gap Topic | Reviews | Est. Misclassified | % of Total |
|
||||
|-----------|---------|-------------------|------------|
|
||||
| Family/Kids | 205 | ~180 | 16% |
|
||||
| Fun/Entertainment | 198 | ~170 | 15% |
|
||||
| Recommendation | 103 | ~95 | 8% |
|
||||
| Return Intent | 65 | ~50 | 4% |
|
||||
| Food/Drinks | 59 | ~40 | 4% |
|
||||
| Excitement | 23 | ~20 | 2% |
|
||||
| **TOTAL** | **653** | **~555** | **~49%** |
|
||||
|
||||
---
|
||||
|
||||
## Recommended Taxonomy Additions
|
||||
|
||||
### Priority 1: Add to O1 (Core Product/Service)
|
||||
```sql
|
||||
INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition) VALUES
|
||||
('O1.06', 'O1', 'O', 'Family Suitability', 'Appropriate for children and families'),
|
||||
('O1.07', 'O1', 'O', 'Age Appropriateness', 'Suitable for specific age groups'),
|
||||
('O1.08', 'O1', 'O', 'Entertainment Value', 'How enjoyable/fun the experience was'),
|
||||
('O1.09', 'O1', 'O', 'Excitement Level', 'Thrill and adrenaline factor'),
|
||||
('O1.10', 'O1', 'O', 'Engagement', 'How captivating the experience was'),
|
||||
('O1.11', 'O1', 'O', 'Group Suitability', 'Good for groups/parties');
|
||||
```
|
||||
|
||||
### Priority 2: Add to R1 (Relationship/Loyalty)
|
||||
```sql
|
||||
INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition) VALUES
|
||||
('R1.06', 'R1', 'R', 'Would Recommend', 'Intent to recommend to others'),
|
||||
('R1.07', 'R1', 'R', 'Would Not Recommend', 'Explicit anti-recommendation'),
|
||||
('R1.08', 'R1', 'R', 'Will Return', 'Intent to visit again'),
|
||||
('R1.09', 'R1', 'R', 'Won''t Return', 'Explicit no-return statement');
|
||||
```
|
||||
|
||||
### Priority 3: Add to O2 (Product Features)
|
||||
```sql
|
||||
INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition) VALUES
|
||||
('O2.06', 'O2', 'O', 'Food Quality', 'Taste, freshness, presentation of food'),
|
||||
('O2.07', 'O2', 'O', 'Drink Quality', 'Quality of beverages'),
|
||||
('O2.08', 'O2', 'O', 'Menu Variety', 'Range of food/drink options'),
|
||||
('O2.09', 'O2', 'O', 'Portion Size', 'Amount of food served');
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Validation Query
|
||||
|
||||
After adding codes, verify reduction in V4.03 usage:
|
||||
|
||||
```sql
|
||||
-- Before: V4.03 count
|
||||
SELECT COUNT(*) FROM pipeline.review_spans WHERE urt_primary = 'V4.03';
|
||||
-- Expected: ~319
|
||||
|
||||
-- After reclassification, target:
|
||||
-- V4.03: ~100 (true generic)
|
||||
-- O1.06-O1.11: ~200 (entertainment/family)
|
||||
-- R1.06-R1.09: ~150 (recommendation/return)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
**Is the taxonomy ready for production?** ❌ **No**
|
||||
|
||||
**Critical Issues**:
|
||||
1. ~49% of reviews mention topics without specific codes
|
||||
2. V4.03 is a catch-all masking actionable insights
|
||||
3. Industry-specific codes (entertainment, F&B) are missing
|
||||
|
||||
**Recommendation**: Add 14 new subcodes before production to capture:
|
||||
- Family/Kids experience (O1.06, O1.07)
|
||||
- Entertainment value (O1.08, O1.09, O1.10)
|
||||
- Recommendation intent (R1.06, R1.07)
|
||||
- Return intent (R1.08, R1.09)
|
||||
- Food/Beverage (O2.06-O2.09)
|
||||
|
||||
**Estimated Improvement**: Classification accuracy from ~50% specific to ~85% specific.
|
||||
@@ -308,11 +308,15 @@ You are a review classifier using primitive-based analysis.
|
||||
"spans": [
|
||||
{
|
||||
"text": "exact text from review",
|
||||
"start": 0,
|
||||
"end": 25,
|
||||
"primitive": "MANNER",
|
||||
"valence": "+",
|
||||
"intensity": 2,
|
||||
"detail": 2,
|
||||
"confidence": 0.85
|
||||
"confidence": 0.85,
|
||||
"entity": null,
|
||||
"entity_type": null
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -427,13 +431,16 @@ ORDER BY span_count DESC;
|
||||
python run_classification_v2.py [OPTIONS]
|
||||
|
||||
Options:
|
||||
--business TEXT Business name or pattern (required for classify/evaluate)
|
||||
--limit INT Max reviews to process (default: 100)
|
||||
--dry-run Don't store results to database
|
||||
--evaluate BUSINESS Evaluate existing classification quality
|
||||
--language-analysis Analyze UNMAPPED rates by language across all data
|
||||
--use-llm Use real LLM classification (default: mock)
|
||||
--model TEXT Model for LLM (default: gpt-4o-mini)
|
||||
--business TEXT Business name or pattern (required for classify/evaluate)
|
||||
--limit INT Max reviews to process (default: 100)
|
||||
--dry-run Don't store results to database
|
||||
--evaluate BUSINESS Evaluate existing classification quality
|
||||
--language-analysis Analyze UNMAPPED rates by language across all data
|
||||
--ignore-legacy-language Exclude rows with language='auto'/'unknown'/NULL
|
||||
--latest-hours INT Only include spans from last N hours
|
||||
--use-existing Use existing spans instead of jobs
|
||||
--use-llm Use real LLM classification (requires OPENAI_API_KEY)
|
||||
--model TEXT Model for LLM (default: gpt-4o-mini)
|
||||
```
|
||||
|
||||
### Models
|
||||
|
||||
401
packages/reviewiq-pipeline/prompts/wave0_sector_brief.md
Normal file
401
packages/reviewiq-pipeline/prompts/wave0_sector_brief.md
Normal file
@@ -0,0 +1,401 @@
|
||||
# Wave 0: Sector Brief Generation Prompt
|
||||
|
||||
## Purpose
|
||||
|
||||
Generate a **sector brief** that provides alignment context for classification agents. This brief describes what customers care about in this sector — NOT how to classify it, NOT what primitives to use, NOT what solutions exist.
|
||||
|
||||
## Critical Guardrails
|
||||
|
||||
**DO:**
|
||||
- Describe customer concerns in plain language
|
||||
- Use real review language patterns
|
||||
- Focus on what customers judge, complain about, praise
|
||||
- Include industry-specific terminology
|
||||
- Identify mode-specific concerns (dine-in vs delivery, etc.)
|
||||
|
||||
**DO NOT:**
|
||||
- Assign primitive codes
|
||||
- Suggest priorities or weights
|
||||
- Propose solutions or playbooks
|
||||
- Define new categories or dimensions
|
||||
- Include KPIs or metrics
|
||||
- Make up statistics
|
||||
|
||||
---
|
||||
|
||||
## Input
|
||||
|
||||
You will receive:
|
||||
|
||||
```json
|
||||
{
|
||||
"sector_code": "FOOD_DINING",
|
||||
"sector_name": "Food & Dining",
|
||||
"description": "Restaurants, cafes, bars, bakeries, food trucks, catering services",
|
||||
"sample_business_types": [
|
||||
"Restaurants",
|
||||
"Cafes & Coffee",
|
||||
"Bars & Nightlife",
|
||||
"Bakeries & Desserts",
|
||||
"Food Services",
|
||||
"Quick Service"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Schema
|
||||
|
||||
Return ONLY valid JSON matching this schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"sector_code": "string",
|
||||
"sector_name": "string",
|
||||
"generated_at": "ISO timestamp",
|
||||
"version": "1.0",
|
||||
|
||||
"what_customers_judge": {
|
||||
"description": "The primary dimensions customers evaluate in this sector",
|
||||
"items": [
|
||||
{
|
||||
"aspect": "string (2-5 words)",
|
||||
"importance": "critical | high | moderate",
|
||||
"why_it_matters": "string (1 sentence)"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"critical_pain_points": {
|
||||
"description": "What damages reputation most severely in this sector",
|
||||
"items": [
|
||||
{
|
||||
"pain_point": "string (2-5 words)",
|
||||
"typical_language": ["array of phrases customers actually use"],
|
||||
"reputation_impact": "severe | significant | moderate"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"common_praise": {
|
||||
"description": "What earns customer loyalty and positive reviews",
|
||||
"items": [
|
||||
{
|
||||
"praise_area": "string (2-5 words)",
|
||||
"typical_language": ["array of phrases customers actually use"],
|
||||
"loyalty_impact": "high | moderate"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"industry_terminology": {
|
||||
"description": "Domain-specific vocabulary used in this sector",
|
||||
"staff_terms": ["waiter", "server", "bartender", "chef"],
|
||||
"product_terms": ["dish", "meal", "appetizer", "entree"],
|
||||
"process_terms": ["reservation", "seating", "check", "tab"],
|
||||
"quality_terms": ["fresh", "authentic", "homemade"],
|
||||
"problem_terms": ["cold", "undercooked", "wrong order"]
|
||||
},
|
||||
|
||||
"mode_specific_concerns": {
|
||||
"description": "Different service modes have different customer priorities",
|
||||
"modes": [
|
||||
{
|
||||
"mode": "string (e.g., 'Dine-in', 'Takeout', 'Delivery')",
|
||||
"primary_concerns": ["array of top concerns for this mode"],
|
||||
"unique_pain_points": ["pain points specific to this mode"]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"what_is_actionable": {
|
||||
"description": "Feedback that businesses can realistically act on",
|
||||
"actionable_examples": [
|
||||
{
|
||||
"feedback_type": "string",
|
||||
"example": "string",
|
||||
"action_owner": "string (role/team that can fix it)"
|
||||
}
|
||||
],
|
||||
"not_actionable_examples": [
|
||||
{
|
||||
"feedback_type": "string",
|
||||
"example": "string",
|
||||
"why_not_actionable": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"sector_specific_signals": {
|
||||
"description": "Signals that have different meaning in this sector vs others",
|
||||
"examples": [
|
||||
{
|
||||
"signal": "string",
|
||||
"meaning_in_this_sector": "string",
|
||||
"contrast_with": "string (how it differs in other sectors)"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Example Output (Food & Dining)
|
||||
|
||||
```json
|
||||
{
|
||||
"sector_code": "FOOD_DINING",
|
||||
"sector_name": "Food & Dining",
|
||||
"generated_at": "2026-01-31T10:00:00Z",
|
||||
"version": "1.0",
|
||||
|
||||
"what_customers_judge": {
|
||||
"description": "The primary dimensions customers evaluate in this sector",
|
||||
"items": [
|
||||
{
|
||||
"aspect": "Food taste and quality",
|
||||
"importance": "critical",
|
||||
"why_it_matters": "The core product - customers primarily visit for the food experience"
|
||||
},
|
||||
{
|
||||
"aspect": "Service speed and attentiveness",
|
||||
"importance": "critical",
|
||||
"why_it_matters": "Direct impact on dining experience and whether they feel valued"
|
||||
},
|
||||
{
|
||||
"aspect": "Cleanliness and hygiene",
|
||||
"importance": "critical",
|
||||
"why_it_matters": "Health/safety concern that can override all other positives if failed"
|
||||
},
|
||||
{
|
||||
"aspect": "Value for money",
|
||||
"importance": "high",
|
||||
"why_it_matters": "Portion size and quality relative to price affects return intent"
|
||||
},
|
||||
{
|
||||
"aspect": "Ambiance and atmosphere",
|
||||
"importance": "moderate",
|
||||
"why_it_matters": "Sets expectations and affects overall enjoyment, especially for special occasions"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"critical_pain_points": {
|
||||
"description": "What damages reputation most severely in this sector",
|
||||
"items": [
|
||||
{
|
||||
"pain_point": "Food safety incidents",
|
||||
"typical_language": ["got sick", "food poisoning", "found hair", "bug in food", "raw chicken"],
|
||||
"reputation_impact": "severe"
|
||||
},
|
||||
{
|
||||
"pain_point": "Cold or wrong food",
|
||||
"typical_language": ["food was cold", "wrong order", "not what I ordered", "missing items"],
|
||||
"reputation_impact": "significant"
|
||||
},
|
||||
{
|
||||
"pain_point": "Rude or dismissive staff",
|
||||
"typical_language": ["rude waiter", "ignored us", "attitude", "condescending", "eye roll"],
|
||||
"reputation_impact": "significant"
|
||||
},
|
||||
{
|
||||
"pain_point": "Excessive wait times",
|
||||
"typical_language": ["waited forever", "40 minutes for food", "never came back", "forgotten"],
|
||||
"reputation_impact": "significant"
|
||||
},
|
||||
{
|
||||
"pain_point": "Dirty facilities",
|
||||
"typical_language": ["dirty bathroom", "sticky table", "flies everywhere", "unclean"],
|
||||
"reputation_impact": "severe"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"common_praise": {
|
||||
"description": "What earns customer loyalty and positive reviews",
|
||||
"items": [
|
||||
{
|
||||
"praise_area": "Exceptional food quality",
|
||||
"typical_language": ["best I've ever had", "delicious", "perfectly cooked", "authentic", "fresh ingredients"],
|
||||
"loyalty_impact": "high"
|
||||
},
|
||||
{
|
||||
"praise_area": "Attentive friendly service",
|
||||
"typical_language": ["amazing server", "made us feel welcome", "remembered us", "went above and beyond"],
|
||||
"loyalty_impact": "high"
|
||||
},
|
||||
{
|
||||
"praise_area": "Great value",
|
||||
"typical_language": ["huge portions", "great price", "worth every penny", "can't beat it"],
|
||||
"loyalty_impact": "high"
|
||||
},
|
||||
{
|
||||
"praise_area": "Perfect ambiance",
|
||||
"typical_language": ["beautiful setting", "romantic", "cozy atmosphere", "perfect for date night"],
|
||||
"loyalty_impact": "moderate"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"industry_terminology": {
|
||||
"description": "Domain-specific vocabulary used in this sector",
|
||||
"staff_terms": ["server", "waiter", "waitress", "host", "hostess", "bartender", "chef", "cook", "manager", "busboy"],
|
||||
"product_terms": ["dish", "meal", "appetizer", "entree", "main course", "dessert", "special", "daily special", "sides"],
|
||||
"process_terms": ["reservation", "walk-in", "seated", "table", "check", "bill", "tab", "tip", "takeout", "to-go", "delivery"],
|
||||
"quality_terms": ["fresh", "homemade", "authentic", "crispy", "tender", "juicy", "flavorful", "seasoned", "cooked to perfection"],
|
||||
"problem_terms": ["cold", "lukewarm", "overcooked", "undercooked", "raw", "burnt", "soggy", "bland", "stale", "greasy"]
|
||||
},
|
||||
|
||||
"mode_specific_concerns": {
|
||||
"description": "Different service modes have different customer priorities",
|
||||
"modes": [
|
||||
{
|
||||
"mode": "Dine-in",
|
||||
"primary_concerns": ["ambiance", "service attentiveness", "wait time to be seated", "table cleanliness"],
|
||||
"unique_pain_points": ["loud neighbors", "rushed out", "ignored by server", "wrong seating"]
|
||||
},
|
||||
{
|
||||
"mode": "Takeout",
|
||||
"primary_concerns": ["order accuracy", "ready on time", "packaging quality", "ease of pickup"],
|
||||
"unique_pain_points": ["order not ready", "missing items", "cold by pickup", "wrong order in bag"]
|
||||
},
|
||||
{
|
||||
"mode": "Delivery",
|
||||
"primary_concerns": ["delivery time", "food temperature", "order accuracy", "packaging integrity"],
|
||||
"unique_pain_points": ["arrived cold", "leaked in bag", "missing sauces", "driver got lost", "late delivery"]
|
||||
},
|
||||
{
|
||||
"mode": "Catering",
|
||||
"primary_concerns": ["on-time setup", "quantity accuracy", "presentation", "dietary accommodation"],
|
||||
"unique_pain_points": ["not enough food", "late arrival", "wrong items", "poor presentation"]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"what_is_actionable": {
|
||||
"description": "Feedback that businesses can realistically act on",
|
||||
"actionable_examples": [
|
||||
{
|
||||
"feedback_type": "Specific staff behavior",
|
||||
"example": "Our server John was rude and rolled his eyes when we asked for substitutions",
|
||||
"action_owner": "Front of house manager"
|
||||
},
|
||||
{
|
||||
"feedback_type": "Food quality issue",
|
||||
"example": "The chicken was undercooked - pink in the middle",
|
||||
"action_owner": "Kitchen manager / Chef"
|
||||
},
|
||||
{
|
||||
"feedback_type": "Facility issue",
|
||||
"example": "Men's bathroom was out of soap and paper towels",
|
||||
"action_owner": "Facilities / Shift manager"
|
||||
},
|
||||
{
|
||||
"feedback_type": "Process issue",
|
||||
"example": "Waited 20 minutes to get our check after flagging the server twice",
|
||||
"action_owner": "FOH manager / Training"
|
||||
}
|
||||
],
|
||||
"not_actionable_examples": [
|
||||
{
|
||||
"feedback_type": "Subjective taste preference",
|
||||
"example": "I just don't like spicy food",
|
||||
"why_not_actionable": "Personal preference, not a quality issue"
|
||||
},
|
||||
{
|
||||
"feedback_type": "Location/parking",
|
||||
"example": "Hard to find parking in this area",
|
||||
"why_not_actionable": "External factor beyond restaurant control"
|
||||
},
|
||||
{
|
||||
"feedback_type": "Price objection without context",
|
||||
"example": "Too expensive",
|
||||
"why_not_actionable": "Vague, no specifics on what was mispriced"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"sector_specific_signals": {
|
||||
"description": "Signals that have different meaning in this sector vs others",
|
||||
"examples": [
|
||||
{
|
||||
"signal": "long wait",
|
||||
"meaning_in_this_sector": "Usually negative - food taking too long, being ignored",
|
||||
"contrast_with": "Healthcare: expected and sometimes indicates thoroughness"
|
||||
},
|
||||
{
|
||||
"signal": "portion size",
|
||||
"meaning_in_this_sector": "Critical value indicator - directly affects perceived value",
|
||||
"contrast_with": "Healthcare: not applicable"
|
||||
},
|
||||
{
|
||||
"signal": "noisy",
|
||||
"meaning_in_this_sector": "Context-dependent - negative for fine dining, expected at sports bars",
|
||||
"contrast_with": "Professional services: always negative"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Sectors to Generate
|
||||
|
||||
Generate one brief for each of these L1 sectors:
|
||||
|
||||
| Code | Sector Name | Sample Business Types |
|
||||
|------|-------------|----------------------|
|
||||
| `FOOD_DINING` | Food & Dining | Restaurants, Cafes, Bars, Bakeries, Catering |
|
||||
| `RETAIL_SHOPPING` | Retail & Shopping | Clothing, Electronics, Grocery, Specialty stores |
|
||||
| `AUTOMOTIVE` | Automotive | Dealers, Repair, Car Wash, Parts |
|
||||
| `HEALTHCARE` | Healthcare | Hospitals, Clinics, Dental, Mental Health, Veterinary |
|
||||
| `PROFESSIONAL_SERVICES` | Professional Services | Legal, Accounting, Consulting, Marketing |
|
||||
| `HOME_SERVICES` | Home Services | Plumbing, Electrical, HVAC, Cleaning, Landscaping |
|
||||
| `PERSONAL_SERVICES` | Personal Services | Salons, Spas, Fitness, Tattoo |
|
||||
| `EDUCATION` | Education | Schools, Tutoring, Driving Schools, Language |
|
||||
| `HOSPITALITY_TRAVEL` | Hospitality & Travel | Hotels, Tours, Travel Agencies |
|
||||
| `ENTERTAINMENT` | Entertainment | Movies, Museums, Amusement Parks, Sports |
|
||||
| `FINANCE_INSURANCE` | Finance & Insurance | Banks, Insurance, Investment, Loans |
|
||||
| `REAL_ESTATE` | Real Estate | Agents, Property Management, Appraisers |
|
||||
| `INDUSTRIAL` | Industrial | Manufacturing, Construction, Warehousing |
|
||||
| `TRANSPORTATION` | Transportation | Taxis, Moving, Shipping, Courier |
|
||||
| `GOVERNMENT` | Government | DMV, Courts, Public Services |
|
||||
| `EVENTS_WEDDINGS` | Events & Weddings | Wedding Venues, Planners, DJ, Photography |
|
||||
| `RELIGIOUS` | Religious | Churches, Temples, Mosques, Spiritual |
|
||||
| `NONPROFIT` | Non-Profit | Charities, Community Organizations |
|
||||
| `TECHNOLOGY` | Technology | IT Services, Software, Web Design |
|
||||
| `PETS_ANIMALS` | Pets & Animals | Pet Stores, Grooming, Boarding, Training |
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
This brief will be injected into Wave 1 and Wave 2 prompts as alignment context:
|
||||
|
||||
```
|
||||
You are configuring primitives for: {sector_name}
|
||||
|
||||
## Sector Context (READ-ONLY, do not modify or extend)
|
||||
|
||||
{sector_brief_json}
|
||||
|
||||
## Your Task
|
||||
|
||||
Using the above context to understand what matters in this sector,
|
||||
configure the following primitives...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Validation
|
||||
|
||||
Before returning, verify:
|
||||
- [ ] All arrays have 3-10 items (not empty, not excessive)
|
||||
- [ ] `typical_language` arrays contain realistic review phrases
|
||||
- [ ] No primitive codes, priorities, or solutions appear anywhere
|
||||
- [ ] Industry terminology is accurate for this sector
|
||||
- [ ] Modes are appropriate for the sector (not all sectors have delivery)
|
||||
- [ ] Actionable vs not-actionable distinction is clear
|
||||
132
packages/reviewiq-pipeline/run_classification.py
Normal file
132
packages/reviewiq-pipeline/run_classification.py
Normal file
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Run classification pipeline for a scraping job.
|
||||
|
||||
Usage:
|
||||
python run_classification.py 22c747a6-b913-4ae4-82bc-14b4195008b6
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("run_classification")
|
||||
|
||||
|
||||
async def run_pipeline(job_id: str):
|
||||
"""Run the classification pipeline for a job."""
|
||||
from reviewiq_pipeline import Pipeline
|
||||
from reviewiq_pipeline.config import Config
|
||||
|
||||
# Get database URL from environment or use default
|
||||
database_url = os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://scraper:scraper123@localhost:5437/scraper"
|
||||
)
|
||||
|
||||
logger.info(f"Processing job {job_id}")
|
||||
|
||||
# Initialize pipeline
|
||||
config = Config(
|
||||
database_url=database_url,
|
||||
llm_provider="anthropic",
|
||||
llm_model="claude-sonnet-4-5-20250929",
|
||||
openai_api_key=os.environ.get("OPENAI_API_KEY"),
|
||||
anthropic_api_key="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",
|
||||
classification_batch_size=25,
|
||||
classification_max_concurrent=5,
|
||||
classification_target_utilization=0.70,
|
||||
)
|
||||
|
||||
pipeline = Pipeline(config)
|
||||
|
||||
try:
|
||||
await pipeline.initialize()
|
||||
logger.info("Pipeline initialized")
|
||||
|
||||
# Run all stages (normalize, classify, route, aggregate)
|
||||
# Just pass job_id - pipeline will fetch and transform reviews from database
|
||||
logger.info("Starting pipeline execution...")
|
||||
start_time = datetime.now()
|
||||
|
||||
result = await pipeline.process(
|
||||
{"job_id": job_id},
|
||||
stages=["normalize", "classify", "route", "aggregate"],
|
||||
)
|
||||
|
||||
elapsed = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
# Print results
|
||||
if result.success:
|
||||
logger.info(f"Pipeline completed successfully in {elapsed:.1f}s")
|
||||
else:
|
||||
logger.warning(f"Pipeline completed with errors in {elapsed:.1f}s")
|
||||
if result.error:
|
||||
logger.error(f"Error: {result.error}")
|
||||
|
||||
# Stage summaries
|
||||
for stage_name, stage_result in result.stage_results.items():
|
||||
# Handle both object and dict access
|
||||
success = getattr(stage_result, 'success', None) or stage_result.get('success', False)
|
||||
data = getattr(stage_result, 'data', None) or stage_result.get('data', {})
|
||||
error = getattr(stage_result, 'error', None) or stage_result.get('error')
|
||||
duration_ms = getattr(stage_result, 'duration_ms', None) or stage_result.get('duration_ms', 0)
|
||||
|
||||
if success:
|
||||
stats = data.get("stats", {}) if data else {}
|
||||
|
||||
if stage_name == "normalize":
|
||||
logger.info(f" Stage 1 (Normalize): {stats.get('output_count', '?')} reviews")
|
||||
elif stage_name == "classify":
|
||||
logger.info(
|
||||
f" Stage 2 (Classify): {stats.get('success_count', '?')} reviews, "
|
||||
f"{stats.get('total_spans', '?')} spans, "
|
||||
f"${stats.get('llm_cost_usd', 0):.4f} LLM cost"
|
||||
)
|
||||
elif stage_name == "route":
|
||||
logger.info(
|
||||
f" Stage 3 (Route): {stats.get('spans_routed', '?')} spans, "
|
||||
f"{stats.get('issues_created', '?')} issues"
|
||||
)
|
||||
elif stage_name == "aggregate":
|
||||
logger.info(f" Stage 4 (Aggregate): {stats.get('facts_upserted', '?')} facts")
|
||||
|
||||
logger.info(f" Duration: {duration_ms}ms")
|
||||
else:
|
||||
logger.error(f" {stage_name}: FAILED - {error}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Pipeline failed: {e}")
|
||||
raise
|
||||
finally:
|
||||
await pipeline.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python run_classification.py <job_id>")
|
||||
sys.exit(1)
|
||||
|
||||
job_id = sys.argv[1]
|
||||
|
||||
# Validate UUID format
|
||||
import uuid
|
||||
try:
|
||||
uuid.UUID(job_id)
|
||||
except ValueError:
|
||||
print(f"Invalid job ID format: {job_id}")
|
||||
sys.exit(1)
|
||||
|
||||
result = asyncio.run(run_pipeline(job_id))
|
||||
|
||||
if result and not result.success:
|
||||
sys.exit(1)
|
||||
409
packages/reviewiq-pipeline/scripts/backfill_review_facts.py
Normal file
409
packages/reviewiq-pipeline/scripts/backfill_review_facts.py
Normal file
@@ -0,0 +1,409 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backfill review_facts_v1 from public.jobs.reviews_data.
|
||||
|
||||
Parses relative timestamps ("17 hours ago", "2 weeks ago") into absolute
|
||||
timestamps anchored to job.created_at.
|
||||
|
||||
Usage:
|
||||
python backfill_review_facts.py
|
||||
python backfill_review_facts.py --dry-run
|
||||
python backfill_review_facts.py --job-id <uuid>
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
# Database URL
|
||||
DB_URL = os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://scraper:scraper123@localhost:5437/scraper"
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# RELATIVE TIMESTAMP PARSER
|
||||
# =============================================================================
|
||||
|
||||
# Regex patterns for relative timestamps
|
||||
RELATIVE_PATTERNS = [
|
||||
# "17 hours ago", "2 weeks ago", "a month ago"
|
||||
(r"(?:edited\s+)?(\d+|a|an)\s+(second|minute|hour|day|week|month|year)s?\s+ago", "standard"),
|
||||
# "just now"
|
||||
(r"just\s+now", "just_now"),
|
||||
# "yesterday"
|
||||
(r"yesterday", "yesterday"),
|
||||
# "today"
|
||||
(r"today", "today"),
|
||||
]
|
||||
|
||||
# Time unit multipliers (in seconds)
|
||||
TIME_UNITS = {
|
||||
"second": 1,
|
||||
"minute": 60,
|
||||
"hour": 3600,
|
||||
"day": 86400,
|
||||
"week": 604800,
|
||||
"month": 2592000, # 30 days
|
||||
"year": 31536000, # 365 days
|
||||
}
|
||||
|
||||
|
||||
def parse_relative_timestamp(raw: str, reference_time: datetime) -> datetime | None:
|
||||
"""
|
||||
Parse a relative timestamp string into an absolute datetime.
|
||||
|
||||
Args:
|
||||
raw: Relative timestamp like "17 hours ago", "Edited 2 weeks ago"
|
||||
reference_time: The reference point (usually job.created_at)
|
||||
|
||||
Returns:
|
||||
Absolute datetime or None if parsing failed
|
||||
"""
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
text = raw.lower().strip()
|
||||
|
||||
# Handle "just now"
|
||||
if "just now" in text:
|
||||
return reference_time
|
||||
|
||||
# Handle "yesterday"
|
||||
if text == "yesterday":
|
||||
return reference_time - timedelta(days=1)
|
||||
|
||||
# Handle "today"
|
||||
if text == "today":
|
||||
return reference_time
|
||||
|
||||
# Handle standard relative format
|
||||
# Remove "edited " prefix if present
|
||||
text = re.sub(r"^edited\s+", "", text)
|
||||
|
||||
# Match "N unit(s) ago"
|
||||
match = re.match(r"(\d+|a|an)\s+(second|minute|hour|day|week|month|year)s?\s+ago", text)
|
||||
if match:
|
||||
quantity_str = match.group(1)
|
||||
unit = match.group(2)
|
||||
|
||||
# Convert "a"/"an" to 1
|
||||
if quantity_str in ("a", "an"):
|
||||
quantity = 1
|
||||
else:
|
||||
quantity = int(quantity_str)
|
||||
|
||||
seconds = quantity * TIME_UNITS.get(unit, 0)
|
||||
return reference_time - timedelta(seconds=seconds)
|
||||
|
||||
# Unknown format
|
||||
return None
|
||||
|
||||
|
||||
def parse_relative_timestamp_safe(raw: str, reference_time: datetime) -> tuple[datetime | None, bool]:
|
||||
"""
|
||||
Safe wrapper that returns (parsed_time, success).
|
||||
"""
|
||||
try:
|
||||
result = parse_relative_timestamp(raw, reference_time)
|
||||
return result, result is not None
|
||||
except Exception:
|
||||
return None, False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# BACKFILL LOGIC
|
||||
# =============================================================================
|
||||
|
||||
async def get_jobs_with_reviews(pool: asyncpg.Pool, job_id: str | None = None) -> list[dict]:
|
||||
"""Get all jobs with reviews_data."""
|
||||
if job_id:
|
||||
query = """
|
||||
SELECT job_id, created_at, reviews_data,
|
||||
COALESCE(metadata->>'business_name', url) as business_id
|
||||
FROM public.jobs
|
||||
WHERE job_id = $1
|
||||
AND reviews_data IS NOT NULL
|
||||
AND jsonb_typeof(reviews_data) = 'array'
|
||||
"""
|
||||
rows = await pool.fetch(query, job_id)
|
||||
else:
|
||||
query = """
|
||||
SELECT job_id, created_at, reviews_data,
|
||||
COALESCE(metadata->>'business_name', url) as business_id
|
||||
FROM public.jobs
|
||||
WHERE reviews_data IS NOT NULL
|
||||
AND jsonb_typeof(reviews_data) = 'array'
|
||||
ORDER BY created_at DESC
|
||||
"""
|
||||
rows = await pool.fetch(query)
|
||||
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def get_run_id_for_job(pool: asyncpg.Pool, job_id: str) -> str | None:
|
||||
"""Get the run_id associated with a job from detected_spans_v2."""
|
||||
row = await pool.fetchrow("""
|
||||
SELECT DISTINCT run_id FROM pipeline.detected_spans_v2
|
||||
WHERE job_id = $1 AND run_id IS NOT NULL
|
||||
LIMIT 1
|
||||
""", job_id)
|
||||
return str(row["run_id"]) if row and row["run_id"] else None
|
||||
|
||||
|
||||
async def get_language_for_review(pool: asyncpg.Pool, review_id: str) -> str | None:
|
||||
"""Get detected language for a review from spans."""
|
||||
row = await pool.fetchrow("""
|
||||
SELECT language FROM pipeline.detected_spans_v2
|
||||
WHERE review_id = $1 AND language IS NOT NULL
|
||||
LIMIT 1
|
||||
""", review_id)
|
||||
return row["language"] if row else None
|
||||
|
||||
|
||||
async def upsert_review_facts(
|
||||
pool: asyncpg.Pool,
|
||||
facts: list[dict],
|
||||
dry_run: bool = False,
|
||||
) -> tuple[int, int]:
|
||||
"""
|
||||
Upsert review facts into the database.
|
||||
|
||||
Returns:
|
||||
(inserted_count, updated_count)
|
||||
"""
|
||||
if dry_run or not facts:
|
||||
return 0, 0
|
||||
|
||||
# Use executemany with ON CONFLICT
|
||||
query = """
|
||||
INSERT INTO pipeline.review_facts_v1
|
||||
(review_id, business_id, job_id, run_id, rating, review_time_utc, raw_timestamp, author, language)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
ON CONFLICT (review_id) DO UPDATE SET
|
||||
business_id = EXCLUDED.business_id,
|
||||
job_id = EXCLUDED.job_id,
|
||||
run_id = COALESCE(EXCLUDED.run_id, pipeline.review_facts_v1.run_id),
|
||||
rating = EXCLUDED.rating,
|
||||
review_time_utc = EXCLUDED.review_time_utc,
|
||||
raw_timestamp = EXCLUDED.raw_timestamp,
|
||||
author = EXCLUDED.author,
|
||||
language = COALESCE(EXCLUDED.language, pipeline.review_facts_v1.language)
|
||||
"""
|
||||
|
||||
# Prepare records
|
||||
records = [
|
||||
(
|
||||
f["review_id"],
|
||||
f["business_id"],
|
||||
f["job_id"],
|
||||
f.get("run_id"),
|
||||
f.get("rating"),
|
||||
f.get("review_time_utc"),
|
||||
f.get("raw_timestamp"),
|
||||
f.get("author"),
|
||||
f.get("language"),
|
||||
)
|
||||
for f in facts
|
||||
]
|
||||
|
||||
await pool.executemany(query, records)
|
||||
return len(records), 0
|
||||
|
||||
|
||||
async def backfill_job(
|
||||
pool: asyncpg.Pool,
|
||||
job: dict,
|
||||
dry_run: bool = False,
|
||||
verbose: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Backfill review facts for a single job.
|
||||
|
||||
Returns:
|
||||
Stats dict with counts and errors
|
||||
"""
|
||||
job_id = job["job_id"]
|
||||
job_created = job["created_at"]
|
||||
business_id = job["business_id"]
|
||||
reviews_data = job["reviews_data"]
|
||||
|
||||
# asyncpg may return JSONB as string
|
||||
if isinstance(reviews_data, str):
|
||||
reviews_data = json.loads(reviews_data)
|
||||
|
||||
# Make job_created timezone-aware if it isn't
|
||||
if job_created.tzinfo is None:
|
||||
job_created = job_created.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Get run_id for this job
|
||||
run_id = await get_run_id_for_job(pool, str(job_id))
|
||||
|
||||
stats = {
|
||||
"job_id": str(job_id),
|
||||
"total_reviews": 0,
|
||||
"parsed_ok": 0,
|
||||
"parsed_failed": 0,
|
||||
"inserted": 0,
|
||||
"sample_failures": [],
|
||||
}
|
||||
|
||||
facts = []
|
||||
|
||||
for review in reviews_data:
|
||||
stats["total_reviews"] += 1
|
||||
|
||||
# Handle both dict and JSON string
|
||||
if isinstance(review, str):
|
||||
try:
|
||||
review = json.loads(review)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
review_id = review.get("review_id")
|
||||
if not review_id:
|
||||
continue
|
||||
|
||||
raw_timestamp = review.get("timestamp", "")
|
||||
review_time, success = parse_relative_timestamp_safe(raw_timestamp, job_created)
|
||||
|
||||
if success:
|
||||
stats["parsed_ok"] += 1
|
||||
else:
|
||||
stats["parsed_failed"] += 1
|
||||
if len(stats["sample_failures"]) < 5:
|
||||
stats["sample_failures"].append(raw_timestamp)
|
||||
|
||||
# Get language from spans if available
|
||||
language = await get_language_for_review(pool, review_id) if not dry_run else None
|
||||
|
||||
facts.append({
|
||||
"review_id": review_id,
|
||||
"business_id": business_id,
|
||||
"job_id": job_id,
|
||||
"run_id": run_id,
|
||||
"rating": review.get("rating"),
|
||||
"review_time_utc": review_time,
|
||||
"raw_timestamp": raw_timestamp,
|
||||
"author": review.get("author"),
|
||||
"language": language,
|
||||
})
|
||||
|
||||
# Upsert
|
||||
inserted, _ = await upsert_review_facts(pool, facts, dry_run=dry_run)
|
||||
stats["inserted"] = inserted
|
||||
|
||||
if verbose:
|
||||
print(f" Job {job_id}: {stats['total_reviews']} reviews, "
|
||||
f"{stats['parsed_ok']} parsed OK, {stats['parsed_failed']} failed")
|
||||
if stats["sample_failures"]:
|
||||
print(f" Sample failures: {stats['sample_failures'][:3]}")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
async def backfill_all(
|
||||
pool: asyncpg.Pool,
|
||||
job_id: str | None = None,
|
||||
dry_run: bool = False,
|
||||
verbose: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Backfill review facts for all jobs (or a specific job).
|
||||
|
||||
Returns:
|
||||
Aggregate stats
|
||||
"""
|
||||
jobs = await get_jobs_with_reviews(pool, job_id)
|
||||
|
||||
print(f"\n{'[DRY RUN] ' if dry_run else ''}Backfilling review_facts_v1 from {len(jobs)} jobs...")
|
||||
|
||||
aggregate = {
|
||||
"jobs_processed": 0,
|
||||
"total_reviews": 0,
|
||||
"parsed_ok": 0,
|
||||
"parsed_failed": 0,
|
||||
"inserted": 0,
|
||||
"unique_failure_patterns": set(),
|
||||
}
|
||||
|
||||
for i, job in enumerate(jobs, 1):
|
||||
if verbose:
|
||||
print(f"\n[{i}/{len(jobs)}] Processing job {job['job_id']}...")
|
||||
|
||||
stats = await backfill_job(pool, job, dry_run=dry_run, verbose=verbose)
|
||||
|
||||
aggregate["jobs_processed"] += 1
|
||||
aggregate["total_reviews"] += stats["total_reviews"]
|
||||
aggregate["parsed_ok"] += stats["parsed_ok"]
|
||||
aggregate["parsed_failed"] += stats["parsed_failed"]
|
||||
aggregate["inserted"] += stats["inserted"]
|
||||
aggregate["unique_failure_patterns"].update(stats["sample_failures"])
|
||||
|
||||
# Convert set to list for JSON serialization
|
||||
aggregate["unique_failure_patterns"] = list(aggregate["unique_failure_patterns"])[:20]
|
||||
|
||||
return aggregate
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI
|
||||
# =============================================================================
|
||||
|
||||
async def main_async(args):
|
||||
"""Main async entry point."""
|
||||
pool = await asyncpg.create_pool(DB_URL)
|
||||
|
||||
try:
|
||||
stats = await backfill_all(
|
||||
pool,
|
||||
job_id=args.job_id,
|
||||
dry_run=args.dry_run,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("BACKFILL COMPLETE")
|
||||
print("=" * 60)
|
||||
print(f"Jobs processed: {stats['jobs_processed']}")
|
||||
print(f"Total reviews: {stats['total_reviews']}")
|
||||
print(f"Timestamps parsed: {stats['parsed_ok']} ({stats['parsed_ok']/max(stats['total_reviews'],1)*100:.1f}%)")
|
||||
print(f"Timestamps failed: {stats['parsed_failed']} ({stats['parsed_failed']/max(stats['total_reviews'],1)*100:.1f}%)")
|
||||
if not args.dry_run:
|
||||
print(f"Records upserted: {stats['inserted']}")
|
||||
|
||||
if stats["unique_failure_patterns"]:
|
||||
print(f"\nUnparsed timestamp patterns ({len(stats['unique_failure_patterns'])}):")
|
||||
for p in stats["unique_failure_patterns"][:10]:
|
||||
print(f" - \"{p}\"")
|
||||
|
||||
# Calculate coverage
|
||||
coverage = stats['parsed_ok'] / max(stats['total_reviews'], 1) * 100
|
||||
if coverage < 90:
|
||||
print(f"\n⚠️ WARNING: Timestamp coverage is {coverage:.1f}% (target: >90%)")
|
||||
else:
|
||||
print(f"\n✅ Timestamp coverage: {coverage:.1f}%")
|
||||
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Backfill review_facts_v1")
|
||||
parser.add_argument("--job-id", help="Process a specific job only")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Don't write to database")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
asyncio.run(main_async(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
226
packages/reviewiq-pipeline/scripts/config_resolver_standalone.py
Normal file
226
packages/reviewiq-pipeline/scripts/config_resolver_standalone.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""
|
||||
Config Resolver - Standalone version for scripts.
|
||||
|
||||
Resolves L1 config + sector brief for classification.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Paths
|
||||
DATA_DIR = Path(__file__).parent.parent / "data"
|
||||
CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
|
||||
L2_CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l2"
|
||||
BRIEFS_DIR = DATA_DIR / "sector_briefs"
|
||||
|
||||
# Meta primitives - always enabled
|
||||
META_PRIMITIVES = frozenset([
|
||||
"HONESTY", "ETHICS", "PROMISES",
|
||||
"ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY",
|
||||
"RETURN_INTENT", "RECOMMEND", "RECOGNITION",
|
||||
"UNMAPPED",
|
||||
])
|
||||
|
||||
# Core primitives dictionary
|
||||
CORE_PRIMITIVES = {
|
||||
"TASTE": {"domain": "O", "name": "Taste/Flavor", "def": "Sensory quality of food/beverage"},
|
||||
"CRAFT": {"domain": "O", "name": "Craftsmanship", "def": "Skill of execution/preparation"},
|
||||
"FRESHNESS": {"domain": "O", "name": "Freshness", "def": "Newness, not stale or old"},
|
||||
"TEMPERATURE": {"domain": "O", "name": "Temperature", "def": "Hot/cold as expected"},
|
||||
"EFFECTIVENESS": {"domain": "O", "name": "Effectiveness", "def": "Achieves intended purpose"},
|
||||
"ACCURACY": {"domain": "O", "name": "Accuracy", "def": "Correct, as ordered/specified"},
|
||||
"CONDITION": {"domain": "O", "name": "Condition", "def": "Physical state, wear, damage"},
|
||||
"CONSISTENCY": {"domain": "O", "name": "Consistency", "def": "Same quality each time"},
|
||||
"MANNER": {"domain": "P", "name": "Manner/Attitude", "def": "Friendliness, respect, warmth"},
|
||||
"COMPETENCE": {"domain": "P", "name": "Competence", "def": "Knowledge and skill of staff"},
|
||||
"ATTENTIVENESS": {"domain": "P", "name": "Attentiveness", "def": "Being present, responsive"},
|
||||
"COMMUNICATION": {"domain": "P", "name": "Communication", "def": "Clarity, listening, updates"},
|
||||
"SPEED": {"domain": "J", "name": "Speed/Wait", "def": "Time to service, waiting"},
|
||||
"FRICTION": {"domain": "J", "name": "Friction", "def": "Obstacles, hassles, complexity"},
|
||||
"RELIABILITY": {"domain": "J", "name": "Reliability", "def": "Dependable, keeps promises"},
|
||||
"AVAILABILITY": {"domain": "J", "name": "Availability", "def": "Open when needed, bookable"},
|
||||
"CLEANLINESS": {"domain": "E", "name": "Cleanliness", "def": "Hygiene, tidiness"},
|
||||
"COMFORT": {"domain": "E", "name": "Comfort", "def": "Physical ease, seating"},
|
||||
"SAFETY": {"domain": "E", "name": "Safety", "def": "Free from harm/danger"},
|
||||
"AMBIANCE": {"domain": "E", "name": "Ambiance", "def": "Atmosphere, mood, vibe"},
|
||||
"ACCESSIBILITY": {"domain": "E", "name": "Accessibility", "def": "Easy to reach, navigate"},
|
||||
"DIGITAL_UX": {"domain": "E", "name": "Digital Experience", "def": "Website, app, online"},
|
||||
"PRICE_LEVEL": {"domain": "V", "name": "Price Level", "def": "Absolute cost (cheap/expensive)"},
|
||||
"PRICE_FAIRNESS": {"domain": "V", "name": "Price Fairness", "def": "Reasonable for what you get"},
|
||||
"PRICE_TRANSPARENCY": {"domain": "V", "name": "Price Transparency", "def": "No hidden fees, clear pricing"},
|
||||
"VALUE_FOR_MONEY": {"domain": "V", "name": "Value for Money", "def": "Worth what you paid"},
|
||||
}
|
||||
|
||||
|
||||
class ConfigResolver:
|
||||
"""Resolves classification config for a business."""
|
||||
|
||||
def __init__(self):
|
||||
self._l1_cache: dict[str, dict] = {}
|
||||
self._l2_cache: dict[str, dict] = {}
|
||||
self._brief_cache: dict[str, dict] = {}
|
||||
|
||||
def _load_l2_configs(self) -> list[dict[str, Any]]:
|
||||
"""Load all L2 config files."""
|
||||
if not L2_CONFIGS_DIR.exists():
|
||||
return []
|
||||
|
||||
configs = []
|
||||
for config_path in L2_CONFIGS_DIR.glob("*_config.json"):
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
configs.append(config)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load L2 config {config_path}: {e}")
|
||||
return configs
|
||||
|
||||
def _find_matching_l2(self, gbp_path: str) -> dict[str, Any] | None:
|
||||
"""Find L2 config that matches the GBP path (most specific wins)."""
|
||||
l2_configs = self._load_l2_configs()
|
||||
|
||||
# Find all matching configs (path starts with L2 gbp_path)
|
||||
matches = []
|
||||
for config in l2_configs:
|
||||
l2_path = config.get("gbp_path", "")
|
||||
if gbp_path.startswith(l2_path) or gbp_path == l2_path:
|
||||
matches.append((len(l2_path), config))
|
||||
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# Return most specific match (longest path)
|
||||
matches.sort(key=lambda x: x[0], reverse=True)
|
||||
return matches[0][1]
|
||||
|
||||
def _apply_l2_delta(self, l1_config: dict, l2_config: dict) -> dict:
|
||||
"""Apply L2 delta to L1 config."""
|
||||
result = l1_config.copy()
|
||||
delta = l2_config.get("delta", {})
|
||||
|
||||
# Enable additional primitives
|
||||
if "enable" in delta:
|
||||
enabled = set(result.get("enabled", []))
|
||||
enabled.update(delta["enable"])
|
||||
result["enabled"] = list(enabled)
|
||||
|
||||
# Merge weights
|
||||
if "weights" in delta:
|
||||
weights = dict(result.get("weights", {}))
|
||||
weights.update(delta["weights"])
|
||||
result["weights"] = weights
|
||||
|
||||
# Update config version to indicate L2
|
||||
result["config_version"] = l2_config.get("config_version", result.get("config_version", "1.0"))
|
||||
result["l2_applied"] = l2_config.get("gbp_path")
|
||||
|
||||
return result
|
||||
|
||||
def _load_l1_config(self, sector_code: str) -> dict[str, Any] | None:
|
||||
if sector_code in self._l1_cache:
|
||||
return self._l1_cache[sector_code]
|
||||
|
||||
config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
|
||||
if not config_path.exists():
|
||||
return None
|
||||
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
|
||||
self._l1_cache[sector_code] = config
|
||||
return config
|
||||
|
||||
def _load_sector_brief(self, sector_code: str) -> dict[str, Any] | None:
|
||||
if sector_code in self._brief_cache:
|
||||
return self._brief_cache[sector_code]
|
||||
|
||||
brief_path = BRIEFS_DIR / f"{sector_code.lower()}_brief.json"
|
||||
if not brief_path.exists():
|
||||
return None
|
||||
|
||||
with open(brief_path) as f:
|
||||
brief = json.load(f)
|
||||
|
||||
self._brief_cache[sector_code] = brief
|
||||
return brief
|
||||
|
||||
async def get_business_mapping(self, pool, business_id: str) -> dict[str, Any] | None:
|
||||
query = """
|
||||
SELECT business_id, gbp_path::text, sector_code
|
||||
FROM pipeline.business_taxonomy_map
|
||||
WHERE business_id = $1
|
||||
"""
|
||||
row = await pool.fetchrow(query, business_id)
|
||||
return dict(row) if row else None
|
||||
|
||||
def resolve_enabled_set(self, l1_config: dict) -> set[str]:
|
||||
enabled = set(l1_config.get("enabled", []))
|
||||
enabled.update(META_PRIMITIVES)
|
||||
return enabled
|
||||
|
||||
def build_primitives_for_prompt(self, enabled: set[str], weights: dict[str, float]) -> dict[str, dict]:
|
||||
result = {}
|
||||
for prim in enabled:
|
||||
if prim in CORE_PRIMITIVES:
|
||||
entry = CORE_PRIMITIVES[prim].copy()
|
||||
if prim in weights:
|
||||
entry["weight"] = weights[prim]
|
||||
result[prim] = entry
|
||||
elif prim in META_PRIMITIVES:
|
||||
result[prim] = {"domain": "M", "name": prim.replace("_", " ").title(), "meta": True}
|
||||
return result
|
||||
|
||||
def extract_brief_signals(self, brief: dict) -> dict[str, Any]:
|
||||
if not brief:
|
||||
return {}
|
||||
return {
|
||||
"sector": brief.get("sector_code"),
|
||||
"what_customers_judge": brief.get("what_customers_judge"),
|
||||
"critical_pain_points": brief.get("critical_pain_points"),
|
||||
"industry_terminology": brief.get("industry_terminology"),
|
||||
}
|
||||
|
||||
async def resolve(self, business_id: str, pool, mode: str | None = None) -> dict[str, Any] | None:
|
||||
mapping = await self.get_business_mapping(pool, business_id)
|
||||
if not mapping:
|
||||
return None
|
||||
|
||||
sector_code = mapping["sector_code"]
|
||||
gbp_path = mapping["gbp_path"]
|
||||
|
||||
# Load L1 config (sector-level)
|
||||
l1_config = self._load_l1_config(sector_code)
|
||||
if not l1_config:
|
||||
l1_config = {"enabled": list(CORE_PRIMITIVES.keys()), "weights": {}}
|
||||
|
||||
# Check for L2 config (category-level delta)
|
||||
l2_config = self._find_matching_l2(gbp_path)
|
||||
if l2_config:
|
||||
logger.info(f"Applying L2 delta for {gbp_path}: {l2_config.get('gbp_path')}")
|
||||
l1_config = self._apply_l2_delta(l1_config, l2_config)
|
||||
|
||||
brief = self._load_sector_brief(sector_code)
|
||||
|
||||
enabled = self.resolve_enabled_set(l1_config)
|
||||
weights = dict(l1_config.get("weights", {}))
|
||||
primitives = self.build_primitives_for_prompt(enabled, weights)
|
||||
brief_signals = self.extract_brief_signals(brief)
|
||||
|
||||
return {
|
||||
"business_id": business_id,
|
||||
"gbp_path": gbp_path,
|
||||
"sector_code": sector_code,
|
||||
"config_version": l1_config.get("config_version", "1.0"),
|
||||
"l2_applied": l1_config.get("l2_applied"),
|
||||
"modes": [mode] if mode else ["in_person"],
|
||||
"default_mode": mode or "in_person",
|
||||
"enabled_primitives": sorted(enabled),
|
||||
"disabled_primitives": sorted(l1_config.get("disabled", [])),
|
||||
"weights": weights,
|
||||
"brief": brief_signals,
|
||||
"primitives": primitives,
|
||||
}
|
||||
148
packages/reviewiq-pipeline/scripts/fix_l1_configs.py
Normal file
148
packages/reviewiq-pipeline/scripts/fix_l1_configs.py
Normal file
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix L1 configs based on validation results.
|
||||
|
||||
Applies fixes discovered during validation:
|
||||
1. Enable primitives that were disabled but appearing frequently
|
||||
2. Remove weights for primitives with zero appearances
|
||||
3. Add weights for high-frequency unweighted primitives
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
CONFIGS_DIR = Path(__file__).parent.parent / "data" / "primitive_configs" / "l1"
|
||||
|
||||
# Fixes based on validation results
|
||||
# Format: { sector: { "enable": [primitives], "disable": [primitives], "add_weight": {prim: weight}, "remove_weight": [prims] } }
|
||||
FIXES = {
|
||||
"ENTERTAINMENT": {
|
||||
"enable": ["CRAFT", "CONSISTENCY", "COMMUNICATION", "FRICTION"],
|
||||
"disable": [],
|
||||
"add_weight": {},
|
||||
"remove_weight": ["CONDITION"], # 0 appearances despite 1.4x weight
|
||||
},
|
||||
"FOOD_DINING": {
|
||||
"enable": ["PRICE_LEVEL", "ACCESSIBILITY", "PRICE_TRANSPARENCY", "FRICTION", "EFFECTIVENESS"],
|
||||
"disable": [],
|
||||
"add_weight": {},
|
||||
"remove_weight": [],
|
||||
},
|
||||
"AUTOMOTIVE": {
|
||||
"enable": ["CRAFT", "CONSISTENCY", "PRICE_LEVEL", "AMBIANCE"],
|
||||
"disable": [],
|
||||
"add_weight": {},
|
||||
"remove_weight": [],
|
||||
},
|
||||
"HEALTHCARE": {
|
||||
"enable": ["CRAFT", "PRICE_LEVEL", "AMBIANCE"],
|
||||
"disable": [],
|
||||
"add_weight": {},
|
||||
"remove_weight": [],
|
||||
},
|
||||
"RETAIL_SHOPPING": {
|
||||
"enable": ["CRAFT", "PRICE_LEVEL", "AMBIANCE"],
|
||||
"disable": [],
|
||||
"add_weight": {},
|
||||
"remove_weight": [],
|
||||
},
|
||||
"HOSPITALITY_TRAVEL": {
|
||||
"enable": ["CRAFT", "CONSISTENCY", "PRICE_LEVEL"],
|
||||
"disable": [],
|
||||
"add_weight": {},
|
||||
"remove_weight": [],
|
||||
},
|
||||
"PERSONAL_SERVICES": {
|
||||
"enable": ["PRICE_LEVEL", "SPEED", "FRICTION"],
|
||||
"disable": [],
|
||||
"add_weight": {},
|
||||
"remove_weight": [],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def fix_config(sector_code: str, fixes: dict) -> dict:
|
||||
"""Apply fixes to a sector config."""
|
||||
config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
|
||||
|
||||
if not config_path.exists():
|
||||
print(f" ⚠️ Config not found: {config_path}")
|
||||
return None
|
||||
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
|
||||
enabled = set(config.get("enabled", []))
|
||||
disabled = set(config.get("disabled", []))
|
||||
weights = config.get("weights", {})
|
||||
|
||||
changes = []
|
||||
|
||||
# Apply enables (move from disabled to enabled)
|
||||
for prim in fixes.get("enable", []):
|
||||
if prim in disabled:
|
||||
disabled.remove(prim)
|
||||
enabled.add(prim)
|
||||
changes.append(f"✓ Enabled {prim}")
|
||||
elif prim not in enabled:
|
||||
enabled.add(prim)
|
||||
changes.append(f"✓ Added {prim} to enabled")
|
||||
|
||||
# Apply disables (move from enabled to disabled)
|
||||
for prim in fixes.get("disable", []):
|
||||
if prim in enabled:
|
||||
enabled.remove(prim)
|
||||
disabled.add(prim)
|
||||
changes.append(f"✗ Disabled {prim}")
|
||||
|
||||
# Add weights
|
||||
for prim, weight in fixes.get("add_weight", {}).items():
|
||||
if prim not in weights:
|
||||
weights[prim] = weight
|
||||
changes.append(f"⚖️ Added weight {prim}: {weight}x")
|
||||
|
||||
# Remove weights
|
||||
for prim in fixes.get("remove_weight", []):
|
||||
if prim in weights:
|
||||
del weights[prim]
|
||||
changes.append(f"⚖️ Removed weight for {prim}")
|
||||
|
||||
# Update config
|
||||
config["enabled"] = sorted(enabled)
|
||||
config["disabled"] = sorted(disabled)
|
||||
config["weights"] = dict(sorted(weights.items()))
|
||||
config["config_version"] = "1.1" # Bump version
|
||||
|
||||
# Save
|
||||
with open(config_path, "w") as f:
|
||||
json.dump(config, f, indent=2)
|
||||
f.write("\n")
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("L1 CONFIG FIXER - Applying validation-based fixes")
|
||||
print("=" * 60)
|
||||
|
||||
total_changes = 0
|
||||
|
||||
for sector, fixes in FIXES.items():
|
||||
print(f"\n📁 {sector}")
|
||||
changes = fix_config(sector, fixes)
|
||||
if changes:
|
||||
for change in changes:
|
||||
print(f" {change}")
|
||||
total_changes += len(changes)
|
||||
else:
|
||||
print(" No changes applied")
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"Total changes applied: {total_changes}")
|
||||
print("Config version bumped to 1.1")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
238
packages/reviewiq-pipeline/scripts/fix_l1_configs_v2.py
Normal file
238
packages/reviewiq-pipeline/scripts/fix_l1_configs_v2.py
Normal file
@@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Guarded L1 Config Fixer - V2 (Threshold-based, Sector-scoped)
|
||||
|
||||
Only applies fixes when:
|
||||
1. Evidence is from sector-scoped validation
|
||||
2. Frequency exceeds threshold (default 3%)
|
||||
3. Changes are logged with version bump
|
||||
|
||||
Usage:
|
||||
python fix_l1_configs_v2.py --apply # Apply fixes from validation
|
||||
python fix_l1_configs_v2.py --dry-run # Show what would change
|
||||
python fix_l1_configs_v2.py --revert SECTOR # Revert to previous version
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
CONFIGS_DIR = Path(__file__).parent.parent / "data" / "primitive_configs" / "l1"
|
||||
CHANGELOG_FILE = CONFIGS_DIR / "CHANGELOG.json"
|
||||
|
||||
# Minimum threshold for auto-enabling (% of sector spans)
|
||||
ENABLE_THRESHOLD_PCT = 3.0
|
||||
|
||||
# Fixes derived from sector-scoped validation (validate_l1_configs_v2.py output)
|
||||
# These are the ONLY fixes that should be applied
|
||||
SECTOR_SCOPED_FIXES = {
|
||||
"ENTERTAINMENT": {
|
||||
"evidence": "2,320 spans from Go Karts + Soho Club",
|
||||
"enable": [
|
||||
("TASTE", 4.3, "Entertainment venues have concessions/food service"),
|
||||
],
|
||||
"add_weight": [
|
||||
("CRAFT", 1.3, "13.4% frequency but unweighted"),
|
||||
],
|
||||
"remove_weight": [],
|
||||
},
|
||||
"FOOD_DINING": {
|
||||
"evidence": "61 spans from Fika cafe",
|
||||
"enable": [
|
||||
("COMFORT", 9.8, "Seating/atmosphere comfort matters for cafes"),
|
||||
],
|
||||
"add_weight": [
|
||||
("AVAILABILITY", 1.2, "16.4% frequency but unweighted"),
|
||||
],
|
||||
"remove_weight": [
|
||||
# Note: Small sample size (61 spans) - these may be false negatives
|
||||
# Keep weights but flag for review with more data
|
||||
],
|
||||
},
|
||||
"AUTOMOTIVE": {
|
||||
"evidence": "1,201 spans from ClickRent car rental",
|
||||
"enable": [], # Nothing exceeds 3% threshold
|
||||
"add_weight": [],
|
||||
"remove_weight": [
|
||||
# CONDITION, HONESTY, PROMISES, RECOVERY all have 0 appearances
|
||||
# However, may be specific to rental vs repair - keep for now
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def load_changelog() -> list[dict]:
|
||||
"""Load the changelog file."""
|
||||
if CHANGELOG_FILE.exists():
|
||||
with open(CHANGELOG_FILE) as f:
|
||||
return json.load(f)
|
||||
return []
|
||||
|
||||
|
||||
def save_changelog(entries: list[dict]) -> None:
|
||||
"""Save the changelog file."""
|
||||
CHANGELOG_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(CHANGELOG_FILE, "w") as f:
|
||||
json.dump(entries, f, indent=2)
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def load_config(sector_code: str) -> dict[str, Any] | None:
|
||||
"""Load a sector config."""
|
||||
config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
|
||||
if not config_path.exists():
|
||||
return None
|
||||
with open(config_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def save_config(sector_code: str, config: dict[str, Any]) -> None:
|
||||
"""Save a sector config."""
|
||||
config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
|
||||
with open(config_path, "w") as f:
|
||||
json.dump(config, f, indent=2)
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def apply_fixes(sector_code: str, fixes: dict, dry_run: bool = False) -> list[str]:
|
||||
"""Apply fixes to a sector config."""
|
||||
config = load_config(sector_code)
|
||||
if not config:
|
||||
return [f"❌ Config not found for {sector_code}"]
|
||||
|
||||
enabled = set(config.get("enabled", []))
|
||||
disabled = set(config.get("disabled", []))
|
||||
weights = config.get("weights", {})
|
||||
|
||||
changes = []
|
||||
evidence = fixes.get("evidence", "unknown")
|
||||
|
||||
# Enable primitives
|
||||
for prim, pct, reason in fixes.get("enable", []):
|
||||
if pct < ENABLE_THRESHOLD_PCT:
|
||||
changes.append(f"⚠️ SKIP {prim}: {pct:.1f}% below {ENABLE_THRESHOLD_PCT}% threshold")
|
||||
continue
|
||||
|
||||
if prim in disabled:
|
||||
disabled.remove(prim)
|
||||
enabled.add(prim)
|
||||
changes.append(f"✓ ENABLE {prim}: {pct:.1f}% in sector data ({reason})")
|
||||
elif prim not in enabled:
|
||||
enabled.add(prim)
|
||||
changes.append(f"✓ ADD {prim}: {pct:.1f}% in sector data ({reason})")
|
||||
|
||||
# Add weights
|
||||
for prim, weight, reason in fixes.get("add_weight", []):
|
||||
if prim not in weights:
|
||||
weights[prim] = weight
|
||||
changes.append(f"⚖️ WEIGHT {prim}: {weight}x ({reason})")
|
||||
|
||||
# Remove weights
|
||||
for prim, reason in fixes.get("remove_weight", []):
|
||||
if prim in weights:
|
||||
del weights[prim]
|
||||
changes.append(f"⚖️ UNWEIGHT {prim}: ({reason})")
|
||||
|
||||
if not changes:
|
||||
return ["✓ No changes needed"]
|
||||
|
||||
if not dry_run:
|
||||
# Bump version
|
||||
old_version = config.get("config_version", "1.0")
|
||||
major, minor = old_version.split(".")
|
||||
new_version = f"{major}.{int(minor) + 1}"
|
||||
|
||||
config["enabled"] = sorted(enabled)
|
||||
config["disabled"] = sorted(disabled)
|
||||
config["weights"] = dict(sorted(weights.items()))
|
||||
config["config_version"] = new_version
|
||||
config["config_updated_at"] = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
save_config(sector_code, config)
|
||||
|
||||
# Log to changelog
|
||||
changelog = load_changelog()
|
||||
changelog.append({
|
||||
"sector": sector_code,
|
||||
"version": new_version,
|
||||
"previous_version": old_version,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"evidence": evidence,
|
||||
"changes": changes,
|
||||
})
|
||||
save_changelog(changelog)
|
||||
|
||||
changes.append(f"📝 Version: {old_version} → {new_version}")
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
def revert_config(sector_code: str, to_version: str | None = None) -> list[str]:
|
||||
"""Revert a config to a previous version."""
|
||||
changelog = load_changelog()
|
||||
|
||||
# Find entries for this sector
|
||||
sector_entries = [e for e in changelog if e["sector"] == sector_code]
|
||||
if not sector_entries:
|
||||
return [f"❌ No changelog entries for {sector_code}"]
|
||||
|
||||
# TODO: Implement actual revert by storing full config snapshots
|
||||
return [f"⚠️ Revert not yet implemented - manual restore required"]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Guarded L1 config fixer")
|
||||
parser.add_argument("--apply", action="store_true", help="Apply sector-scoped fixes")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would change")
|
||||
parser.add_argument("--revert", metavar="SECTOR", help="Revert sector to previous version")
|
||||
parser.add_argument("--sector", help="Apply to specific sector only")
|
||||
parser.add_argument("--show-changelog", action="store_true", help="Show changelog")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.show_changelog:
|
||||
changelog = load_changelog()
|
||||
print(json.dumps(changelog, indent=2))
|
||||
return
|
||||
|
||||
if args.revert:
|
||||
changes = revert_config(args.revert.upper())
|
||||
for change in changes:
|
||||
print(change)
|
||||
return
|
||||
|
||||
if args.apply or args.dry_run:
|
||||
print("=" * 60)
|
||||
print(f"L1 CONFIG FIXER V2 - {'DRY RUN' if args.dry_run else 'APPLYING FIXES'}")
|
||||
print(f"Threshold: {ENABLE_THRESHOLD_PCT}%")
|
||||
print("=" * 60)
|
||||
|
||||
sectors = [args.sector.upper()] if args.sector else SECTOR_SCOPED_FIXES.keys()
|
||||
|
||||
for sector in sectors:
|
||||
if sector not in SECTOR_SCOPED_FIXES:
|
||||
print(f"\n⚠️ {sector}: No sector-scoped fixes defined")
|
||||
continue
|
||||
|
||||
print(f"\n📁 {sector}")
|
||||
print(f" Evidence: {SECTOR_SCOPED_FIXES[sector]['evidence']}")
|
||||
|
||||
changes = apply_fixes(sector, SECTOR_SCOPED_FIXES[sector], dry_run=args.dry_run)
|
||||
for change in changes:
|
||||
print(f" {change}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
if args.dry_run:
|
||||
print("DRY RUN - No changes applied")
|
||||
else:
|
||||
print("Fixes applied - see CHANGELOG.json for history")
|
||||
print("=" * 60)
|
||||
return
|
||||
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
372
packages/reviewiq-pipeline/scripts/generate_sector_briefs.py
Normal file
372
packages/reviewiq-pipeline/scripts/generate_sector_briefs.py
Normal file
@@ -0,0 +1,372 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Wave 0: Sector Brief Generator
|
||||
|
||||
Generates alignment context briefs for each sector.
|
||||
These briefs inform Wave 1 and Wave 2 primitive config generation.
|
||||
|
||||
Usage:
|
||||
python generate_sector_briefs.py # Generate all sectors
|
||||
python generate_sector_briefs.py --sector FOOD_DINING # Generate one sector
|
||||
python generate_sector_briefs.py --dry-run # Show what would be generated
|
||||
python generate_sector_briefs.py --validate # Validate existing briefs
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
print("ERROR: openai package required. Install with: pip install openai")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
PROMPT_TEMPLATE = '''You are an expert in customer experience analysis across industries.
|
||||
|
||||
Your task: Generate a **sector brief** for the "{sector_name}" sector.
|
||||
|
||||
This brief will be used to align classification agents with industry-specific context.
|
||||
It describes what customers care about — NOT how to classify, NOT what primitives to use.
|
||||
|
||||
## Sector Information
|
||||
|
||||
- **Code**: {sector_code}
|
||||
- **Name**: {sector_name}
|
||||
- **Description**: {description}
|
||||
- **Sample Business Types**: {business_types}
|
||||
|
||||
## Output Requirements
|
||||
|
||||
Generate a JSON object with this exact structure:
|
||||
|
||||
```json
|
||||
{{
|
||||
"sector_code": "{sector_code}",
|
||||
"sector_name": "{sector_name}",
|
||||
"generated_at": "<ISO timestamp>",
|
||||
"version": "1.0",
|
||||
|
||||
"what_customers_judge": {{
|
||||
"description": "The primary dimensions customers evaluate in this sector",
|
||||
"items": [
|
||||
{{
|
||||
"aspect": "string (2-5 words)",
|
||||
"importance": "critical | high | moderate",
|
||||
"why_it_matters": "string (1 sentence)"
|
||||
}}
|
||||
]
|
||||
}},
|
||||
|
||||
"critical_pain_points": {{
|
||||
"description": "What damages reputation most severely",
|
||||
"items": [
|
||||
{{
|
||||
"pain_point": "string (2-5 words)",
|
||||
"typical_language": ["phrases customers actually use in reviews"],
|
||||
"reputation_impact": "severe | significant | moderate"
|
||||
}}
|
||||
]
|
||||
}},
|
||||
|
||||
"common_praise": {{
|
||||
"description": "What earns customer loyalty and positive reviews",
|
||||
"items": [
|
||||
{{
|
||||
"praise_area": "string (2-5 words)",
|
||||
"typical_language": ["phrases customers actually use in reviews"],
|
||||
"loyalty_impact": "high | moderate"
|
||||
}}
|
||||
]
|
||||
}},
|
||||
|
||||
"industry_terminology": {{
|
||||
"description": "Domain-specific vocabulary",
|
||||
"staff_terms": ["terms for staff roles in this sector"],
|
||||
"product_terms": ["terms for products/services"],
|
||||
"process_terms": ["terms for processes/interactions"],
|
||||
"quality_terms": ["positive quality descriptors"],
|
||||
"problem_terms": ["negative quality descriptors"]
|
||||
}},
|
||||
|
||||
"mode_specific_concerns": {{
|
||||
"description": "Different service modes have different priorities",
|
||||
"modes": [
|
||||
{{
|
||||
"mode": "string (e.g., 'In-person', 'Online', 'Phone')",
|
||||
"primary_concerns": ["top concerns for this mode"],
|
||||
"unique_pain_points": ["pain points specific to this mode"]
|
||||
}}
|
||||
]
|
||||
}},
|
||||
|
||||
"what_is_actionable": {{
|
||||
"description": "Feedback businesses can act on",
|
||||
"actionable_examples": [
|
||||
{{
|
||||
"feedback_type": "string",
|
||||
"example": "string (realistic review excerpt)",
|
||||
"action_owner": "role/team that can fix it"
|
||||
}}
|
||||
],
|
||||
"not_actionable_examples": [
|
||||
{{
|
||||
"feedback_type": "string",
|
||||
"example": "string (realistic review excerpt)",
|
||||
"why_not_actionable": "string"
|
||||
}}
|
||||
]
|
||||
}},
|
||||
|
||||
"sector_specific_signals": {{
|
||||
"description": "Signals with sector-specific meaning",
|
||||
"examples": [
|
||||
{{
|
||||
"signal": "string (word or phrase)",
|
||||
"meaning_in_this_sector": "string",
|
||||
"contrast_with": "how it differs in other sectors"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
}}
|
||||
```
|
||||
|
||||
## Critical Rules
|
||||
|
||||
1. **Use realistic review language** in `typical_language` arrays - actual phrases customers write
|
||||
2. **Include 4-8 items** per array (not too few, not excessive)
|
||||
3. **Be sector-specific** - don't use generic phrases that apply to all businesses
|
||||
4. **Include appropriate modes** - only modes that actually exist in this sector
|
||||
5. **NO primitive codes, priorities, weights, or solutions**
|
||||
6. **Focus on WHAT customers care about**, not HOW to classify it
|
||||
|
||||
Return ONLY the JSON object, no markdown formatting or explanation.'''
|
||||
|
||||
|
||||
def load_sectors(data_path: Path) -> list[dict]:
|
||||
"""Load sector definitions from JSON file."""
|
||||
with open(data_path) as f:
|
||||
data = json.load(f)
|
||||
return data["sectors"]
|
||||
|
||||
|
||||
def generate_sector_brief(client: OpenAI, sector: dict, model: str) -> dict:
|
||||
"""Generate a sector brief using LLM."""
|
||||
prompt = PROMPT_TEMPLATE.format(
|
||||
sector_code=sector["sector_code"],
|
||||
sector_name=sector["sector_name"],
|
||||
description=sector["description"],
|
||||
business_types=", ".join(sector["sample_business_types"])
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an expert customer experience analyst. Return only valid JSON, no markdown."
|
||||
},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=4000,
|
||||
response_format={"type": "json_object"}
|
||||
)
|
||||
|
||||
text = response.choices[0].message.content.strip()
|
||||
|
||||
# Parse JSON
|
||||
brief = json.loads(text)
|
||||
|
||||
# Ensure required fields
|
||||
brief["sector_code"] = sector["sector_code"]
|
||||
brief["sector_name"] = sector["sector_name"]
|
||||
brief["generated_at"] = datetime.utcnow().isoformat() + "Z"
|
||||
brief["version"] = "1.0"
|
||||
|
||||
return brief
|
||||
|
||||
|
||||
def validate_brief(brief: dict) -> list[str]:
|
||||
"""Validate a sector brief, return list of issues."""
|
||||
issues = []
|
||||
|
||||
required_keys = [
|
||||
"what_customers_judge",
|
||||
"critical_pain_points",
|
||||
"common_praise",
|
||||
"industry_terminology",
|
||||
"mode_specific_concerns",
|
||||
"what_is_actionable",
|
||||
"sector_specific_signals"
|
||||
]
|
||||
|
||||
for key in required_keys:
|
||||
if key not in brief:
|
||||
issues.append(f"Missing required key: {key}")
|
||||
|
||||
# Check array lengths
|
||||
if "what_customers_judge" in brief:
|
||||
items = brief["what_customers_judge"].get("items", [])
|
||||
if len(items) < 3:
|
||||
issues.append(f"what_customers_judge has only {len(items)} items (need 3+)")
|
||||
if len(items) > 10:
|
||||
issues.append(f"what_customers_judge has {len(items)} items (max 10)")
|
||||
|
||||
if "critical_pain_points" in brief:
|
||||
items = brief["critical_pain_points"].get("items", [])
|
||||
if len(items) < 3:
|
||||
issues.append(f"critical_pain_points has only {len(items)} items (need 3+)")
|
||||
|
||||
if "common_praise" in brief:
|
||||
items = brief["common_praise"].get("items", [])
|
||||
if len(items) < 3:
|
||||
issues.append(f"common_praise has only {len(items)} items (need 3+)")
|
||||
|
||||
# Check for forbidden content
|
||||
text = json.dumps(brief).lower()
|
||||
forbidden = ["priority", "weight", "primitive", "enabled", "disabled", "solution"]
|
||||
for word in forbidden:
|
||||
if word in text and word != "solution": # solution can appear in context
|
||||
issues.append(f"Contains potentially forbidden term: {word}")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def save_brief(brief: dict, output_dir: Path) -> Path:
|
||||
"""Save brief to JSON file."""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
filename = f"{brief['sector_code'].lower()}_brief.json"
|
||||
output_path = output_dir / filename
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(brief, f, indent=2)
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def validate_existing_briefs(output_dir: Path) -> None:
|
||||
"""Validate all existing brief files."""
|
||||
if not output_dir.exists():
|
||||
print(f"Output directory does not exist: {output_dir}")
|
||||
return
|
||||
|
||||
files = list(output_dir.glob("*_brief.json"))
|
||||
if not files:
|
||||
print("No brief files found")
|
||||
return
|
||||
|
||||
print(f"Validating {len(files)} brief files...\n")
|
||||
|
||||
all_valid = True
|
||||
for filepath in sorted(files):
|
||||
with open(filepath) as f:
|
||||
brief = json.load(f)
|
||||
|
||||
issues = validate_brief(brief)
|
||||
status = "✓" if not issues else "✗"
|
||||
print(f"{status} {filepath.name}")
|
||||
|
||||
if issues:
|
||||
all_valid = False
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
|
||||
print()
|
||||
if all_valid:
|
||||
print("All briefs valid!")
|
||||
else:
|
||||
print("Some briefs have issues.")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate sector briefs for Wave 0")
|
||||
parser.add_argument("--sector", help="Generate only this sector code")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be generated")
|
||||
parser.add_argument("--validate", action="store_true", help="Validate existing briefs")
|
||||
parser.add_argument("--output-dir", default="data/sector_briefs", help="Output directory")
|
||||
parser.add_argument("--model", default="gpt-4o", help="OpenAI model to use")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Paths
|
||||
script_dir = Path(__file__).parent
|
||||
package_dir = script_dir.parent
|
||||
data_path = package_dir / "data" / "sectors.json"
|
||||
output_dir = package_dir / args.output_dir
|
||||
|
||||
# Validate mode
|
||||
if args.validate:
|
||||
validate_existing_briefs(output_dir)
|
||||
return
|
||||
|
||||
# Load sectors
|
||||
sectors = load_sectors(data_path)
|
||||
print(f"Loaded {len(sectors)} sectors")
|
||||
|
||||
# Filter to single sector if specified
|
||||
if args.sector:
|
||||
sectors = [s for s in sectors if s["sector_code"] == args.sector]
|
||||
if not sectors:
|
||||
print(f"ERROR: Sector '{args.sector}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[DRY RUN] Would generate briefs for:")
|
||||
for sector in sectors:
|
||||
print(f" - {sector['sector_code']}: {sector['sector_name']}")
|
||||
print(f"\nOutput directory: {output_dir}")
|
||||
return
|
||||
|
||||
# Check API key
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
print("ERROR: OPENAI_API_KEY environment variable required")
|
||||
sys.exit(1)
|
||||
|
||||
# Initialize client
|
||||
client = OpenAI(api_key=api_key)
|
||||
print(f"Using model: {args.model}")
|
||||
|
||||
# Generate briefs
|
||||
results = {"success": [], "failed": []}
|
||||
|
||||
for i, sector in enumerate(sectors, 1):
|
||||
print(f"\n[{i}/{len(sectors)}] Generating brief for: {sector['sector_name']}")
|
||||
|
||||
try:
|
||||
brief = generate_sector_brief(client, sector, args.model)
|
||||
|
||||
# Validate
|
||||
issues = validate_brief(brief)
|
||||
if issues:
|
||||
print(f" Warnings:")
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
|
||||
# Save
|
||||
output_path = save_brief(brief, output_dir)
|
||||
print(f" ✓ Saved to: {output_path}")
|
||||
results["success"].append(sector["sector_code"])
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ FAILED: {e}")
|
||||
results["failed"].append(sector["sector_code"])
|
||||
|
||||
# Summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
print(f"Success: {len(results['success'])}")
|
||||
print(f"Failed: {len(results['failed'])}")
|
||||
|
||||
if results["failed"]:
|
||||
print(f"\nFailed sectors: {', '.join(results['failed'])}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
523
packages/reviewiq-pipeline/scripts/llm_classifier.py
Normal file
523
packages/reviewiq-pipeline/scripts/llm_classifier.py
Normal file
@@ -0,0 +1,523 @@
|
||||
"""
|
||||
LLM Classifier - Real classification using OpenAI Responses API.
|
||||
|
||||
Uses JSON Schema to enforce strict output format.
|
||||
Validates primitives against enabled set.
|
||||
Stores raw response for audit.
|
||||
Supports multilingual reviews with language detection.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
# Language detection - try langdetect, fall back to heuristics
|
||||
try:
|
||||
from langdetect import detect as langdetect_detect, LangDetectException
|
||||
LANGDETECT_AVAILABLE = True
|
||||
except ImportError:
|
||||
LANGDETECT_AVAILABLE = False
|
||||
LangDetectException = Exception # Placeholder
|
||||
|
||||
|
||||
def detect_language(text: str) -> tuple[str, float]:
|
||||
"""
|
||||
Detect the language of a text.
|
||||
|
||||
Returns (language_code, confidence).
|
||||
Supported languages: en, es, de, fr, it, pt, ru, zh, ja, ko, ar, etc.
|
||||
|
||||
Falls back to heuristic detection if langdetect unavailable.
|
||||
"""
|
||||
if not text or len(text.strip()) < 3:
|
||||
return "unknown", 0.0
|
||||
|
||||
text = text.strip()
|
||||
|
||||
# Try langdetect first (most accurate)
|
||||
if LANGDETECT_AVAILABLE:
|
||||
try:
|
||||
lang = langdetect_detect(text)
|
||||
# langdetect doesn't provide confidence directly, estimate based on text length
|
||||
confidence = min(0.95, 0.5 + len(text) / 200)
|
||||
return lang, confidence
|
||||
except LangDetectException:
|
||||
pass
|
||||
|
||||
# Fallback: Simple heuristic detection based on character ranges
|
||||
# This is less accurate but works without dependencies
|
||||
|
||||
# Count characters in different scripts
|
||||
latin = sum(1 for c in text if '\u0041' <= c <= '\u024F') # Latin extended
|
||||
cyrillic = sum(1 for c in text if '\u0400' <= c <= '\u04FF') # Cyrillic
|
||||
cjk = sum(1 for c in text if '\u4E00' <= c <= '\u9FFF') # CJK Unified
|
||||
japanese = sum(1 for c in text if '\u3040' <= c <= '\u30FF') # Hiragana + Katakana
|
||||
korean = sum(1 for c in text if '\uAC00' <= c <= '\uD7AF') # Hangul
|
||||
arabic = sum(1 for c in text if '\u0600' <= c <= '\u06FF') # Arabic
|
||||
|
||||
total = len(text)
|
||||
if total == 0:
|
||||
return "unknown", 0.0
|
||||
|
||||
# Determine primary script
|
||||
if cjk / total > 0.3:
|
||||
return "zh", 0.6 # Chinese
|
||||
if japanese / total > 0.2:
|
||||
return "ja", 0.6 # Japanese
|
||||
if korean / total > 0.3:
|
||||
return "ko", 0.6 # Korean
|
||||
if cyrillic / total > 0.3:
|
||||
return "ru", 0.5 # Russian (could be other Cyrillic)
|
||||
if arabic / total > 0.3:
|
||||
return "ar", 0.5 # Arabic
|
||||
|
||||
if latin / total > 0.5:
|
||||
# Latin script - try to distinguish languages by common words
|
||||
text_lower = text.lower()
|
||||
|
||||
# Spanish indicators (expanded for better detection)
|
||||
es_words = ['el', 'la', 'los', 'las', 'de', 'que', 'es', 'en', 'un', 'una',
|
||||
'muy', 'pero', 'con', 'está', 'están', 'para', 'por', 'como',
|
||||
'excelente', 'recomendado', 'servicio', 'bueno', 'malo', 'bien',
|
||||
'todo', 'nada', 'más', 'sin', 'nunca', 'siempre', 'también']
|
||||
es_score = sum(1 for w in es_words if re.search(rf'\b{w}\b', text_lower))
|
||||
|
||||
# Spanish-specific patterns (accents, ñ, inverted punctuation)
|
||||
if 'ñ' in text_lower or '¿' in text or '¡' in text:
|
||||
es_score += 3
|
||||
if any(c in text_lower for c in 'áéíóúü'):
|
||||
es_score += 1
|
||||
|
||||
# English indicators
|
||||
en_words = ['the', 'and', 'is', 'are', 'was', 'were', 'this', 'that',
|
||||
'with', 'for', 'but', 'not', 'very', 'great', 'good',
|
||||
'service', 'place', 'food', 'staff', 'friendly', 'amazing',
|
||||
'would', 'recommend', 'will', 'definitely', 'really']
|
||||
en_score = sum(1 for w in en_words if re.search(rf'\b{w}\b', text_lower))
|
||||
|
||||
# German indicators
|
||||
de_words = ['der', 'die', 'das', 'und', 'ist', 'sind', 'war', 'sehr',
|
||||
'mit', 'für', 'aber', 'nicht', 'ein', 'eine', 'wir', 'ich',
|
||||
'auch', 'gut', 'schlecht', 'toll', 'super']
|
||||
de_score = sum(1 for w in de_words if re.search(rf'\b{w}\b', text_lower))
|
||||
# German umlauts
|
||||
if any(c in text_lower for c in 'äöüß'):
|
||||
de_score += 2
|
||||
|
||||
# French indicators
|
||||
fr_words = ['le', 'la', 'les', 'est', 'sont', 'très', 'mais', 'avec',
|
||||
'pour', 'pas', 'un', 'une', 'et', 'nous', 'vous', 'bien',
|
||||
'bon', 'mauvais', 'excellent', 'super', "c'est", "j'ai"]
|
||||
fr_score = sum(1 for w in fr_words if re.search(rf'\b{w}\b', text_lower))
|
||||
# French accents and patterns
|
||||
if any(c in text_lower for c in 'àâçèêëîïôùûÿœæ'):
|
||||
fr_score += 2
|
||||
|
||||
scores = {'es': es_score, 'en': en_score, 'de': de_score, 'fr': fr_score}
|
||||
best_lang = max(scores, key=scores.get)
|
||||
best_score = scores[best_lang]
|
||||
|
||||
if best_score >= 1: # Lowered threshold
|
||||
confidence = min(0.75, 0.3 + best_score * 0.08)
|
||||
return best_lang, confidence
|
||||
|
||||
# Default to English for Latin script
|
||||
return "en", 0.3
|
||||
|
||||
return "unknown", 0.1
|
||||
|
||||
# Lazy client initialization
|
||||
_client = None
|
||||
|
||||
|
||||
def get_client() -> OpenAI:
|
||||
"""Get OpenAI client, initializing lazily on first use."""
|
||||
global _client
|
||||
if _client is None:
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError(
|
||||
"OPENAI_API_KEY environment variable not set. "
|
||||
"Set it or use --dry-run / mock classifier."
|
||||
)
|
||||
_client = OpenAI(api_key=api_key)
|
||||
return _client
|
||||
|
||||
# Default model
|
||||
DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
|
||||
|
||||
# Meta primitives - always available
|
||||
META_PRIMITIVES = frozenset([
|
||||
"HONESTY", "ETHICS", "PROMISES",
|
||||
"ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY",
|
||||
"RETURN_INTENT", "RECOMMEND", "RECOGNITION",
|
||||
"UNMAPPED",
|
||||
])
|
||||
|
||||
# JSON Schema for structured output
|
||||
SPAN_SCHEMA = {
|
||||
"name": "review_classification",
|
||||
"strict": True,
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": False,
|
||||
"properties": {
|
||||
"spans": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": False,
|
||||
"properties": {
|
||||
"primitive": {"type": "string"},
|
||||
"valence": {"type": "string", "enum": ["positive", "negative", "mixed", "neutral"]},
|
||||
"intensity": {"type": "integer", "minimum": 1, "maximum": 5},
|
||||
"evidence": {"type": "string"},
|
||||
"start_char": {"type": ["integer", "null"]},
|
||||
"end_char": {"type": ["integer", "null"]},
|
||||
"confidence": {"type": "number", "minimum": 0.0, "maximum": 1.0},
|
||||
"details": {"type": "null"}
|
||||
},
|
||||
"required": ["primitive", "valence", "intensity", "evidence", "confidence", "start_char", "end_char", "details"]
|
||||
}
|
||||
},
|
||||
"unmapped": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": False,
|
||||
"properties": {
|
||||
"label": {"type": "string"},
|
||||
"evidence": {"type": "string"},
|
||||
"confidence": {"type": "number", "minimum": 0.0, "maximum": 1.0}
|
||||
},
|
||||
"required": ["label", "evidence", "confidence"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["spans", "unmapped"]
|
||||
}
|
||||
}
|
||||
|
||||
# System prompt
|
||||
SYSTEM_PROMPT = """You are a review classification system that extracts semantic spans and maps them to primitives.
|
||||
|
||||
## RULES (MUST FOLLOW)
|
||||
|
||||
1. Use ONLY primitives from the enabled_primitives list provided. Do NOT invent new primitives.
|
||||
|
||||
2. Meta primitives are always available: HONESTY, ETHICS, PROMISES, ACKNOWLEDGMENT, RESPONSE_QUALITY, RECOVERY, RETURN_INTENT, RECOMMEND, RECOGNITION, UNMAPPED
|
||||
|
||||
3. If content doesn't fit any enabled primitive, use UNMAPPED or put it in the unmapped array with a descriptive label.
|
||||
|
||||
4. Output MUST match the JSON schema exactly. No extra keys.
|
||||
|
||||
5. Evidence must be a SHORT EXACT QUOTE from the review text (in original language).
|
||||
|
||||
6. Extract 1-5 spans per review. Prefer fewer, larger spans over many small ones.
|
||||
|
||||
7. If unsure about classification, lower the confidence score.
|
||||
|
||||
## VALENCE
|
||||
- positive: praise, satisfaction, recommendation
|
||||
- negative: complaint, dissatisfaction, warning
|
||||
- mixed: both positive and negative in same span
|
||||
- neutral: factual observation, no sentiment
|
||||
|
||||
## INTENSITY (1-5)
|
||||
- 1: mild ("okay", "fine")
|
||||
- 2: moderate ("good", "bad")
|
||||
- 3: strong ("great", "terrible")
|
||||
- 4: very strong ("amazing", "awful")
|
||||
- 5: extreme ("best ever", "worst nightmare")
|
||||
|
||||
## CONFIDENCE
|
||||
- 0.9+: Very certain the primitive fits
|
||||
- 0.7-0.9: Confident
|
||||
- 0.5-0.7: Moderate confidence
|
||||
- <0.5: Low confidence (consider UNMAPPED)
|
||||
|
||||
Output valid JSON only. No markdown, no explanations."""
|
||||
|
||||
|
||||
def compute_review_hash(text: str, config_version: str) -> str:
|
||||
"""Compute hash for caching."""
|
||||
key = f"{config_version}:{text}"
|
||||
return hashlib.sha256(key.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def build_user_payload(
|
||||
review_text: str,
|
||||
rating: int | None,
|
||||
config: dict[str, Any],
|
||||
language: str = "auto",
|
||||
) -> dict[str, Any]:
|
||||
"""Build the user message payload for the LLM."""
|
||||
# Extract only what the model needs
|
||||
enabled = set(config.get("enabled_primitives", []))
|
||||
enabled.update(META_PRIMITIVES)
|
||||
|
||||
# Build primitive definitions (minimal)
|
||||
primitives_dict = config.get("primitives", {})
|
||||
primitive_defs = {}
|
||||
for prim in enabled:
|
||||
if prim in primitives_dict:
|
||||
info = primitives_dict[prim]
|
||||
primitive_defs[prim] = info.get("def", info.get("name", prim))
|
||||
elif prim in META_PRIMITIVES:
|
||||
primitive_defs[prim] = f"Meta primitive: {prim.replace('_', ' ').lower()}"
|
||||
|
||||
# Extract brief signals (keep it short)
|
||||
brief = config.get("brief", {})
|
||||
brief_summary = {}
|
||||
if brief.get("what_customers_judge"):
|
||||
items = brief["what_customers_judge"]
|
||||
if isinstance(items, dict):
|
||||
items = items.get("items", [])
|
||||
brief_summary["key_judgment_areas"] = [
|
||||
item.get("aspect", item.get("area", str(item))) if isinstance(item, dict) else str(item)
|
||||
for item in items[:5]
|
||||
]
|
||||
if brief.get("critical_pain_points"):
|
||||
pains = brief["critical_pain_points"]
|
||||
if isinstance(pains, dict):
|
||||
pains = pains.get("items", [])
|
||||
brief_summary["critical_pains"] = [
|
||||
item.get("pain", str(item)) if isinstance(item, dict) else str(item)
|
||||
for item in pains[:3]
|
||||
]
|
||||
|
||||
return {
|
||||
"business": {
|
||||
"name": config.get("business_id"),
|
||||
"sector": config.get("sector_code"),
|
||||
"config_version": config.get("config_version"),
|
||||
},
|
||||
"enabled_primitives": sorted(enabled),
|
||||
"primitive_definitions": primitive_defs,
|
||||
"weights": config.get("weights", {}),
|
||||
"sector_brief": brief_summary,
|
||||
"review": {
|
||||
"text": review_text,
|
||||
"rating": rating,
|
||||
"language": language,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def validate_response(
|
||||
response: dict[str, Any],
|
||||
enabled_primitives: set[str],
|
||||
) -> tuple[dict[str, Any], list[str]]:
|
||||
"""
|
||||
Validate LLM response and fix invalid primitives.
|
||||
|
||||
Returns (validated_response, warnings).
|
||||
"""
|
||||
warnings = []
|
||||
all_valid = enabled_primitives | META_PRIMITIVES
|
||||
|
||||
validated_spans = []
|
||||
for span in response.get("spans", []):
|
||||
prim = span.get("primitive")
|
||||
if prim not in all_valid:
|
||||
warnings.append(f"Invalid primitive '{prim}' → UNMAPPED (original: {prim})")
|
||||
span["primitive"] = "UNMAPPED"
|
||||
validated_spans.append(span)
|
||||
|
||||
return {
|
||||
"spans": validated_spans,
|
||||
"unmapped": response.get("unmapped", []),
|
||||
}, warnings
|
||||
|
||||
|
||||
def classify_review(
|
||||
review_text: str,
|
||||
rating: int | None,
|
||||
config: dict[str, Any],
|
||||
language: str = "auto",
|
||||
model: str | None = None,
|
||||
max_retries: int = 3,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Classify a single review using OpenAI.
|
||||
|
||||
Args:
|
||||
review_text: The review text to classify
|
||||
rating: Star rating (1-5) if available
|
||||
config: Resolved config from ConfigResolver
|
||||
language: Language hint (default: auto-detect)
|
||||
model: Model to use (default: gpt-4o-mini)
|
||||
max_retries: Max retries on transient errors
|
||||
|
||||
Returns:
|
||||
{
|
||||
"spans": [...],
|
||||
"unmapped": [...],
|
||||
"model": str,
|
||||
"raw_response": str,
|
||||
"review_hash": str,
|
||||
"warnings": [...],
|
||||
"detected_language": str,
|
||||
"language_confidence": float,
|
||||
}
|
||||
"""
|
||||
model = model or DEFAULT_MODEL
|
||||
|
||||
# Detect language if auto
|
||||
detected_lang = "unknown"
|
||||
lang_confidence = 0.0
|
||||
if language == "auto":
|
||||
detected_lang, lang_confidence = detect_language(review_text)
|
||||
language = detected_lang
|
||||
else:
|
||||
detected_lang = language
|
||||
lang_confidence = 1.0 # User-specified
|
||||
|
||||
# Build payload with detected language
|
||||
payload = build_user_payload(review_text, rating, config, detected_lang)
|
||||
user_content = json.dumps(payload, ensure_ascii=False, indent=None)
|
||||
|
||||
# Compute hash for caching
|
||||
review_hash = compute_review_hash(review_text, config.get("config_version", "1.0"))
|
||||
|
||||
# Call OpenAI with retries
|
||||
last_error = None
|
||||
client = get_client()
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_content},
|
||||
],
|
||||
response_format={
|
||||
"type": "json_schema",
|
||||
"json_schema": SPAN_SCHEMA,
|
||||
},
|
||||
temperature=0.1, # Low temperature for consistency
|
||||
max_tokens=2000,
|
||||
)
|
||||
|
||||
# Parse response
|
||||
raw_text = response.choices[0].message.content
|
||||
parsed = json.loads(raw_text)
|
||||
|
||||
# Validate primitives
|
||||
enabled = set(config.get("enabled_primitives", []))
|
||||
validated, warnings = validate_response(parsed, enabled)
|
||||
|
||||
return {
|
||||
"spans": validated["spans"],
|
||||
"unmapped": validated["unmapped"],
|
||||
"model": model,
|
||||
"raw_response": raw_text,
|
||||
"review_hash": review_hash,
|
||||
"warnings": warnings,
|
||||
"tokens": {
|
||||
"prompt": response.usage.prompt_tokens if response.usage else 0,
|
||||
"completion": response.usage.completion_tokens if response.usage else 0,
|
||||
},
|
||||
"detected_language": detected_lang,
|
||||
"language_confidence": lang_confidence,
|
||||
}
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
last_error = f"JSON parse error: {e}"
|
||||
# Don't retry parse errors - log and return fallback
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
if "rate_limit" in str(e).lower() or "429" in str(e):
|
||||
# Exponential backoff for rate limits
|
||||
wait = 2 ** attempt
|
||||
time.sleep(wait)
|
||||
continue
|
||||
elif "500" in str(e) or "502" in str(e) or "503" in str(e):
|
||||
# Retry on server errors
|
||||
time.sleep(1)
|
||||
continue
|
||||
else:
|
||||
# Don't retry other errors
|
||||
break
|
||||
|
||||
# Fallback response on error
|
||||
return {
|
||||
"spans": [{
|
||||
"primitive": "UNMAPPED",
|
||||
"valence": "neutral",
|
||||
"intensity": 1,
|
||||
"evidence": review_text[:100] if review_text else "",
|
||||
"start_char": 0,
|
||||
"end_char": min(100, len(review_text)) if review_text else 0,
|
||||
"confidence": 0.1,
|
||||
"details": {"error": last_error},
|
||||
}],
|
||||
"unmapped": [],
|
||||
"model": model,
|
||||
"raw_response": json.dumps({"error": last_error}),
|
||||
"review_hash": review_hash,
|
||||
"warnings": [f"Classification failed: {last_error}"],
|
||||
"tokens": {"prompt": 0, "completion": 0},
|
||||
"detected_language": detected_lang,
|
||||
"language_confidence": lang_confidence,
|
||||
}
|
||||
|
||||
|
||||
async def classify_review_async(
|
||||
review_text: str,
|
||||
rating: int | None,
|
||||
config: dict[str, Any],
|
||||
language: str = "auto",
|
||||
model: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Async wrapper for classify_review."""
|
||||
import asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
lambda: classify_review(review_text, rating, config, language, model),
|
||||
)
|
||||
|
||||
|
||||
# Batch classification (for later optimization)
|
||||
async def classify_batch(
|
||||
reviews: list[dict[str, Any]],
|
||||
config: dict[str, Any],
|
||||
model: str | None = None,
|
||||
max_concurrent: int = 5,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Classify multiple reviews concurrently.
|
||||
|
||||
Args:
|
||||
reviews: List of {"text": str, "rating": int, "language": str}
|
||||
config: Resolved config
|
||||
model: Model to use
|
||||
max_concurrent: Max concurrent requests
|
||||
|
||||
Returns:
|
||||
List of classification results
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
semaphore = asyncio.Semaphore(max_concurrent)
|
||||
|
||||
async def classify_one(review: dict) -> dict:
|
||||
async with semaphore:
|
||||
return await classify_review_async(
|
||||
review.get("text", ""),
|
||||
review.get("rating"),
|
||||
config,
|
||||
review.get("language", "auto"),
|
||||
model,
|
||||
)
|
||||
|
||||
tasks = [classify_one(r) for r in reviews]
|
||||
return await asyncio.gather(*tasks)
|
||||
1102
packages/reviewiq-pipeline/scripts/run_classification_v2.py
Normal file
1102
packages/reviewiq-pipeline/scripts/run_classification_v2.py
Normal file
File diff suppressed because it is too large
Load Diff
457
packages/reviewiq-pipeline/scripts/validate_l1_configs.py
Normal file
457
packages/reviewiq-pipeline/scripts/validate_l1_configs.py
Normal file
@@ -0,0 +1,457 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Wave 1 L1 Config Validation Script
|
||||
|
||||
Validates L1 primitive configs against real review data by analyzing:
|
||||
1. Coverage: % of spans mapped to enabled primitives
|
||||
2. Top primitives by frequency
|
||||
3. Disabled primitives appearing (potential misconfig)
|
||||
4. Weight effectiveness
|
||||
|
||||
Usage:
|
||||
python validate_l1_configs.py --sector ENTERTAINMENT --job-url "gokarts"
|
||||
python validate_l1_configs.py --sector AUTOMOTIVE --job-url "clickrent"
|
||||
python validate_l1_configs.py --all
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
# Paths
|
||||
DATA_DIR = Path(__file__).parent.parent / "data"
|
||||
CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
|
||||
BRIEFS_DIR = DATA_DIR / "sector_briefs"
|
||||
|
||||
# Primitive to URT domain mapping
|
||||
# Primitives map to URT domains: O=Offering, P=People, J=Journey, E=Environment, A=Access, V=Value, R=Relationship
|
||||
PRIMITIVE_TO_DOMAIN = {
|
||||
# Quality -> Offering (O)
|
||||
"TASTE": "O", "CRAFT": "O", "FRESHNESS": "O", "TEMPERATURE": "O",
|
||||
"EFFECTIVENESS": "O", "ACCURACY": "O", "CONDITION": "O", "CONSISTENCY": "O",
|
||||
# Service -> People (P)
|
||||
"MANNER": "P", "COMPETENCE": "P", "ATTENTIVENESS": "P", "COMMUNICATION": "P",
|
||||
# Process -> Journey (J)
|
||||
"SPEED": "J", "FRICTION": "J", "RELIABILITY": "J", "AVAILABILITY": "J",
|
||||
# Environment -> Environment (E)
|
||||
"CLEANLINESS": "E", "COMFORT": "E", "SAFETY": "E", "AMBIANCE": "E",
|
||||
"ACCESSIBILITY": "E", "DIGITAL_UX": "E",
|
||||
# Value -> Value (V)
|
||||
"PRICE_LEVEL": "V", "PRICE_FAIRNESS": "V", "PRICE_TRANSPARENCY": "V", "VALUE_FOR_MONEY": "V",
|
||||
}
|
||||
|
||||
# URT code to primitive mapping (simplified - maps URT codes to closest primitive)
|
||||
URT_TO_PRIMITIVE = {
|
||||
# Offering codes
|
||||
"O1.01": "CONSISTENCY", "O1.02": "CRAFT", "O1.03": "FRESHNESS",
|
||||
"O1.04": "EFFECTIVENESS", "O1.05": "TASTE", "O1.06": "CONDITION",
|
||||
"O2.01": "ACCURACY", "O2.02": "EFFECTIVENESS", "O2.03": "CRAFT",
|
||||
"O3.01": "ACCURACY", "O3.02": "CONSISTENCY", "O3.03": "EFFECTIVENESS",
|
||||
# People codes
|
||||
"P1.01": "MANNER", "P1.02": "MANNER", "P1.03": "ATTENTIVENESS",
|
||||
"P1.04": "COMMUNICATION", "P1.05": "ATTENTIVENESS",
|
||||
"P2.01": "COMPETENCE", "P2.02": "COMPETENCE", "P2.03": "COMPETENCE",
|
||||
"P3.01": "COMMUNICATION", "P3.02": "COMMUNICATION", "P3.03": "COMMUNICATION",
|
||||
# Journey codes
|
||||
"J1.01": "SPEED", "J1.02": "RELIABILITY", "J1.03": "FRICTION",
|
||||
"J1.04": "SPEED", "J1.05": "RELIABILITY",
|
||||
"J2.01": "RELIABILITY", "J2.02": "RELIABILITY", "J2.03": "FRICTION",
|
||||
"J3.01": "FRICTION", "J3.02": "FRICTION", "J3.03": "FRICTION",
|
||||
# Environment codes
|
||||
"E1.01": "CLEANLINESS", "E1.02": "COMFORT", "E1.03": "AMBIANCE",
|
||||
"E1.04": "AMBIANCE", "E1.05": "COMFORT",
|
||||
"E2.01": "AMBIANCE", "E2.02": "COMFORT", "E2.03": "COMFORT",
|
||||
"E2.04": "AMBIANCE", "E2.05": "DIGITAL_UX",
|
||||
"E3.01": "SAFETY", "E3.02": "SAFETY", "E3.03": "ACCESSIBILITY",
|
||||
"E4.01": "ACCESSIBILITY", "E4.02": "ACCESSIBILITY", "E4.03": "DIGITAL_UX",
|
||||
# Access codes
|
||||
"A1.01": "AVAILABILITY", "A1.02": "AVAILABILITY", "A1.03": "AVAILABILITY",
|
||||
"A1.04": "ACCESSIBILITY", "A1.05": "ACCESSIBILITY",
|
||||
"A2.01": "ACCESSIBILITY", "A2.02": "ACCESSIBILITY", "A2.03": "DIGITAL_UX",
|
||||
"A3.01": "ACCESSIBILITY", "A3.02": "ACCESSIBILITY", "A3.03": "SPEED",
|
||||
"A4.01": "ACCESSIBILITY", "A4.02": "ACCESSIBILITY", "A4.03": "AVAILABILITY",
|
||||
# Value codes
|
||||
"V1.01": "PRICE_LEVEL", "V1.02": "PRICE_FAIRNESS", "V1.03": "PRICE_TRANSPARENCY",
|
||||
"V2.01": "PRICE_FAIRNESS", "V2.02": "PRICE_TRANSPARENCY", "V2.03": "VALUE_FOR_MONEY",
|
||||
"V3.01": "VALUE_FOR_MONEY", "V3.02": "VALUE_FOR_MONEY", "V3.03": "PRICE_FAIRNESS",
|
||||
"V4.01": "VALUE_FOR_MONEY", "V4.02": "VALUE_FOR_MONEY", "V4.03": "VALUE_FOR_MONEY",
|
||||
# Relationship codes
|
||||
"R1.01": "RELIABILITY", "R1.02": "RELIABILITY", "R1.03": "RELIABILITY",
|
||||
"R2.01": "RELIABILITY", "R2.02": "CONSISTENCY", "R2.03": "RELIABILITY",
|
||||
"R3.01": "MANNER", "R3.02": "MANNER", "R3.03": "COMMUNICATION",
|
||||
"R4.01": "CONSISTENCY", "R4.02": "RELIABILITY", "R4.03": "CONSISTENCY",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Validation results for a sector."""
|
||||
sector_code: str
|
||||
job_count: int
|
||||
review_count: int
|
||||
span_count: int
|
||||
|
||||
# Coverage metrics
|
||||
enabled_coverage: float # % spans using enabled primitives
|
||||
disabled_hits: dict[str, int] # disabled primitives that appeared
|
||||
unmapped_count: int # spans that couldn't be mapped
|
||||
|
||||
# Distribution
|
||||
primitive_counts: dict[str, int] # all primitives by count
|
||||
domain_distribution: dict[str, int] # O, P, J, E, A, V, R
|
||||
valence_distribution: dict[str, int] # V+, V-, V0, V±
|
||||
|
||||
# Top codes
|
||||
top_urt_codes: list[tuple[str, int]]
|
||||
|
||||
# Recommendations
|
||||
recommendations: list[str]
|
||||
|
||||
|
||||
def load_l1_config(sector_code: str) -> dict[str, Any] | None:
|
||||
"""Load L1 config for a sector."""
|
||||
config_file = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
|
||||
if not config_file.exists():
|
||||
return None
|
||||
with open(config_file) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def load_sector_brief(sector_code: str) -> dict[str, Any] | None:
|
||||
"""Load sector brief for a sector."""
|
||||
brief_file = BRIEFS_DIR / f"{sector_code.lower()}_brief.json"
|
||||
if not brief_file.exists():
|
||||
return None
|
||||
with open(brief_file) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def map_urt_to_primitive(urt_code: str) -> str | None:
|
||||
"""Map URT code to primitive."""
|
||||
return URT_TO_PRIMITIVE.get(urt_code)
|
||||
|
||||
|
||||
async def fetch_spans_for_jobs(pool: asyncpg.Pool, job_url_pattern: str) -> list[dict]:
|
||||
"""Fetch spans for jobs matching URL pattern."""
|
||||
query = """
|
||||
SELECT
|
||||
rs.urt_primary,
|
||||
rs.valence,
|
||||
rs.intensity,
|
||||
rs.span_text,
|
||||
j.url
|
||||
FROM pipeline.review_spans rs
|
||||
JOIN pipeline.reviews_raw rr ON rs.review_id = rr.review_id
|
||||
JOIN public.jobs j ON rr.job_id = j.job_id
|
||||
WHERE LOWER(j.url) LIKE $1
|
||||
ORDER BY rs.created_at DESC
|
||||
"""
|
||||
rows = await pool.fetch(query, f"%{job_url_pattern.lower()}%")
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
async def fetch_all_spans(pool: asyncpg.Pool) -> list[dict]:
|
||||
"""Fetch all spans from database."""
|
||||
query = """
|
||||
SELECT
|
||||
urt_primary,
|
||||
valence,
|
||||
intensity,
|
||||
span_text
|
||||
FROM pipeline.review_spans
|
||||
ORDER BY created_at DESC
|
||||
"""
|
||||
rows = await pool.fetch(query)
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
def analyze_spans(
|
||||
spans: list[dict],
|
||||
config: dict[str, Any],
|
||||
) -> ValidationResult:
|
||||
"""Analyze spans against L1 config."""
|
||||
sector_code = config["sector_code"]
|
||||
enabled = set(config.get("enabled", []))
|
||||
disabled = set(config.get("disabled", []))
|
||||
weights = config.get("weights", {})
|
||||
|
||||
# Counters
|
||||
primitive_counts: Counter = Counter()
|
||||
domain_counts: Counter = Counter()
|
||||
valence_counts: Counter = Counter()
|
||||
urt_counts: Counter = Counter()
|
||||
disabled_hits: Counter = Counter()
|
||||
unmapped = 0
|
||||
enabled_hits = 0
|
||||
|
||||
for span in spans:
|
||||
urt_code = span["urt_primary"]
|
||||
valence = span.get("valence", "V0")
|
||||
|
||||
# Count URT codes
|
||||
urt_counts[urt_code] += 1
|
||||
|
||||
# Count valence
|
||||
valence_counts[valence] += 1
|
||||
|
||||
# Map to primitive
|
||||
primitive = map_urt_to_primitive(urt_code)
|
||||
if primitive:
|
||||
primitive_counts[primitive] += 1
|
||||
|
||||
# Count domain
|
||||
domain = PRIMITIVE_TO_DOMAIN.get(primitive, urt_code[0])
|
||||
domain_counts[domain] += 1
|
||||
|
||||
# Check if enabled or disabled
|
||||
if primitive in enabled:
|
||||
enabled_hits += 1
|
||||
elif primitive in disabled:
|
||||
disabled_hits[primitive] += 1
|
||||
else:
|
||||
unmapped += 1
|
||||
# Still count domain from URT code
|
||||
domain_counts[urt_code[0]] += 1
|
||||
|
||||
# Calculate coverage
|
||||
total = len(spans)
|
||||
enabled_coverage = enabled_hits / total if total > 0 else 0
|
||||
|
||||
# Generate recommendations
|
||||
recommendations = []
|
||||
|
||||
# Check disabled primitives that appeared frequently
|
||||
for prim, count in disabled_hits.most_common(5):
|
||||
if count >= 10:
|
||||
pct = count / total * 100
|
||||
recommendations.append(
|
||||
f"ENABLE {prim}: Disabled but appeared {count} times ({pct:.1f}%)"
|
||||
)
|
||||
|
||||
# Check for missing high-weight primitives
|
||||
weighted_set = set(weights.keys())
|
||||
for prim in weighted_set:
|
||||
if primitive_counts[prim] == 0 and prim in enabled:
|
||||
recommendations.append(
|
||||
f"CHECK {prim}: Weighted ({weights[prim]}x) but no appearances"
|
||||
)
|
||||
|
||||
# Check for frequently appearing unweighted primitives
|
||||
for prim, count in primitive_counts.most_common(10):
|
||||
if prim in enabled and prim not in weights and count >= total * 0.1:
|
||||
pct = count / total * 100
|
||||
recommendations.append(
|
||||
f"WEIGHT {prim}: High frequency ({count}, {pct:.1f}%) but not weighted"
|
||||
)
|
||||
|
||||
return ValidationResult(
|
||||
sector_code=sector_code,
|
||||
job_count=1, # Will be updated by caller
|
||||
review_count=0, # Not tracked at span level
|
||||
span_count=total,
|
||||
enabled_coverage=enabled_coverage,
|
||||
disabled_hits=dict(disabled_hits),
|
||||
unmapped_count=unmapped,
|
||||
primitive_counts=dict(primitive_counts),
|
||||
domain_distribution=dict(domain_counts),
|
||||
valence_distribution=dict(valence_counts),
|
||||
top_urt_codes=urt_counts.most_common(15),
|
||||
recommendations=recommendations,
|
||||
)
|
||||
|
||||
|
||||
def print_validation_report(result: ValidationResult, config: dict, brief: dict | None):
|
||||
"""Print formatted validation report."""
|
||||
print("\n" + "=" * 70)
|
||||
print(f"VALIDATION REPORT: {result.sector_code}")
|
||||
print("=" * 70)
|
||||
|
||||
# Overview
|
||||
print(f"\n📊 OVERVIEW")
|
||||
print(f" Spans analyzed: {result.span_count:,}")
|
||||
print(f" Enabled coverage: {result.enabled_coverage:.1%}")
|
||||
print(f" Unmapped spans: {result.unmapped_count} ({result.unmapped_count/result.span_count*100:.1f}%)" if result.span_count > 0 else " No spans")
|
||||
|
||||
# Config summary
|
||||
print(f"\n⚙️ CONFIG SUMMARY")
|
||||
print(f" Enabled: {len(config.get('enabled', []))} primitives")
|
||||
print(f" Disabled: {len(config.get('disabled', []))} primitives")
|
||||
print(f" Weighted: {len(config.get('weights', {}))} primitives")
|
||||
|
||||
# Domain distribution
|
||||
print(f"\n📁 DOMAIN DISTRIBUTION")
|
||||
domain_names = {"O": "Offering", "P": "People", "J": "Journey",
|
||||
"E": "Environment", "A": "Access", "V": "Value", "R": "Relationship"}
|
||||
for domain in "OPJEVRA":
|
||||
count = result.domain_distribution.get(domain, 0)
|
||||
pct = count / result.span_count * 100 if result.span_count > 0 else 0
|
||||
bar = "█" * int(pct / 2)
|
||||
print(f" {domain} {domain_names.get(domain, '?'):12} {count:4} ({pct:5.1f}%) {bar}")
|
||||
|
||||
# Valence distribution
|
||||
print(f"\n😊 VALENCE DISTRIBUTION")
|
||||
for val in ["V+", "V-", "V0", "V±"]:
|
||||
count = result.valence_distribution.get(val, 0)
|
||||
pct = count / result.span_count * 100 if result.span_count > 0 else 0
|
||||
print(f" {val}: {count:4} ({pct:5.1f}%)")
|
||||
|
||||
# Top primitives
|
||||
print(f"\n🔝 TOP PRIMITIVES")
|
||||
enabled_set = set(config.get("enabled", []))
|
||||
weights = config.get("weights", {})
|
||||
for prim, count in sorted(result.primitive_counts.items(), key=lambda x: -x[1])[:12]:
|
||||
pct = count / result.span_count * 100 if result.span_count > 0 else 0
|
||||
status = "✓" if prim in enabled_set else "✗"
|
||||
weight = f"({weights[prim]}x)" if prim in weights else ""
|
||||
print(f" {status} {prim:20} {count:4} ({pct:5.1f}%) {weight}")
|
||||
|
||||
# Top URT codes
|
||||
print(f"\n📋 TOP URT CODES")
|
||||
for code, count in result.top_urt_codes[:10]:
|
||||
pct = count / result.span_count * 100 if result.span_count > 0 else 0
|
||||
mapped = URT_TO_PRIMITIVE.get(code, "UNMAPPED")
|
||||
print(f" {code}: {count:4} ({pct:5.1f}%) → {mapped}")
|
||||
|
||||
# Disabled but appearing
|
||||
if result.disabled_hits:
|
||||
print(f"\n⚠️ DISABLED BUT APPEARING")
|
||||
for prim, count in sorted(result.disabled_hits.items(), key=lambda x: -x[1]):
|
||||
pct = count / result.span_count * 100 if result.span_count > 0 else 0
|
||||
print(f" {prim}: {count} ({pct:.1f}%)")
|
||||
|
||||
# Recommendations
|
||||
if result.recommendations:
|
||||
print(f"\n💡 RECOMMENDATIONS")
|
||||
for rec in result.recommendations:
|
||||
print(f" • {rec}")
|
||||
|
||||
# Brief signals check (if available)
|
||||
if brief:
|
||||
print(f"\n📝 BRIEF SIGNALS CHECK")
|
||||
what_customers_judge = brief.get("what_customers_judge", {})
|
||||
if isinstance(what_customers_judge, dict):
|
||||
items = what_customers_judge.get("items", [])
|
||||
else:
|
||||
items = what_customers_judge if isinstance(what_customers_judge, list) else []
|
||||
|
||||
print(f" Key judgment areas from brief:")
|
||||
for item in items[:5]:
|
||||
if isinstance(item, dict):
|
||||
print(f" • {item.get('area', item)}")
|
||||
else:
|
||||
print(f" • {item}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
|
||||
async def run_validation(
|
||||
sector_code: str,
|
||||
job_url_pattern: str | None = None,
|
||||
db_url: str | None = None,
|
||||
):
|
||||
"""Run validation for a sector."""
|
||||
# Load config
|
||||
config = load_l1_config(sector_code)
|
||||
if not config:
|
||||
print(f"❌ No L1 config found for {sector_code}")
|
||||
return None
|
||||
|
||||
# Load brief
|
||||
brief = load_sector_brief(sector_code)
|
||||
|
||||
# Connect to database
|
||||
db_url = db_url or os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://scraper:scraper123@localhost:5437/scraper"
|
||||
)
|
||||
|
||||
pool = await asyncpg.create_pool(db_url)
|
||||
|
||||
try:
|
||||
# Fetch spans
|
||||
if job_url_pattern:
|
||||
spans = await fetch_spans_for_jobs(pool, job_url_pattern)
|
||||
if not spans:
|
||||
print(f"⚠️ No spans found for jobs matching '{job_url_pattern}'")
|
||||
return None
|
||||
else:
|
||||
spans = await fetch_all_spans(pool)
|
||||
|
||||
# Analyze
|
||||
result = analyze_spans(spans, config)
|
||||
|
||||
# Print report
|
||||
print_validation_report(result, config, brief)
|
||||
|
||||
return result
|
||||
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
async def run_all_validations(db_url: str | None = None):
|
||||
"""Run validation for all sectors with available data."""
|
||||
# Known jobs and their sectors
|
||||
jobs_by_sector = {
|
||||
"ENTERTAINMENT": ["gokarts", "soho"],
|
||||
"AUTOMOTIVE": ["clickrent"],
|
||||
"PERSONAL_SERVICES": ["fleitas"],
|
||||
"FOOD_DINING": ["fika"],
|
||||
}
|
||||
|
||||
results = {}
|
||||
|
||||
for sector, job_patterns in jobs_by_sector.items():
|
||||
print(f"\n{'='*70}")
|
||||
print(f"Validating {sector}...")
|
||||
print(f"{'='*70}")
|
||||
|
||||
for pattern in job_patterns:
|
||||
result = await run_validation(sector, pattern, db_url)
|
||||
if result:
|
||||
results[f"{sector}:{pattern}"] = result
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 70)
|
||||
print("VALIDATION SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
for key, result in results.items():
|
||||
sector, pattern = key.split(":")
|
||||
print(f"\n{sector} ({pattern}):")
|
||||
print(f" Coverage: {result.enabled_coverage:.1%}")
|
||||
print(f" Spans: {result.span_count}")
|
||||
if result.disabled_hits:
|
||||
print(f" ⚠️ Disabled hits: {sum(result.disabled_hits.values())}")
|
||||
if result.recommendations:
|
||||
print(f" Recommendations: {len(result.recommendations)}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Validate L1 primitive configs")
|
||||
parser.add_argument("--sector", help="Sector code (e.g., ENTERTAINMENT)")
|
||||
parser.add_argument("--job-url", help="Job URL pattern to filter (e.g., 'gokarts')")
|
||||
parser.add_argument("--all", action="store_true", help="Run all validations")
|
||||
parser.add_argument("--db-url", help="Database URL")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.all:
|
||||
asyncio.run(run_all_validations(args.db_url))
|
||||
elif args.sector:
|
||||
asyncio.run(run_validation(args.sector, args.job_url, args.db_url))
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
421
packages/reviewiq-pipeline/scripts/validate_l1_configs_v2.py
Normal file
421
packages/reviewiq-pipeline/scripts/validate_l1_configs_v2.py
Normal file
@@ -0,0 +1,421 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Wave 1 L1 Config Validation Script - V2 (Sector-Scoped)
|
||||
|
||||
Validates L1 primitive configs against SECTOR-SPECIFIC review data.
|
||||
Only validates sectors where we have real business data.
|
||||
|
||||
Key improvement over v1: spans are filtered by business → sector mapping,
|
||||
ensuring "TASTE in HEALTHCARE" noise doesn't pollute results.
|
||||
|
||||
Usage:
|
||||
python validate_l1_configs_v2.py --sector ENTERTAINMENT
|
||||
python validate_l1_configs_v2.py --sector AUTOMOTIVE
|
||||
python validate_l1_configs_v2.py --all
|
||||
python validate_l1_configs_v2.py --report # Summary only
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
# Paths
|
||||
DATA_DIR = Path(__file__).parent.parent / "data"
|
||||
CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
|
||||
BRIEFS_DIR = DATA_DIR / "sector_briefs"
|
||||
|
||||
# Business → Sector mapping (ground truth)
|
||||
BUSINESS_TO_SECTOR = {
|
||||
"Go Karts Mar Menor": "ENTERTAINMENT",
|
||||
"ClickRent Gran Canaria | Alquiler de Coches y Furgonetas": "AUTOMOTIVE",
|
||||
"Soho Club": "ENTERTAINMENT",
|
||||
"Fika": "FOOD_DINING",
|
||||
}
|
||||
|
||||
# Sectors with real data
|
||||
SECTORS_WITH_DATA = {"ENTERTAINMENT", "AUTOMOTIVE", "FOOD_DINING"}
|
||||
|
||||
# URT code to primitive mapping
|
||||
URT_TO_PRIMITIVE = {
|
||||
# Offering codes
|
||||
"O1.01": "CONSISTENCY", "O1.02": "CRAFT", "O1.03": "FRESHNESS",
|
||||
"O1.04": "EFFECTIVENESS", "O1.05": "TASTE", "O1.06": "CONDITION",
|
||||
"O2.01": "ACCURACY", "O2.02": "EFFECTIVENESS", "O2.03": "CRAFT",
|
||||
"O3.01": "ACCURACY", "O3.02": "CONSISTENCY", "O3.03": "EFFECTIVENESS",
|
||||
# People codes
|
||||
"P1.01": "MANNER", "P1.02": "MANNER", "P1.03": "ATTENTIVENESS",
|
||||
"P1.04": "COMMUNICATION", "P1.05": "ATTENTIVENESS",
|
||||
"P2.01": "COMPETENCE", "P2.02": "COMPETENCE", "P2.03": "COMPETENCE",
|
||||
"P3.01": "COMMUNICATION", "P3.02": "COMMUNICATION", "P3.03": "COMMUNICATION",
|
||||
# Journey codes
|
||||
"J1.01": "SPEED", "J1.02": "RELIABILITY", "J1.03": "FRICTION",
|
||||
"J1.04": "SPEED", "J1.05": "RELIABILITY",
|
||||
"J2.01": "RELIABILITY", "J2.02": "RELIABILITY", "J2.03": "FRICTION",
|
||||
"J3.01": "FRICTION", "J3.02": "FRICTION", "J3.03": "FRICTION",
|
||||
# Environment codes
|
||||
"E1.01": "CLEANLINESS", "E1.02": "COMFORT", "E1.03": "AMBIANCE",
|
||||
"E1.04": "AMBIANCE", "E1.05": "COMFORT",
|
||||
"E2.01": "AMBIANCE", "E2.02": "COMFORT", "E2.03": "COMFORT",
|
||||
"E2.04": "AMBIANCE", "E2.05": "DIGITAL_UX",
|
||||
"E3.01": "SAFETY", "E3.02": "SAFETY", "E3.03": "ACCESSIBILITY",
|
||||
"E4.01": "ACCESSIBILITY", "E4.02": "ACCESSIBILITY", "E4.03": "DIGITAL_UX",
|
||||
# Access codes
|
||||
"A1.01": "AVAILABILITY", "A1.02": "AVAILABILITY", "A1.03": "AVAILABILITY",
|
||||
"A1.04": "ACCESSIBILITY", "A1.05": "ACCESSIBILITY",
|
||||
"A2.01": "ACCESSIBILITY", "A2.02": "ACCESSIBILITY", "A2.03": "DIGITAL_UX",
|
||||
"A3.01": "ACCESSIBILITY", "A3.02": "ACCESSIBILITY", "A3.03": "SPEED",
|
||||
"A4.01": "ACCESSIBILITY", "A4.02": "ACCESSIBILITY", "A4.03": "AVAILABILITY",
|
||||
# Value codes
|
||||
"V1.01": "PRICE_LEVEL", "V1.02": "PRICE_FAIRNESS", "V1.03": "PRICE_TRANSPARENCY",
|
||||
"V2.01": "PRICE_FAIRNESS", "V2.02": "PRICE_TRANSPARENCY", "V2.03": "VALUE_FOR_MONEY",
|
||||
"V3.01": "VALUE_FOR_MONEY", "V3.02": "VALUE_FOR_MONEY", "V3.03": "PRICE_FAIRNESS",
|
||||
"V4.01": "VALUE_FOR_MONEY", "V4.02": "VALUE_FOR_MONEY", "V4.03": "VALUE_FOR_MONEY",
|
||||
# Relationship codes (map to meta - these should stay unmapped)
|
||||
"R1.01": None, "R1.02": None, "R1.03": None,
|
||||
"R2.01": None, "R2.02": None, "R2.03": None,
|
||||
"R3.01": None, "R3.02": None, "R3.03": None,
|
||||
"R4.01": None, "R4.02": None, "R4.03": None,
|
||||
}
|
||||
|
||||
# Minimum threshold for "enable" recommendations (% of sector spans)
|
||||
ENABLE_THRESHOLD_PCT = 3.0 # Only recommend enable if >= 3% of sector spans
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectorValidation:
|
||||
"""Validation result for a single sector."""
|
||||
sector_code: str
|
||||
businesses: list[str]
|
||||
span_count: int
|
||||
|
||||
# Coverage
|
||||
enabled_coverage: float
|
||||
disabled_hits: dict[str, int] = field(default_factory=dict)
|
||||
unmapped_count: int = 0
|
||||
|
||||
# Distribution
|
||||
primitive_counts: dict[str, int] = field(default_factory=dict)
|
||||
domain_distribution: dict[str, int] = field(default_factory=dict)
|
||||
valence_distribution: dict[str, int] = field(default_factory=dict)
|
||||
top_urt_codes: list[tuple[str, int]] = field(default_factory=list)
|
||||
|
||||
# Recommendations (threshold-gated)
|
||||
recommended_enables: list[tuple[str, float]] = field(default_factory=list) # (primitive, pct)
|
||||
recommended_disables: list[tuple[str, float]] = field(default_factory=list)
|
||||
weight_issues: list[str] = field(default_factory=list)
|
||||
|
||||
# Metadata
|
||||
validated_at: str = ""
|
||||
config_version: str = ""
|
||||
|
||||
|
||||
def load_l1_config(sector_code: str) -> dict[str, Any] | None:
|
||||
"""Load L1 config for a sector."""
|
||||
config_file = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
|
||||
if not config_file.exists():
|
||||
return None
|
||||
with open(config_file) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def get_businesses_for_sector(sector_code: str) -> list[str]:
|
||||
"""Get list of businesses belonging to a sector."""
|
||||
return [biz for biz, sector in BUSINESS_TO_SECTOR.items() if sector == sector_code]
|
||||
|
||||
|
||||
async def fetch_spans_for_businesses(pool: asyncpg.Pool, businesses: list[str]) -> list[dict]:
|
||||
"""Fetch spans for specific businesses only."""
|
||||
if not businesses:
|
||||
return []
|
||||
|
||||
query = """
|
||||
SELECT
|
||||
business_id,
|
||||
urt_primary,
|
||||
valence,
|
||||
intensity,
|
||||
span_text
|
||||
FROM pipeline.review_spans
|
||||
WHERE business_id = ANY($1)
|
||||
ORDER BY created_at DESC
|
||||
"""
|
||||
rows = await pool.fetch(query, businesses)
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
def analyze_sector_spans(
|
||||
spans: list[dict],
|
||||
config: dict[str, Any],
|
||||
businesses: list[str],
|
||||
) -> SectorValidation:
|
||||
"""Analyze spans for a specific sector."""
|
||||
sector_code = config["sector_code"]
|
||||
enabled = set(config.get("enabled", []))
|
||||
disabled = set(config.get("disabled", []))
|
||||
weights = config.get("weights", {})
|
||||
config_version = config.get("config_version", "1.0")
|
||||
|
||||
# Counters
|
||||
primitive_counts: Counter = Counter()
|
||||
domain_counts: Counter = Counter()
|
||||
valence_counts: Counter = Counter()
|
||||
urt_counts: Counter = Counter()
|
||||
disabled_hits: Counter = Counter()
|
||||
unmapped = 0
|
||||
enabled_hits = 0
|
||||
|
||||
for span in spans:
|
||||
urt_code = span["urt_primary"]
|
||||
valence = span.get("valence", "V0")
|
||||
|
||||
urt_counts[urt_code] += 1
|
||||
valence_counts[valence] += 1
|
||||
domain_counts[urt_code[0]] += 1
|
||||
|
||||
primitive = URT_TO_PRIMITIVE.get(urt_code)
|
||||
if primitive:
|
||||
primitive_counts[primitive] += 1
|
||||
if primitive in enabled:
|
||||
enabled_hits += 1
|
||||
elif primitive in disabled:
|
||||
disabled_hits[primitive] += 1
|
||||
else:
|
||||
unmapped += 1
|
||||
|
||||
total = len(spans)
|
||||
enabled_coverage = enabled_hits / total if total > 0 else 0
|
||||
|
||||
# Threshold-gated recommendations
|
||||
recommended_enables = []
|
||||
for prim, count in disabled_hits.most_common():
|
||||
pct = count / total * 100 if total > 0 else 0
|
||||
if pct >= ENABLE_THRESHOLD_PCT:
|
||||
recommended_enables.append((prim, pct))
|
||||
|
||||
# Weight issues
|
||||
weight_issues = []
|
||||
for prim in weights:
|
||||
if primitive_counts[prim] == 0 and prim in enabled:
|
||||
weight_issues.append(f"{prim} weighted ({weights[prim]}x) but 0 appearances")
|
||||
|
||||
# High-frequency unweighted
|
||||
for prim, count in primitive_counts.most_common(5):
|
||||
pct = count / total * 100 if total > 0 else 0
|
||||
if prim in enabled and prim not in weights and pct >= 10:
|
||||
weight_issues.append(f"{prim} high freq ({pct:.1f}%) but unweighted")
|
||||
|
||||
return SectorValidation(
|
||||
sector_code=sector_code,
|
||||
businesses=businesses,
|
||||
span_count=total,
|
||||
enabled_coverage=enabled_coverage,
|
||||
disabled_hits=dict(disabled_hits),
|
||||
unmapped_count=unmapped,
|
||||
primitive_counts=dict(primitive_counts),
|
||||
domain_distribution=dict(domain_counts),
|
||||
valence_distribution=dict(valence_counts),
|
||||
top_urt_codes=urt_counts.most_common(15),
|
||||
recommended_enables=recommended_enables,
|
||||
weight_issues=weight_issues,
|
||||
validated_at=datetime.utcnow().isoformat(),
|
||||
config_version=config_version,
|
||||
)
|
||||
|
||||
|
||||
def print_sector_report(result: SectorValidation, config: dict):
|
||||
"""Print detailed validation report for a sector."""
|
||||
print("\n" + "=" * 70)
|
||||
print(f"SECTOR-SCOPED VALIDATION: {result.sector_code}")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n📊 DATA SOURCE")
|
||||
print(f" Businesses: {', '.join(result.businesses)}")
|
||||
print(f" Total spans: {result.span_count:,}")
|
||||
print(f" Config version: {result.config_version}")
|
||||
|
||||
print(f"\n📈 COVERAGE")
|
||||
print(f" Enabled coverage: {result.enabled_coverage:.1%}")
|
||||
print(f" Unmapped (R-domain): {result.unmapped_count} ({result.unmapped_count/result.span_count*100:.1f}%)" if result.span_count > 0 else "")
|
||||
|
||||
# Domain distribution
|
||||
print(f"\n📁 DOMAIN DISTRIBUTION")
|
||||
domain_names = {"O": "Offering", "P": "People", "J": "Journey",
|
||||
"E": "Environment", "A": "Access", "V": "Value", "R": "Relationship"}
|
||||
for domain in "OPJEVRA":
|
||||
count = result.domain_distribution.get(domain, 0)
|
||||
pct = count / result.span_count * 100 if result.span_count > 0 else 0
|
||||
bar = "█" * int(pct / 2)
|
||||
print(f" {domain} {domain_names.get(domain, '?'):12} {count:4} ({pct:5.1f}%) {bar}")
|
||||
|
||||
# Top primitives
|
||||
print(f"\n🔝 TOP PRIMITIVES (sector-scoped)")
|
||||
enabled_set = set(config.get("enabled", []))
|
||||
disabled_set = set(config.get("disabled", []))
|
||||
weights = config.get("weights", {})
|
||||
|
||||
for prim, count in sorted(result.primitive_counts.items(), key=lambda x: -x[1])[:12]:
|
||||
pct = count / result.span_count * 100 if result.span_count > 0 else 0
|
||||
if prim in enabled_set:
|
||||
status = "✓"
|
||||
elif prim in disabled_set:
|
||||
status = "✗"
|
||||
else:
|
||||
status = "?"
|
||||
weight = f"({weights[prim]}x)" if prim in weights else ""
|
||||
print(f" {status} {prim:20} {count:4} ({pct:5.1f}%) {weight}")
|
||||
|
||||
# Threshold-gated recommendations
|
||||
if result.recommended_enables:
|
||||
print(f"\n⚠️ RECOMMENDED ENABLES (≥{ENABLE_THRESHOLD_PCT}% threshold)")
|
||||
for prim, pct in result.recommended_enables:
|
||||
count = result.disabled_hits.get(prim, 0)
|
||||
print(f" → ENABLE {prim}: {count} spans ({pct:.1f}%)")
|
||||
else:
|
||||
print(f"\n✅ No primitives exceed {ENABLE_THRESHOLD_PCT}% threshold for enabling")
|
||||
|
||||
# Low-frequency disabled (info only)
|
||||
low_freq_disabled = [(p, c) for p, c in result.disabled_hits.items()
|
||||
if c / result.span_count * 100 < ENABLE_THRESHOLD_PCT]
|
||||
if low_freq_disabled:
|
||||
print(f"\n📋 DISABLED BUT APPEARING (below threshold - no action)")
|
||||
for prim, count in sorted(low_freq_disabled, key=lambda x: -x[1])[:5]:
|
||||
pct = count / result.span_count * 100
|
||||
print(f" {prim}: {count} ({pct:.1f}%)")
|
||||
|
||||
# Weight issues
|
||||
if result.weight_issues:
|
||||
print(f"\n⚖️ WEIGHT ISSUES")
|
||||
for issue in result.weight_issues:
|
||||
print(f" • {issue}")
|
||||
|
||||
print(f"\n⏱️ Validated at: {result.validated_at}")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
async def validate_sector(
|
||||
sector_code: str,
|
||||
db_url: str | None = None,
|
||||
verbose: bool = True,
|
||||
) -> SectorValidation | None:
|
||||
"""Validate a single sector with sector-scoped data."""
|
||||
|
||||
if sector_code not in SECTORS_WITH_DATA:
|
||||
if verbose:
|
||||
print(f"⚠️ {sector_code}: No real business data available for validation")
|
||||
return None
|
||||
|
||||
config = load_l1_config(sector_code)
|
||||
if not config:
|
||||
if verbose:
|
||||
print(f"❌ No L1 config found for {sector_code}")
|
||||
return None
|
||||
|
||||
businesses = get_businesses_for_sector(sector_code)
|
||||
if not businesses:
|
||||
if verbose:
|
||||
print(f"⚠️ {sector_code}: No businesses mapped")
|
||||
return None
|
||||
|
||||
db_url = db_url or os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://scraper:scraper123@localhost:5437/scraper"
|
||||
)
|
||||
|
||||
pool = await asyncpg.create_pool(db_url)
|
||||
|
||||
try:
|
||||
spans = await fetch_spans_for_businesses(pool, businesses)
|
||||
if not spans:
|
||||
if verbose:
|
||||
print(f"⚠️ {sector_code}: No spans found for businesses")
|
||||
return None
|
||||
|
||||
result = analyze_sector_spans(spans, config, businesses)
|
||||
|
||||
if verbose:
|
||||
print_sector_report(result, config)
|
||||
|
||||
return result
|
||||
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
async def validate_all_sectors(db_url: str | None = None) -> dict[str, SectorValidation]:
|
||||
"""Validate all sectors with available data."""
|
||||
results = {}
|
||||
|
||||
for sector in SECTORS_WITH_DATA:
|
||||
result = await validate_sector(sector, db_url, verbose=True)
|
||||
if result:
|
||||
results[sector] = result
|
||||
|
||||
# Print summary
|
||||
print("\n" + "=" * 70)
|
||||
print("VALIDATION SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"\n{'Sector':<20} {'Spans':>8} {'Coverage':>10} {'Enables':>10}")
|
||||
print("-" * 50)
|
||||
|
||||
for sector, result in results.items():
|
||||
enables = len(result.recommended_enables)
|
||||
enables_str = f"{enables} recs" if enables > 0 else "✓ OK"
|
||||
print(f"{sector:<20} {result.span_count:>8,} {result.enabled_coverage:>9.1%} {enables_str:>10}")
|
||||
|
||||
print("-" * 50)
|
||||
print(f"Sectors validated: {len(results)}/{len(SECTORS_WITH_DATA)}")
|
||||
print(f"Sectors without data: {20 - len(SECTORS_WITH_DATA)}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def generate_summary_report(db_url: str | None = None) -> dict:
|
||||
"""Generate a JSON summary report for all sectors."""
|
||||
results = {}
|
||||
|
||||
for sector in SECTORS_WITH_DATA:
|
||||
result = await validate_sector(sector, db_url, verbose=False)
|
||||
if result:
|
||||
results[sector] = {
|
||||
"span_count": result.span_count,
|
||||
"enabled_coverage": round(result.enabled_coverage, 3),
|
||||
"recommended_enables": result.recommended_enables,
|
||||
"weight_issues": result.weight_issues,
|
||||
"config_version": result.config_version,
|
||||
"validated_at": result.validated_at,
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Sector-scoped L1 config validation")
|
||||
parser.add_argument("--sector", help="Validate specific sector")
|
||||
parser.add_argument("--all", action="store_true", help="Validate all sectors with data")
|
||||
parser.add_argument("--report", action="store_true", help="Generate JSON summary report")
|
||||
parser.add_argument("--db-url", help="Database URL")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.report:
|
||||
results = asyncio.run(generate_summary_report(args.db_url))
|
||||
print(json.dumps(results, indent=2))
|
||||
elif args.all:
|
||||
asyncio.run(validate_all_sectors(args.db_url))
|
||||
elif args.sector:
|
||||
asyncio.run(validate_sector(args.sector.upper(), args.db_url))
|
||||
else:
|
||||
parser.print_help()
|
||||
print("\n\nSectors with real data:", ", ".join(sorted(SECTORS_WITH_DATA)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,733 @@
|
||||
"""
|
||||
Classification Pipeline - LLM-powered primitives classification.
|
||||
|
||||
Classifies reviews using the primitives taxonomy (MANNER, SPEED, VALUE_FOR_MONEY, etc.)
|
||||
and stores results in detected_spans_v2.
|
||||
|
||||
Stages:
|
||||
- fetch: Find reviews without classification
|
||||
- classify: LLM-powered span extraction with primitives
|
||||
- save: Store results to detected_spans_v2
|
||||
|
||||
Usage:
|
||||
pipeline = ClassificationPipeline()
|
||||
await pipeline.initialize()
|
||||
result = await pipeline.process({"business_id": "Go Karts Mar Menor", "limit": 100})
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
import unicodedata
|
||||
import uuid
|
||||
from collections import Counter
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from pipeline_core import (
|
||||
BasePipeline,
|
||||
DashboardConfig,
|
||||
DashboardSection,
|
||||
PipelineMetadata,
|
||||
PipelineResult as BasePipelineResult,
|
||||
StageResult,
|
||||
WidgetConfig,
|
||||
)
|
||||
|
||||
from reviewiq_pipeline.config import Config
|
||||
from reviewiq_pipeline.db.connection import DatabasePool
|
||||
from reviewiq_pipeline.services.llm_client import LLMClient, LLMClientBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Stage names
|
||||
STAGE_NAMES = ["fetch", "classify", "save"]
|
||||
|
||||
# Primitives taxonomy - maps primitive to domain
|
||||
PRIMITIVES_BY_DOMAIN = {
|
||||
"O": ["TASTE", "CRAFT", "FRESHNESS", "TEMPERATURE", "EFFECTIVENESS", "ACCURACY", "CONDITION", "CONSISTENCY"],
|
||||
"P": ["MANNER", "COMPETENCE", "ATTENTIVENESS", "COMMUNICATION"],
|
||||
"J": ["SPEED", "FRICTION", "RELIABILITY", "AVAILABILITY"],
|
||||
"E": ["CLEANLINESS", "COMFORT", "SAFETY", "AMBIANCE", "ACCESSIBILITY", "DIGITAL_UX"],
|
||||
"V": ["PRICE_LEVEL", "PRICE_FAIRNESS", "PRICE_TRANSPARENCY", "VALUE_FOR_MONEY"],
|
||||
"meta": ["HONESTY", "ETHICS", "PROMISES", "ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY",
|
||||
"RETURN_INTENT", "RECOMMEND", "RECOGNITION", "UNMAPPED", "NON_INFORMATIVE"],
|
||||
}
|
||||
|
||||
# Flatten for lookup
|
||||
ALL_PRIMITIVES = []
|
||||
PRIMITIVE_TO_DOMAIN = {}
|
||||
for domain, primitives in PRIMITIVES_BY_DOMAIN.items():
|
||||
for p in primitives:
|
||||
ALL_PRIMITIVES.append(p)
|
||||
PRIMITIVE_TO_DOMAIN[p] = domain
|
||||
|
||||
# Classification prompt
|
||||
CLASSIFICATION_PROMPT = """You are a review classifier using primitive-based analysis.
|
||||
|
||||
## TASK
|
||||
Extract semantic spans from customer reviews and classify each span to exactly ONE primitive.
|
||||
|
||||
## PRIMITIVES (use ONLY these)
|
||||
### OUTPUT (O) - Product/Service Quality
|
||||
- TASTE: Flavor quality (food/beverage)
|
||||
- CRAFT: Skill of execution, craftsmanship
|
||||
- FRESHNESS: How fresh/new the product is
|
||||
- TEMPERATURE: Serving temperature
|
||||
- EFFECTIVENESS: Does it work/achieve purpose
|
||||
- ACCURACY: Correct execution of order
|
||||
- CONDITION: State at delivery
|
||||
- CONSISTENCY: Same quality each time
|
||||
|
||||
### PEOPLE (P) - Staff Interactions
|
||||
- MANNER: Friendliness and warmth
|
||||
- COMPETENCE: Knowledge and skill
|
||||
- ATTENTIVENESS: Being present and responsive
|
||||
- COMMUNICATION: Clarity and updates
|
||||
|
||||
### JOURNEY (J) - Process and Timing
|
||||
- SPEED: How fast things happen
|
||||
- FRICTION: Ease of process
|
||||
- RELIABILITY: Dependable service
|
||||
- AVAILABILITY: Access to service/staff
|
||||
|
||||
### ENVIRONMENT (E) - Physical/Digital Space
|
||||
- CLEANLINESS: Hygiene and tidiness
|
||||
- COMFORT: Physical ease
|
||||
- SAFETY: Physical safety
|
||||
- AMBIANCE: Overall mood/atmosphere
|
||||
- ACCESSIBILITY: Ease of access
|
||||
- DIGITAL_UX: Digital experience
|
||||
|
||||
### VALUE (V) - Cost and Worth
|
||||
- PRICE_LEVEL: Absolute cost
|
||||
- PRICE_FAIRNESS: Fair for what you get
|
||||
- PRICE_TRANSPARENCY: Clear about costs
|
||||
- VALUE_FOR_MONEY: Overall value assessment
|
||||
|
||||
### META - Trust and Sentiment
|
||||
- HONESTY: Truthfulness
|
||||
- ETHICS: Moral conduct
|
||||
- PROMISES: Keeping commitments
|
||||
- ACKNOWLEDGMENT: Recognizing issues
|
||||
- RESPONSE_QUALITY: How business responds
|
||||
- RECOVERY: Making amends
|
||||
- RETURN_INTENT: Would come back
|
||||
- RECOMMEND: Would suggest to others
|
||||
- RECOGNITION: Customer acknowledgment
|
||||
- UNMAPPED: Cannot classify (use sparingly)
|
||||
- NON_INFORMATIVE: No actionable content
|
||||
|
||||
## RULES
|
||||
1. Extract 1-5 spans per review (prefer fewer, larger spans about same topic)
|
||||
2. Each span gets exactly ONE primitive (most specific match)
|
||||
3. Valence: + (positive), - (negative), 0 (neutral), ± (mixed)
|
||||
4. Intensity: 1 (low), 2 (moderate), 3 (high/extreme)
|
||||
5. Detail: 1 (vague), 2 (some detail), 3 (specific/actionable)
|
||||
6. Confidence: 0.0 to 1.0
|
||||
|
||||
## OUTPUT FORMAT (JSON only)
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"text": "exact text from review",
|
||||
"start": 0,
|
||||
"end": 25,
|
||||
"primitive": "MANNER",
|
||||
"valence": "+",
|
||||
"intensity": 2,
|
||||
"detail": 2,
|
||||
"confidence": 0.85,
|
||||
"entity": null,
|
||||
"entity_type": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Return valid JSON only, no markdown."""
|
||||
|
||||
# Non-informative detection
|
||||
PURE_JUNK_RE = re.compile(
|
||||
r'^[\s\.\!\?\,\-\_\~\*\#\@]+$'
|
||||
r'|^[\U0001F300-\U0001F9FF\U0001FA00-\U0001FAFF\U00002600-\U000027BF\s\.\!\?]+$'
|
||||
r'|^(translated by google|traducido por google)[\.\s]*$',
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
def is_non_informative(text: str) -> tuple[bool, str]:
|
||||
"""Check if review is non-informative (skip LLM)."""
|
||||
if not text or not text.strip():
|
||||
return True, "empty"
|
||||
|
||||
text = text.strip()
|
||||
|
||||
if PURE_JUNK_RE.match(text):
|
||||
return True, "junk_pattern"
|
||||
|
||||
# No alphanumeric content
|
||||
alpha_chars = sum(1 for c in text if unicodedata.category(c).startswith('L'))
|
||||
digit_chars = sum(1 for c in text if unicodedata.category(c).startswith('N'))
|
||||
if alpha_chars == 0 and digit_chars == 0:
|
||||
return True, "no_content"
|
||||
|
||||
# Pure repetition
|
||||
tokens = text.split()
|
||||
if len(tokens) >= 3:
|
||||
unique_tokens = len(set(t.lower() for t in tokens))
|
||||
if unique_tokens == 1 and alpha_chars < 20:
|
||||
return True, "pure_repetition"
|
||||
|
||||
return False, ""
|
||||
|
||||
|
||||
def compute_review_hash(text: str) -> str:
|
||||
"""Compute hash for review text (for deduplication)."""
|
||||
normalized = text.strip().lower()
|
||||
return hashlib.sha256(normalized.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
class ClassificationPipeline(BasePipeline):
|
||||
"""
|
||||
Classification Pipeline - LLM-powered primitives classification.
|
||||
|
||||
Processes reviews through LLM to extract semantic spans and classify
|
||||
them using the primitives taxonomy.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Config | None = None):
|
||||
"""Initialize the pipeline."""
|
||||
self._config = config or Config()
|
||||
self._db: DatabasePool | None = None
|
||||
self._llm: LLMClientBase | None = None
|
||||
self._initialized = False
|
||||
|
||||
@property
|
||||
def config(self) -> Config:
|
||||
"""Get pipeline configuration."""
|
||||
return self._config
|
||||
|
||||
@property
|
||||
def metadata(self) -> PipelineMetadata:
|
||||
"""Get pipeline metadata."""
|
||||
return PipelineMetadata(
|
||||
id="classification",
|
||||
name="Primitives Classification Pipeline",
|
||||
description="LLM-powered span extraction and primitives classification. Processes reviews and stores results in detected_spans_v2.",
|
||||
version="1.0.0",
|
||||
stages=STAGE_NAMES,
|
||||
input_type="BusinessInput",
|
||||
)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize database and LLM connections."""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
logger.info("Initializing Classification pipeline...")
|
||||
|
||||
self._db = DatabasePool(self._config)
|
||||
await self._db.initialize()
|
||||
|
||||
self._llm = LLMClient.create(self._config)
|
||||
self._llm.set_prompt(CLASSIFICATION_PROMPT)
|
||||
|
||||
self._initialized = True
|
||||
logger.info("Classification pipeline initialized")
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close all connections."""
|
||||
if self._llm:
|
||||
await self._llm.close()
|
||||
self._llm = None
|
||||
|
||||
if self._db:
|
||||
await self._db.close()
|
||||
self._db = None
|
||||
|
||||
self._initialized = False
|
||||
logger.info("Classification pipeline closed")
|
||||
|
||||
async def process(
|
||||
self,
|
||||
input_data: dict[str, Any],
|
||||
stages: list[str] | None = None,
|
||||
) -> BasePipelineResult:
|
||||
"""
|
||||
Process reviews through classification.
|
||||
|
||||
Args:
|
||||
input_data: Must contain business_id OR job_id. Optional: limit, batch_size
|
||||
stages: List of stage names to run (default: all)
|
||||
|
||||
Returns:
|
||||
BasePipelineResult with classification stats
|
||||
"""
|
||||
await self.initialize()
|
||||
|
||||
stages = stages or STAGE_NAMES
|
||||
stages_run: list[str] = []
|
||||
stage_results: dict[str, Any] = {}
|
||||
|
||||
business_id = input_data.get("business_id")
|
||||
job_id = input_data.get("job_id")
|
||||
limit = input_data.get("limit", 100)
|
||||
batch_size = input_data.get("batch_size", 10)
|
||||
|
||||
# Resolve business_id from job_id
|
||||
if not business_id and job_id:
|
||||
try:
|
||||
async with self._db.pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"SELECT business_name FROM jobs WHERE job_id = $1",
|
||||
uuid.UUID(job_id) if isinstance(job_id, str) else job_id,
|
||||
)
|
||||
if row and row["business_name"]:
|
||||
business_id = row["business_name"]
|
||||
logger.info(f"Resolved business_id '{business_id}' from job_id")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to resolve business_id: {e}")
|
||||
|
||||
if not business_id:
|
||||
return BasePipelineResult(
|
||||
pipeline_id="classification",
|
||||
stages_run=[],
|
||||
stage_results={},
|
||||
success=False,
|
||||
error="business_id is required (provide business_id or job_id)",
|
||||
)
|
||||
|
||||
# Generate run_id for this execution
|
||||
run_id = uuid.uuid4()
|
||||
context = {
|
||||
"business_id": business_id,
|
||||
"job_id": job_id,
|
||||
"limit": limit,
|
||||
"batch_size": batch_size,
|
||||
"run_id": run_id,
|
||||
"reviews": [],
|
||||
"classified": [],
|
||||
}
|
||||
|
||||
try:
|
||||
# Stage: Fetch unclassified reviews
|
||||
if "fetch" in stages:
|
||||
start = time.time()
|
||||
logger.info(f"Fetching unclassified reviews for {business_id}")
|
||||
|
||||
try:
|
||||
reviews = await self._fetch_unclassified(business_id, limit)
|
||||
context["reviews"] = reviews
|
||||
duration_ms = int((time.time() - start) * 1000)
|
||||
stages_run.append("fetch")
|
||||
stage_results["fetch"] = StageResult(
|
||||
stage="fetch",
|
||||
success=True,
|
||||
data={"reviews_found": len(reviews)},
|
||||
error=None,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
logger.info(f"Found {len(reviews)} unclassified reviews")
|
||||
except Exception as e:
|
||||
logger.exception("Fetch failed")
|
||||
return BasePipelineResult(
|
||||
pipeline_id="classification",
|
||||
stages_run=stages_run,
|
||||
stage_results=stage_results,
|
||||
success=False,
|
||||
error=f"Fetch failed: {e}",
|
||||
)
|
||||
|
||||
# Stage: Classify reviews
|
||||
if "classify" in stages and context["reviews"]:
|
||||
start = time.time()
|
||||
logger.info(f"Classifying {len(context['reviews'])} reviews")
|
||||
|
||||
try:
|
||||
classified = await self._classify_reviews(
|
||||
context["reviews"],
|
||||
business_id,
|
||||
batch_size,
|
||||
)
|
||||
context["classified"] = classified
|
||||
duration_ms = int((time.time() - start) * 1000)
|
||||
stages_run.append("classify")
|
||||
|
||||
total_spans = sum(len(c.get("spans", [])) for c in classified)
|
||||
stage_results["classify"] = StageResult(
|
||||
stage="classify",
|
||||
success=True,
|
||||
data={
|
||||
"reviews_classified": len(classified),
|
||||
"total_spans": total_spans,
|
||||
"llm_cost_usd": self._llm.total_cost_usd if self._llm else 0,
|
||||
},
|
||||
error=None,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
logger.info(f"Classified {len(classified)} reviews, {total_spans} spans")
|
||||
except Exception as e:
|
||||
logger.exception("Classification failed")
|
||||
stage_results["classify"] = StageResult(
|
||||
stage="classify",
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e),
|
||||
duration_ms=int((time.time() - start) * 1000),
|
||||
)
|
||||
|
||||
# Stage: Save results
|
||||
if "save" in stages and context["classified"]:
|
||||
start = time.time()
|
||||
logger.info(f"Saving {len(context['classified'])} classifications")
|
||||
|
||||
try:
|
||||
saved_count = await self._save_classifications(
|
||||
context["classified"],
|
||||
business_id,
|
||||
job_id,
|
||||
run_id,
|
||||
)
|
||||
duration_ms = int((time.time() - start) * 1000)
|
||||
stages_run.append("save")
|
||||
stage_results["save"] = StageResult(
|
||||
stage="save",
|
||||
success=True,
|
||||
data={"spans_saved": saved_count},
|
||||
error=None,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
logger.info(f"Saved {saved_count} spans to detected_spans_v2")
|
||||
except Exception as e:
|
||||
logger.exception("Save failed")
|
||||
stage_results["save"] = StageResult(
|
||||
stage="save",
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e),
|
||||
duration_ms=int((time.time() - start) * 1000),
|
||||
)
|
||||
|
||||
return BasePipelineResult(
|
||||
pipeline_id="classification",
|
||||
stages_run=stages_run,
|
||||
stage_results=stage_results,
|
||||
success=all(stage_results.get(s, {}).get("success", False) for s in stages_run),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Pipeline failed")
|
||||
return BasePipelineResult(
|
||||
pipeline_id="classification",
|
||||
stages_run=stages_run,
|
||||
stage_results=stage_results,
|
||||
success=False,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def _fetch_unclassified(
|
||||
self,
|
||||
business_id: str,
|
||||
limit: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch reviews that haven't been classified yet."""
|
||||
async with self._db.pool.acquire() as conn:
|
||||
# Get reviews from reviews_latest that don't have spans in detected_spans_v2
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT
|
||||
r.review_id,
|
||||
r.business_id,
|
||||
r.text AS review_text,
|
||||
r.rating,
|
||||
r.review_time
|
||||
FROM pipeline.reviews_latest r
|
||||
LEFT JOIN (
|
||||
SELECT DISTINCT review_id, business_id
|
||||
FROM pipeline.detected_spans_v2
|
||||
) s ON s.review_id = r.review_id AND s.business_id = r.business_id
|
||||
WHERE r.business_id = $1
|
||||
AND s.review_id IS NULL
|
||||
AND r.text IS NOT NULL
|
||||
AND LENGTH(r.text) > 0
|
||||
ORDER BY r.review_time DESC
|
||||
LIMIT $2
|
||||
""",
|
||||
business_id,
|
||||
limit,
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"review_id": row["review_id"],
|
||||
"business_id": row["business_id"],
|
||||
"text": row["review_text"],
|
||||
"rating": row["rating"] or 3,
|
||||
"review_time": row["review_time"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
async def _classify_reviews(
|
||||
self,
|
||||
reviews: list[dict[str, Any]],
|
||||
business_id: str,
|
||||
batch_size: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Classify reviews using LLM."""
|
||||
results = []
|
||||
|
||||
for review in reviews:
|
||||
text = review.get("text", "")
|
||||
rating = review.get("rating", 3)
|
||||
|
||||
# Check for non-informative
|
||||
is_junk, reason = is_non_informative(text)
|
||||
if is_junk:
|
||||
results.append({
|
||||
"review_id": review["review_id"],
|
||||
"business_id": business_id,
|
||||
"text": text,
|
||||
"rating": rating,
|
||||
"spans": [{
|
||||
"text": text,
|
||||
"start": 0,
|
||||
"end": len(text),
|
||||
"primitive": "NON_INFORMATIVE",
|
||||
"valence": "0",
|
||||
"intensity": 1,
|
||||
"detail": 1,
|
||||
"confidence": 1.0,
|
||||
"entity": None,
|
||||
"entity_type": None,
|
||||
"mode": reason,
|
||||
}],
|
||||
"review_hash": compute_review_hash(text),
|
||||
})
|
||||
continue
|
||||
|
||||
# Classify with LLM
|
||||
try:
|
||||
user_prompt = f"Rating: {rating}/5\nText: {text}"
|
||||
response, metadata = await self._llm.classify(text)
|
||||
|
||||
spans = response.get("spans", [])
|
||||
|
||||
# Validate primitives
|
||||
for span in spans:
|
||||
if span.get("primitive") not in ALL_PRIMITIVES:
|
||||
span["primitive"] = "UNMAPPED"
|
||||
span["unmapped_keywords"] = [span.get("primitive", "unknown")]
|
||||
|
||||
results.append({
|
||||
"review_id": review["review_id"],
|
||||
"business_id": business_id,
|
||||
"text": text,
|
||||
"rating": rating,
|
||||
"spans": spans,
|
||||
"review_hash": compute_review_hash(text),
|
||||
"model": metadata.get("model"),
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM classification failed for review {review['review_id']}: {e}")
|
||||
# Fallback to UNMAPPED
|
||||
results.append({
|
||||
"review_id": review["review_id"],
|
||||
"business_id": business_id,
|
||||
"text": text,
|
||||
"rating": rating,
|
||||
"spans": [{
|
||||
"text": text,
|
||||
"start": 0,
|
||||
"end": len(text),
|
||||
"primitive": "UNMAPPED",
|
||||
"valence": "0",
|
||||
"intensity": 1,
|
||||
"detail": 1,
|
||||
"confidence": 0.0,
|
||||
"entity": None,
|
||||
"entity_type": None,
|
||||
"mode": "llm_error",
|
||||
}],
|
||||
"review_hash": compute_review_hash(text),
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
async def _save_classifications(
|
||||
self,
|
||||
classifications: list[dict[str, Any]],
|
||||
business_id: str,
|
||||
job_id: str | None,
|
||||
run_id: uuid.UUID,
|
||||
) -> int:
|
||||
"""Save classification results to detected_spans_v2."""
|
||||
saved_count = 0
|
||||
config_version = f"primitives_v1_{datetime.utcnow().strftime('%Y%m%d')}"
|
||||
|
||||
async with self._db.pool.acquire() as conn:
|
||||
# Get GBP path for business
|
||||
gbp_row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT gbp_category_path
|
||||
FROM jobs
|
||||
WHERE business_name = $1
|
||||
AND gbp_category_path IS NOT NULL
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
""",
|
||||
business_id,
|
||||
)
|
||||
gbp_path = str(gbp_row["gbp_category_path"]) if gbp_row and gbp_row["gbp_category_path"] else "unknown"
|
||||
|
||||
for classification in classifications:
|
||||
review_id = classification["review_id"]
|
||||
review_hash = classification.get("review_hash")
|
||||
model = classification.get("model")
|
||||
|
||||
for span in classification.get("spans", []):
|
||||
try:
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO pipeline.detected_spans_v2 (
|
||||
job_id, business_id, review_id, gbp_path, sector_code,
|
||||
config_version, primitive, valence, intensity, detail, mode,
|
||||
confidence, span_text, span_start, span_end,
|
||||
unmapped_keywords, entity, entity_type,
|
||||
model, review_hash, run_id, created_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
|
||||
$12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW()
|
||||
)
|
||||
""",
|
||||
uuid.UUID(job_id) if job_id else None,
|
||||
business_id,
|
||||
review_id,
|
||||
gbp_path,
|
||||
gbp_path.split(".")[0] if "." in gbp_path else gbp_path,
|
||||
config_version,
|
||||
span.get("primitive", "UNMAPPED"),
|
||||
span.get("valence", "0"),
|
||||
span.get("intensity", 1),
|
||||
span.get("detail", 1),
|
||||
span.get("mode"),
|
||||
span.get("confidence", 0.5),
|
||||
span.get("text", ""),
|
||||
span.get("start", 0),
|
||||
span.get("end", 0),
|
||||
span.get("unmapped_keywords"),
|
||||
span.get("entity"),
|
||||
span.get("entity_type"),
|
||||
model,
|
||||
review_hash,
|
||||
run_id,
|
||||
)
|
||||
saved_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save span: {e}")
|
||||
|
||||
return saved_count
|
||||
|
||||
def get_dashboard_config(self) -> DashboardConfig:
|
||||
"""Get dashboard configuration."""
|
||||
return DashboardConfig(
|
||||
pipeline_id="classification",
|
||||
title="Classification Pipeline",
|
||||
description="Monitor classification progress and quality",
|
||||
sections=[
|
||||
DashboardSection(
|
||||
id="stats",
|
||||
title="Classification Stats",
|
||||
widgets=[
|
||||
WidgetConfig(
|
||||
id="reviews_classified",
|
||||
type="stat_card",
|
||||
title="Reviews Classified",
|
||||
grid={"x": 0, "y": 0, "w": 3, "h": 1},
|
||||
config={"value_key": "reviews_classified"},
|
||||
),
|
||||
WidgetConfig(
|
||||
id="total_spans",
|
||||
type="stat_card",
|
||||
title="Total Spans",
|
||||
grid={"x": 3, "y": 0, "w": 3, "h": 1},
|
||||
config={"value_key": "total_spans"},
|
||||
),
|
||||
WidgetConfig(
|
||||
id="llm_cost",
|
||||
type="stat_card",
|
||||
title="LLM Cost",
|
||||
grid={"x": 6, "y": 0, "w": 3, "h": 1},
|
||||
config={"value_key": "llm_cost_usd", "format": "${value:.4f}"},
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
default_time_range="7d",
|
||||
refresh_interval=60,
|
||||
)
|
||||
|
||||
async def get_widget_data(
|
||||
self,
|
||||
widget_id: str,
|
||||
params: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""Get data for dashboard widgets."""
|
||||
await self.initialize()
|
||||
|
||||
business_id = params.get("business_id")
|
||||
if not business_id:
|
||||
return {"error": "business_id required"}
|
||||
|
||||
async with self._db.pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(DISTINCT review_id) as reviews_classified,
|
||||
COUNT(*) as total_spans
|
||||
FROM pipeline.detected_spans_v2
|
||||
WHERE business_id = $1
|
||||
""",
|
||||
business_id,
|
||||
)
|
||||
|
||||
return {
|
||||
"reviews_classified": row["reviews_classified"] or 0,
|
||||
"total_spans": row["total_spans"] or 0,
|
||||
"llm_cost_usd": 0, # Would need to track this
|
||||
}
|
||||
|
||||
async def health_check(self) -> dict[str, Any]:
|
||||
"""Check pipeline health."""
|
||||
await self.initialize()
|
||||
|
||||
checks = {}
|
||||
healthy = True
|
||||
|
||||
# Check database
|
||||
try:
|
||||
async with self._db.pool.acquire() as conn:
|
||||
await conn.fetchval("SELECT 1")
|
||||
checks["database"] = "ok"
|
||||
except Exception as e:
|
||||
checks["database"] = str(e)
|
||||
healthy = False
|
||||
|
||||
# Check LLM
|
||||
try:
|
||||
if self._llm:
|
||||
checks["llm"] = f"{self._config.llm_provider}/{self._config.llm_model}"
|
||||
else:
|
||||
checks["llm"] = "not_initialized"
|
||||
except Exception as e:
|
||||
checks["llm"] = str(e)
|
||||
|
||||
return {"healthy": healthy, "checks": checks}
|
||||
@@ -76,6 +76,51 @@ class Config(BaseSettings):
|
||||
batch_size: int = Field(default=50, ge=1, le=500)
|
||||
trust_score_floor: float = Field(default=0.2, ge=0.0, le=1.0)
|
||||
|
||||
# Batched Classification
|
||||
classification_batch_size: int = Field(
|
||||
default=0,
|
||||
ge=0,
|
||||
le=200,
|
||||
description="Number of reviews per LLM call. 0 = auto-calculate based on context window",
|
||||
)
|
||||
classification_max_concurrent: int = Field(
|
||||
default=0,
|
||||
ge=0,
|
||||
description="Maximum concurrent batch requests. 0 = unlimited (run all batches in parallel)",
|
||||
)
|
||||
classification_target_utilization: float = Field(
|
||||
default=0.70,
|
||||
ge=0.3,
|
||||
le=0.85,
|
||||
description="Target context window utilization. Optimal: 0.60-0.75. Above 0.85 causes ~23% quality degradation.",
|
||||
)
|
||||
use_prompt_caching: bool = Field(
|
||||
default=True,
|
||||
description="Enable prompt caching for cost reduction (OpenAI/Anthropic)",
|
||||
)
|
||||
|
||||
# Smart Review Router (cost optimization)
|
||||
router_enabled: bool = Field(
|
||||
default=False,
|
||||
description="Enable smart review routing to skip/route trivial reviews",
|
||||
)
|
||||
router_skip_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Allow SKIP tier (no LLM, assign generic code)",
|
||||
)
|
||||
router_cheap_model_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Allow CHEAP tier (use Haiku instead of Sonnet)",
|
||||
)
|
||||
router_cheap_model: str = Field(
|
||||
default="claude-3-5-haiku-20241022",
|
||||
description="Model to use for CHEAP tier routing",
|
||||
)
|
||||
router_conservative: bool = Field(
|
||||
default=True,
|
||||
description="Use conservative routing (fewer false negatives)",
|
||||
)
|
||||
|
||||
# Migrations
|
||||
migrations_path: str = Field(
|
||||
default="",
|
||||
|
||||
@@ -7,6 +7,7 @@ enabling independent development and validation of each stage.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from typing import Any, Literal, TypedDict
|
||||
|
||||
|
||||
@@ -181,13 +182,14 @@ class ReviewToClassify(TypedDict):
|
||||
review_time: str
|
||||
|
||||
|
||||
class ClassificationConfig(TypedDict):
|
||||
class ClassificationConfig(TypedDict, total=False):
|
||||
"""Configuration for LLM classification."""
|
||||
|
||||
model: str
|
||||
taxonomy_version: str
|
||||
profile: ProfileType
|
||||
max_spans_per_review: int
|
||||
job_id: str | None # Optional job_id for tracking
|
||||
|
||||
|
||||
class Stage2Input(TypedDict):
|
||||
@@ -329,6 +331,7 @@ class Stage3Input(TypedDict):
|
||||
"""Input to Stage 3 issue routing."""
|
||||
|
||||
spans: list[SpanToRoute]
|
||||
job_id: str | None # Optional job_id for linking issues to pipeline executions
|
||||
|
||||
|
||||
class RoutedSpan(TypedDict):
|
||||
@@ -379,7 +382,7 @@ class FactRecord(TypedDict, total=False):
|
||||
# Keys
|
||||
business_id: str
|
||||
place_id: str
|
||||
period_date: str
|
||||
period_date: date
|
||||
bucket_type: str
|
||||
subject_type: SubjectType
|
||||
subject_id: str
|
||||
@@ -574,7 +577,7 @@ class FactTimeseries(TypedDict, total=False):
|
||||
id: int
|
||||
business_id: str
|
||||
place_id: str
|
||||
period_date: str
|
||||
period_date: date
|
||||
bucket_type: BucketType
|
||||
subject_type: SubjectType
|
||||
subject_id: str
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
-- Migration: 006_add_job_id_to_issues.sql
|
||||
-- Purpose: Add job_id column to issues table for tracking pipeline execution context
|
||||
|
||||
-- Add job_id column to issues table
|
||||
ALTER TABLE pipeline.issues ADD COLUMN IF NOT EXISTS job_id UUID;
|
||||
|
||||
-- Create index for filtering by job_id
|
||||
CREATE INDEX IF NOT EXISTS idx_issues_job_id ON pipeline.issues(job_id);
|
||||
|
||||
COMMENT ON COLUMN pipeline.issues.job_id IS 'References the scraper job that triggered the pipeline execution';
|
||||
@@ -0,0 +1,352 @@
|
||||
-- Migration: Implement URT taxonomy with PostgreSQL ltree
|
||||
-- Benefits:
|
||||
-- 1. Hierarchical queries (find all codes under a domain/category)
|
||||
-- 2. Ancestor/descendant lookups in O(1)
|
||||
-- 3. Pattern matching on paths (e.g., 'O.*' for all Offering codes)
|
||||
-- 4. Efficient GiST indexing for tree operations
|
||||
-- 5. Aggregations at any level of hierarchy
|
||||
|
||||
-- Enable ltree extension
|
||||
CREATE EXTENSION IF NOT EXISTS ltree;
|
||||
|
||||
-- ============================================================================
|
||||
-- NEW UNIFIED TAXONOMY TABLE
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS pipeline.urt_taxonomy (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- ltree path: Domain.Category.Subcode (e.g., 'O.O1.O1_01')
|
||||
path ltree NOT NULL UNIQUE,
|
||||
|
||||
-- Human-readable code (e.g., 'O1.01')
|
||||
code VARCHAR(10) NOT NULL UNIQUE,
|
||||
|
||||
-- Node type for filtering
|
||||
node_type VARCHAR(20) NOT NULL CHECK (node_type IN ('domain', 'category', 'subcode')),
|
||||
|
||||
-- Hierarchy level (1=domain, 2=category, 3=subcode)
|
||||
level INT GENERATED ALWAYS AS (nlevel(path)) STORED,
|
||||
|
||||
-- Names and definitions
|
||||
name VARCHAR(100) NOT NULL,
|
||||
definition TEXT,
|
||||
|
||||
-- Examples (for subcodes)
|
||||
positive_example TEXT,
|
||||
negative_example TEXT,
|
||||
|
||||
-- Actionability (for subcodes)
|
||||
solution TEXT,
|
||||
solution_complexity VARCHAR(10) DEFAULT 'medium',
|
||||
marketing_angle TEXT,
|
||||
|
||||
-- Owner routing
|
||||
default_owner VARCHAR(50),
|
||||
|
||||
-- Metadata
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- INDEXES FOR LTREE OPERATIONS
|
||||
-- ============================================================================
|
||||
|
||||
-- GiST index for ltree operations (ancestor, descendant, pattern matching)
|
||||
CREATE INDEX idx_urt_taxonomy_path_gist ON pipeline.urt_taxonomy USING GIST (path);
|
||||
|
||||
-- B-tree index for exact path lookups and sorting
|
||||
CREATE INDEX idx_urt_taxonomy_path_btree ON pipeline.urt_taxonomy USING BTREE (path);
|
||||
|
||||
-- Index for code lookups (most common operation)
|
||||
CREATE INDEX idx_urt_taxonomy_code ON pipeline.urt_taxonomy (code);
|
||||
|
||||
-- Index for node type filtering
|
||||
CREATE INDEX idx_urt_taxonomy_node_type ON pipeline.urt_taxonomy (node_type);
|
||||
|
||||
-- ============================================================================
|
||||
-- MIGRATE EXISTING DATA
|
||||
-- ============================================================================
|
||||
|
||||
-- Insert domains (level 1)
|
||||
INSERT INTO pipeline.urt_taxonomy (path, code, node_type, name, definition, default_owner)
|
||||
SELECT
|
||||
code::ltree as path,
|
||||
code,
|
||||
'domain',
|
||||
name,
|
||||
-- Domain definitions from spec
|
||||
CASE code
|
||||
WHEN 'O' THEN 'Does the core product/service deliver?'
|
||||
WHEN 'P' THEN 'How do personnel behave and perform?'
|
||||
WHEN 'J' THEN 'Is the process smooth and timely?'
|
||||
WHEN 'E' THEN 'Is the space functional and pleasant?'
|
||||
WHEN 'A' THEN 'Can everyone participate fully?'
|
||||
WHEN 'V' THEN 'Is the exchange fair and transparent?'
|
||||
WHEN 'R' THEN 'Is trust built and maintained?'
|
||||
END,
|
||||
CASE code
|
||||
WHEN 'O' THEN 'Product/Operations'
|
||||
WHEN 'P' THEN 'HR/Training'
|
||||
WHEN 'J' THEN 'Operations/Process'
|
||||
WHEN 'E' THEN 'Facilities/IT'
|
||||
WHEN 'A' THEN 'Compliance/Design'
|
||||
WHEN 'V' THEN 'Finance/Pricing'
|
||||
WHEN 'R' THEN 'Leadership/CX'
|
||||
END
|
||||
FROM pipeline.urt_domains
|
||||
ON CONFLICT (code) DO NOTHING;
|
||||
|
||||
-- Insert categories (level 2)
|
||||
INSERT INTO pipeline.urt_taxonomy (path, code, node_type, name, definition)
|
||||
SELECT
|
||||
(domain_code || '.' || code)::ltree as path,
|
||||
code,
|
||||
'category',
|
||||
name,
|
||||
NULL -- Categories don't have definitions in current schema
|
||||
FROM pipeline.urt_categories
|
||||
ON CONFLICT (code) DO NOTHING;
|
||||
|
||||
-- Insert subcodes (level 3)
|
||||
INSERT INTO pipeline.urt_taxonomy (path, code, node_type, name, definition, positive_example, negative_example, solution, solution_complexity, marketing_angle)
|
||||
SELECT
|
||||
(domain_code || '.' || category_code || '.' || replace(code, '.', '_'))::ltree as path,
|
||||
code,
|
||||
'subcode',
|
||||
name,
|
||||
definition,
|
||||
positive_example,
|
||||
negative_example,
|
||||
solution,
|
||||
solution_complexity,
|
||||
marketing_angle
|
||||
FROM pipeline.urt_subcodes
|
||||
ON CONFLICT (code) DO NOTHING;
|
||||
|
||||
-- ============================================================================
|
||||
-- HELPER FUNCTIONS
|
||||
-- ============================================================================
|
||||
|
||||
-- Get all ancestors of a code (e.g., O1.01 -> [O, O1])
|
||||
CREATE OR REPLACE FUNCTION pipeline.urt_ancestors(p_code VARCHAR)
|
||||
RETURNS TABLE(code VARCHAR, name VARCHAR, node_type VARCHAR, level INT) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT t.code, t.name, t.node_type, t.level
|
||||
FROM pipeline.urt_taxonomy t
|
||||
WHERE t.path @> (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
|
||||
AND t.code != p_code
|
||||
ORDER BY t.level;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Get all descendants of a code (e.g., O -> all O* codes)
|
||||
CREATE OR REPLACE FUNCTION pipeline.urt_descendants(p_code VARCHAR)
|
||||
RETURNS TABLE(code VARCHAR, name VARCHAR, node_type VARCHAR, level INT) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT t.code, t.name, t.node_type, t.level
|
||||
FROM pipeline.urt_taxonomy t
|
||||
WHERE t.path <@ (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
|
||||
AND t.code != p_code
|
||||
ORDER BY t.path;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Get siblings (same parent)
|
||||
CREATE OR REPLACE FUNCTION pipeline.urt_siblings(p_code VARCHAR)
|
||||
RETURNS TABLE(code VARCHAR, name VARCHAR, level INT) AS $$
|
||||
DECLARE
|
||||
v_parent ltree;
|
||||
BEGIN
|
||||
SELECT subpath(path, 0, nlevel(path) - 1) INTO v_parent
|
||||
FROM pipeline.urt_taxonomy WHERE code = p_code;
|
||||
|
||||
RETURN QUERY
|
||||
SELECT t.code, t.name, t.level
|
||||
FROM pipeline.urt_taxonomy t
|
||||
WHERE subpath(t.path, 0, nlevel(t.path) - 1) = v_parent
|
||||
AND t.code != p_code
|
||||
ORDER BY t.path;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Get domain for any code
|
||||
CREATE OR REPLACE FUNCTION pipeline.urt_domain(p_code VARCHAR)
|
||||
RETURNS VARCHAR AS $$
|
||||
SELECT code FROM pipeline.urt_taxonomy
|
||||
WHERE path @> (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
|
||||
AND node_type = 'domain';
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- Get category for a subcode
|
||||
CREATE OR REPLACE FUNCTION pipeline.urt_category(p_code VARCHAR)
|
||||
RETURNS VARCHAR AS $$
|
||||
SELECT code FROM pipeline.urt_taxonomy
|
||||
WHERE path @> (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
|
||||
AND node_type = 'category';
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
-- ============================================================================
|
||||
-- VIEW: FLATTENED TAXONOMY WITH HIERARCHY INFO
|
||||
-- ============================================================================
|
||||
|
||||
CREATE OR REPLACE VIEW pipeline.v_urt_taxonomy AS
|
||||
SELECT
|
||||
t.id,
|
||||
t.path,
|
||||
t.code,
|
||||
t.node_type,
|
||||
t.level,
|
||||
t.name,
|
||||
t.definition,
|
||||
-- Parent info
|
||||
CASE
|
||||
WHEN t.level > 1 THEN subpath(t.path, 0, t.level - 1)::text
|
||||
ELSE NULL
|
||||
END as parent_path,
|
||||
-- Domain info (for rollups)
|
||||
subpath(t.path, 0, 1)::text as domain_code,
|
||||
(SELECT name FROM pipeline.urt_taxonomy WHERE path = subpath(t.path, 0, 1)) as domain_name,
|
||||
-- Category info (for subcodes)
|
||||
CASE
|
||||
WHEN t.level >= 2 THEN subpath(t.path, 0, 2)::text
|
||||
ELSE NULL
|
||||
END as category_path,
|
||||
-- Full path as breadcrumb
|
||||
t.path::text as full_path,
|
||||
-- Actionability
|
||||
t.solution,
|
||||
t.default_owner,
|
||||
t.is_active
|
||||
FROM pipeline.urt_taxonomy t
|
||||
ORDER BY t.path;
|
||||
|
||||
-- ============================================================================
|
||||
-- UPDATE REVIEW_SPANS TO USE LTREE
|
||||
-- ============================================================================
|
||||
|
||||
-- Add ltree column to review_spans for efficient hierarchy queries
|
||||
ALTER TABLE pipeline.review_spans
|
||||
ADD COLUMN IF NOT EXISTS urt_path ltree;
|
||||
|
||||
-- Populate ltree paths from existing codes
|
||||
UPDATE pipeline.review_spans rs
|
||||
SET urt_path = t.path
|
||||
FROM pipeline.urt_taxonomy t
|
||||
WHERE rs.urt_primary = t.code
|
||||
AND rs.urt_path IS NULL;
|
||||
|
||||
-- Create GiST index for hierarchy queries on spans
|
||||
CREATE INDEX IF NOT EXISTS idx_review_spans_urt_path_gist
|
||||
ON pipeline.review_spans USING GIST (urt_path);
|
||||
|
||||
-- ============================================================================
|
||||
-- EXAMPLE QUERIES (for reference)
|
||||
-- ============================================================================
|
||||
|
||||
-- These are example queries, not executed:
|
||||
/*
|
||||
|
||||
-- 1. Find all subcodes under "People" domain
|
||||
SELECT code, name FROM pipeline.urt_taxonomy
|
||||
WHERE path <@ 'P' AND node_type = 'subcode';
|
||||
|
||||
-- 2. Find all codes matching pattern (e.g., all Value subcodes)
|
||||
SELECT code, name FROM pipeline.urt_taxonomy
|
||||
WHERE path ~ 'V.*' AND node_type = 'subcode';
|
||||
|
||||
-- 3. Aggregate span counts by domain
|
||||
SELECT
|
||||
subpath(urt_path, 0, 1)::text as domain,
|
||||
COUNT(*) as span_count
|
||||
FROM pipeline.review_spans
|
||||
WHERE urt_path IS NOT NULL
|
||||
GROUP BY subpath(urt_path, 0, 1)
|
||||
ORDER BY span_count DESC;
|
||||
|
||||
-- 4. Aggregate by category within a domain
|
||||
SELECT
|
||||
subpath(urt_path, 0, 2)::text as category,
|
||||
COUNT(*) as span_count
|
||||
FROM pipeline.review_spans
|
||||
WHERE urt_path <@ 'O' -- All Offering codes
|
||||
GROUP BY subpath(urt_path, 0, 2)
|
||||
ORDER BY span_count DESC;
|
||||
|
||||
-- 5. Get ancestors of a specific code
|
||||
SELECT * FROM pipeline.urt_ancestors('O1.01');
|
||||
-- Returns: O (Offering), O1 (Function)
|
||||
|
||||
-- 6. Get all descendants of a category
|
||||
SELECT * FROM pipeline.urt_descendants('O1');
|
||||
-- Returns: O1.01, O1.02, O1.03, O1.04, O1.05
|
||||
|
||||
-- 7. Find the domain owner for a code
|
||||
SELECT pipeline.urt_domain('P1.01');
|
||||
-- Returns: P (People)
|
||||
|
||||
-- 8. Drill-down query: Domain -> Category -> Subcode
|
||||
WITH RECURSIVE tree AS (
|
||||
SELECT path, code, name, level
|
||||
FROM pipeline.urt_taxonomy
|
||||
WHERE node_type = 'domain' AND code = 'O'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT t.path, t.code, t.name, t.level
|
||||
FROM pipeline.urt_taxonomy t
|
||||
JOIN tree ON t.path <@ tree.path AND nlevel(t.path) = nlevel(tree.path) + 1
|
||||
)
|
||||
SELECT * FROM tree ORDER BY path;
|
||||
|
||||
*/
|
||||
|
||||
-- ============================================================================
|
||||
-- TRIGGER: Auto-update urt_path on review_spans
|
||||
-- ============================================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION pipeline.set_urt_path()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.urt_path := (SELECT path FROM pipeline.urt_taxonomy WHERE code = NEW.urt_primary);
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS trg_set_urt_path ON pipeline.review_spans;
|
||||
CREATE TRIGGER trg_set_urt_path
|
||||
BEFORE INSERT OR UPDATE OF urt_primary ON pipeline.review_spans
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION pipeline.set_urt_path();
|
||||
|
||||
-- ============================================================================
|
||||
-- MATERIALIZED VIEW: Pre-computed hierarchy rollups
|
||||
-- ============================================================================
|
||||
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS pipeline.mv_urt_domain_stats AS
|
||||
SELECT
|
||||
subpath(rs.urt_path, 0, 1)::text as domain_code,
|
||||
t.name as domain_name,
|
||||
rs.valence,
|
||||
COUNT(*) as span_count,
|
||||
COUNT(DISTINCT rs.review_id) as review_count,
|
||||
AVG(CASE rs.intensity
|
||||
WHEN 'I1' THEN 1
|
||||
WHEN 'I2' THEN 2
|
||||
WHEN 'I3' THEN 3
|
||||
END) as avg_intensity
|
||||
FROM pipeline.review_spans rs
|
||||
JOIN pipeline.urt_taxonomy t ON subpath(rs.urt_path, 0, 1) = t.path
|
||||
WHERE rs.urt_path IS NOT NULL
|
||||
GROUP BY subpath(rs.urt_path, 0, 1), t.name, rs.valence;
|
||||
|
||||
CREATE UNIQUE INDEX ON pipeline.mv_urt_domain_stats (domain_code, valence);
|
||||
|
||||
-- Refresh command (run periodically):
|
||||
-- REFRESH MATERIALIZED VIEW CONCURRENTLY pipeline.mv_urt_domain_stats;
|
||||
|
||||
COMMENT ON TABLE pipeline.urt_taxonomy IS 'Unified URT taxonomy using ltree for hierarchical queries. Replaces urt_domains, urt_categories, urt_subcodes.';
|
||||
@@ -70,16 +70,18 @@ class ReviewRepository:
|
||||
self,
|
||||
review: NormalizedReview,
|
||||
raw_id: int,
|
||||
job_id: str | None = None,
|
||||
) -> int:
|
||||
"""Insert an enriched review stub (pre-classification)."""
|
||||
query = """
|
||||
INSERT INTO pipeline.reviews_enriched (
|
||||
source, review_id, review_version, is_latest, raw_id,
|
||||
business_id, place_id, text, text_normalized, rating, review_time,
|
||||
language, taxonomy_version
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
||||
language, taxonomy_version, job_id
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14::uuid)
|
||||
ON CONFLICT (source, review_id, review_version) DO UPDATE SET
|
||||
is_latest = EXCLUDED.is_latest
|
||||
is_latest = EXCLUDED.is_latest,
|
||||
job_id = COALESCE(EXCLUDED.job_id, pipeline.reviews_enriched.job_id)
|
||||
RETURNING id
|
||||
"""
|
||||
enriched_id = await self.db.fetchval(
|
||||
@@ -97,6 +99,7 @@ class ReviewRepository:
|
||||
review["review_time"],
|
||||
review["text_language"],
|
||||
"v5.1", # taxonomy_version - will be updated by Stage 2
|
||||
job_id,
|
||||
)
|
||||
return enriched_id
|
||||
|
||||
@@ -213,6 +216,7 @@ class SpanRepository:
|
||||
batch_id: str,
|
||||
model_version: str,
|
||||
taxonomy_version: str,
|
||||
job_id: str | None = None,
|
||||
) -> None:
|
||||
"""Insert a span into the database."""
|
||||
query = """
|
||||
@@ -224,15 +228,17 @@ class SpanRepository:
|
||||
entity, entity_type, entity_normalized,
|
||||
relation_type, related_span_id, causal_chain,
|
||||
is_primary, is_active, review_time,
|
||||
confidence, usn, taxonomy_version, model_version, ingest_batch_id
|
||||
confidence, usn, taxonomy_version, model_version, ingest_batch_id,
|
||||
job_id
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
||||
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
|
||||
$31, $32, $33, $34
|
||||
$31, $32, $33, $34, $35::uuid
|
||||
)
|
||||
ON CONFLICT (span_id) DO UPDATE SET
|
||||
is_active = EXCLUDED.is_active
|
||||
is_active = EXCLUDED.is_active,
|
||||
job_id = COALESCE(EXCLUDED.job_id, pipeline.review_spans.job_id)
|
||||
"""
|
||||
# Build related_span_id from index if needed
|
||||
related_span_id = None
|
||||
@@ -276,6 +282,7 @@ class SpanRepository:
|
||||
taxonomy_version,
|
||||
model_version,
|
||||
batch_id,
|
||||
job_id,
|
||||
)
|
||||
|
||||
async def get_unrouted_negative_spans(
|
||||
@@ -312,6 +319,24 @@ class SpanRepository:
|
||||
row = await self.db.fetchrow(query, span_id)
|
||||
return dict(row) if row else None
|
||||
|
||||
async def deactivate_spans_for_job(self, job_id: str) -> int:
|
||||
"""Deactivate all spans for a job (used before reclassification).
|
||||
|
||||
Returns the number of spans deactivated.
|
||||
"""
|
||||
result = await self.db.execute(
|
||||
"""
|
||||
UPDATE pipeline.review_spans
|
||||
SET is_active = FALSE
|
||||
WHERE job_id = $1::uuid AND is_active = TRUE
|
||||
""",
|
||||
job_id,
|
||||
)
|
||||
# Extract count from result string like "UPDATE 42"
|
||||
if result and result.startswith("UPDATE "):
|
||||
return int(result.split()[1])
|
||||
return 0
|
||||
|
||||
|
||||
class IssueRepository:
|
||||
"""Repository for issue data operations."""
|
||||
@@ -329,6 +354,7 @@ class IssueRepository:
|
||||
entity: str | None,
|
||||
entity_normalized: str | None,
|
||||
taxonomy_version: str,
|
||||
job_id: str | None = None,
|
||||
) -> bool:
|
||||
"""Create or update an issue. Returns True if newly created."""
|
||||
# First check if exists
|
||||
@@ -363,8 +389,8 @@ class IssueRepository:
|
||||
INSERT INTO pipeline.issues (
|
||||
issue_id, business_id, place_id, primary_subcode, domain,
|
||||
state, priority_score, confidence_score, span_count, max_intensity,
|
||||
entity, entity_normalized, taxonomy_version
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
||||
entity, entity_normalized, taxonomy_version, job_id
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14::uuid)
|
||||
""",
|
||||
issue_id,
|
||||
business_id,
|
||||
@@ -379,6 +405,7 @@ class IssueRepository:
|
||||
entity,
|
||||
entity_normalized,
|
||||
taxonomy_version,
|
||||
job_id,
|
||||
)
|
||||
return True
|
||||
|
||||
@@ -448,6 +475,41 @@ class IssueRepository:
|
||||
span_id,
|
||||
)
|
||||
|
||||
async def delete_issues_for_job(self, job_id: str) -> int:
|
||||
"""Delete all issues for a job (used before reclassification).
|
||||
|
||||
Also deletes related issue_spans and issue_events.
|
||||
Returns the number of issues deleted.
|
||||
"""
|
||||
# First delete related records
|
||||
await self.db.execute(
|
||||
"""
|
||||
DELETE FROM pipeline.issue_spans
|
||||
WHERE issue_id IN (
|
||||
SELECT issue_id FROM pipeline.issues WHERE job_id = $1::uuid
|
||||
)
|
||||
""",
|
||||
job_id,
|
||||
)
|
||||
await self.db.execute(
|
||||
"""
|
||||
DELETE FROM pipeline.issue_events
|
||||
WHERE issue_id IN (
|
||||
SELECT issue_id FROM pipeline.issues WHERE job_id = $1::uuid
|
||||
)
|
||||
""",
|
||||
job_id,
|
||||
)
|
||||
# Then delete issues
|
||||
result = await self.db.execute(
|
||||
"DELETE FROM pipeline.issues WHERE job_id = $1::uuid",
|
||||
job_id,
|
||||
)
|
||||
# Extract count from result string like "DELETE 42"
|
||||
if result and result.startswith("DELETE "):
|
||||
return int(result.split()[1])
|
||||
return 0
|
||||
|
||||
|
||||
class FactRepository:
|
||||
"""Repository for fact time series operations."""
|
||||
|
||||
@@ -0,0 +1,764 @@
|
||||
"""
|
||||
Reputation Pipeline - Primitives-based classification and reputation analytics.
|
||||
|
||||
This pipeline uses the new primitives taxonomy (MANNER, SPEED, VALUE_FOR_MONEY, etc.)
|
||||
instead of the legacy URT codes. It powers the Reputation Report product.
|
||||
|
||||
Stages:
|
||||
- classify: LLM-powered span extraction with primitives (stored in detected_spans_v2)
|
||||
- report: Generate reputation report JSON
|
||||
|
||||
Usage:
|
||||
pipeline = ReputationPipeline()
|
||||
await pipeline.initialize()
|
||||
result = await pipeline.process({"business_id": "Go Karts Mar Menor", "days": 365})
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from pipeline_core import (
|
||||
BasePipeline,
|
||||
DashboardConfig,
|
||||
DashboardSection,
|
||||
PipelineMetadata,
|
||||
PipelineResult as BasePipelineResult,
|
||||
StageResult,
|
||||
WidgetConfig,
|
||||
)
|
||||
|
||||
from reviewiq_pipeline.config import Config
|
||||
from reviewiq_pipeline.db.connection import DatabasePool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Stage names
|
||||
STAGE_NAMES = ["classify", "report"]
|
||||
|
||||
# Domain mapping for primitives
|
||||
DOMAIN_MAP = {
|
||||
# Output/Product (O)
|
||||
"TASTE": "O", "CRAFT": "O", "FRESHNESS": "O", "TEMPERATURE": "O",
|
||||
"EFFECTIVENESS": "O", "ACCURACY": "O", "CONDITION": "O", "CONSISTENCY": "O",
|
||||
# People/Service (P)
|
||||
"MANNER": "P", "COMPETENCE": "P", "ATTENTIVENESS": "P", "COMMUNICATION": "P",
|
||||
# Journey/Process (J)
|
||||
"SPEED": "J", "FRICTION": "J", "RELIABILITY": "J", "AVAILABILITY": "J",
|
||||
# Environment (E)
|
||||
"CLEANLINESS": "E", "COMFORT": "E", "SAFETY": "E", "AMBIANCE": "E",
|
||||
"ACCESSIBILITY": "E", "DIGITAL_UX": "E",
|
||||
# Value (V)
|
||||
"PRICE_LEVEL": "V", "PRICE_FAIRNESS": "V", "PRICE_TRANSPARENCY": "V",
|
||||
"VALUE_FOR_MONEY": "V",
|
||||
# Meta
|
||||
"HONESTY": "meta", "ETHICS": "meta", "PROMISES": "meta",
|
||||
"ACKNOWLEDGMENT": "meta", "RESPONSE_QUALITY": "meta", "RECOVERY": "meta",
|
||||
"RETURN_INTENT": "meta", "RECOMMEND": "meta", "RECOGNITION": "meta",
|
||||
"UNMAPPED": "meta", "NON_INFORMATIVE": "meta",
|
||||
}
|
||||
|
||||
DOMAIN_NAMES = {
|
||||
"O": "Output/Product",
|
||||
"P": "People/Service",
|
||||
"J": "Journey/Process",
|
||||
"E": "Environment",
|
||||
"V": "Value",
|
||||
"meta": "Meta",
|
||||
}
|
||||
|
||||
|
||||
class ReputationPipeline(BasePipeline):
|
||||
"""
|
||||
Reputation Pipeline - Primitives-based classification and analytics.
|
||||
|
||||
Uses the new primitives taxonomy (37 primitives across 5 domains + meta)
|
||||
for more actionable, business-friendly insights.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Config | None = None):
|
||||
"""Initialize the pipeline."""
|
||||
self._config = config or Config()
|
||||
self._db: DatabasePool | None = None
|
||||
self._initialized = False
|
||||
|
||||
@property
|
||||
def config(self) -> Config:
|
||||
"""Get pipeline configuration."""
|
||||
return self._config
|
||||
|
||||
@property
|
||||
def metadata(self) -> PipelineMetadata:
|
||||
"""Get pipeline metadata."""
|
||||
return PipelineMetadata(
|
||||
id="reputation",
|
||||
name="Reputation Analytics Pipeline",
|
||||
description="Primitives-based classification and reputation scoring. Generates business-facing analytics reports.",
|
||||
version="2.0.0",
|
||||
stages=STAGE_NAMES,
|
||||
input_type="BusinessInput",
|
||||
)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize database connections."""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
logger.info("Initializing Reputation pipeline...")
|
||||
|
||||
self._db = DatabasePool(self._config)
|
||||
await self._db.initialize()
|
||||
|
||||
self._initialized = True
|
||||
logger.info("Reputation pipeline initialized")
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close all connections."""
|
||||
if self._db:
|
||||
await self._db.close()
|
||||
self._db = None
|
||||
|
||||
self._initialized = False
|
||||
logger.info("Reputation pipeline closed")
|
||||
|
||||
async def process(
|
||||
self,
|
||||
input_data: dict[str, Any],
|
||||
stages: list[str] | None = None,
|
||||
) -> BasePipelineResult:
|
||||
"""
|
||||
Process input data through the pipeline.
|
||||
|
||||
Args:
|
||||
input_data: Must contain business_id OR job_id. Optional: days, start, end
|
||||
stages: List of stage names to run (default: all)
|
||||
|
||||
Returns:
|
||||
BasePipelineResult with stage outputs
|
||||
"""
|
||||
await self.initialize()
|
||||
|
||||
stages = stages or STAGE_NAMES
|
||||
stages_run: list[str] = []
|
||||
stage_results: dict[str, StageResult] = {}
|
||||
|
||||
business_id = input_data.get("business_id")
|
||||
job_id = input_data.get("job_id")
|
||||
|
||||
# Resolve business_id from job_id if not provided directly
|
||||
if not business_id and job_id:
|
||||
try:
|
||||
async with self._db.pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"SELECT business_name FROM jobs WHERE job_id = $1",
|
||||
uuid.UUID(job_id) if isinstance(job_id, str) else job_id,
|
||||
)
|
||||
if row and row["business_name"]:
|
||||
business_id = row["business_name"]
|
||||
logger.info(f"Resolved business_id '{business_id}' from job_id '{job_id}'")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to resolve business_id from job_id: {e}")
|
||||
|
||||
if not business_id:
|
||||
return BasePipelineResult(
|
||||
pipeline_id="reputation",
|
||||
stages_run=[],
|
||||
stage_results={},
|
||||
success=False,
|
||||
error="business_id is required (provide business_id or job_id)",
|
||||
)
|
||||
|
||||
# Parse time window
|
||||
days = input_data.get("days", 365)
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
if input_data.get("start"):
|
||||
start_date = datetime.fromisoformat(input_data["start"])
|
||||
if input_data.get("end"):
|
||||
end_date = datetime.fromisoformat(input_data["end"])
|
||||
|
||||
try:
|
||||
# Stage: Classify (uses existing spans from detected_spans_v2)
|
||||
if "classify" in stages:
|
||||
start = time.time()
|
||||
logger.info(f"Running Classification check for {business_id}")
|
||||
|
||||
try:
|
||||
classify_result = await self._check_classification(
|
||||
business_id, start_date, end_date
|
||||
)
|
||||
duration_ms = int((time.time() - start) * 1000)
|
||||
stages_run.append("classify")
|
||||
stage_results["classify"] = StageResult(
|
||||
stage="classify",
|
||||
success=True,
|
||||
data=classify_result,
|
||||
error=None,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Classification check failed")
|
||||
stage_results["classify"] = StageResult(
|
||||
stage="classify",
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e),
|
||||
duration_ms=int((time.time() - start) * 1000),
|
||||
)
|
||||
|
||||
# Stage: Report (generate reputation report)
|
||||
if "report" in stages:
|
||||
start = time.time()
|
||||
logger.info(f"Generating Reputation Report for {business_id}")
|
||||
|
||||
try:
|
||||
report_result = await self._generate_report(
|
||||
business_id, start_date, end_date
|
||||
)
|
||||
duration_ms = int((time.time() - start) * 1000)
|
||||
stages_run.append("report")
|
||||
stage_results["report"] = StageResult(
|
||||
stage="report",
|
||||
success=True,
|
||||
data=report_result,
|
||||
error=None,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Report generation failed")
|
||||
stage_results["report"] = StageResult(
|
||||
stage="report",
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e),
|
||||
duration_ms=int((time.time() - start) * 1000),
|
||||
)
|
||||
|
||||
return BasePipelineResult(
|
||||
pipeline_id="reputation",
|
||||
stages_run=stages_run,
|
||||
stage_results=stage_results,
|
||||
success=all(r["success"] for r in stage_results.values()),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Pipeline failed with unexpected error")
|
||||
return BasePipelineResult(
|
||||
pipeline_id="reputation",
|
||||
stages_run=stages_run,
|
||||
stage_results=stage_results,
|
||||
success=False,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def _check_classification(
|
||||
self,
|
||||
business_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
) -> dict[str, Any]:
|
||||
"""Check classification coverage for the business."""
|
||||
if not self._db:
|
||||
return {"error": "Database not initialized"}
|
||||
|
||||
async with self._db.pool.acquire() as conn:
|
||||
# Get span counts
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(*) as total_spans,
|
||||
COUNT(*) FILTER (WHERE valence = '+') as positive,
|
||||
COUNT(*) FILTER (WHERE valence = '-') as negative,
|
||||
COUNT(*) FILTER (WHERE valence = '0') as neutral,
|
||||
COUNT(*) FILTER (WHERE valence = '±') as mixed,
|
||||
COUNT(*) FILTER (WHERE primitive = 'UNMAPPED') as unmapped,
|
||||
COUNT(*) FILTER (WHERE primitive = 'NON_INFORMATIVE') as non_informative,
|
||||
COUNT(DISTINCT s.review_id) as reviews_with_spans
|
||||
FROM pipeline.detected_spans_v2 s
|
||||
JOIN pipeline.review_facts_v1 f
|
||||
ON f.review_id = s.review_id AND f.business_id = s.business_id
|
||||
WHERE s.business_id = $1
|
||||
AND f.review_time_utc >= $2
|
||||
AND f.review_time_utc < $3
|
||||
""",
|
||||
business_id,
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
if not row or row["total_spans"] == 0:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"message": "No classified spans found for this business/period",
|
||||
"total_spans": 0,
|
||||
}
|
||||
|
||||
total = row["total_spans"]
|
||||
unmapped_rate = row["unmapped"] / total if total > 0 else 0
|
||||
|
||||
return {
|
||||
"status": "ok" if unmapped_rate < 0.10 else "needs_attention",
|
||||
"total_spans": total,
|
||||
"reviews_with_spans": row["reviews_with_spans"],
|
||||
"positive_count": row["positive"],
|
||||
"negative_count": row["negative"],
|
||||
"neutral_count": row["neutral"],
|
||||
"mixed_count": row["mixed"],
|
||||
"unmapped_count": row["unmapped"],
|
||||
"non_informative_count": row["non_informative"],
|
||||
"unmapped_rate": round(unmapped_rate * 100, 1),
|
||||
}
|
||||
|
||||
async def _generate_report(
|
||||
self,
|
||||
business_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
) -> dict[str, Any]:
|
||||
"""Generate a reputation report summary."""
|
||||
if not self._db:
|
||||
return {"error": "Database not initialized"}
|
||||
|
||||
async with self._db.pool.acquire() as conn:
|
||||
# Get overall scores
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
WITH span_data AS (
|
||||
SELECT
|
||||
s.primitive,
|
||||
s.valence,
|
||||
s.confidence,
|
||||
s.intensity,
|
||||
CASE s.valence
|
||||
WHEN '+' THEN 1
|
||||
WHEN '-' THEN -1
|
||||
ELSE 0
|
||||
END as valence_num
|
||||
FROM pipeline.detected_spans_v2 s
|
||||
JOIN pipeline.review_facts_v1 f
|
||||
ON f.review_id = s.review_id AND f.business_id = s.business_id
|
||||
WHERE s.business_id = $1
|
||||
AND f.review_time_utc >= $2
|
||||
AND f.review_time_utc < $3
|
||||
AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) as content_spans,
|
||||
ROUND(
|
||||
100.0 * SUM(valence_num * confidence * intensity) /
|
||||
NULLIF(SUM(confidence * intensity), 0),
|
||||
1
|
||||
) as overall_score,
|
||||
ROUND(100.0 * COUNT(*) FILTER (WHERE valence = '+') / NULLIF(COUNT(*), 0), 1) as positive_share
|
||||
FROM span_data
|
||||
""",
|
||||
business_id,
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
if not row or row["content_spans"] == 0:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"message": "No content spans found",
|
||||
}
|
||||
|
||||
# Get domain breakdown
|
||||
domain_rows = await conn.fetch(
|
||||
"""
|
||||
SELECT
|
||||
s.primitive,
|
||||
COUNT(*) as count,
|
||||
ROUND(
|
||||
100.0 * SUM(
|
||||
CASE s.valence WHEN '+' THEN 1 WHEN '-' THEN -1 ELSE 0 END
|
||||
* s.confidence * s.intensity
|
||||
) / NULLIF(SUM(s.confidence * s.intensity), 0),
|
||||
1
|
||||
) as score
|
||||
FROM pipeline.detected_spans_v2 s
|
||||
JOIN pipeline.review_facts_v1 f
|
||||
ON f.review_id = s.review_id AND f.business_id = s.business_id
|
||||
WHERE s.business_id = $1
|
||||
AND f.review_time_utc >= $2
|
||||
AND f.review_time_utc < $3
|
||||
AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
|
||||
GROUP BY s.primitive
|
||||
ORDER BY count DESC
|
||||
""",
|
||||
business_id,
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
# Aggregate by domain
|
||||
domain_scores = {}
|
||||
primitive_scores = {}
|
||||
for r in domain_rows:
|
||||
prim = r["primitive"]
|
||||
domain = DOMAIN_MAP.get(prim, "meta")
|
||||
|
||||
primitive_scores[prim] = {
|
||||
"domain": domain,
|
||||
"score": float(r["score"]) if r["score"] else 0,
|
||||
"volume": r["count"],
|
||||
}
|
||||
|
||||
if domain not in domain_scores:
|
||||
domain_scores[domain] = {"total_score": 0, "total_volume": 0}
|
||||
domain_scores[domain]["total_score"] += (r["score"] or 0) * r["count"]
|
||||
domain_scores[domain]["total_volume"] += r["count"]
|
||||
|
||||
# Calculate domain averages
|
||||
domains = {}
|
||||
for domain, data in domain_scores.items():
|
||||
if data["total_volume"] > 0:
|
||||
domains[domain] = {
|
||||
"name": DOMAIN_NAMES.get(domain, domain),
|
||||
"score": round(data["total_score"] / data["total_volume"], 1),
|
||||
"volume": data["total_volume"],
|
||||
}
|
||||
|
||||
# Get top drivers
|
||||
top_positive = await conn.fetch(
|
||||
"""
|
||||
SELECT
|
||||
s.primitive,
|
||||
COUNT(*) as count,
|
||||
ROUND(100.0 * COUNT(*) / (
|
||||
SELECT COUNT(*) FROM pipeline.detected_spans_v2 s2
|
||||
JOIN pipeline.review_facts_v1 f2 ON f2.review_id = s2.review_id AND f2.business_id = s2.business_id
|
||||
WHERE s2.business_id = $1 AND s2.valence = '+'
|
||||
AND f2.review_time_utc >= $2 AND f2.review_time_utc < $3
|
||||
), 1) as impact
|
||||
FROM pipeline.detected_spans_v2 s
|
||||
JOIN pipeline.review_facts_v1 f
|
||||
ON f.review_id = s.review_id AND f.business_id = s.business_id
|
||||
WHERE s.business_id = $1 AND s.valence = '+'
|
||||
AND f.review_time_utc >= $2 AND f.review_time_utc < $3
|
||||
AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
|
||||
GROUP BY s.primitive
|
||||
ORDER BY count DESC
|
||||
LIMIT 5
|
||||
""",
|
||||
business_id,
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
top_negative = await conn.fetch(
|
||||
"""
|
||||
SELECT
|
||||
s.primitive,
|
||||
COUNT(*) as count,
|
||||
ROUND(100.0 * COUNT(*) / NULLIF((
|
||||
SELECT COUNT(*) FROM pipeline.detected_spans_v2 s2
|
||||
JOIN pipeline.review_facts_v1 f2 ON f2.review_id = s2.review_id AND f2.business_id = s2.business_id
|
||||
WHERE s2.business_id = $1 AND s2.valence = '-'
|
||||
AND f2.review_time_utc >= $2 AND f2.review_time_utc < $3
|
||||
), 0), 1) as impact
|
||||
FROM pipeline.detected_spans_v2 s
|
||||
JOIN pipeline.review_facts_v1 f
|
||||
ON f.review_id = s.review_id AND f.business_id = s.business_id
|
||||
WHERE s.business_id = $1 AND s.valence = '-'
|
||||
AND f.review_time_utc >= $2 AND f.review_time_utc < $3
|
||||
AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
|
||||
GROUP BY s.primitive
|
||||
ORDER BY count DESC
|
||||
LIMIT 5
|
||||
""",
|
||||
business_id,
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"business_id": business_id,
|
||||
"window": {
|
||||
"start": start_date.isoformat(),
|
||||
"end": end_date.isoformat(),
|
||||
},
|
||||
"scores": {
|
||||
"overall": float(row["overall_score"]) if row["overall_score"] else 0,
|
||||
"positive_share": float(row["positive_share"]) if row["positive_share"] else 0,
|
||||
"content_spans": row["content_spans"],
|
||||
},
|
||||
"domains": domains,
|
||||
"primitives": primitive_scores,
|
||||
"drivers": {
|
||||
"positives": [
|
||||
{"primitive": r["primitive"], "count": r["count"], "impact": float(r["impact"]) if r["impact"] else 0}
|
||||
for r in top_positive
|
||||
],
|
||||
"negatives": [
|
||||
{"primitive": r["primitive"], "count": r["count"], "impact": float(r["impact"]) if r["impact"] else 0}
|
||||
for r in top_negative
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
def get_dashboard_config(self) -> DashboardConfig:
|
||||
"""Get the dashboard configuration for Reputation Pipeline."""
|
||||
return DashboardConfig(
|
||||
pipeline_id="reputation",
|
||||
title="Reputation Analytics",
|
||||
description="Primitives-based reputation scoring and business insights",
|
||||
sections=[
|
||||
DashboardSection(
|
||||
id="overview",
|
||||
title="Reputation Overview",
|
||||
description="Overall reputation score and key metrics",
|
||||
widgets=[
|
||||
WidgetConfig(
|
||||
id="reputation_score",
|
||||
type="stat_card",
|
||||
title="Reputation Score",
|
||||
grid={"x": 0, "y": 0, "w": 3, "h": 1},
|
||||
config={
|
||||
"value_key": "overall_score",
|
||||
"format": "{value:.0f}",
|
||||
"icon": "trending-up",
|
||||
"color": "blue",
|
||||
},
|
||||
),
|
||||
WidgetConfig(
|
||||
id="positive_share",
|
||||
type="stat_card",
|
||||
title="Positive Share",
|
||||
grid={"x": 3, "y": 0, "w": 3, "h": 1},
|
||||
config={
|
||||
"value_key": "positive_share",
|
||||
"format": "{value:.1f}%",
|
||||
"icon": "thumbs-up",
|
||||
"color": "green",
|
||||
},
|
||||
),
|
||||
WidgetConfig(
|
||||
id="content_spans",
|
||||
type="stat_card",
|
||||
title="Content Spans",
|
||||
grid={"x": 6, "y": 0, "w": 3, "h": 1},
|
||||
config={
|
||||
"value_key": "content_spans",
|
||||
"format": "{value:,}",
|
||||
"icon": "message-square",
|
||||
"color": "purple",
|
||||
},
|
||||
),
|
||||
WidgetConfig(
|
||||
id="unmapped_rate",
|
||||
type="stat_card",
|
||||
title="Unmapped Rate",
|
||||
grid={"x": 9, "y": 0, "w": 3, "h": 1},
|
||||
config={
|
||||
"value_key": "unmapped_rate",
|
||||
"format": "{value:.1f}%",
|
||||
"icon": "alert-circle",
|
||||
"color": "orange",
|
||||
},
|
||||
),
|
||||
],
|
||||
collapsed=False,
|
||||
),
|
||||
DashboardSection(
|
||||
id="domains",
|
||||
title="Domain Breakdown",
|
||||
description="Performance across experience domains",
|
||||
widgets=[
|
||||
WidgetConfig(
|
||||
id="domain_scores",
|
||||
type="bar_chart",
|
||||
title="Domain Scores",
|
||||
grid={"x": 0, "y": 0, "w": 6, "h": 2},
|
||||
config={
|
||||
"x_axis": {"key": "domain", "type": "category"},
|
||||
"y_axis": {"key": "score", "label": "Score"},
|
||||
"series": [{"key": "score", "name": "Score"}],
|
||||
},
|
||||
),
|
||||
WidgetConfig(
|
||||
id="domain_volume",
|
||||
type="pie_chart",
|
||||
title="Mentions by Domain",
|
||||
grid={"x": 6, "y": 0, "w": 6, "h": 2},
|
||||
config={
|
||||
"value_key": "volume",
|
||||
"label_key": "name",
|
||||
"show_legend": True,
|
||||
},
|
||||
),
|
||||
],
|
||||
collapsed=False,
|
||||
),
|
||||
DashboardSection(
|
||||
id="drivers",
|
||||
title="Key Drivers",
|
||||
description="Top positive and negative drivers",
|
||||
widgets=[
|
||||
WidgetConfig(
|
||||
id="positive_drivers",
|
||||
type="bar_chart",
|
||||
title="Top Strengths",
|
||||
grid={"x": 0, "y": 0, "w": 6, "h": 2},
|
||||
config={
|
||||
"x_axis": {"key": "primitive", "type": "category"},
|
||||
"y_axis": {"key": "impact", "label": "Impact %"},
|
||||
"series": [{"key": "impact", "name": "Impact", "color": "#22c55e"}],
|
||||
},
|
||||
),
|
||||
WidgetConfig(
|
||||
id="negative_drivers",
|
||||
type="bar_chart",
|
||||
title="Top Weaknesses",
|
||||
grid={"x": 6, "y": 0, "w": 6, "h": 2},
|
||||
config={
|
||||
"x_axis": {"key": "primitive", "type": "category"},
|
||||
"y_axis": {"key": "impact", "label": "Impact %"},
|
||||
"series": [{"key": "impact", "name": "Impact", "color": "#ef4444"}],
|
||||
},
|
||||
),
|
||||
],
|
||||
collapsed=False,
|
||||
),
|
||||
DashboardSection(
|
||||
id="primitives",
|
||||
title="Primitive Analysis",
|
||||
description="Detailed breakdown by primitive",
|
||||
widgets=[
|
||||
WidgetConfig(
|
||||
id="primitives_table",
|
||||
type="table",
|
||||
title="All Primitives",
|
||||
grid={"x": 0, "y": 0, "w": 12, "h": 3},
|
||||
config={
|
||||
"columns": [
|
||||
{"key": "primitive", "header": "Primitive", "width": 150},
|
||||
{"key": "domain", "header": "Domain", "width": 100},
|
||||
{"key": "score", "header": "Score", "width": 80, "align": "right"},
|
||||
{"key": "volume", "header": "Mentions", "width": 80, "align": "right"},
|
||||
],
|
||||
"row_key": "primitive",
|
||||
"page_size": 15,
|
||||
"sortable": True,
|
||||
},
|
||||
),
|
||||
],
|
||||
collapsed=True,
|
||||
),
|
||||
],
|
||||
default_time_range="365d",
|
||||
refresh_interval=600,
|
||||
)
|
||||
|
||||
async def get_widget_data(
|
||||
self,
|
||||
widget_id: str,
|
||||
params: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""Get data for a specific dashboard widget."""
|
||||
await self.initialize()
|
||||
|
||||
business_id = params.get("business_id")
|
||||
if not business_id:
|
||||
return {"error": "business_id required"}
|
||||
|
||||
days = 365
|
||||
time_range = params.get("time_range", "365d")
|
||||
if time_range.endswith("d"):
|
||||
days = int(time_range[:-1])
|
||||
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
# Get classification check data
|
||||
classify_data = await self._check_classification(business_id, start_date, end_date)
|
||||
|
||||
# Get report data
|
||||
report_data = await self._generate_report(business_id, start_date, end_date)
|
||||
|
||||
match widget_id:
|
||||
# Overview stats
|
||||
case "reputation_score":
|
||||
return {"overall_score": report_data.get("scores", {}).get("overall", 0)}
|
||||
case "positive_share":
|
||||
return {"positive_share": report_data.get("scores", {}).get("positive_share", 0)}
|
||||
case "content_spans":
|
||||
return {"content_spans": report_data.get("scores", {}).get("content_spans", 0)}
|
||||
case "unmapped_rate":
|
||||
return {"unmapped_rate": classify_data.get("unmapped_rate", 0)}
|
||||
|
||||
# Domain charts
|
||||
case "domain_scores":
|
||||
domains = report_data.get("domains", {})
|
||||
return {"data": [{"domain": k, **v} for k, v in domains.items()]}
|
||||
case "domain_volume":
|
||||
domains = report_data.get("domains", {})
|
||||
return {"data": [{"name": v["name"], "volume": v["volume"]} for v in domains.values()]}
|
||||
|
||||
# Driver charts
|
||||
case "positive_drivers":
|
||||
return {"data": report_data.get("drivers", {}).get("positives", [])}
|
||||
case "negative_drivers":
|
||||
return {"data": report_data.get("drivers", {}).get("negatives", [])}
|
||||
|
||||
# Primitives table
|
||||
case "primitives_table":
|
||||
primitives = report_data.get("primitives", {})
|
||||
return {
|
||||
"data": [
|
||||
{"primitive": k, **v}
|
||||
for k, v in primitives.items()
|
||||
],
|
||||
"total": len(primitives),
|
||||
}
|
||||
|
||||
case _:
|
||||
logger.warning(f"Unknown widget: {widget_id}")
|
||||
return {"error": f"Unknown widget: {widget_id}"}
|
||||
|
||||
async def health_check(self) -> dict[str, Any]:
|
||||
"""Check pipeline health."""
|
||||
await self.initialize()
|
||||
|
||||
checks = {}
|
||||
healthy = True
|
||||
|
||||
# Check database connection
|
||||
try:
|
||||
if self._db:
|
||||
async with self._db.pool.acquire() as conn:
|
||||
await conn.fetchval("SELECT 1")
|
||||
checks["database"] = "ok"
|
||||
else:
|
||||
checks["database"] = "not_initialized"
|
||||
healthy = False
|
||||
except Exception as e:
|
||||
checks["database"] = str(e)
|
||||
healthy = False
|
||||
|
||||
# Check spans table exists
|
||||
try:
|
||||
if self._db:
|
||||
async with self._db.pool.acquire() as conn:
|
||||
count = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM pipeline.detected_spans_v2 LIMIT 1"
|
||||
)
|
||||
checks["spans_table"] = "ok"
|
||||
except Exception as e:
|
||||
checks["spans_table"] = str(e)
|
||||
healthy = False
|
||||
|
||||
return {
|
||||
"healthy": healthy,
|
||||
"checks": checks,
|
||||
}
|
||||
@@ -2,10 +2,22 @@
|
||||
|
||||
from reviewiq_pipeline.services.embeddings import EmbeddingService
|
||||
from reviewiq_pipeline.services.llm_client import LLMClient
|
||||
from reviewiq_pipeline.services.review_router import (
|
||||
ReviewRouter,
|
||||
RouterConfig,
|
||||
RoutingDecision,
|
||||
RoutingTier,
|
||||
create_router,
|
||||
)
|
||||
from reviewiq_pipeline.services.text_processor import TextProcessor
|
||||
|
||||
__all__ = [
|
||||
"LLMClient",
|
||||
"EmbeddingService",
|
||||
"TextProcessor",
|
||||
"ReviewRouter",
|
||||
"RouterConfig",
|
||||
"RoutingDecision",
|
||||
"RoutingTier",
|
||||
"create_router",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,392 @@
|
||||
"""
|
||||
Category Resolver Service
|
||||
|
||||
Resolves business categories to the deepest node in the GBP taxonomy.
|
||||
Uses a multi-phase approach:
|
||||
1. Exact match from Google's category
|
||||
2. LLM matching when no exact match
|
||||
3. Hierarchical LLM classification when no Google category
|
||||
|
||||
This is critical for the classification pipeline as it provides context
|
||||
for understanding and categorizing reviews.
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import asyncpg
|
||||
|
||||
from .llm_client import LLMClient
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolvedCategory:
|
||||
"""Result of category resolution."""
|
||||
category_id: int
|
||||
path: str # ltree path as string
|
||||
name: str
|
||||
level: int
|
||||
method: str # 'exact', 'llm', 'hierarchical'
|
||||
confidence: float # 0.0 - 1.0
|
||||
|
||||
|
||||
class CategoryResolver:
|
||||
"""
|
||||
Resolves business categories to GBP taxonomy nodes.
|
||||
|
||||
Usage:
|
||||
resolver = CategoryResolver(db_pool, llm_client)
|
||||
|
||||
# With Google category
|
||||
result = await resolver.resolve("Toy store")
|
||||
# -> ResolvedCategory(path="Retail.Stores.Toy_store", method="exact")
|
||||
|
||||
# Without Google category (infer from name)
|
||||
result = await resolver.resolve(None, business_name="Pura Vida Hostel")
|
||||
# -> ResolvedCategory(path="Travel_Hospitality.Hotels.Hostel", method="hierarchical")
|
||||
"""
|
||||
|
||||
def __init__(self, pool: asyncpg.Pool, llm_client: Optional[LLMClient] = None):
|
||||
self.pool = pool
|
||||
self.llm = llm_client
|
||||
self._level1_cache: list[dict] = []
|
||||
self._level2_cache: dict[str, list[dict]] = {}
|
||||
self._level3_cache: dict[str, list[dict]] = {}
|
||||
|
||||
async def resolve(
|
||||
self,
|
||||
google_category: Optional[str] = None,
|
||||
business_name: Optional[str] = None,
|
||||
business_address: Optional[str] = None
|
||||
) -> Optional[ResolvedCategory]:
|
||||
"""
|
||||
Resolve to the deepest taxonomy node.
|
||||
|
||||
Args:
|
||||
google_category: Category from Google Maps (e.g., "Toy store")
|
||||
business_name: Business name for inference if no Google category
|
||||
business_address: Address for additional context
|
||||
|
||||
Returns:
|
||||
ResolvedCategory or None if resolution failed
|
||||
"""
|
||||
# Phase 1: Try exact match if we have Google category
|
||||
if google_category:
|
||||
result = await self._exact_match(google_category)
|
||||
if result:
|
||||
log.info(f"Exact match: '{google_category}' -> {result.path}")
|
||||
return result
|
||||
|
||||
# Phase 2: LLM matching for Google category
|
||||
if self.llm:
|
||||
result = await self._llm_match(google_category)
|
||||
if result:
|
||||
log.info(f"LLM match: '{google_category}' -> {result.path}")
|
||||
return result
|
||||
|
||||
# Phase 3: Hierarchical classification from business name
|
||||
if business_name and self.llm:
|
||||
result = await self._hierarchical_classify(
|
||||
business_name=business_name,
|
||||
business_address=business_address,
|
||||
google_category=google_category # May be None or unmatched
|
||||
)
|
||||
if result:
|
||||
log.info(f"Hierarchical: '{business_name}' -> {result.path}")
|
||||
return result
|
||||
|
||||
log.warning(f"Could not resolve category for: {google_category or business_name}")
|
||||
return None
|
||||
|
||||
async def _exact_match(self, google_category: str) -> Optional[ResolvedCategory]:
|
||||
"""Try exact match against taxonomy."""
|
||||
async with self.pool.acquire() as conn:
|
||||
# Try exact match (case-insensitive)
|
||||
row = await conn.fetchrow("""
|
||||
SELECT id, name, path::text as path, level
|
||||
FROM gbp_categories
|
||||
WHERE LOWER(name) = LOWER($1) AND level = 3
|
||||
""", google_category)
|
||||
|
||||
if row:
|
||||
return ResolvedCategory(
|
||||
category_id=row['id'],
|
||||
path=row['path'],
|
||||
name=row['name'],
|
||||
level=row['level'],
|
||||
method='exact',
|
||||
confidence=1.0
|
||||
)
|
||||
|
||||
# Try fuzzy match (contains)
|
||||
row = await conn.fetchrow("""
|
||||
SELECT id, name, path::text as path, level
|
||||
FROM gbp_categories
|
||||
WHERE LOWER(name) LIKE LOWER($1) AND level = 3
|
||||
ORDER BY length(name) ASC
|
||||
LIMIT 1
|
||||
""", f"%{google_category}%")
|
||||
|
||||
if row:
|
||||
return ResolvedCategory(
|
||||
category_id=row['id'],
|
||||
path=row['path'],
|
||||
name=row['name'],
|
||||
level=row['level'],
|
||||
method='exact',
|
||||
confidence=0.9
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
async def _llm_match(self, google_category: str) -> Optional[ResolvedCategory]:
|
||||
"""Use LLM to match Google category to taxonomy."""
|
||||
# Get candidate categories (level 3) that might match
|
||||
async with self.pool.acquire() as conn:
|
||||
# Get categories with similar words
|
||||
words = google_category.lower().split()
|
||||
conditions = " OR ".join([f"LOWER(name) LIKE '%{w}%'" for w in words if len(w) > 2])
|
||||
|
||||
if not conditions:
|
||||
return None
|
||||
|
||||
candidates = await conn.fetch(f"""
|
||||
SELECT id, name, path::text as path, level
|
||||
FROM gbp_categories
|
||||
WHERE ({conditions}) AND level = 3
|
||||
ORDER BY name
|
||||
LIMIT 20
|
||||
""")
|
||||
|
||||
if not candidates:
|
||||
# Get random sample for LLM to choose from
|
||||
candidates = await conn.fetch("""
|
||||
SELECT id, name, path::text as path, level
|
||||
FROM gbp_categories
|
||||
WHERE level = 3
|
||||
ORDER BY RANDOM()
|
||||
LIMIT 50
|
||||
""")
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# Ask LLM to pick best match
|
||||
candidate_list = "\n".join([f"- {c['name']} ({c['path']})" for c in candidates])
|
||||
|
||||
prompt = f"""Given the Google Maps business category "{google_category}", select the BEST matching category from this taxonomy list.
|
||||
|
||||
Candidates:
|
||||
{candidate_list}
|
||||
|
||||
Respond with ONLY the exact category name from the list, nothing else.
|
||||
If none match well, respond with "NONE"."""
|
||||
|
||||
response = await self.llm.complete(prompt, max_tokens=50)
|
||||
selected_name = response.strip().strip('"').strip("'")
|
||||
|
||||
if selected_name == "NONE":
|
||||
return None
|
||||
|
||||
# Find the selected category
|
||||
for c in candidates:
|
||||
if c['name'].lower() == selected_name.lower():
|
||||
return ResolvedCategory(
|
||||
category_id=c['id'],
|
||||
path=c['path'],
|
||||
name=c['name'],
|
||||
level=c['level'],
|
||||
method='llm',
|
||||
confidence=0.8
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
async def _hierarchical_classify(
|
||||
self,
|
||||
business_name: str,
|
||||
business_address: Optional[str] = None,
|
||||
google_category: Optional[str] = None
|
||||
) -> Optional[ResolvedCategory]:
|
||||
"""
|
||||
Walk down the taxonomy tree using LLM at each level.
|
||||
|
||||
Level 1 (16 sectors) -> Level 2 (91 types) -> Level 3 (4034 categories)
|
||||
"""
|
||||
context = f"Business: {business_name}"
|
||||
if business_address:
|
||||
context += f"\nAddress: {business_address}"
|
||||
if google_category:
|
||||
context += f"\nGoogle category hint: {google_category}"
|
||||
|
||||
# Level 1: Select sector
|
||||
level1_categories = await self._get_level_categories(1)
|
||||
sector = await self._llm_select_category(
|
||||
context=context,
|
||||
categories=level1_categories,
|
||||
level_name="sector"
|
||||
)
|
||||
|
||||
if not sector:
|
||||
return None
|
||||
|
||||
# Level 2: Select business type within sector
|
||||
level2_categories = await self._get_level_categories(2, parent_path=sector['path'])
|
||||
business_type = await self._llm_select_category(
|
||||
context=context,
|
||||
categories=level2_categories,
|
||||
level_name="business type",
|
||||
parent=sector['name']
|
||||
)
|
||||
|
||||
if not business_type:
|
||||
return None
|
||||
|
||||
# Level 3: Select specific category
|
||||
level3_categories = await self._get_level_categories(3, parent_path=business_type['path'])
|
||||
specific = await self._llm_select_category(
|
||||
context=context,
|
||||
categories=level3_categories,
|
||||
level_name="specific category",
|
||||
parent=business_type['name']
|
||||
)
|
||||
|
||||
if not specific:
|
||||
return None
|
||||
|
||||
return ResolvedCategory(
|
||||
category_id=specific['id'],
|
||||
path=specific['path'],
|
||||
name=specific['name'],
|
||||
level=specific['level'],
|
||||
method='hierarchical',
|
||||
confidence=0.7
|
||||
)
|
||||
|
||||
async def _get_level_categories(
|
||||
self,
|
||||
level: int,
|
||||
parent_path: Optional[str] = None
|
||||
) -> list[dict]:
|
||||
"""Get categories at a specific level, optionally filtered by parent."""
|
||||
cache_key = f"{level}:{parent_path or 'root'}"
|
||||
|
||||
# Check cache
|
||||
if level == 1 and self._level1_cache:
|
||||
return self._level1_cache
|
||||
if level == 2 and parent_path in self._level2_cache:
|
||||
return self._level2_cache[parent_path]
|
||||
if level == 3 and parent_path in self._level3_cache:
|
||||
return self._level3_cache[parent_path]
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
if parent_path:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, path::text as path, level
|
||||
FROM gbp_categories
|
||||
WHERE level = $1 AND path <@ $2::ltree
|
||||
ORDER BY name
|
||||
""", level, parent_path)
|
||||
else:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, path::text as path, level
|
||||
FROM gbp_categories
|
||||
WHERE level = $1
|
||||
ORDER BY name
|
||||
""", level)
|
||||
|
||||
result = [dict(r) for r in rows]
|
||||
|
||||
# Cache results
|
||||
if level == 1:
|
||||
self._level1_cache = result
|
||||
elif level == 2 and parent_path:
|
||||
self._level2_cache[parent_path] = result
|
||||
elif level == 3 and parent_path:
|
||||
self._level3_cache[parent_path] = result
|
||||
|
||||
return result
|
||||
|
||||
async def _llm_select_category(
|
||||
self,
|
||||
context: str,
|
||||
categories: list[dict],
|
||||
level_name: str,
|
||||
parent: Optional[str] = None
|
||||
) -> Optional[dict]:
|
||||
"""Ask LLM to select best category from list."""
|
||||
if not categories:
|
||||
return None
|
||||
|
||||
# If only one option, return it
|
||||
if len(categories) == 1:
|
||||
return categories[0]
|
||||
|
||||
category_list = "\n".join([f"- {c['name']}" for c in categories])
|
||||
|
||||
parent_context = f" within {parent}" if parent else ""
|
||||
|
||||
prompt = f"""{context}
|
||||
|
||||
Select the most appropriate {level_name}{parent_context} for this business.
|
||||
|
||||
Options:
|
||||
{category_list}
|
||||
|
||||
Respond with ONLY the exact category name from the list, nothing else."""
|
||||
|
||||
response = await self.llm.complete(prompt, max_tokens=50)
|
||||
selected_name = response.strip().strip('"').strip("'")
|
||||
|
||||
# Find the selected category
|
||||
for c in categories:
|
||||
if c['name'].lower() == selected_name.lower():
|
||||
return c
|
||||
|
||||
# Fuzzy match if exact not found
|
||||
for c in categories:
|
||||
if selected_name.lower() in c['name'].lower() or c['name'].lower() in selected_name.lower():
|
||||
return c
|
||||
|
||||
# Return first as fallback
|
||||
log.warning(f"LLM selected '{selected_name}' not in list, using first option")
|
||||
return categories[0] if categories else None
|
||||
|
||||
|
||||
async def resolve_job_category(
|
||||
pool: asyncpg.Pool,
|
||||
llm_client: LLMClient,
|
||||
job_id: str,
|
||||
google_category: Optional[str],
|
||||
business_name: Optional[str],
|
||||
business_address: Optional[str] = None
|
||||
) -> Optional[ResolvedCategory]:
|
||||
"""
|
||||
Resolve and save category for a job.
|
||||
|
||||
This is the main entry point for pre-flight category resolution.
|
||||
"""
|
||||
resolver = CategoryResolver(pool, llm_client)
|
||||
result = await resolver.resolve(
|
||||
google_category=google_category,
|
||||
business_name=business_name,
|
||||
business_address=business_address
|
||||
)
|
||||
|
||||
if result:
|
||||
# Save to database
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute("""
|
||||
UPDATE jobs
|
||||
SET gbp_category_id = $2,
|
||||
gbp_category_path = $3::ltree,
|
||||
category_resolution_method = $4,
|
||||
updated_at = NOW()
|
||||
WHERE job_id = $1::uuid
|
||||
""", job_id, result.category_id, result.path, result.method)
|
||||
|
||||
log.info(f"Job {job_id}: resolved category to {result.path} ({result.method})")
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,210 @@
|
||||
"""
|
||||
Classification validator for post-LLM validation.
|
||||
|
||||
Catches common misclassification patterns based on keyword detection
|
||||
and suggests corrections before persisting to database.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Price/money indicators → should be V codes
|
||||
PRICE_PATTERNS = [
|
||||
r'\b\d+\s*[€$£]\b', # "50€", "100$"
|
||||
r'\b[€$£]\s*\d+\b', # "€50", "$100"
|
||||
r'\beur(o|os)?\b',
|
||||
r'\bprice[sd]?\b',
|
||||
r'\bcost[s]?\b',
|
||||
r'\bfee[s]?\b',
|
||||
r'\bcharge[sd]?\b',
|
||||
r'\bdeposit[s]?\b',
|
||||
r'\brefund[s]?\b',
|
||||
r'\bcheap\b',
|
||||
r'\bexpensive\b',
|
||||
r'\baffordable\b',
|
||||
r'\bpreis\b', # German
|
||||
r'\bprecio[s]?\b', # Spanish
|
||||
r'\bgünstig\b', # German "cheap"
|
||||
r'\bteuer\b', # German "expensive"
|
||||
r'\bbarato\b', # Spanish "cheap"
|
||||
r'\bcaro\b', # Spanish "expensive"
|
||||
]
|
||||
|
||||
# Staff behavior indicators → should be P codes
|
||||
STAFF_PATTERNS = [
|
||||
r'\bfriendly\b',
|
||||
r'\brude\b',
|
||||
r'\bhelpful\b',
|
||||
r'\bpatient\b',
|
||||
r'\bimpatient\b',
|
||||
r'\bwelcoming\b',
|
||||
r'\battentive\b',
|
||||
r'\bprofessional\b',
|
||||
r'\bunprofessional\b',
|
||||
r'\bamable\b', # Spanish "friendly"
|
||||
r'\bsimpático\b', # Spanish
|
||||
r'\bmuy amable\b',
|
||||
r'\bnett\b', # German "nice"
|
||||
r'\bfreundlich\b', # German "friendly"
|
||||
r'\bunfreundlich\b', # German "unfriendly"
|
||||
r'\bgentil\b', # French/Spanish
|
||||
]
|
||||
|
||||
# Scam/ethics indicators → should be R codes
|
||||
ETHICS_PATTERNS = [
|
||||
r'\bscam\b',
|
||||
r'\bfraud\b',
|
||||
r'\bcheat\b',
|
||||
r'\bdishonest\b',
|
||||
r'\blied\b',
|
||||
r'\blie[s]?\b',
|
||||
r'\bscammer[s]?\b',
|
||||
r'\bsteal\b',
|
||||
r'\bstole\b',
|
||||
r'\brobber[y]?\b',
|
||||
r'\bestafa\b', # Spanish "scam"
|
||||
r'\btramp[a]?\b', # Spanish "trap/trick"
|
||||
r'\bengaño\b', # Spanish "deception"
|
||||
r'\bAbzocker\b', # German "rip-off"
|
||||
r'\bBetrug\b', # German "fraud"
|
||||
r'\barnaque\b', # French "scam"
|
||||
r'\bvoleur[s]?\b', # French "thief"
|
||||
]
|
||||
|
||||
# Wayfinding indicators → should be A1.04
|
||||
WAYFINDING_PATTERNS = [
|
||||
r"\bcouldn'?t find\b",
|
||||
r'\bhard to find\b',
|
||||
r'\bdifficult to find\b',
|
||||
r'\bconfusing\b.*\b(direction|location|shuttle)\b',
|
||||
r'\blost\b',
|
||||
r'\bno signs?\b',
|
||||
r'\bno señal\b', # Spanish
|
||||
r'\bkeine Schilder\b', # German
|
||||
]
|
||||
|
||||
|
||||
def validate_classification(
|
||||
span_text: str,
|
||||
urt_code: str,
|
||||
valence: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Validate a classification and suggest correction if needed.
|
||||
|
||||
Args:
|
||||
span_text: The span text
|
||||
urt_code: The assigned URT code
|
||||
valence: The assigned valence
|
||||
|
||||
Returns:
|
||||
Correction dict if misclassified, None if OK
|
||||
"""
|
||||
text_lower = span_text.lower()
|
||||
domain = urt_code[0] # First letter is domain
|
||||
|
||||
# Rule 1: Price mentions should be V codes
|
||||
if domain != 'V':
|
||||
for pattern in PRICE_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
# Determine which V code
|
||||
if any(re.search(p, text_lower, re.I) for p in [r'hidden', r'extra', r'surprise', r'unexpected', r'trampa']):
|
||||
suggested = 'V1.03'
|
||||
elif any(re.search(p, text_lower, re.I) for p in [r'overcharge', r'wrong.*charge', r'billing']):
|
||||
suggested = 'V4.04'
|
||||
else:
|
||||
suggested = 'V1.01'
|
||||
|
||||
logger.debug(f"Validation: {urt_code} → {suggested} (price mention)")
|
||||
return {
|
||||
'suggested_urt': suggested,
|
||||
'reason': 'price_mention',
|
||||
'pattern': pattern,
|
||||
}
|
||||
|
||||
# Rule 2: Staff behavior should be P codes
|
||||
if domain != 'P':
|
||||
for pattern in STAFF_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
# Determine which P code
|
||||
if any(re.search(p, text_lower, re.I) for p in [r'rude', r'unfriendly', r'disrespect', r'unfreundlich']):
|
||||
suggested = 'P1.02'
|
||||
elif any(re.search(p, text_lower, re.I) for p in [r'impatient', r'rushed']):
|
||||
suggested = 'P1.03'
|
||||
else:
|
||||
suggested = 'P1.01'
|
||||
|
||||
logger.debug(f"Validation: {urt_code} → {suggested} (staff behavior)")
|
||||
return {
|
||||
'suggested_urt': suggested,
|
||||
'reason': 'staff_behavior',
|
||||
'pattern': pattern,
|
||||
}
|
||||
|
||||
# Rule 3: Scam/ethics should be R codes
|
||||
if domain != 'R':
|
||||
for pattern in ETHICS_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
if any(re.search(p, text_lower, re.I) for p in [r'scam', r'fraud', r'cheat', r'estafa', r'Betrug', r'arnaque']):
|
||||
suggested = 'R1.02'
|
||||
else:
|
||||
suggested = 'R1.01'
|
||||
|
||||
logger.debug(f"Validation: {urt_code} → {suggested} (ethics issue)")
|
||||
return {
|
||||
'suggested_urt': suggested,
|
||||
'reason': 'ethics_issue',
|
||||
'pattern': pattern,
|
||||
}
|
||||
|
||||
# Rule 4: Wayfinding should be A1.04
|
||||
if urt_code not in ('A1.04', 'A4.01'):
|
||||
for pattern in WAYFINDING_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
logger.debug(f"Validation: {urt_code} → A1.04 (wayfinding)")
|
||||
return {
|
||||
'suggested_urt': 'A1.04',
|
||||
'reason': 'wayfinding',
|
||||
'pattern': pattern,
|
||||
}
|
||||
|
||||
return None # Classification looks OK
|
||||
|
||||
|
||||
def validate_and_fix_spans(spans: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Validate and fix a list of spans.
|
||||
|
||||
Args:
|
||||
spans: List of span dicts with 'span_text', 'urt_primary', 'valence'
|
||||
|
||||
Returns:
|
||||
List of spans with corrections applied
|
||||
"""
|
||||
corrections_made = 0
|
||||
|
||||
for span in spans:
|
||||
correction = validate_classification(
|
||||
span.get('span_text', ''),
|
||||
span.get('urt_primary', 'O1.01'),
|
||||
span.get('valence', 'V0'),
|
||||
)
|
||||
|
||||
if correction:
|
||||
original = span['urt_primary']
|
||||
span['urt_primary'] = correction['suggested_urt']
|
||||
span['_validation_correction'] = {
|
||||
'original': original,
|
||||
'reason': correction['reason'],
|
||||
}
|
||||
corrections_made += 1
|
||||
|
||||
if corrections_made:
|
||||
logger.info(f"Validation corrected {corrections_made} spans")
|
||||
|
||||
return spans
|
||||
@@ -0,0 +1,262 @@
|
||||
"""
|
||||
Config Resolver - Resolves L1 config + sector brief for classification.
|
||||
|
||||
Builds a single JSON payload per business containing:
|
||||
- Enabled primitives (L1 + always-on meta)
|
||||
- Weights
|
||||
- Sector brief (language/signals)
|
||||
- Minimal primitive dictionary
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Paths
|
||||
DATA_DIR = Path(__file__).parent.parent.parent.parent / "data"
|
||||
CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
|
||||
BRIEFS_DIR = DATA_DIR / "sector_briefs"
|
||||
PRIMITIVES_FILE = DATA_DIR / "primitives.json"
|
||||
|
||||
# Meta primitives - always enabled, never weighted
|
||||
META_PRIMITIVES = frozenset([
|
||||
"HONESTY", "ETHICS", "PROMISES", # Trust
|
||||
"ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY", # Resolution
|
||||
"RETURN_INTENT", "RECOMMEND", "RECOGNITION", # Loyalty
|
||||
"UNMAPPED", # Escape
|
||||
])
|
||||
|
||||
# Core primitives dictionary (frozen 36 - minimal for prompt)
|
||||
CORE_PRIMITIVES = {
|
||||
# Quality (8)
|
||||
"TASTE": {"domain": "O", "name": "Taste/Flavor", "def": "Sensory quality of food/beverage"},
|
||||
"CRAFT": {"domain": "O", "name": "Craftsmanship", "def": "Skill of execution/preparation"},
|
||||
"FRESHNESS": {"domain": "O", "name": "Freshness", "def": "Newness, not stale or old"},
|
||||
"TEMPERATURE": {"domain": "O", "name": "Temperature", "def": "Hot/cold as expected"},
|
||||
"EFFECTIVENESS": {"domain": "O", "name": "Effectiveness", "def": "Achieves intended purpose"},
|
||||
"ACCURACY": {"domain": "O", "name": "Accuracy", "def": "Correct, as ordered/specified"},
|
||||
"CONDITION": {"domain": "O", "name": "Condition", "def": "Physical state, wear, damage"},
|
||||
"CONSISTENCY": {"domain": "O", "name": "Consistency", "def": "Same quality each time"},
|
||||
# Service (4)
|
||||
"MANNER": {"domain": "P", "name": "Manner/Attitude", "def": "Friendliness, respect, warmth"},
|
||||
"COMPETENCE": {"domain": "P", "name": "Competence", "def": "Knowledge and skill of staff"},
|
||||
"ATTENTIVENESS": {"domain": "P", "name": "Attentiveness", "def": "Being present, responsive"},
|
||||
"COMMUNICATION": {"domain": "P", "name": "Communication", "def": "Clarity, listening, updates"},
|
||||
# Process (4)
|
||||
"SPEED": {"domain": "J", "name": "Speed/Wait", "def": "Time to service, waiting"},
|
||||
"FRICTION": {"domain": "J", "name": "Friction", "def": "Obstacles, hassles, complexity"},
|
||||
"RELIABILITY": {"domain": "J", "name": "Reliability", "def": "Dependable, keeps promises"},
|
||||
"AVAILABILITY": {"domain": "J", "name": "Availability", "def": "Open when needed, bookable"},
|
||||
# Environment (6)
|
||||
"CLEANLINESS": {"domain": "E", "name": "Cleanliness", "def": "Hygiene, tidiness"},
|
||||
"COMFORT": {"domain": "E", "name": "Comfort", "def": "Physical ease, seating"},
|
||||
"SAFETY": {"domain": "E", "name": "Safety", "def": "Free from harm/danger"},
|
||||
"AMBIANCE": {"domain": "E", "name": "Ambiance", "def": "Atmosphere, mood, vibe"},
|
||||
"ACCESSIBILITY": {"domain": "E", "name": "Accessibility", "def": "Easy to reach, navigate"},
|
||||
"DIGITAL_UX": {"domain": "E", "name": "Digital Experience", "def": "Website, app, online"},
|
||||
# Value (4)
|
||||
"PRICE_LEVEL": {"domain": "V", "name": "Price Level", "def": "Absolute cost (cheap/expensive)"},
|
||||
"PRICE_FAIRNESS": {"domain": "V", "name": "Price Fairness", "def": "Reasonable for what you get"},
|
||||
"PRICE_TRANSPARENCY": {"domain": "V", "name": "Price Transparency", "def": "No hidden fees, clear pricing"},
|
||||
"VALUE_FOR_MONEY": {"domain": "V", "name": "Value for Money", "def": "Worth what you paid"},
|
||||
}
|
||||
|
||||
|
||||
class ConfigResolver:
|
||||
"""
|
||||
Resolves classification config for a business.
|
||||
|
||||
Usage:
|
||||
resolver = ConfigResolver()
|
||||
payload = await resolver.resolve("Go Karts Mar Menor", pool)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._l1_cache: dict[str, dict] = {}
|
||||
self._brief_cache: dict[str, dict] = {}
|
||||
|
||||
def _load_l1_config(self, sector_code: str) -> dict[str, Any] | None:
|
||||
"""Load L1 config from file."""
|
||||
if sector_code in self._l1_cache:
|
||||
return self._l1_cache[sector_code]
|
||||
|
||||
config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
|
||||
if not config_path.exists():
|
||||
logger.warning(f"No L1 config for sector {sector_code}")
|
||||
return None
|
||||
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
|
||||
self._l1_cache[sector_code] = config
|
||||
return config
|
||||
|
||||
def _load_sector_brief(self, sector_code: str) -> dict[str, Any] | None:
|
||||
"""Load sector brief from file."""
|
||||
if sector_code in self._brief_cache:
|
||||
return self._brief_cache[sector_code]
|
||||
|
||||
brief_path = BRIEFS_DIR / f"{sector_code.lower()}_brief.json"
|
||||
if not brief_path.exists():
|
||||
logger.warning(f"No sector brief for {sector_code}")
|
||||
return None
|
||||
|
||||
with open(brief_path) as f:
|
||||
brief = json.load(f)
|
||||
|
||||
self._brief_cache[sector_code] = brief
|
||||
return brief
|
||||
|
||||
async def get_business_mapping(
|
||||
self,
|
||||
pool: asyncpg.Pool,
|
||||
business_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Get business → taxonomy mapping from database."""
|
||||
query = """
|
||||
SELECT business_id, gbp_path::text, sector_code
|
||||
FROM pipeline.business_taxonomy_map
|
||||
WHERE business_id = $1
|
||||
"""
|
||||
row = await pool.fetchrow(query, business_id)
|
||||
if not row:
|
||||
return None
|
||||
return dict(row)
|
||||
|
||||
def resolve_enabled_set(self, l1_config: dict) -> set[str]:
|
||||
"""
|
||||
Compute final enabled primitive set.
|
||||
|
||||
= L1.enabled + META_PRIMITIVES (always-on)
|
||||
"""
|
||||
enabled = set(l1_config.get("enabled", []))
|
||||
enabled.update(META_PRIMITIVES)
|
||||
return enabled
|
||||
|
||||
def resolve_weights(self, l1_config: dict) -> dict[str, float]:
|
||||
"""Get weights from L1 config."""
|
||||
return dict(l1_config.get("weights", {}))
|
||||
|
||||
def build_primitives_for_prompt(
|
||||
self,
|
||||
enabled: set[str],
|
||||
weights: dict[str, float],
|
||||
) -> dict[str, dict]:
|
||||
"""
|
||||
Build minimal primitives dict for prompt.
|
||||
|
||||
Only includes enabled primitives with their definitions.
|
||||
"""
|
||||
result = {}
|
||||
for prim in enabled:
|
||||
if prim in CORE_PRIMITIVES:
|
||||
entry = CORE_PRIMITIVES[prim].copy()
|
||||
if prim in weights:
|
||||
entry["weight"] = weights[prim]
|
||||
result[prim] = entry
|
||||
elif prim in META_PRIMITIVES:
|
||||
# Meta primitives - minimal entry
|
||||
result[prim] = {"domain": "M", "name": prim.replace("_", " ").title(), "meta": True}
|
||||
return result
|
||||
|
||||
def extract_brief_signals(self, brief: dict) -> dict[str, Any]:
|
||||
"""
|
||||
Extract relevant signals from sector brief for prompt.
|
||||
|
||||
Keeps it minimal to avoid bloating context.
|
||||
"""
|
||||
if not brief:
|
||||
return {}
|
||||
|
||||
return {
|
||||
"sector": brief.get("sector_code"),
|
||||
"what_customers_judge": brief.get("what_customers_judge"),
|
||||
"critical_pain_points": brief.get("critical_pain_points"),
|
||||
"industry_terminology": brief.get("industry_terminology"),
|
||||
}
|
||||
|
||||
async def resolve(
|
||||
self,
|
||||
business_id: str,
|
||||
pool: asyncpg.Pool,
|
||||
mode: str | None = None,
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Resolve full classification payload for a business.
|
||||
|
||||
Args:
|
||||
business_id: Business identifier
|
||||
pool: Database connection pool
|
||||
mode: Optional service mode (e.g., "dine_in", "delivery")
|
||||
|
||||
Returns:
|
||||
Classification payload or None if business not mapped
|
||||
"""
|
||||
# Get business mapping
|
||||
mapping = await self.get_business_mapping(pool, business_id)
|
||||
if not mapping:
|
||||
logger.warning(f"Business not mapped: {business_id}")
|
||||
return None
|
||||
|
||||
sector_code = mapping["sector_code"]
|
||||
gbp_path = mapping["gbp_path"]
|
||||
|
||||
# Load L1 config
|
||||
l1_config = self._load_l1_config(sector_code)
|
||||
if not l1_config:
|
||||
logger.warning(f"No L1 config for {sector_code}, using defaults")
|
||||
l1_config = {"enabled": list(CORE_PRIMITIVES.keys()), "weights": {}}
|
||||
|
||||
# Load sector brief
|
||||
brief = self._load_sector_brief(sector_code)
|
||||
|
||||
# Resolve enabled set and weights
|
||||
enabled = self.resolve_enabled_set(l1_config)
|
||||
weights = self.resolve_weights(l1_config)
|
||||
|
||||
# Build primitives for prompt
|
||||
primitives = self.build_primitives_for_prompt(enabled, weights)
|
||||
|
||||
# Extract brief signals
|
||||
brief_signals = self.extract_brief_signals(brief)
|
||||
|
||||
# Build payload
|
||||
payload = {
|
||||
"business_id": business_id,
|
||||
"gbp_path": gbp_path,
|
||||
"sector_code": sector_code,
|
||||
"config_version": l1_config.get("config_version", "1.0"),
|
||||
"modes": [mode] if mode else ["in_person"],
|
||||
"default_mode": mode or "in_person",
|
||||
"enabled_primitives": sorted(enabled),
|
||||
"disabled_primitives": sorted(l1_config.get("disabled", [])),
|
||||
"weights": weights,
|
||||
"brief": brief_signals,
|
||||
"primitives": primitives,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Resolved config for {business_id}: "
|
||||
f"sector={sector_code}, enabled={len(enabled)}, weights={len(weights)}"
|
||||
)
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
# Convenience function
|
||||
async def resolve_business_config(
|
||||
business_id: str,
|
||||
pool: asyncpg.Pool,
|
||||
mode: str | None = None,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Resolve classification config for a business."""
|
||||
resolver = ConfigResolver()
|
||||
return await resolver.resolve(business_id, pool, mode)
|
||||
@@ -0,0 +1,571 @@
|
||||
"""
|
||||
LLM prompts for generating sparse primitive config deltas for GBP hierarchy nodes.
|
||||
|
||||
These prompts are used to populate L1 (sector) and L2 (category) nodes in the
|
||||
GBP category tree with business-specific primitive configurations.
|
||||
|
||||
The output is a sparse delta that only includes primitives that need overrides
|
||||
for that specific business type. Configuration inheritance handles the rest.
|
||||
"""
|
||||
|
||||
# =============================================================================
|
||||
# SYSTEM PROMPT
|
||||
# =============================================================================
|
||||
|
||||
SYSTEM_PROMPT_GBP_PRIMITIVE_CONFIG = """You are a customer experience taxonomy configuration specialist. Your task is to generate sparse primitive configuration deltas for Google Business Profile (GBP) category nodes.
|
||||
|
||||
## YOUR ROLE
|
||||
|
||||
You configure how the Universal Review Taxonomy (URT) primitives should be weighted, labeled, and detected for specific business types. Each primitive represents a distinct dimension of customer experience that appears in reviews.
|
||||
|
||||
## THE 36 PRIMITIVES (Grouped by Domain)
|
||||
|
||||
### OFFERING (O) - What the business provides
|
||||
- WORKS: Does the product/service function correctly?
|
||||
- PERFORMANCE: How well does it perform?
|
||||
- DURABILITY: How long does it last?
|
||||
- RELIABILITY: Is it consistent over time?
|
||||
- OUTCOME: Did the customer achieve their goal?
|
||||
- MATERIALS: Quality of ingredients/components
|
||||
- CRAFTSMANSHIP: Skill of construction/execution
|
||||
- PRESENTATION: Visual/aesthetic quality
|
||||
- ATTENTION_TO_DETAIL: Finishing touches
|
||||
- CONDITION: State at delivery
|
||||
- COMPLETENESS: All components present?
|
||||
- FEATURES: Promised features available?
|
||||
- SCOPE: Full scope delivered?
|
||||
- DOCUMENTATION: Supporting materials
|
||||
- SPEC_MATCH: Matches what was ordered?
|
||||
- PERSONALIZATION: Adapted to individual
|
||||
- FLEXIBILITY: Can be modified?
|
||||
- APPROPRIATENESS: Right solution for need?
|
||||
|
||||
### PEOPLE (P) - Staff interactions
|
||||
- WARMTH: Friendly manner
|
||||
- RESPECT: Dignity and courtesy
|
||||
- EMPATHY: Understanding feelings
|
||||
- PATIENCE: Calm and tolerant
|
||||
- ENTHUSIASM: Energy and engagement
|
||||
- KNOWLEDGE: Expertise level
|
||||
- SKILL: Technical ability
|
||||
- PROBLEM_SOLVING: Finding solutions
|
||||
- PROFESSIONALISM: Conduct standards
|
||||
- EXPERIENCE: Depth of expertise
|
||||
- ATTENTIVENESS: Being present
|
||||
- INITIATIVE: Proactive help
|
||||
- AVAILABILITY: Present when needed
|
||||
- FOLLOW_THROUGH: Completing promises
|
||||
- URGENCY: Appropriate prioritization
|
||||
- CLARITY: Clear communication
|
||||
- LISTENING: Understanding needs
|
||||
- PROACTIVE_UPDATES: Keeping informed
|
||||
- ACCURACY: Correct information
|
||||
- TONE: Communication style
|
||||
|
||||
### JOURNEY (J) - Process and timing
|
||||
- WAIT_TIME: Time spent waiting
|
||||
- SPEED: How fast things happen
|
||||
- RESPONSE_TIME: Time to respond
|
||||
- PUNCTUALITY: On-time delivery
|
||||
- PACING: Appropriate speed
|
||||
- SIMPLICITY: Easy process
|
||||
- NAVIGATION: Finding things
|
||||
- PAPERWORK: Documentation burden
|
||||
- HANDOFFS: Transitions
|
||||
- SELF_SERVICE: Autonomy options
|
||||
- CONSISTENCY: Same each time
|
||||
- PROCESS_ACCURACY: Correct execution
|
||||
- UPTIME: System availability
|
||||
- PREDICTABILITY: Expectations met
|
||||
- ERROR_RATE: Frequency of mistakes
|
||||
- ACKNOWLEDGMENT: Recognizing issues
|
||||
- RESOLUTION_PROCESS: How problems handled
|
||||
- RESOLUTION_SPEED: Time to fix
|
||||
- RESOLUTION_QUALITY: Adequacy of fix
|
||||
- PREVENTION: Avoiding recurrence
|
||||
|
||||
### ENVIRONMENT (E) - Physical and digital space
|
||||
- CLEANLINESS: Hygiene and tidiness
|
||||
- MAINTENANCE: Condition and upkeep
|
||||
- LAYOUT: Functional arrangement
|
||||
- EQUIPMENT: Tools and amenities
|
||||
- SIGNAGE: Navigation aids
|
||||
- INTERFACE_DESIGN: Digital UX
|
||||
- DIGITAL_FUNCTIONALITY: Features working
|
||||
- DIGITAL_PERFORMANCE: Speed/responsiveness
|
||||
- DIGITAL_NAVIGATION: Finding things online
|
||||
- MOBILE_EXPERIENCE: Smartphone optimization
|
||||
- ATMOSPHERE: Overall mood
|
||||
- NOISE: Sound environment
|
||||
- TEMPERATURE: Climate comfort
|
||||
- CROWDING: Density/space
|
||||
- AESTHETICS: Visual appeal
|
||||
- PHYSICAL_SAFETY: Protection from harm
|
||||
- HEALTH_HYGIENE: Sanitation standards
|
||||
- SECURITY: Protection of person/property
|
||||
- COMFORT: Physical ease
|
||||
- EMERGENCY_READINESS: Preparedness
|
||||
|
||||
### ACCESS (A) - Availability and accessibility
|
||||
- HOURS: Operating hours
|
||||
- BOOKING: Appointment access
|
||||
- INVENTORY: Product availability
|
||||
- STAFFING: Personnel available
|
||||
- GEOGRAPHIC_REACH: Service area
|
||||
- PHYSICAL_ACCESSIBILITY: Mobility access
|
||||
- VISUAL_ACCESSIBILITY: Sight accommodations
|
||||
- HEARING_ACCESSIBILITY: Audio accommodations
|
||||
- COGNITIVE_ACCESSIBILITY: Mental accommodations
|
||||
- DIGITAL_ACCESSIBILITY: Assistive tech support
|
||||
- LANGUAGE_SUPPORT: Multiple languages
|
||||
- CULTURAL_SENSITIVITY: Background respect
|
||||
- DIETARY_MEDICAL: Restriction accommodations
|
||||
- FAMILY_FRIENDLY: Children accommodation
|
||||
- EQUAL_TREATMENT: Non-discrimination
|
||||
- LOCATION: Convenience
|
||||
- PARKING: Vehicle accommodation
|
||||
- TRANSIT: Public transport
|
||||
- PAYMENT_OPTIONS: How you can pay
|
||||
- CONTACT_OPTIONS: Ways to reach
|
||||
|
||||
### VALUE (V) - Cost and worth
|
||||
- ABSOLUTE_PRICE: The actual cost
|
||||
- PRICE_VS_EXPECTATION: Compared to anticipated
|
||||
- PRICE_VS_MARKET: Compared to competitors
|
||||
- HIDDEN_COSTS: Unexpected charges
|
||||
- PAYMENT_FLEXIBILITY: Terms and options
|
||||
- PRICING_CLARITY: Understanding costs
|
||||
- FEE_DISCLOSURE: Upfront about charges
|
||||
- ADVERTISING_ACCURACY: Marketing matches reality
|
||||
- TERMS_FAIRNESS: Policy reasonableness
|
||||
- HONEST_REPRESENTATION: Truthful claims
|
||||
- TIME_INVESTMENT: Hours required
|
||||
- MENTAL_EFFORT: Cognitive load
|
||||
- PHYSICAL_EFFORT: Bodily exertion
|
||||
- HASSLE_FACTOR: Cumulative frustration
|
||||
- OPPORTUNITY_COST: What else could be done
|
||||
- OVERALL_VALUE: Total assessment
|
||||
- QUALITY_PRICE_RATIO: What you get for what you pay
|
||||
- SATISFACTION: Contentment with exchange
|
||||
- RECOMMENDATION: Would suggest to others
|
||||
- RETURN_INTENT: Would come back
|
||||
|
||||
### RELATIONSHIP (R) - Trust and loyalty
|
||||
- TRUTHFULNESS: Accurate representations
|
||||
- PROMISE_KEEPING: Honoring commitments
|
||||
- TRANSPARENCY: Openness about practices
|
||||
- ETHICS: Moral business conduct
|
||||
- FAIR_DEALING: Equitable treatment
|
||||
- TRACK_RECORD: Historical performance
|
||||
- DEPENDABILITY: Same over time
|
||||
- STABILITY: Organizational continuity
|
||||
- TRUSTWORTHINESS: Warranting confidence
|
||||
- GUARANTEE_HONOR: Standing behind product
|
||||
- ERROR_ACKNOWLEDGMENT: Admitting failures
|
||||
- APOLOGY: Expression of regret
|
||||
- COMPENSATION: Making amends
|
||||
- IMPROVEMENT: Actions to prevent recurrence
|
||||
- OWNERSHIP: Taking responsibility
|
||||
- RECOGNITION: Acknowledging customers
|
||||
- REWARDS: Loyalty benefits
|
||||
- RELATIONSHIP_BUILDING: Investment in connection
|
||||
- ONGOING_COMMUNICATION: Contact quality
|
||||
- COMMUNITY: Belonging and connection
|
||||
|
||||
## META PRIMITIVES (DO NOT INCLUDE IN OUTPUT)
|
||||
|
||||
These are always globally active and should NEVER appear in your output:
|
||||
- HONESTY, ETHICS, PROMISES, ACKNOWLEDGMENT, RESPONSE_QUALITY
|
||||
- RECOVERY, RETURN_INTENT, RECOMMEND, RECOGNITION, UNMAPPED
|
||||
|
||||
## OUTPUT RULES
|
||||
|
||||
1. **SPARSE OUTPUT ONLY**: Only include primitives that DIFFER from parent configuration
|
||||
- If parent has WAIT_TIME at "normal" priority and this business needs "critical", include it
|
||||
- If parent already has the right configuration, do NOT include it
|
||||
|
||||
2. **PRIORITY LEVELS** (use exact strings):
|
||||
- "critical": Essential for this business (top 3-5 per business)
|
||||
- "high": Very important (next 5-8)
|
||||
- "normal": Standard relevance (default)
|
||||
- "low": Less common for this business
|
||||
- "very_low": Rarely relevant (prefer over active: false)
|
||||
|
||||
3. **WHEN TO SET active: false**:
|
||||
- Only when a primitive is truly IRRELEVANT (not just uncommon)
|
||||
- Example: PARKING for an online-only business
|
||||
- Prefer priority: "very_low" unless truly N/A
|
||||
|
||||
4. **SIGNALS**: 5-15 realistic customer phrases per side
|
||||
- Use actual language customers use in reviews
|
||||
- Include colloquial expressions, not formal descriptions
|
||||
- Positive and negative should be opposites of the same dimension
|
||||
- Use __replace__: true ONLY if parent signals are wrong (rare)
|
||||
|
||||
5. **MODES**: Only include if this business has distinct service modes
|
||||
- Examples: "dine_in" vs "delivery" for restaurants
|
||||
- "in_store" vs "online" for retailers
|
||||
- Most businesses: omit modes entirely
|
||||
|
||||
6. **business_context**: Include for L1 sectors and leaf categories
|
||||
- name: Human-friendly display name
|
||||
- description: 1-2 sentence description
|
||||
- modes: Array of applicable modes (if any)
|
||||
- default_mode: Primary mode (if modes exist)
|
||||
|
||||
## VALIDATION RULES
|
||||
|
||||
Your output MUST:
|
||||
- Be valid JSON only (no markdown, no explanations)
|
||||
- Use ONLY primitive codes from the dictionary provided
|
||||
- NOT create new primitive codes
|
||||
- NOT include meta primitives
|
||||
- NOT include playbooks, solutions, or action recommendations
|
||||
- Have at least one primitive_config entry
|
||||
- Use exact priority strings: "critical", "high", "normal", "low", "very_low"
|
||||
|
||||
## OUTPUT SCHEMA
|
||||
|
||||
```json
|
||||
{
|
||||
"business_context": {
|
||||
"name": "Human-Friendly Name",
|
||||
"description": "What this business type does and what matters to customers",
|
||||
"modes": ["mode1", "mode2"],
|
||||
"default_mode": "mode1"
|
||||
},
|
||||
"primitive_configs": {
|
||||
"PRIMITIVE_CODE": {
|
||||
"active": true,
|
||||
"priority": "critical|high|normal|low|very_low",
|
||||
"label": "Business-specific label for this primitive",
|
||||
"description": "What this primitive means for this specific business",
|
||||
"signals": {
|
||||
"positive": ["signal 1", "signal 2", "..."],
|
||||
"negative": ["signal 1", "signal 2", "..."],
|
||||
"__replace__": false
|
||||
},
|
||||
"modes": {
|
||||
"mode_name": {
|
||||
"applicable": true,
|
||||
"label": "Mode-specific label"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Return ONLY the JSON object. No preamble, no explanation, no markdown."""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# USER PROMPT TEMPLATE
|
||||
# =============================================================================
|
||||
|
||||
USER_PROMPT_TEMPLATE = """Generate a sparse primitive configuration delta for this GBP node.
|
||||
|
||||
## NODE INFORMATION
|
||||
|
||||
**GBP Path**: {gbp_path}
|
||||
**Node Name**: {node_name}
|
||||
**Node Description**: {node_description}
|
||||
**Node Level**: {node_level} (L1=Sector, L2=Category, L3=Subcategory, L4=Leaf)
|
||||
|
||||
## PARENT RESOLVED CONFIGURATION
|
||||
|
||||
This is the already-resolved configuration from all ancestors. Only include primitives that need to CHANGE from this:
|
||||
|
||||
```json
|
||||
{parent_resolved_config}
|
||||
```
|
||||
|
||||
## PRIMITIVE DICTIONARY
|
||||
|
||||
Reference for all available primitives with their base definitions:
|
||||
|
||||
```json
|
||||
{primitive_dictionary}
|
||||
```
|
||||
|
||||
## YOUR TASK
|
||||
|
||||
Generate a sparse delta configuration for "{node_name}" that:
|
||||
|
||||
1. Identifies the 5-10 MOST CRITICAL primitives for this business type
|
||||
2. Adjusts priority levels to reflect what customers actually care about
|
||||
3. Provides business-specific labels and signals where helpful
|
||||
4. Only includes primitives that DIFFER from parent_resolved_config
|
||||
5. Uses realistic customer language for signals
|
||||
|
||||
Think about:
|
||||
- What do customers of {node_name} businesses typically praise or complain about?
|
||||
- Which URT primitives are most actionable for this business type?
|
||||
- What unique aspects distinguish this business type from others?
|
||||
|
||||
Return ONLY valid JSON matching the output schema."""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
def build_user_prompt(
|
||||
gbp_path: str,
|
||||
node_name: str,
|
||||
node_description: str,
|
||||
node_level: int,
|
||||
parent_resolved_config: dict,
|
||||
primitive_dictionary: dict,
|
||||
) -> str:
|
||||
"""
|
||||
Build the user prompt with actual values substituted.
|
||||
|
||||
Args:
|
||||
gbp_path: The ltree path (e.g., "Food_Beverage" or "Food_Beverage.Restaurants")
|
||||
node_name: Human-readable name (e.g., "Food & Beverage" or "Restaurants")
|
||||
node_description: Brief description of this business category
|
||||
node_level: 1-4 indicating hierarchy depth
|
||||
parent_resolved_config: Already-resolved config from ancestors (or {} for L1)
|
||||
primitive_dictionary: All primitives with definitions and base signals
|
||||
|
||||
Returns:
|
||||
Formatted user prompt string
|
||||
"""
|
||||
import json
|
||||
|
||||
level_labels = {
|
||||
1: "L1=Sector",
|
||||
2: "L2=Category",
|
||||
3: "L3=Subcategory",
|
||||
4: "L4=Leaf",
|
||||
}
|
||||
|
||||
return USER_PROMPT_TEMPLATE.format(
|
||||
gbp_path=gbp_path,
|
||||
node_name=node_name,
|
||||
node_description=node_description,
|
||||
node_level=level_labels.get(node_level, f"L{node_level}"),
|
||||
parent_resolved_config=json.dumps(parent_resolved_config, indent=2),
|
||||
primitive_dictionary=json.dumps(primitive_dictionary, indent=2),
|
||||
)
|
||||
|
||||
|
||||
def validate_primitive_config_output(output: dict, primitive_codes: set[str]) -> list[str]:
|
||||
"""
|
||||
Validate the LLM output against schema and rules.
|
||||
|
||||
Args:
|
||||
output: Parsed JSON output from LLM
|
||||
primitive_codes: Set of valid primitive codes
|
||||
|
||||
Returns:
|
||||
List of validation errors (empty if valid)
|
||||
"""
|
||||
errors = []
|
||||
|
||||
# Meta primitives that should never appear
|
||||
META_PRIMITIVES = {
|
||||
"HONESTY", "ETHICS", "PROMISES", "ACKNOWLEDGMENT", "RESPONSE_QUALITY",
|
||||
"RECOVERY", "RETURN_INTENT", "RECOMMEND", "RECOGNITION", "UNMAPPED"
|
||||
}
|
||||
|
||||
VALID_PRIORITIES = {"critical", "high", "normal", "low", "very_low"}
|
||||
|
||||
# Check required structure
|
||||
if not isinstance(output, dict):
|
||||
errors.append("Output must be a JSON object")
|
||||
return errors
|
||||
|
||||
primitive_configs = output.get("primitive_configs", {})
|
||||
if not primitive_configs:
|
||||
errors.append("primitive_configs is required and must not be empty")
|
||||
|
||||
if not isinstance(primitive_configs, dict):
|
||||
errors.append("primitive_configs must be an object")
|
||||
return errors
|
||||
|
||||
for code, config in primitive_configs.items():
|
||||
# Check code is valid
|
||||
if code not in primitive_codes:
|
||||
errors.append(f"Unknown primitive code: {code}")
|
||||
continue
|
||||
|
||||
# Check for meta primitives
|
||||
if code in META_PRIMITIVES:
|
||||
errors.append(f"Meta primitive should not appear: {code}")
|
||||
|
||||
if not isinstance(config, dict):
|
||||
errors.append(f"{code}: config must be an object")
|
||||
continue
|
||||
|
||||
# Check priority if present
|
||||
priority = config.get("priority")
|
||||
if priority and priority not in VALID_PRIORITIES:
|
||||
errors.append(f"{code}: invalid priority '{priority}', must be one of {VALID_PRIORITIES}")
|
||||
|
||||
# Check signals structure if present
|
||||
signals = config.get("signals")
|
||||
if signals:
|
||||
if not isinstance(signals, dict):
|
||||
errors.append(f"{code}: signals must be an object")
|
||||
else:
|
||||
pos = signals.get("positive", [])
|
||||
neg = signals.get("negative", [])
|
||||
if pos and not isinstance(pos, list):
|
||||
errors.append(f"{code}: signals.positive must be an array")
|
||||
if neg and not isinstance(neg, list):
|
||||
errors.append(f"{code}: signals.negative must be an array")
|
||||
|
||||
# Check business_context if present
|
||||
business_context = output.get("business_context")
|
||||
if business_context:
|
||||
if not isinstance(business_context, dict):
|
||||
errors.append("business_context must be an object")
|
||||
else:
|
||||
modes = business_context.get("modes")
|
||||
if modes and not isinstance(modes, list):
|
||||
errors.append("business_context.modes must be an array")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# EXAMPLE PRIMITIVE DICTIONARY (subset for reference)
|
||||
# =============================================================================
|
||||
|
||||
EXAMPLE_PRIMITIVE_DICTIONARY = {
|
||||
"WAIT_TIME": {
|
||||
"code": "WAIT_TIME",
|
||||
"domain": "J",
|
||||
"category": "Timing",
|
||||
"name": "Wait Time",
|
||||
"definition": "Time spent waiting for service",
|
||||
"base_signals": {
|
||||
"positive": ["no wait", "seated immediately", "right away", "quick turnaround"],
|
||||
"negative": ["long wait", "waited forever", "45 minutes", "hours to be seen"]
|
||||
}
|
||||
},
|
||||
"WARMTH": {
|
||||
"code": "WARMTH",
|
||||
"domain": "P",
|
||||
"category": "Attitude",
|
||||
"name": "Warmth/Friendliness",
|
||||
"definition": "Approachability and pleasantness of staff",
|
||||
"base_signals": {
|
||||
"positive": ["so friendly", "welcoming", "made us feel at home", "warm greeting"],
|
||||
"negative": ["cold", "unfriendly", "rude", "didn't acknowledge us"]
|
||||
}
|
||||
},
|
||||
"CRAFTSMANSHIP": {
|
||||
"code": "CRAFTSMANSHIP",
|
||||
"domain": "O",
|
||||
"category": "Quality",
|
||||
"name": "Craftsmanship",
|
||||
"definition": "Skill of construction or execution",
|
||||
"base_signals": {
|
||||
"positive": ["beautifully made", "expert work", "attention to detail", "quality craftsmanship"],
|
||||
"negative": ["sloppy work", "poorly made", "amateur job", "uneven"]
|
||||
}
|
||||
},
|
||||
# ... more primitives would be included in full dictionary
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# EXAMPLE OUTPUT (for reference and testing)
|
||||
# =============================================================================
|
||||
|
||||
EXAMPLE_OUTPUT_RESTAURANT = {
|
||||
"business_context": {
|
||||
"name": "Restaurants",
|
||||
"description": "Food service establishments where customers dine on-premises or order for delivery/takeout",
|
||||
"modes": ["dine_in", "takeout", "delivery"],
|
||||
"default_mode": "dine_in"
|
||||
},
|
||||
"primitive_configs": {
|
||||
"WAIT_TIME": {
|
||||
"priority": "critical",
|
||||
"label": "Wait for Table/Food",
|
||||
"description": "Time waiting to be seated and for food to arrive",
|
||||
"signals": {
|
||||
"positive": [
|
||||
"seated immediately",
|
||||
"food came out fast",
|
||||
"no wait for a table",
|
||||
"quick service",
|
||||
"didn't have to wait long"
|
||||
],
|
||||
"negative": [
|
||||
"waited 45 minutes for a table",
|
||||
"food took forever",
|
||||
"an hour for appetizers",
|
||||
"still waiting for our entrees",
|
||||
"had to flag down the waiter"
|
||||
]
|
||||
},
|
||||
"modes": {
|
||||
"dine_in": {
|
||||
"applicable": True,
|
||||
"label": "Wait for Table & Food"
|
||||
},
|
||||
"takeout": {
|
||||
"applicable": True,
|
||||
"label": "Order Ready Time"
|
||||
},
|
||||
"delivery": {
|
||||
"applicable": True,
|
||||
"label": "Delivery Time"
|
||||
}
|
||||
}
|
||||
},
|
||||
"CRAFTSMANSHIP": {
|
||||
"priority": "critical",
|
||||
"label": "Food Preparation Quality",
|
||||
"description": "Skill and care in cooking and food preparation",
|
||||
"signals": {
|
||||
"positive": [
|
||||
"cooked to perfection",
|
||||
"beautifully plated",
|
||||
"chef knows what they're doing",
|
||||
"perfectly seasoned",
|
||||
"amazing flavor"
|
||||
],
|
||||
"negative": [
|
||||
"overcooked",
|
||||
"bland and tasteless",
|
||||
"clearly microwaved",
|
||||
"burnt edges",
|
||||
"undercooked chicken"
|
||||
]
|
||||
}
|
||||
},
|
||||
"WARMTH": {
|
||||
"priority": "high",
|
||||
"label": "Server Friendliness",
|
||||
"description": "Warmth and hospitality from hosts, servers, and staff"
|
||||
},
|
||||
"CLEANLINESS": {
|
||||
"priority": "high",
|
||||
"label": "Restaurant Cleanliness",
|
||||
"description": "Hygiene of dining area, bathrooms, and visible kitchen areas"
|
||||
},
|
||||
"ATMOSPHERE": {
|
||||
"priority": "high",
|
||||
"label": "Dining Ambiance",
|
||||
"description": "Overall mood, decor, lighting, and vibe of the restaurant"
|
||||
},
|
||||
"PARKING": {
|
||||
"priority": "normal",
|
||||
"modes": {
|
||||
"dine_in": {"applicable": True},
|
||||
"takeout": {"applicable": True},
|
||||
"delivery": {"applicable": False}
|
||||
}
|
||||
},
|
||||
"DIGITAL_ACCESSIBILITY": {
|
||||
"priority": "very_low",
|
||||
"description": "Screen reader support and digital accessibility - rarely mentioned in restaurant reviews"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ Provides a unified interface for classification requests with:
|
||||
- Structured output (JSON mode)
|
||||
- Retry handling
|
||||
- Cost tracking
|
||||
- Adaptive batch sizing based on context window
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -14,7 +15,8 @@ import json
|
||||
import logging
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any, TypedDict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from reviewiq_pipeline.config import Config
|
||||
@@ -22,6 +24,240 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Exceptions
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class PartialBatchResult(Exception):
|
||||
"""
|
||||
Exception raised when batch JSON parsing partially fails but some results were recovered.
|
||||
|
||||
Carries the partial results and indices of missing reviews so the caller can
|
||||
only reprocess the missing ones instead of the entire batch.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
partial_results: list[dict[str, Any]],
|
||||
missing_indices: list[int],
|
||||
metadata: dict[str, Any] | None = None,
|
||||
):
|
||||
super().__init__(message)
|
||||
self.partial_results = partial_results
|
||||
self.missing_indices = missing_indices
|
||||
self.metadata = metadata or {}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Model Context Windows and Token Estimation
|
||||
# =============================================================================
|
||||
|
||||
MODEL_CONTEXT_WINDOWS = {
|
||||
# OpenAI models
|
||||
"gpt-4o": 128_000,
|
||||
"gpt-4o-mini": 128_000,
|
||||
"gpt-4-turbo": 128_000,
|
||||
"gpt-4": 8_192,
|
||||
"gpt-3.5-turbo": 16_385,
|
||||
# Anthropic models
|
||||
"claude-3-opus-20240229": 200_000,
|
||||
"claude-3-sonnet-20240229": 200_000,
|
||||
"claude-3-haiku-20240307": 200_000,
|
||||
"claude-3-5-sonnet-20241022": 200_000,
|
||||
"claude-sonnet-4-20250514": 200_000,
|
||||
}
|
||||
|
||||
# Average tokens per character (rough estimate, varies by language)
|
||||
CHARS_PER_TOKEN = 4
|
||||
|
||||
# Output tokens per review (classification response)
|
||||
OUTPUT_TOKENS_PER_REVIEW = 450 # Conservative estimate
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchSizeCalculation:
|
||||
"""Result of batch size calculation."""
|
||||
batch_size: int
|
||||
system_prompt_tokens: int
|
||||
avg_tokens_per_review: int
|
||||
output_tokens_reserved: int
|
||||
context_window: int
|
||||
utilization_target: float
|
||||
reasoning: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenStats:
|
||||
"""Running statistics for token estimation."""
|
||||
total_reviews: int = 0
|
||||
total_input_tokens: int = 0
|
||||
total_output_tokens: int = 0
|
||||
min_review_tokens: int = 999999
|
||||
max_review_tokens: int = 0
|
||||
|
||||
def update(self, review_tokens: int, output_tokens: int):
|
||||
"""Update stats with new observation."""
|
||||
self.total_reviews += 1
|
||||
self.total_input_tokens += review_tokens
|
||||
self.total_output_tokens += output_tokens
|
||||
self.min_review_tokens = min(self.min_review_tokens, review_tokens)
|
||||
self.max_review_tokens = max(self.max_review_tokens, review_tokens)
|
||||
|
||||
@property
|
||||
def avg_review_tokens(self) -> int:
|
||||
"""Average tokens per review."""
|
||||
if self.total_reviews == 0:
|
||||
return 150 # Default estimate
|
||||
return self.total_input_tokens // self.total_reviews
|
||||
|
||||
@property
|
||||
def avg_output_tokens(self) -> int:
|
||||
"""Average output tokens per review."""
|
||||
if self.total_reviews == 0:
|
||||
return OUTPUT_TOKENS_PER_REVIEW
|
||||
return self.total_output_tokens // self.total_reviews
|
||||
|
||||
|
||||
class BatchSizer:
|
||||
"""
|
||||
Calculates optimal batch size based on context window and actual token usage.
|
||||
|
||||
Adapts in real-time based on observed token counts from previous batches.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
system_prompt_tokens: int,
|
||||
target_utilization: float = 0.6,
|
||||
):
|
||||
self.model = model
|
||||
self.system_prompt_tokens = system_prompt_tokens
|
||||
self.target_utilization = target_utilization
|
||||
self.context_window = MODEL_CONTEXT_WINDOWS.get(model, 128_000)
|
||||
self.stats = TokenStats()
|
||||
|
||||
def estimate_tokens(self, text: str) -> int:
|
||||
"""Estimate token count for text (fast approximation)."""
|
||||
# Simple heuristic: ~4 chars per token for English
|
||||
# More accurate would be to use tiktoken, but this is faster
|
||||
return max(1, len(text) // CHARS_PER_TOKEN)
|
||||
|
||||
def calculate_batch_size(
|
||||
self,
|
||||
reviews: list[dict],
|
||||
fixed_size: int | None = None,
|
||||
) -> BatchSizeCalculation:
|
||||
"""
|
||||
Calculate optimal batch size for a set of reviews.
|
||||
|
||||
Args:
|
||||
reviews: List of reviews with 'text' field
|
||||
fixed_size: If set, use this size (skip calculation)
|
||||
|
||||
Returns:
|
||||
BatchSizeCalculation with recommended size and reasoning
|
||||
"""
|
||||
if fixed_size and fixed_size > 0:
|
||||
return BatchSizeCalculation(
|
||||
batch_size=min(fixed_size, len(reviews)),
|
||||
system_prompt_tokens=self.system_prompt_tokens,
|
||||
avg_tokens_per_review=self.stats.avg_review_tokens,
|
||||
output_tokens_reserved=fixed_size * self.stats.avg_output_tokens,
|
||||
context_window=self.context_window,
|
||||
utilization_target=self.target_utilization,
|
||||
reasoning=f"Fixed batch size: {fixed_size}",
|
||||
)
|
||||
|
||||
# Calculate actual token counts for these reviews
|
||||
review_tokens = [self.estimate_tokens(r.get("text", "")) for r in reviews]
|
||||
avg_review_tokens = sum(review_tokens) // len(review_tokens) if review_tokens else 150
|
||||
max_review_tokens = max(review_tokens) if review_tokens else 300
|
||||
|
||||
# Use learned average if we have history, otherwise use current batch
|
||||
effective_avg = (
|
||||
(self.stats.avg_review_tokens + avg_review_tokens) // 2
|
||||
if self.stats.total_reviews > 0
|
||||
else avg_review_tokens
|
||||
)
|
||||
|
||||
# Use learned output average
|
||||
output_per_review = self.stats.avg_output_tokens
|
||||
|
||||
# Calculate available space
|
||||
available = int(self.context_window * self.target_utilization)
|
||||
available -= self.system_prompt_tokens
|
||||
available -= 1000 # Safety buffer for JSON overhead
|
||||
|
||||
# Calculate batch size
|
||||
# Each review needs: input tokens + output tokens
|
||||
tokens_per_review = effective_avg + output_per_review
|
||||
|
||||
# Use 80th percentile estimate to handle variance
|
||||
# (avg + (max - avg) * 0.3) gives room for longer reviews
|
||||
variance_adjusted = effective_avg + int((max_review_tokens - effective_avg) * 0.3)
|
||||
tokens_per_review_safe = variance_adjusted + output_per_review
|
||||
|
||||
batch_size = max(1, available // tokens_per_review_safe)
|
||||
|
||||
# Cap at reasonable limits
|
||||
batch_size = min(batch_size, 100, len(reviews))
|
||||
|
||||
reasoning = (
|
||||
f"Context: {self.context_window:,} | "
|
||||
f"System: {self.system_prompt_tokens:,} | "
|
||||
f"Avg review: {effective_avg} (variance-adjusted: {variance_adjusted}) | "
|
||||
f"Output/review: {output_per_review} | "
|
||||
f"Target utilization: {self.target_utilization:.0%} | "
|
||||
f"→ Batch size: {batch_size}"
|
||||
)
|
||||
|
||||
return BatchSizeCalculation(
|
||||
batch_size=batch_size,
|
||||
system_prompt_tokens=self.system_prompt_tokens,
|
||||
avg_tokens_per_review=effective_avg,
|
||||
output_tokens_reserved=batch_size * output_per_review,
|
||||
context_window=self.context_window,
|
||||
utilization_target=self.target_utilization,
|
||||
reasoning=reasoning,
|
||||
)
|
||||
|
||||
def update_from_response(self, batch_size: int, input_tokens: int, output_tokens: int):
|
||||
"""
|
||||
Update statistics from actual LLM response.
|
||||
|
||||
Call this after each batch to improve future estimates.
|
||||
"""
|
||||
if batch_size > 0:
|
||||
avg_input = input_tokens // batch_size
|
||||
avg_output = output_tokens // batch_size
|
||||
|
||||
# Update stats for each review in batch
|
||||
for _ in range(batch_size):
|
||||
self.stats.update(avg_input, avg_output)
|
||||
|
||||
logger.debug(
|
||||
f"BatchSizer updated: {batch_size} reviews, "
|
||||
f"avg input={avg_input}, avg output={avg_output}, "
|
||||
f"running avg input={self.stats.avg_review_tokens}, "
|
||||
f"running avg output={self.stats.avg_output_tokens}"
|
||||
)
|
||||
|
||||
def get_stats_summary(self) -> dict:
|
||||
"""Get current statistics summary."""
|
||||
return {
|
||||
"total_reviews_processed": self.stats.total_reviews,
|
||||
"avg_input_tokens": self.stats.avg_review_tokens,
|
||||
"avg_output_tokens": self.stats.avg_output_tokens,
|
||||
"min_review_tokens": self.stats.min_review_tokens if self.stats.total_reviews > 0 else 0,
|
||||
"max_review_tokens": self.stats.max_review_tokens if self.stats.total_reviews > 0 else 0,
|
||||
"model": self.model,
|
||||
"context_window": self.context_window,
|
||||
}
|
||||
|
||||
# System prompt for URT classification
|
||||
SYSTEM_PROMPT = """You are a review classification system using URT (Universal Review Taxonomy) v5.1.
|
||||
|
||||
@@ -329,6 +565,18 @@ Return valid JSON matching this schema. No markdown, no explanations.
|
||||
}"""
|
||||
|
||||
|
||||
class BatchReviewInput(TypedDict):
|
||||
"""Input format for batch classification."""
|
||||
review_id: str
|
||||
text: str
|
||||
rating: int
|
||||
|
||||
|
||||
class BatchClassificationResponse(TypedDict):
|
||||
"""Response format for batch classification."""
|
||||
reviews: list[dict[str, Any]] # Each contains review_index, spans, review_summary
|
||||
|
||||
|
||||
class LLMClientBase(ABC):
|
||||
"""Abstract base class for LLM clients."""
|
||||
|
||||
@@ -337,18 +585,24 @@ class LLMClientBase(ABC):
|
||||
self.total_tokens_used = 0
|
||||
self.total_cost_usd = 0.0
|
||||
self._custom_prompt: str | None = None
|
||||
self._custom_prompt_batch: str | None = None
|
||||
self._cached_tokens: int = 0 # Track cached token usage
|
||||
|
||||
def set_prompt(self, prompt: str) -> None:
|
||||
def set_prompt(self, prompt: str, batch_prompt: str | None = None) -> None:
|
||||
"""
|
||||
Set a custom system prompt (e.g., built dynamically from database).
|
||||
Set custom system prompts (e.g., built dynamically from database).
|
||||
|
||||
Args:
|
||||
prompt: The system prompt to use for classification
|
||||
prompt: The system prompt for single review classification
|
||||
batch_prompt: The system prompt for batch classification (if different)
|
||||
"""
|
||||
self._custom_prompt = prompt
|
||||
self._custom_prompt_batch = batch_prompt or prompt
|
||||
|
||||
def get_prompt(self) -> str:
|
||||
def get_prompt(self, batch_mode: bool = False) -> str:
|
||||
"""Get the current system prompt (custom or default)."""
|
||||
if batch_mode:
|
||||
return self._custom_prompt_batch or self._custom_prompt or SYSTEM_PROMPT
|
||||
return self._custom_prompt or SYSTEM_PROMPT
|
||||
|
||||
@abstractmethod
|
||||
@@ -358,7 +612,7 @@ class LLMClientBase(ABC):
|
||||
profile: str = "standard",
|
||||
) -> tuple[LLMClassificationResponse, dict[str, Any]]:
|
||||
"""
|
||||
Classify a review and extract spans.
|
||||
Classify a single review and extract spans.
|
||||
|
||||
Args:
|
||||
review_text: The review text to classify
|
||||
@@ -369,6 +623,24 @@ class LLMClientBase(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def classify_batch(
|
||||
self,
|
||||
reviews: list[BatchReviewInput],
|
||||
profile: str = "standard",
|
||||
) -> tuple[list[LLMClassificationResponse], dict[str, Any]]:
|
||||
"""
|
||||
Classify multiple reviews in a single LLM call.
|
||||
|
||||
Args:
|
||||
reviews: List of reviews with review_id, text, and rating
|
||||
profile: Classification profile (lite/core/standard/full)
|
||||
|
||||
Returns:
|
||||
Tuple of (list of classification responses, aggregated metadata)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generate(
|
||||
self,
|
||||
@@ -396,16 +668,31 @@ class LLMClientBase(ABC):
|
||||
"""Close the client and cleanup resources."""
|
||||
pass
|
||||
|
||||
def _build_batch_user_prompt(self, reviews: list[BatchReviewInput]) -> str:
|
||||
"""Build user prompt for batch classification."""
|
||||
lines = [
|
||||
f"Classify these {len(reviews)} reviews. Return JSON with 'reviews' array.",
|
||||
""
|
||||
]
|
||||
|
||||
for i, review in enumerate(reviews):
|
||||
lines.append(f"---REVIEW {i} (rating={review['rating']}★)---")
|
||||
lines.append(review["text"])
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class OpenAIClient(LLMClientBase):
|
||||
"""OpenAI LLM client implementation."""
|
||||
"""OpenAI LLM client implementation with batch support and prompt caching."""
|
||||
|
||||
# Pricing per 1M tokens (as of 2024)
|
||||
# Cached input tokens are 50% cheaper
|
||||
PRICING = {
|
||||
"gpt-4o": {"input": 5.0, "output": 15.0},
|
||||
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
||||
"gpt-4-turbo": {"input": 10.0, "output": 30.0},
|
||||
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
|
||||
"gpt-4o": {"input": 2.50, "cached_input": 1.25, "output": 10.0},
|
||||
"gpt-4o-mini": {"input": 0.15, "cached_input": 0.075, "output": 0.60},
|
||||
"gpt-4-turbo": {"input": 10.0, "cached_input": 5.0, "output": 30.0},
|
||||
"gpt-3.5-turbo": {"input": 0.50, "cached_input": 0.25, "output": 1.50},
|
||||
}
|
||||
|
||||
def __init__(self, config: Config):
|
||||
@@ -420,7 +707,7 @@ class OpenAIClient(LLMClientBase):
|
||||
review_text: str,
|
||||
profile: str = "standard",
|
||||
) -> tuple[LLMClassificationResponse, dict[str, Any]]:
|
||||
"""Classify using OpenAI."""
|
||||
"""Classify a single review using OpenAI."""
|
||||
start_time = time.time()
|
||||
|
||||
messages = [
|
||||
@@ -446,27 +733,154 @@ class OpenAIClient(LLMClientBase):
|
||||
|
||||
result = json.loads(content)
|
||||
|
||||
# Calculate costs
|
||||
# Calculate costs (with caching support)
|
||||
metadata = self._calculate_openai_costs(response, start_time)
|
||||
|
||||
return result, metadata
|
||||
|
||||
async def classify_batch(
|
||||
self,
|
||||
reviews: list[BatchReviewInput],
|
||||
profile: str = "standard",
|
||||
) -> tuple[list[LLMClassificationResponse], dict[str, Any]]:
|
||||
"""
|
||||
Classify multiple reviews in a single LLM call.
|
||||
|
||||
Uses prompt caching - the system prompt is cached after first call,
|
||||
reducing input token costs by ~50% on subsequent calls.
|
||||
"""
|
||||
if not reviews:
|
||||
return [], {"error": "No reviews provided"}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Build batch user prompt
|
||||
user_prompt = self._build_batch_user_prompt(reviews)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": self.get_prompt(batch_mode=True)},
|
||||
{"role": "user", "content": user_prompt},
|
||||
]
|
||||
|
||||
# Calculate max tokens based on batch size (estimate ~400 tokens per review output)
|
||||
max_output_tokens = min(16000, len(reviews) * 500)
|
||||
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
temperature=self.config.llm_temperature,
|
||||
response_format={"type": "json_object"},
|
||||
max_tokens=max_output_tokens,
|
||||
timeout=self.config.llm_timeout_seconds * 2, # Longer timeout for batch
|
||||
)
|
||||
|
||||
# Parse response
|
||||
content = response.choices[0].message.content
|
||||
if not content:
|
||||
raise ValueError("Empty response from OpenAI")
|
||||
|
||||
metadata = self._calculate_openai_costs(response, start_time, batch_size=len(reviews))
|
||||
|
||||
# Try full JSON parse first
|
||||
try:
|
||||
batch_result = json.loads(content)
|
||||
review_results = self._parse_batch_response(batch_result, reviews)
|
||||
return review_results, metadata
|
||||
except json.JSONDecodeError as e:
|
||||
# Full parse failed - try partial extraction
|
||||
logger.warning(f"Full JSON parse failed: {e}, attempting partial recovery...")
|
||||
|
||||
partial_reviews, missing_indices = self._extract_partial_batch_json(
|
||||
content, len(reviews)
|
||||
)
|
||||
|
||||
if partial_reviews:
|
||||
raise PartialBatchResult(
|
||||
message=f"Recovered {len(partial_reviews)}/{len(reviews)} reviews from malformed JSON",
|
||||
partial_results=partial_reviews,
|
||||
missing_indices=missing_indices,
|
||||
metadata=metadata,
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
def _calculate_openai_costs(
|
||||
self,
|
||||
response: Any,
|
||||
start_time: float,
|
||||
batch_size: int = 1,
|
||||
) -> dict[str, Any]:
|
||||
"""Calculate costs from OpenAI response, accounting for cached tokens."""
|
||||
input_tokens = response.usage.prompt_tokens if response.usage else 0
|
||||
output_tokens = response.usage.completion_tokens if response.usage else 0
|
||||
total_tokens = input_tokens + output_tokens
|
||||
|
||||
pricing = self.PRICING.get(self.model, {"input": 0.15, "output": 0.60})
|
||||
cost = (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
|
||||
# Check for cached tokens (OpenAI returns this in newer API versions)
|
||||
cached_tokens = 0
|
||||
if hasattr(response.usage, "prompt_tokens_details") and response.usage.prompt_tokens_details:
|
||||
cached_tokens = getattr(response.usage.prompt_tokens_details, "cached_tokens", 0)
|
||||
|
||||
uncached_input = input_tokens - cached_tokens
|
||||
|
||||
pricing = self.PRICING.get(self.model, {"input": 0.15, "cached_input": 0.075, "output": 0.60})
|
||||
cost = (
|
||||
uncached_input * pricing["input"]
|
||||
+ cached_tokens * pricing.get("cached_input", pricing["input"] * 0.5)
|
||||
+ output_tokens * pricing["output"]
|
||||
) / 1_000_000
|
||||
|
||||
self.total_tokens_used += total_tokens
|
||||
self.total_cost_usd += cost
|
||||
self._cached_tokens += cached_tokens
|
||||
|
||||
metadata = {
|
||||
return {
|
||||
"model": self.model,
|
||||
"input_tokens": input_tokens,
|
||||
"cached_tokens": cached_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
"total_tokens": total_tokens,
|
||||
"cost_usd": cost,
|
||||
"latency_ms": int((time.time() - start_time) * 1000),
|
||||
"batch_size": batch_size,
|
||||
"tokens_per_review": total_tokens / batch_size if batch_size > 0 else 0,
|
||||
}
|
||||
|
||||
return result, metadata
|
||||
def _parse_batch_response(
|
||||
self,
|
||||
batch_result: dict[str, Any],
|
||||
original_reviews: list[BatchReviewInput],
|
||||
) -> list[LLMClassificationResponse]:
|
||||
"""Parse batch response into individual review results."""
|
||||
results: list[LLMClassificationResponse] = []
|
||||
|
||||
# Handle both formats: {"reviews": [...]} and direct list
|
||||
review_data = batch_result.get("reviews", [])
|
||||
if not review_data and isinstance(batch_result, list):
|
||||
review_data = batch_result
|
||||
|
||||
# Create a lookup by review_index
|
||||
results_by_index = {r.get("review_index", i): r for i, r in enumerate(review_data)}
|
||||
|
||||
for i, original in enumerate(original_reviews):
|
||||
if i in results_by_index:
|
||||
review_result = results_by_index[i]
|
||||
# Convert to standard format
|
||||
results.append({
|
||||
"spans": review_result.get("spans", []),
|
||||
"review_summary": review_result.get("review_summary", {
|
||||
"dominant_valence": "V0",
|
||||
"dominant_domain": "O",
|
||||
"span_count": len(review_result.get("spans", [])),
|
||||
"has_comparative": False,
|
||||
"has_entity": False,
|
||||
}),
|
||||
})
|
||||
else:
|
||||
# Missing review - create fallback
|
||||
logger.warning(f"Review index {i} missing from batch response, using fallback")
|
||||
results.append(create_fallback_response(original["text"]))
|
||||
|
||||
return results
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
@@ -511,14 +925,16 @@ class OpenAIClient(LLMClientBase):
|
||||
|
||||
|
||||
class AnthropicClient(LLMClientBase):
|
||||
"""Anthropic LLM client implementation."""
|
||||
"""Anthropic LLM client implementation with batch support and prompt caching."""
|
||||
|
||||
# Pricing per 1M tokens (as of 2024)
|
||||
# Cached input tokens are 90% cheaper with Anthropic
|
||||
PRICING = {
|
||||
"claude-3-opus-20240229": {"input": 15.0, "output": 75.0},
|
||||
"claude-3-sonnet-20240229": {"input": 3.0, "output": 15.0},
|
||||
"claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
|
||||
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
|
||||
"claude-3-opus-20240229": {"input": 15.0, "cached_input": 1.50, "output": 75.0},
|
||||
"claude-3-sonnet-20240229": {"input": 3.0, "cached_input": 0.30, "output": 15.0},
|
||||
"claude-3-haiku-20240307": {"input": 0.25, "cached_input": 0.03, "output": 1.25},
|
||||
"claude-3-5-sonnet-20241022": {"input": 3.0, "cached_input": 0.30, "output": 15.0},
|
||||
"claude-sonnet-4-20250514": {"input": 3.0, "cached_input": 0.30, "output": 15.0},
|
||||
}
|
||||
|
||||
def __init__(self, config: Config):
|
||||
@@ -533,13 +949,16 @@ class AnthropicClient(LLMClientBase):
|
||||
review_text: str,
|
||||
profile: str = "standard",
|
||||
) -> tuple[LLMClassificationResponse, dict[str, Any]]:
|
||||
"""Classify using Anthropic."""
|
||||
"""Classify a single review using Anthropic."""
|
||||
start_time = time.time()
|
||||
|
||||
# Use cache_control for prompt caching
|
||||
system_content = self._build_cached_system(self.get_prompt())
|
||||
|
||||
response = await self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=4096,
|
||||
system=self.get_prompt(),
|
||||
system=system_content,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
@@ -554,30 +973,161 @@ class AnthropicClient(LLMClientBase):
|
||||
if not content:
|
||||
raise ValueError("Empty response from Anthropic")
|
||||
|
||||
# Try to extract JSON from response
|
||||
result = self._extract_json(content)
|
||||
metadata = self._calculate_anthropic_costs(response, start_time)
|
||||
|
||||
# Calculate costs
|
||||
return result, metadata
|
||||
|
||||
async def classify_batch(
|
||||
self,
|
||||
reviews: list[BatchReviewInput],
|
||||
profile: str = "standard",
|
||||
) -> tuple[list[LLMClassificationResponse], dict[str, Any]]:
|
||||
"""
|
||||
Classify multiple reviews in a single LLM call.
|
||||
|
||||
Uses Anthropic's prompt caching with cache_control - the system prompt
|
||||
is cached after first call, reducing input costs by ~90%.
|
||||
"""
|
||||
if not reviews:
|
||||
return [], {"error": "No reviews provided"}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Build batch user prompt
|
||||
user_prompt = self._build_batch_user_prompt(reviews)
|
||||
|
||||
# Use cache_control for prompt caching (system prompt is cacheable)
|
||||
system_content = self._build_cached_system(self.get_prompt(batch_mode=True))
|
||||
|
||||
# Calculate max tokens based on batch size
|
||||
max_output_tokens = min(16000, len(reviews) * 500)
|
||||
|
||||
response = await self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=max_output_tokens,
|
||||
system=system_content,
|
||||
messages=[{"role": "user", "content": user_prompt}],
|
||||
temperature=self.config.llm_temperature,
|
||||
)
|
||||
|
||||
# Parse response
|
||||
content = response.content[0].text if response.content else ""
|
||||
if not content:
|
||||
raise ValueError("Empty response from Anthropic")
|
||||
|
||||
metadata = self._calculate_anthropic_costs(response, start_time, batch_size=len(reviews))
|
||||
|
||||
# Try full JSON extraction first
|
||||
try:
|
||||
batch_result = self._extract_json(content)
|
||||
review_results = self._parse_batch_response(batch_result, reviews)
|
||||
return review_results, metadata
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
# Full parse failed - try partial extraction
|
||||
logger.warning(f"Full JSON parse failed: {e}, attempting partial recovery...")
|
||||
|
||||
partial_reviews, missing_indices = self._extract_partial_batch_json(
|
||||
content, len(reviews)
|
||||
)
|
||||
|
||||
if partial_reviews:
|
||||
# We recovered some results - raise PartialBatchResult
|
||||
raise PartialBatchResult(
|
||||
message=f"Recovered {len(partial_reviews)}/{len(reviews)} reviews from malformed JSON",
|
||||
partial_results=partial_reviews,
|
||||
missing_indices=missing_indices,
|
||||
metadata=metadata,
|
||||
)
|
||||
else:
|
||||
# Couldn't recover anything - re-raise original error
|
||||
raise
|
||||
|
||||
def _build_cached_system(self, prompt: str) -> list[dict[str, Any]]:
|
||||
"""Build system content with cache_control for prompt caching."""
|
||||
return [
|
||||
{
|
||||
"type": "text",
|
||||
"text": prompt,
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
]
|
||||
|
||||
def _calculate_anthropic_costs(
|
||||
self,
|
||||
response: Any,
|
||||
start_time: float,
|
||||
batch_size: int = 1,
|
||||
) -> dict[str, Any]:
|
||||
"""Calculate costs from Anthropic response, accounting for cached tokens."""
|
||||
input_tokens = response.usage.input_tokens
|
||||
output_tokens = response.usage.output_tokens
|
||||
total_tokens = input_tokens + output_tokens
|
||||
|
||||
pricing = self.PRICING.get(self.model, {"input": 3.0, "output": 15.0})
|
||||
cost = (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
|
||||
# Anthropic returns cache info in usage
|
||||
cached_tokens = getattr(response.usage, "cache_read_input_tokens", 0) or 0
|
||||
cache_creation_tokens = getattr(response.usage, "cache_creation_input_tokens", 0) or 0
|
||||
|
||||
uncached_input = input_tokens - cached_tokens
|
||||
|
||||
pricing = self.PRICING.get(self.model, {"input": 3.0, "cached_input": 0.30, "output": 15.0})
|
||||
cost = (
|
||||
uncached_input * pricing["input"]
|
||||
+ cached_tokens * pricing.get("cached_input", pricing["input"] * 0.1)
|
||||
+ output_tokens * pricing["output"]
|
||||
) / 1_000_000
|
||||
|
||||
self.total_tokens_used += total_tokens
|
||||
self.total_cost_usd += cost
|
||||
self._cached_tokens += cached_tokens
|
||||
|
||||
metadata = {
|
||||
return {
|
||||
"model": self.model,
|
||||
"input_tokens": input_tokens,
|
||||
"cached_tokens": cached_tokens,
|
||||
"cache_creation_tokens": cache_creation_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
"total_tokens": total_tokens,
|
||||
"cost_usd": cost,
|
||||
"latency_ms": int((time.time() - start_time) * 1000),
|
||||
"batch_size": batch_size,
|
||||
"tokens_per_review": total_tokens / batch_size if batch_size > 0 else 0,
|
||||
}
|
||||
|
||||
return result, metadata
|
||||
def _parse_batch_response(
|
||||
self,
|
||||
batch_result: dict[str, Any],
|
||||
original_reviews: list[BatchReviewInput],
|
||||
) -> list[LLMClassificationResponse]:
|
||||
"""Parse batch response into individual review results."""
|
||||
results: list[LLMClassificationResponse] = []
|
||||
|
||||
# Handle both formats: {"reviews": [...]} and direct list
|
||||
review_data = batch_result.get("reviews", [])
|
||||
if not review_data and isinstance(batch_result, list):
|
||||
review_data = batch_result
|
||||
|
||||
# Create a lookup by review_index
|
||||
results_by_index = {r.get("review_index", i): r for i, r in enumerate(review_data)}
|
||||
|
||||
for i, original in enumerate(original_reviews):
|
||||
if i in results_by_index:
|
||||
review_result = results_by_index[i]
|
||||
results.append({
|
||||
"spans": review_result.get("spans", []),
|
||||
"review_summary": review_result.get("review_summary", {
|
||||
"dominant_valence": "V0",
|
||||
"dominant_domain": "O",
|
||||
"span_count": len(review_result.get("spans", [])),
|
||||
"has_comparative": False,
|
||||
"has_entity": False,
|
||||
}),
|
||||
})
|
||||
else:
|
||||
logger.warning(f"Review index {i} missing from batch response, using fallback")
|
||||
results.append(create_fallback_response(original["text"]))
|
||||
|
||||
return results
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
@@ -607,7 +1157,6 @@ class AnthropicClient(LLMClientBase):
|
||||
self.total_tokens_used += input_tokens + output_tokens
|
||||
self.total_cost_usd += cost
|
||||
|
||||
# Extract JSON from response (handles code blocks)
|
||||
return self._extract_json_string(content)
|
||||
|
||||
def _extract_json_string(self, content: str) -> str:
|
||||
@@ -615,16 +1164,13 @@ class AnthropicClient(LLMClientBase):
|
||||
import re
|
||||
content = content.strip()
|
||||
|
||||
# If it starts with {, return as-is
|
||||
if content.startswith("{"):
|
||||
return content
|
||||
|
||||
# Try to find JSON in code blocks
|
||||
json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
|
||||
if json_match:
|
||||
return json_match.group(1)
|
||||
|
||||
# Try to find JSON object
|
||||
json_match = re.search(r"\{[\s\S]*\}", content)
|
||||
if json_match:
|
||||
return json_match.group(0)
|
||||
@@ -635,26 +1181,162 @@ class AnthropicClient(LLMClientBase):
|
||||
"""Extract JSON from response, handling markdown code blocks."""
|
||||
content = content.strip()
|
||||
|
||||
# Try direct parse first
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find JSON in code blocks
|
||||
import re
|
||||
|
||||
json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
|
||||
if json_match:
|
||||
return json.loads(json_match.group(1))
|
||||
|
||||
# Try to find JSON object
|
||||
json_match = re.search(r"\{[\s\S]*\}", content)
|
||||
if json_match:
|
||||
return json.loads(json_match.group(0))
|
||||
|
||||
raise ValueError(f"Could not extract JSON from response: {content[:200]}")
|
||||
|
||||
def _extract_partial_batch_json(
|
||||
self, content: str, expected_count: int
|
||||
) -> tuple[list[dict[str, Any]], list[int]]:
|
||||
"""
|
||||
Extract partial results from truncated/malformed batch JSON.
|
||||
|
||||
Returns:
|
||||
Tuple of (successfully_parsed_reviews, missing_indices)
|
||||
"""
|
||||
import re
|
||||
|
||||
parsed_reviews: list[dict[str, Any]] = []
|
||||
found_indices: set[int] = set()
|
||||
|
||||
# Pattern to match complete review objects with review_index
|
||||
# Matches: {"review_index": N, ... } with balanced braces
|
||||
review_pattern = r'\{\s*"review_index"\s*:\s*(\d+)[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
|
||||
|
||||
# Try to find all complete review objects
|
||||
for match in re.finditer(review_pattern, content):
|
||||
try:
|
||||
# Extract the matched text and try to parse
|
||||
obj_text = match.group(0)
|
||||
|
||||
# Try to parse as JSON - may need to fix trailing issues
|
||||
try:
|
||||
obj = json.loads(obj_text)
|
||||
except json.JSONDecodeError:
|
||||
# Try adding closing brace if truncated
|
||||
continue
|
||||
|
||||
if "review_index" in obj and "spans" in obj:
|
||||
idx = obj["review_index"]
|
||||
if idx not in found_indices:
|
||||
parsed_reviews.append(obj)
|
||||
found_indices.add(idx)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Alternative: try parsing incrementally by finding review_index markers
|
||||
if len(parsed_reviews) < expected_count // 2:
|
||||
# Find all review_index positions
|
||||
index_matches = list(re.finditer(r'"review_index"\s*:\s*(\d+)', content))
|
||||
|
||||
for i, match in enumerate(index_matches):
|
||||
idx = int(match.group(1))
|
||||
if idx in found_indices:
|
||||
continue
|
||||
|
||||
# Find the start of this review object
|
||||
start = content.rfind('{', 0, match.start())
|
||||
if start == -1:
|
||||
continue
|
||||
|
||||
# Find the end - either next review_index or end of content
|
||||
if i + 1 < len(index_matches):
|
||||
end_search = index_matches[i + 1].start()
|
||||
else:
|
||||
end_search = len(content)
|
||||
|
||||
# Find the closing brace
|
||||
obj_text = content[start:end_search]
|
||||
|
||||
# Count braces to find proper end
|
||||
brace_count = 0
|
||||
end_pos = 0
|
||||
for j, char in enumerate(obj_text):
|
||||
if char == '{':
|
||||
brace_count += 1
|
||||
elif char == '}':
|
||||
brace_count -= 1
|
||||
if brace_count == 0:
|
||||
end_pos = j + 1
|
||||
break
|
||||
|
||||
if end_pos > 0:
|
||||
try:
|
||||
obj = json.loads(obj_text[:end_pos])
|
||||
# Validate required fields and data integrity
|
||||
if self._validate_recovered_review(obj):
|
||||
review_idx = obj["review_index"]
|
||||
if review_idx not in found_indices:
|
||||
parsed_reviews.append(obj)
|
||||
found_indices.add(review_idx)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Determine missing indices
|
||||
missing_indices = [i for i in range(expected_count) if i not in found_indices]
|
||||
|
||||
logger.info(
|
||||
f"Partial JSON recovery: {len(parsed_reviews)}/{expected_count} reviews recovered, "
|
||||
f"{len(missing_indices)} missing"
|
||||
)
|
||||
|
||||
return parsed_reviews, missing_indices
|
||||
|
||||
def _validate_recovered_review(self, obj: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Validate a recovered review has all required fields with valid data.
|
||||
|
||||
Returns True only if the review is complete and usable.
|
||||
Rejects:
|
||||
- Missing review_index or spans
|
||||
- Empty spans array
|
||||
- Spans missing required fields (text, urt_primary, valence, intensity)
|
||||
- Empty field values
|
||||
"""
|
||||
# Check required top-level fields
|
||||
if "review_index" not in obj:
|
||||
return False
|
||||
if not isinstance(obj.get("review_index"), int):
|
||||
return False
|
||||
|
||||
if "spans" not in obj:
|
||||
return False
|
||||
if not isinstance(obj["spans"], list):
|
||||
return False
|
||||
if len(obj["spans"]) == 0:
|
||||
# Empty spans = no useful classification data
|
||||
return False
|
||||
|
||||
# Validate each span has required fields with non-empty values
|
||||
required_span_fields = ["text", "urt_primary", "valence", "intensity"]
|
||||
for span in obj["spans"]:
|
||||
if not isinstance(span, dict):
|
||||
return False
|
||||
for field in required_span_fields:
|
||||
if field not in span:
|
||||
return False
|
||||
if not span[field]: # Empty string or None
|
||||
return False
|
||||
|
||||
# review_summary is optional but if present should be a dict
|
||||
if "review_summary" in obj and not isinstance(obj["review_summary"], dict):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the Anthropic client."""
|
||||
await self.client.close()
|
||||
|
||||
@@ -0,0 +1,480 @@
|
||||
"""
|
||||
Dynamic prompt builder for URT classification.
|
||||
|
||||
Fetches taxonomy from database to build the system prompt,
|
||||
ensuring single source of truth and including examples.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Entity extraction rules for staff recognition
|
||||
ENTITY_EXTRACTION_RULES = """
|
||||
## ENTITY EXTRACTION (Staff Recognition)
|
||||
|
||||
When a span mentions a SPECIFIC PERSON by name, extract:
|
||||
- entity: The person's name exactly as written
|
||||
- entity_type: "staff" for employees, "customer" for other people mentioned
|
||||
|
||||
### EXTRACT (set entity + entity_type):
|
||||
- "Miglė was amazing" → entity: "Miglė", entity_type: "staff"
|
||||
- "Thank you Carlos!" → entity: "Carlos", entity_type: "staff"
|
||||
- "Adrian helped us" → entity: "Adrian", entity_type: "staff"
|
||||
- "Ačiū Artūrui" → entity: "Artūrui", entity_type: "staff"
|
||||
- "bartender Eivydas" → entity: "Eivydas", entity_type: "staff"
|
||||
- "our server Maria" → entity: "Maria", entity_type: "staff"
|
||||
|
||||
### DO NOT EXTRACT (keep entity: null):
|
||||
- "The bartender was rude" → no specific name, keep null
|
||||
- "Staff was friendly" → generic reference, keep null
|
||||
- "Service was great" → no person mentioned, keep null
|
||||
- "The manager helped" → role only, no name, keep null
|
||||
|
||||
### Name Recognition Tips:
|
||||
- Look for CAPITALIZED words that are NOT at sentence start
|
||||
- Common patterns: "[Name] was/is [adjective]", "thank [Name]", "[role] [Name]"
|
||||
- International names: Miglė, Eivydas, Žydrė, Artūras (Lithuanian), Carlos, María (Spanish), etc.
|
||||
- When a name appears near: bartender, waiter, server, staff, manager, helped, thank, amazing, great, rude
|
||||
|
||||
IMPORTANT: When in doubt, extract the name. Staff recognition is valuable - false positives are acceptable.
|
||||
"""
|
||||
|
||||
# Static parts of the prompt that don't change
|
||||
PROMPT_HEADER = """You are a review classification system using URT (Universal Review Taxonomy) v5.1.
|
||||
|
||||
Your task is to extract semantic spans from customer reviews and classify each span independently.
|
||||
|
||||
## SPAN EXTRACTION RULES
|
||||
|
||||
**CRITICAL: Use TOPIC-BASED splitting, NOT sentence-based splitting.**
|
||||
|
||||
A span = all consecutive text about the SAME topic/domain, regardless of sentence count.
|
||||
|
||||
### When to KEEP TOGETHER (same span):
|
||||
- Multiple sentences about the same topic: "The food was great. I loved the pasta. The sauce was perfect." → ONE span (all about Offering)
|
||||
- Cause and effect: "The wait was long because they were understaffed" → ONE span
|
||||
- Elaboration: "Staff was rude. They ignored us for 20 minutes." → ONE span (both about People)
|
||||
- Single-topic reviews: Even if 5 sentences, if all about food → ONE span
|
||||
|
||||
### When to SPLIT (separate spans):
|
||||
- Contrasting conjunctions that change topic: "Food was great BUT service was slow" → TWO spans
|
||||
- Domain change: food (O) → staff (P) → ambiance (E) = split at each change
|
||||
- Target change: "The waiter was nice but the manager was rude" → TWO spans (different people)
|
||||
|
||||
### Examples:
|
||||
- "Amazing food. Best burger ever. Fries were crispy too." → 1 span (all Offering, V+)
|
||||
- "Food was great but we waited an hour." → 2 spans (Offering V+, Journey V-)
|
||||
- "I've been coming here for years. Always consistent quality." → 1 span (Relationship)
|
||||
- "The staff are lovely and amazing with kids. More highchairs are definitely needed though." → 2 spans (People V+, Access V-)
|
||||
|
||||
**Guardrails**:
|
||||
- Prefer FEWER, LARGER spans over many small ones
|
||||
- Most reviews should have 1-3 spans, rarely more
|
||||
- Min 1 span per review
|
||||
- Spans must be non-overlapping
|
||||
|
||||
## CRITICAL CLASSIFICATION RULES (Common Mistakes to Avoid)
|
||||
|
||||
### RULE 1: Money/Price → ALWAYS use V codes (Value)
|
||||
Any mention of: price, cost, fee, charge, €, $, deposit, refund, expensive, cheap, affordable
|
||||
- ✅ "50€ extra" → V1.03 Hidden Costs
|
||||
- ✅ "good price" → V1.01 Price Level
|
||||
- ❌ NEVER use P codes for pricing (P is for People/staff behavior)
|
||||
|
||||
### RULE 2: Staff Behavior → ALWAYS use P codes (People)
|
||||
Any mention of: friendly, rude, helpful, patient, amable, nett, simpático, attentive
|
||||
- ✅ "staff was friendly" → P1.01 Warmth
|
||||
- ✅ "rude employee" → P1.02 Respect
|
||||
- ❌ NEVER use A codes for staff behavior (A is for Access/availability)
|
||||
|
||||
### RULE 3: Scam/Fraud/Deception → ALWAYS use R codes (Relationship)
|
||||
Any mention of: scam, estafa, fraud, lied, cheat, dishonest, robbery, Abzocker
|
||||
- ✅ "felt scammed" → R1.02 Ethics
|
||||
- ✅ "they lied" → R1.01 Honesty
|
||||
- ❌ NEVER use P or V codes for ethical issues
|
||||
|
||||
### RULE 4: Location/Finding → Use A codes (Access)
|
||||
Difficulty finding a place, shuttle, meeting point, confusing directions
|
||||
- ✅ "couldn't find shuttle" → A1.04 Wayfinding
|
||||
- ✅ "far from airport" → A4.01 Location
|
||||
- ❌ Don't confuse with J1.02 Punctuality (which is about being on time)
|
||||
|
||||
### RULE 5: Wait Time vs Punctuality
|
||||
- J1.01 Speed = how FAST service is ("waited 2 hours", "slow service")
|
||||
- J1.02 Punctuality = being ON TIME vs scheduled ("arrived late", "delayed")
|
||||
|
||||
"""
|
||||
|
||||
PROMPT_BATCH_OUTPUT_FORMAT = """
|
||||
## BATCH OUTPUT FORMAT
|
||||
|
||||
When given multiple reviews, return a JSON object with a "reviews" array.
|
||||
Each review in the array contains its own spans and summary.
|
||||
|
||||
{
|
||||
"reviews": [
|
||||
{
|
||||
"review_index": 0,
|
||||
"spans": [
|
||||
{
|
||||
"span_index": 0,
|
||||
"span_text": "exact text from this review",
|
||||
"span_start": 0,
|
||||
"span_end": 25,
|
||||
"urt_primary": "P1.01",
|
||||
"urt_secondary": [],
|
||||
"valence": "V+",
|
||||
"intensity": "I2",
|
||||
"specificity": "S2",
|
||||
"actionability": "A1",
|
||||
"temporal": "TC",
|
||||
"evidence": "ES",
|
||||
"comparative": "CR-N",
|
||||
"is_primary": true,
|
||||
"confidence": "high",
|
||||
"entity": "Maria",
|
||||
"entity_type": "staff",
|
||||
"usn": "URT:S:P1.01:+2:21TC.ES.N"
|
||||
}
|
||||
],
|
||||
"review_summary": {
|
||||
"dominant_valence": "V+",
|
||||
"dominant_domain": "P",
|
||||
"span_count": 1,
|
||||
"has_comparative": false,
|
||||
"has_entity": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"review_index": 1,
|
||||
"spans": [ ... ],
|
||||
"review_summary": { ... }
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
CRITICAL RULES FOR BATCH PROCESSING:
|
||||
1. Process each review INDEPENDENTLY - do not mix content between reviews
|
||||
2. review_index MUST match the input order (0, 1, 2, ...)
|
||||
3. span_start and span_end are relative to THAT review's text only
|
||||
4. If you see the same entity (e.g., staff name "Maria") in multiple reviews, use consistent spelling
|
||||
5. Output ALL reviews in the batch - never skip any
|
||||
6. Each review must have at least 1 span
|
||||
"""
|
||||
|
||||
PROMPT_SINGLE_OUTPUT_FORMAT = """
|
||||
## SINGLE REVIEW OUTPUT FORMAT
|
||||
|
||||
Return valid JSON matching this schema. No markdown, no explanations.
|
||||
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"span_index": 0,
|
||||
"span_text": "exact text from review",
|
||||
"span_start": 0,
|
||||
"span_end": 25,
|
||||
"urt_primary": "O1.01",
|
||||
"urt_secondary": [],
|
||||
"valence": "V+",
|
||||
"intensity": "I2",
|
||||
"specificity": "S2",
|
||||
"actionability": "A1",
|
||||
"temporal": "TC",
|
||||
"evidence": "ES",
|
||||
"comparative": "CR-N",
|
||||
"is_primary": true,
|
||||
"confidence": "high",
|
||||
"entity": null,
|
||||
"entity_type": null,
|
||||
"relation_type": null,
|
||||
"related_span_index": null,
|
||||
"usn": "URT:S:O1.01:+2:21TC.ES.N"
|
||||
}
|
||||
],
|
||||
"review_summary": {
|
||||
"dominant_valence": "V+",
|
||||
"dominant_domain": "O",
|
||||
"span_count": 1,
|
||||
"has_comparative": false,
|
||||
"has_entity": false
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
PROMPT_DIMENSIONS = """
|
||||
## DIMENSION CODES
|
||||
|
||||
### Valence
|
||||
- V+ : Positive sentiment
|
||||
- V- : Negative sentiment
|
||||
- V0 : Neutral/factual
|
||||
- V± : Mixed within the span
|
||||
|
||||
### Intensity
|
||||
- I1 : Low ("okay", "fine", "decent")
|
||||
- I2 : Moderate ("good", "bad", "slow")
|
||||
- I3 : High ("amazing", "terrible", "unacceptable")
|
||||
|
||||
### Specificity
|
||||
- S1 : Vague ("it was bad")
|
||||
- S2 : Some detail ("the food was cold")
|
||||
- S3 : Precise ("waited 45 minutes for appetizers")
|
||||
|
||||
### Actionability
|
||||
- A1 : No clear action possible
|
||||
- A2 : Possible actions, unclear which
|
||||
- A3 : Clear, specific action ("train staff on X", "fix Y")
|
||||
|
||||
### Temporal
|
||||
- TC : Current visit (default when no markers)
|
||||
- TR : Recent pattern ("lately", "recently", "again")
|
||||
- TH : Historical ("for years", "always", "used to")
|
||||
- TF : Future ("won't return", "next time", "I expect")
|
||||
|
||||
### Evidence
|
||||
- ES : Stated explicitly in text (default)
|
||||
- EI : Inferred logically (not stated, but entailed)
|
||||
- EC : Contextual (depends on surrounding text)
|
||||
|
||||
### Comparative
|
||||
- CR-N : No comparison (default)
|
||||
- CR-B : Better than alternatives
|
||||
- CR-W : Worse than alternatives
|
||||
- CR-S : Same as alternatives
|
||||
|
||||
## PRIMARY SPAN SELECTION
|
||||
|
||||
Mark exactly ONE span as is_primary=true using this order:
|
||||
1. Highest intensity (I3 > I2 > I1)
|
||||
2. Tie-break: negative over positive (V- > V± > V0 > V+)
|
||||
3. Tie-break: earliest span_index
|
||||
|
||||
## USN (URT String Notation)
|
||||
|
||||
Generate a USN string for each span:
|
||||
```
|
||||
URT:S:{primary}[+{sec1}][+{sec2}]:{valence_sign}{intensity_num}:{S#}{A#}{temporal}.{evidence}.{CR_suffix}
|
||||
```
|
||||
|
||||
Examples:
|
||||
- `URT:S:J1.03:-2:22TC.ES.N` (J1.03, V-, I2, S2, A2, TC, ES, CR-N)
|
||||
- `URT:S:P1.01+O2.03:+3:33TR.ES.B` (P1.01 primary, O2.03 secondary, V+, I3, S3, A3, TR, ES, CR-B)
|
||||
|
||||
Valence encoding: + for V+, - for V-, 0 for V0, ± for V±
|
||||
CR suffix: N=CR-N, B=CR-B, W=CR-W, S=CR-S"""
|
||||
|
||||
# Domain-specific warnings to include
|
||||
DOMAIN_WARNINGS = {
|
||||
"V": "USE FOR ALL PRICE/COST/FEE/MONEY MENTIONS",
|
||||
"P": "USE FOR STAFF BEHAVIOR ONLY, NOT PRICING",
|
||||
}
|
||||
|
||||
|
||||
class PromptBuilder:
|
||||
"""
|
||||
Builds the classification prompt dynamically from database taxonomy.
|
||||
|
||||
Usage:
|
||||
builder = PromptBuilder(db_pool)
|
||||
prompt = await builder.build() # For single review
|
||||
prompt = await builder.build(batch_mode=True) # For batch processing
|
||||
"""
|
||||
|
||||
def __init__(self, pool: asyncpg.Pool):
|
||||
self.pool = pool
|
||||
self._cached_prompt_single: str | None = None
|
||||
self._cached_prompt_batch: str | None = None
|
||||
self._cached_taxonomy: str | None = None
|
||||
|
||||
async def build(self, force_refresh: bool = False, batch_mode: bool = False) -> str:
|
||||
"""
|
||||
Build the complete system prompt from database taxonomy.
|
||||
|
||||
Args:
|
||||
force_refresh: If True, rebuild even if cached
|
||||
batch_mode: If True, include batch output format
|
||||
|
||||
Returns:
|
||||
Complete system prompt string
|
||||
"""
|
||||
# Check if we can use cached version
|
||||
cache = self._cached_prompt_batch if batch_mode else self._cached_prompt_single
|
||||
if not force_refresh and cache:
|
||||
return cache
|
||||
|
||||
# Build taxonomy section (shared between single and batch)
|
||||
if not self._cached_taxonomy or force_refresh:
|
||||
domains = await self._fetch_domains()
|
||||
subcodes = await self._fetch_subcodes()
|
||||
self._cached_taxonomy = self._build_taxonomy_section(domains, subcodes)
|
||||
logger.info(f"Built taxonomy section with {len(subcodes)} subcodes")
|
||||
|
||||
# Combine all parts with appropriate output format
|
||||
output_format = PROMPT_BATCH_OUTPUT_FORMAT if batch_mode else PROMPT_SINGLE_OUTPUT_FORMAT
|
||||
prompt = (
|
||||
PROMPT_HEADER
|
||||
+ self._cached_taxonomy
|
||||
+ ENTITY_EXTRACTION_RULES
|
||||
+ PROMPT_DIMENSIONS
|
||||
+ output_format
|
||||
)
|
||||
|
||||
# Cache it
|
||||
if batch_mode:
|
||||
self._cached_prompt_batch = prompt
|
||||
else:
|
||||
self._cached_prompt_single = prompt
|
||||
|
||||
logger.info(f"Built {'batch' if batch_mode else 'single'} classification prompt")
|
||||
return prompt
|
||||
|
||||
async def build_cacheable_parts(self) -> tuple[str, str]:
|
||||
"""
|
||||
Build the prompt split into cacheable (static) and dynamic parts.
|
||||
|
||||
For prompt caching, we want to separate:
|
||||
- Static part (taxonomy, rules) - can be cached
|
||||
- Dynamic part (output format) - varies by mode
|
||||
|
||||
Returns:
|
||||
Tuple of (cacheable_prefix, suffix_for_batch)
|
||||
"""
|
||||
if not self._cached_taxonomy:
|
||||
domains = await self._fetch_domains()
|
||||
subcodes = await self._fetch_subcodes()
|
||||
self._cached_taxonomy = self._build_taxonomy_section(domains, subcodes)
|
||||
|
||||
# Static cacheable prefix (same for all calls)
|
||||
cacheable_prefix = (
|
||||
PROMPT_HEADER
|
||||
+ self._cached_taxonomy
|
||||
+ ENTITY_EXTRACTION_RULES
|
||||
+ PROMPT_DIMENSIONS
|
||||
)
|
||||
|
||||
return cacheable_prefix, PROMPT_BATCH_OUTPUT_FORMAT
|
||||
|
||||
async def _fetch_domains(self) -> list[dict[str, Any]]:
|
||||
"""Fetch domain definitions from database."""
|
||||
query = """
|
||||
SELECT code, name, description
|
||||
FROM pipeline.urt_domains
|
||||
ORDER BY code
|
||||
"""
|
||||
rows = await self.pool.fetch(query)
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
async def _fetch_subcodes(self) -> list[dict[str, Any]]:
|
||||
"""Fetch subcode definitions with examples from database."""
|
||||
query = """
|
||||
SELECT
|
||||
code,
|
||||
name,
|
||||
definition,
|
||||
positive_example,
|
||||
negative_example
|
||||
FROM pipeline.urt_subcodes
|
||||
ORDER BY code
|
||||
"""
|
||||
rows = await self.pool.fetch(query)
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
def _build_taxonomy_section(
|
||||
self,
|
||||
domains: list[dict[str, Any]],
|
||||
subcodes: list[dict[str, Any]]
|
||||
) -> str:
|
||||
"""Build the taxonomy section of the prompt."""
|
||||
# Group subcodes by domain
|
||||
subcodes_by_domain: dict[str, list[dict]] = {}
|
||||
for subcode in subcodes:
|
||||
domain_code = subcode["code"][0]
|
||||
if domain_code not in subcodes_by_domain:
|
||||
subcodes_by_domain[domain_code] = []
|
||||
subcodes_by_domain[domain_code].append(subcode)
|
||||
|
||||
# Build the section
|
||||
lines = ["## URT TAXONOMY (Use EXACT codes from database)", ""]
|
||||
|
||||
for domain in domains:
|
||||
code = domain["code"]
|
||||
name = domain["name"]
|
||||
desc = domain["description"]
|
||||
domain_subcodes = subcodes_by_domain.get(code, [])
|
||||
|
||||
# Domain header with warning if applicable
|
||||
warning = DOMAIN_WARNINGS.get(code, "")
|
||||
if warning:
|
||||
lines.append(f"### {code} - {name.upper()} ({len(domain_subcodes)} codes) ⚠️ {warning}")
|
||||
else:
|
||||
lines.append(f"### {code} - {name.upper()} ({len(domain_subcodes)} codes)")
|
||||
|
||||
# Add each subcode with definition and examples
|
||||
for sc in domain_subcodes:
|
||||
sc_code = sc["code"]
|
||||
sc_name = sc["name"]
|
||||
sc_def = sc["definition"] or sc_name
|
||||
pos_ex = sc.get("positive_example")
|
||||
neg_ex = sc.get("negative_example")
|
||||
|
||||
# Main line: code, name, definition
|
||||
line = f"{sc_code} {sc_name}: {sc_def}"
|
||||
|
||||
# Add examples if available (helps LLM distinguish)
|
||||
if pos_ex and neg_ex:
|
||||
line += f' [+"{pos_ex}" / -"{neg_ex}"]'
|
||||
elif pos_ex:
|
||||
line += f' [+"{pos_ex}"]'
|
||||
elif neg_ex:
|
||||
line += f' [-"{neg_ex}"]'
|
||||
|
||||
lines.append(line)
|
||||
|
||||
lines.append("") # Blank line between domains
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def invalidate_cache(self) -> None:
|
||||
"""Invalidate the cached prompt, forcing rebuild on next call."""
|
||||
self._cached_prompt_single = None
|
||||
self._cached_prompt_batch = None
|
||||
self._cached_taxonomy = None
|
||||
|
||||
|
||||
# Global prompt cache for when DB is not available
|
||||
_static_prompt_cache: str | None = None
|
||||
|
||||
|
||||
async def build_prompt_from_db(pool: asyncpg.Pool) -> str:
|
||||
"""
|
||||
Convenience function to build prompt from database.
|
||||
|
||||
Args:
|
||||
pool: Database connection pool
|
||||
|
||||
Returns:
|
||||
Complete system prompt
|
||||
"""
|
||||
builder = PromptBuilder(pool)
|
||||
return await builder.build()
|
||||
|
||||
|
||||
def get_static_fallback_prompt() -> str:
|
||||
"""
|
||||
Get a static fallback prompt when database is not available.
|
||||
This should only be used in testing or when DB connection fails.
|
||||
"""
|
||||
global _static_prompt_cache
|
||||
if _static_prompt_cache is None:
|
||||
# Import the hardcoded version as fallback
|
||||
from reviewiq_pipeline.services.llm_client import SYSTEM_PROMPT
|
||||
_static_prompt_cache = SYSTEM_PROMPT
|
||||
return _static_prompt_cache
|
||||
@@ -0,0 +1,375 @@
|
||||
"""
|
||||
Language-agnostic review router for cost-optimized LLM classification.
|
||||
|
||||
Routes reviews to different processing paths based on structural signals only:
|
||||
- SKIP: Extremely low-value reviews (skip LLM entirely, assign generic code)
|
||||
- CHEAP_MODEL: Short, simple reviews (use Haiku for classification)
|
||||
- FULL_MODEL: Complex reviews (use Sonnet for full classification)
|
||||
|
||||
IMPORTANT: All routing decisions use ONLY language-agnostic signals:
|
||||
- Word count / character count (numeric)
|
||||
- Presence of numbers in text (pattern-based)
|
||||
- Sentence count (punctuation-based)
|
||||
- Emoji-only detection (pattern-based)
|
||||
- Star rating (numeric)
|
||||
|
||||
NO hardcoded word lists (like "great", "bueno", "gut") are used because:
|
||||
- Reviews span 7+ languages (Spanish, English, Dutch, German, Polish, Finnish, Danish, etc.)
|
||||
- Typography errors are common
|
||||
- False negatives (skipping valuable reviews) are worse than false positives
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from reviewiq_pipeline.contracts import ReviewToClassify
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RoutingTier(Enum):
|
||||
"""Processing tier for a review."""
|
||||
|
||||
SKIP = "skip" # Skip LLM, assign generic URT code
|
||||
CHEAP_MODEL = "cheap" # Use fast/cheap model (Haiku)
|
||||
FULL_MODEL = "full" # Use full model (Sonnet)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoutingDecision:
|
||||
"""Result of routing decision for a review."""
|
||||
|
||||
tier: RoutingTier
|
||||
reason: str
|
||||
signals: dict[str, any]
|
||||
# For SKIP tier, pre-assign the generic classification
|
||||
skip_classification: dict | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RouterConfig:
|
||||
"""Configuration for the review router."""
|
||||
|
||||
# SKIP tier thresholds (very conservative - prefer false positives)
|
||||
skip_max_words: int = 1
|
||||
skip_max_chars: int = 15
|
||||
skip_require_extreme_rating: bool = True # Only skip if rating is 1 or 5
|
||||
|
||||
# CHEAP_MODEL tier thresholds
|
||||
cheap_max_words: int = 10
|
||||
cheap_max_chars: int = 100
|
||||
|
||||
# Signals that force FULL_MODEL regardless of length
|
||||
full_model_if_has_numbers: bool = True
|
||||
full_model_if_multiple_sentences: bool = True
|
||||
full_model_min_sentences: int = 2
|
||||
|
||||
|
||||
class ReviewRouter:
|
||||
"""
|
||||
Routes reviews to appropriate processing tier using language-agnostic signals.
|
||||
|
||||
Design principles:
|
||||
- Conservative: Prefer false positives (processing simple reviews fully)
|
||||
over false negatives (skipping valuable reviews)
|
||||
- Language-agnostic: No word lists, only structural/numeric signals
|
||||
- Transparent: Every decision includes the signals used
|
||||
"""
|
||||
|
||||
# Pattern to detect numbers (dates, amounts, room numbers, etc.)
|
||||
NUMBER_PATTERN = re.compile(r'\d+')
|
||||
|
||||
# Pattern for sentence-ending punctuation (language-agnostic)
|
||||
SENTENCE_END_PATTERN = re.compile(r'[.!?。!?]+')
|
||||
|
||||
# Emoji pattern (same as TextProcessor)
|
||||
EMOJI_PATTERN = re.compile(
|
||||
"["
|
||||
"\U0001F600-\U0001F64F" # emoticons
|
||||
"\U0001F300-\U0001F5FF" # symbols & pictographs
|
||||
"\U0001F680-\U0001F6FF" # transport & map symbols
|
||||
"\U0001F1E0-\U0001F1FF" # flags
|
||||
"\U00002702-\U000027B0" # dingbats
|
||||
"\U000024C2-\U0001F251" # enclosed characters
|
||||
"]+",
|
||||
flags=re.UNICODE,
|
||||
)
|
||||
|
||||
# Generic classification for skipped reviews
|
||||
GENERIC_POSITIVE = {
|
||||
"urt_primary": "V4.03", # Overall Satisfaction - General
|
||||
"valence": "V+",
|
||||
"intensity": "I1",
|
||||
"confidence": "low",
|
||||
"skip_reason": "auto_routed_positive",
|
||||
}
|
||||
|
||||
GENERIC_NEGATIVE = {
|
||||
"urt_primary": "V4.03", # Overall Satisfaction - General
|
||||
"valence": "V-",
|
||||
"intensity": "I1",
|
||||
"confidence": "low",
|
||||
"skip_reason": "auto_routed_negative",
|
||||
}
|
||||
|
||||
def __init__(self, config: RouterConfig | None = None):
|
||||
self.config = config or RouterConfig()
|
||||
self._stats = {
|
||||
"skip": 0,
|
||||
"cheap": 0,
|
||||
"full": 0,
|
||||
}
|
||||
|
||||
def route(self, review: ReviewToClassify) -> RoutingDecision:
|
||||
"""
|
||||
Determine the processing tier for a review.
|
||||
|
||||
Args:
|
||||
review: Review to route
|
||||
|
||||
Returns:
|
||||
RoutingDecision with tier, reason, and signals
|
||||
"""
|
||||
text = review.get("text_normalized") or review.get("text") or ""
|
||||
rating = review.get("rating", 3)
|
||||
|
||||
# Extract language-agnostic signals
|
||||
signals = self._extract_signals(text, rating)
|
||||
|
||||
# Decision logic (conservative - start with FULL, demote only if safe)
|
||||
decision = self._make_decision(signals, rating)
|
||||
|
||||
# Update stats
|
||||
self._stats[decision.tier.value] += 1
|
||||
|
||||
return decision
|
||||
|
||||
def route_batch(
|
||||
self,
|
||||
reviews: list[ReviewToClassify]
|
||||
) -> dict[RoutingTier, list[ReviewToClassify]]:
|
||||
"""
|
||||
Route a batch of reviews, grouping by tier.
|
||||
|
||||
Args:
|
||||
reviews: List of reviews to route
|
||||
|
||||
Returns:
|
||||
Dictionary mapping tiers to lists of reviews
|
||||
"""
|
||||
result = {
|
||||
RoutingTier.SKIP: [],
|
||||
RoutingTier.CHEAP_MODEL: [],
|
||||
RoutingTier.FULL_MODEL: [],
|
||||
}
|
||||
|
||||
for review in reviews:
|
||||
decision = self.route(review)
|
||||
# Attach routing decision to review for downstream use
|
||||
review["_routing"] = decision
|
||||
result[decision.tier].append(review)
|
||||
|
||||
logger.info(
|
||||
f"Routed {len(reviews)} reviews: "
|
||||
f"SKIP={len(result[RoutingTier.SKIP])}, "
|
||||
f"CHEAP={len(result[RoutingTier.CHEAP_MODEL])}, "
|
||||
f"FULL={len(result[RoutingTier.FULL_MODEL])}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_signals(self, text: str, rating: int) -> dict[str, any]:
|
||||
"""
|
||||
Extract language-agnostic signals from review text.
|
||||
|
||||
All signals are structural/numeric, never word-based.
|
||||
"""
|
||||
if not text:
|
||||
return {
|
||||
"word_count": 0,
|
||||
"char_count": 0,
|
||||
"has_numbers": False,
|
||||
"sentence_count": 0,
|
||||
"emoji_count": 0,
|
||||
"is_emoji_only": False,
|
||||
"rating": rating,
|
||||
"is_extreme_rating": rating in (1, 5),
|
||||
}
|
||||
|
||||
words = text.split()
|
||||
word_count = len(words)
|
||||
char_count = len(text)
|
||||
|
||||
# Check for numbers (dates, amounts, room numbers - often signal specific details)
|
||||
has_numbers = bool(self.NUMBER_PATTERN.search(text))
|
||||
|
||||
# Count sentences by punctuation
|
||||
sentences = self.SENTENCE_END_PATTERN.split(text)
|
||||
sentence_count = len([s for s in sentences if s.strip()])
|
||||
|
||||
# Count emoji
|
||||
emoji_matches = self.EMOJI_PATTERN.findall(text)
|
||||
emoji_count = len(emoji_matches)
|
||||
|
||||
# Check if text is emoji-only (after stripping whitespace)
|
||||
text_without_emoji = self.EMOJI_PATTERN.sub("", text).strip()
|
||||
is_emoji_only = emoji_count > 0 and len(text_without_emoji) == 0
|
||||
|
||||
return {
|
||||
"word_count": word_count,
|
||||
"char_count": char_count,
|
||||
"has_numbers": has_numbers,
|
||||
"sentence_count": sentence_count,
|
||||
"emoji_count": emoji_count,
|
||||
"is_emoji_only": is_emoji_only,
|
||||
"rating": rating,
|
||||
"is_extreme_rating": rating in (1, 5),
|
||||
}
|
||||
|
||||
def _make_decision(
|
||||
self,
|
||||
signals: dict[str, any],
|
||||
rating: int
|
||||
) -> RoutingDecision:
|
||||
"""
|
||||
Make routing decision based on signals.
|
||||
|
||||
Decision order (conservative):
|
||||
1. Check for FULL_MODEL forcing signals first
|
||||
2. Check for SKIP eligibility (very strict)
|
||||
3. Check for CHEAP_MODEL eligibility
|
||||
4. Default to FULL_MODEL
|
||||
"""
|
||||
cfg = self.config
|
||||
|
||||
# FULL_MODEL forcing conditions
|
||||
if cfg.full_model_if_has_numbers and signals["has_numbers"]:
|
||||
return RoutingDecision(
|
||||
tier=RoutingTier.FULL_MODEL,
|
||||
reason="contains_numbers",
|
||||
signals=signals,
|
||||
)
|
||||
|
||||
if (cfg.full_model_if_multiple_sentences and
|
||||
signals["sentence_count"] >= cfg.full_model_min_sentences):
|
||||
return RoutingDecision(
|
||||
tier=RoutingTier.FULL_MODEL,
|
||||
reason="multiple_sentences",
|
||||
signals=signals,
|
||||
)
|
||||
|
||||
if signals["word_count"] > cfg.cheap_max_words:
|
||||
return RoutingDecision(
|
||||
tier=RoutingTier.FULL_MODEL,
|
||||
reason="long_text",
|
||||
signals=signals,
|
||||
)
|
||||
|
||||
# SKIP eligibility (very strict)
|
||||
skip_eligible = (
|
||||
signals["word_count"] <= cfg.skip_max_words and
|
||||
signals["char_count"] <= cfg.skip_max_chars and
|
||||
not signals["has_numbers"] and
|
||||
signals["sentence_count"] <= 1
|
||||
)
|
||||
|
||||
if cfg.skip_require_extreme_rating:
|
||||
skip_eligible = skip_eligible and signals["is_extreme_rating"]
|
||||
|
||||
if skip_eligible:
|
||||
# Determine generic classification based on rating
|
||||
if rating >= 4:
|
||||
skip_class = self.GENERIC_POSITIVE.copy()
|
||||
else:
|
||||
skip_class = self.GENERIC_NEGATIVE.copy()
|
||||
|
||||
return RoutingDecision(
|
||||
tier=RoutingTier.SKIP,
|
||||
reason="trivial_review",
|
||||
signals=signals,
|
||||
skip_classification=skip_class,
|
||||
)
|
||||
|
||||
# CHEAP_MODEL eligibility
|
||||
if (signals["word_count"] <= cfg.cheap_max_words and
|
||||
signals["char_count"] <= cfg.cheap_max_chars and
|
||||
signals["sentence_count"] <= 1):
|
||||
return RoutingDecision(
|
||||
tier=RoutingTier.CHEAP_MODEL,
|
||||
reason="short_simple_review",
|
||||
signals=signals,
|
||||
)
|
||||
|
||||
# Default to FULL_MODEL
|
||||
return RoutingDecision(
|
||||
tier=RoutingTier.FULL_MODEL,
|
||||
reason="default",
|
||||
signals=signals,
|
||||
)
|
||||
|
||||
def get_stats(self) -> dict[str, int]:
|
||||
"""Get routing statistics."""
|
||||
return self._stats.copy()
|
||||
|
||||
def reset_stats(self):
|
||||
"""Reset routing statistics."""
|
||||
self._stats = {"skip": 0, "cheap": 0, "full": 0}
|
||||
|
||||
|
||||
def create_router(
|
||||
conservative: bool = True,
|
||||
skip_enabled: bool = True,
|
||||
cheap_model_enabled: bool = True,
|
||||
) -> ReviewRouter:
|
||||
"""
|
||||
Factory function to create a router with common configurations.
|
||||
|
||||
Args:
|
||||
conservative: If True, use very strict thresholds (recommended)
|
||||
skip_enabled: If True, allow SKIP tier
|
||||
cheap_model_enabled: If True, allow CHEAP_MODEL tier
|
||||
|
||||
Returns:
|
||||
Configured ReviewRouter instance
|
||||
"""
|
||||
if conservative:
|
||||
# Very conservative - only skip 1-word reviews with extreme ratings
|
||||
config = RouterConfig(
|
||||
skip_max_words=1,
|
||||
skip_max_chars=15,
|
||||
skip_require_extreme_rating=True,
|
||||
cheap_max_words=10,
|
||||
cheap_max_chars=100,
|
||||
full_model_if_has_numbers=True,
|
||||
full_model_if_multiple_sentences=True,
|
||||
full_model_min_sentences=2,
|
||||
)
|
||||
else:
|
||||
# Less conservative - skip more, cheaper processing
|
||||
config = RouterConfig(
|
||||
skip_max_words=3,
|
||||
skip_max_chars=30,
|
||||
skip_require_extreme_rating=False,
|
||||
cheap_max_words=15,
|
||||
cheap_max_chars=150,
|
||||
full_model_if_has_numbers=True,
|
||||
full_model_if_multiple_sentences=True,
|
||||
full_model_min_sentences=3,
|
||||
)
|
||||
|
||||
# Override if tiers disabled
|
||||
if not skip_enabled:
|
||||
config.skip_max_words = 0
|
||||
config.skip_max_chars = 0
|
||||
|
||||
if not cheap_model_enabled:
|
||||
config.cheap_max_words = 0
|
||||
config.cheap_max_chars = 0
|
||||
|
||||
return ReviewRouter(config)
|
||||
@@ -205,10 +205,11 @@ class Stage1Normalizer:
|
||||
source="google",
|
||||
)
|
||||
|
||||
# Insert enriched review stub
|
||||
# Insert enriched review stub with job_id
|
||||
await self.review_repo.insert_enriched_review(
|
||||
normalized,
|
||||
raw_id,
|
||||
job_id=input_data.get("job_id"),
|
||||
)
|
||||
|
||||
return raw_id
|
||||
|
||||
@@ -4,15 +4,21 @@ Stage 2: LLM Classification
|
||||
Classify normalized reviews into URT codes with span-level extraction.
|
||||
|
||||
Responsibilities:
|
||||
- Call LLM for span extraction and classification
|
||||
- Call LLM for span extraction and classification (batched for efficiency)
|
||||
- Generate embeddings
|
||||
- Calculate trust scores
|
||||
- Select primary span
|
||||
- Write to reviews_enriched and review_spans tables
|
||||
|
||||
Efficiency Features:
|
||||
- Batch processing: Multiple reviews per LLM call (configurable batch_size)
|
||||
- Prompt caching: System prompt cached to reduce input token costs
|
||||
- Parallel execution: Multiple batches processed concurrently
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
@@ -27,7 +33,20 @@ from reviewiq_pipeline.contracts import (
|
||||
Stage2Output,
|
||||
Stage2Stats,
|
||||
)
|
||||
from reviewiq_pipeline.services.llm_client import LLMClient, create_fallback_response
|
||||
from reviewiq_pipeline.services.llm_client import (
|
||||
LLMClient,
|
||||
create_fallback_response,
|
||||
BatchReviewInput,
|
||||
BatchSizer,
|
||||
PartialBatchResult,
|
||||
)
|
||||
from reviewiq_pipeline.services.prompt_builder import PromptBuilder
|
||||
from reviewiq_pipeline.services.classification_validator import validate_classification
|
||||
from reviewiq_pipeline.services.review_router import (
|
||||
ReviewRouter,
|
||||
RoutingTier,
|
||||
create_router,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from reviewiq_pipeline.config import Config
|
||||
@@ -75,22 +94,100 @@ class Stage2Classifier:
|
||||
self.span_repo = span_repo
|
||||
self.embedding_service = embedding_service
|
||||
self._llm_client: LLMClientBase | None = None
|
||||
self._cheap_llm_client: LLMClientBase | None = None # For CHEAP tier
|
||||
self._prompt_builder: PromptBuilder | None = None
|
||||
self._batch_sizer: BatchSizer | None = None
|
||||
self._system_prompt_tokens: int = 0
|
||||
|
||||
# Initialize router if enabled
|
||||
self._router: ReviewRouter | None = None
|
||||
if config.router_enabled:
|
||||
self._router = create_router(
|
||||
conservative=config.router_conservative,
|
||||
skip_enabled=config.router_skip_enabled,
|
||||
cheap_model_enabled=config.router_cheap_model_enabled,
|
||||
)
|
||||
logger.info(
|
||||
f"Review router enabled: conservative={config.router_conservative}, "
|
||||
f"skip={config.router_skip_enabled}, cheap={config.router_cheap_model_enabled}"
|
||||
)
|
||||
|
||||
async def _get_llm_client(self) -> LLMClientBase:
|
||||
"""Get or create LLM client."""
|
||||
"""Get or create LLM client with dynamic prompt from database."""
|
||||
if self._llm_client is None:
|
||||
self._llm_client = LLMClient.create(self.config)
|
||||
|
||||
# Build prompt dynamically from database if available
|
||||
batch_prompt = None
|
||||
if self.db and self.db.pool:
|
||||
try:
|
||||
self._prompt_builder = PromptBuilder(self.db.pool)
|
||||
# Build both single and batch prompts
|
||||
single_prompt = await self._prompt_builder.build(batch_mode=False)
|
||||
batch_prompt = await self._prompt_builder.build(batch_mode=True)
|
||||
self._llm_client.set_prompt(single_prompt, batch_prompt)
|
||||
logger.info("Using dynamic prompts from database taxonomy (single + batch)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to build dynamic prompt, using static: {e}")
|
||||
|
||||
# Estimate system prompt tokens for batch sizing
|
||||
prompt_for_sizing = batch_prompt or self._llm_client.get_prompt(batch_mode=True)
|
||||
self._system_prompt_tokens = len(prompt_for_sizing) // 4 # ~4 chars per token
|
||||
|
||||
# Initialize batch sizer
|
||||
self._batch_sizer = BatchSizer(
|
||||
model=self.config.llm_model,
|
||||
system_prompt_tokens=self._system_prompt_tokens,
|
||||
target_utilization=self.config.classification_target_utilization,
|
||||
)
|
||||
logger.info(
|
||||
f"BatchSizer initialized: model={self.config.llm_model}, "
|
||||
f"system_prompt_tokens≈{self._system_prompt_tokens}, "
|
||||
f"target_utilization={self.config.classification_target_utilization:.0%}"
|
||||
)
|
||||
|
||||
return self._llm_client
|
||||
|
||||
async def _get_cheap_llm_client(self) -> LLMClientBase:
|
||||
"""Get or create cheap LLM client for CHEAP tier routing."""
|
||||
if self._cheap_llm_client is None:
|
||||
# Create a copy of config with cheap model
|
||||
from copy import copy
|
||||
cheap_config = copy(self.config)
|
||||
cheap_config.llm_model = self.config.router_cheap_model
|
||||
|
||||
self._cheap_llm_client = LLMClient.create(cheap_config)
|
||||
|
||||
# Use same prompts as main client
|
||||
if self._llm_client:
|
||||
single_prompt = self._llm_client.get_prompt(batch_mode=False)
|
||||
batch_prompt = self._llm_client.get_prompt(batch_mode=True)
|
||||
self._cheap_llm_client.set_prompt(single_prompt, batch_prompt)
|
||||
|
||||
logger.info(f"Cheap LLM client initialized with model: {self.config.router_cheap_model}")
|
||||
|
||||
return self._cheap_llm_client
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close resources."""
|
||||
if self._llm_client:
|
||||
await self._llm_client.close()
|
||||
self._llm_client = None
|
||||
if self._cheap_llm_client:
|
||||
await self._cheap_llm_client.close()
|
||||
self._cheap_llm_client = None
|
||||
|
||||
async def process(self, input_data: Stage2Input) -> Stage2Output:
|
||||
"""
|
||||
Process reviews through classification stage.
|
||||
Process reviews through classification stage using batched LLM calls.
|
||||
|
||||
This method:
|
||||
1. Routes reviews to appropriate tier (if router enabled)
|
||||
2. Calculates optimal batch size based on context window and review lengths
|
||||
3. Splits reviews into batches dynamically
|
||||
4. Processes batches in parallel (with concurrency limit)
|
||||
5. Uses prompt caching to reduce costs on subsequent batches
|
||||
6. Adapts batch size based on actual token usage
|
||||
|
||||
Args:
|
||||
input_data: Stage 2 input with reviews and config
|
||||
@@ -99,65 +196,284 @@ class Stage2Classifier:
|
||||
Stage2Output with classified reviews and stats
|
||||
"""
|
||||
batch_id = str(uuid.uuid4())[:8]
|
||||
logger.info(
|
||||
f"Stage 2: Classifying {len(input_data['reviews'])} reviews "
|
||||
f"(batch {batch_id})"
|
||||
)
|
||||
|
||||
classified_reviews: list[ClassifiedReview] = []
|
||||
total_tokens = 0
|
||||
total_cost = 0.0
|
||||
total_spans = 0
|
||||
error_count = 0
|
||||
reviews = input_data["reviews"]
|
||||
max_concurrent = self.config.classification_max_concurrent
|
||||
fixed_batch_size = self.config.classification_batch_size # 0 = auto
|
||||
|
||||
llm_client = await self._get_llm_client()
|
||||
|
||||
for review in input_data["reviews"]:
|
||||
try:
|
||||
classified, metadata = await self._classify_review(
|
||||
review,
|
||||
input_data["config"]["profile"],
|
||||
llm_client,
|
||||
batch_id,
|
||||
)
|
||||
# Smart routing (if enabled)
|
||||
skip_classified: list[ClassifiedReview] = []
|
||||
reviews_to_process = reviews
|
||||
cheap_reviews: list[ReviewToClassify] = []
|
||||
full_reviews: list[ReviewToClassify] = []
|
||||
|
||||
if classified:
|
||||
classified_reviews.append(classified)
|
||||
total_spans += len(classified.get("spans", []))
|
||||
total_tokens += metadata.get("total_tokens", 0)
|
||||
total_cost += metadata.get("cost_usd", 0.0)
|
||||
if self._router:
|
||||
routed = self._router.route_batch(reviews)
|
||||
|
||||
# Persist to database if configured
|
||||
# Process SKIP tier immediately (no LLM)
|
||||
for review in routed[RoutingTier.SKIP]:
|
||||
routing = review.get("_routing")
|
||||
if routing and routing.skip_classification:
|
||||
classified = self._create_skip_classification(
|
||||
review,
|
||||
routing.skip_classification,
|
||||
batch_id,
|
||||
)
|
||||
skip_classified.append(classified)
|
||||
|
||||
# Persist if configured
|
||||
if self.review_repo and self.span_repo:
|
||||
await self._persist_classification(
|
||||
classified,
|
||||
review,
|
||||
classified, review, batch_id, input_data["config"]
|
||||
)
|
||||
|
||||
cheap_reviews = routed[RoutingTier.CHEAP_MODEL]
|
||||
full_reviews = routed[RoutingTier.FULL_MODEL]
|
||||
|
||||
router_stats = self._router.get_stats()
|
||||
logger.info(
|
||||
f"Router results: SKIP={len(routed[RoutingTier.SKIP])}, "
|
||||
f"CHEAP={len(cheap_reviews)}, FULL={len(full_reviews)}"
|
||||
)
|
||||
|
||||
# If no cheap model enabled, merge into full
|
||||
if not self.config.router_cheap_model_enabled:
|
||||
full_reviews = cheap_reviews + full_reviews
|
||||
cheap_reviews = []
|
||||
else:
|
||||
# No router - all reviews go to full model
|
||||
full_reviews = reviews
|
||||
|
||||
# Calculate optimal batch size dynamically (based on full_reviews)
|
||||
all_llm_reviews = full_reviews + cheap_reviews # Combined for batch sizing
|
||||
if all_llm_reviews:
|
||||
review_dicts = [{"text": r["text"]} for r in all_llm_reviews]
|
||||
batch_calc = self._batch_sizer.calculate_batch_size(
|
||||
reviews=review_dicts,
|
||||
fixed_size=fixed_batch_size if fixed_batch_size > 0 else None,
|
||||
)
|
||||
batch_size = batch_calc.batch_size
|
||||
logger.info(f"Batch sizing: {batch_calc.reasoning}")
|
||||
else:
|
||||
batch_size = fixed_batch_size or 25
|
||||
|
||||
llm_review_count = len(full_reviews) + len(cheap_reviews)
|
||||
logger.info(
|
||||
f"Stage 2: Classifying {len(reviews)} reviews "
|
||||
f"(batch_id={batch_id}, batch_size={batch_size}, max_concurrent={max_concurrent}, "
|
||||
f"skip={len(skip_classified)}, llm={llm_review_count})"
|
||||
)
|
||||
|
||||
# Split FULL tier reviews into batches
|
||||
full_batches = [
|
||||
full_reviews[i:i + batch_size]
|
||||
for i in range(0, len(full_reviews), batch_size)
|
||||
] if full_reviews else []
|
||||
|
||||
# Split CHEAP tier reviews into batches
|
||||
cheap_batches = [
|
||||
cheap_reviews[i:i + batch_size]
|
||||
for i in range(0, len(cheap_reviews), batch_size)
|
||||
] if cheap_reviews else []
|
||||
|
||||
logger.info(
|
||||
f"Split into {len(full_batches)} FULL batches + {len(cheap_batches)} CHEAP batches "
|
||||
f"({'unlimited' if max_concurrent == 0 else max_concurrent} concurrent)"
|
||||
)
|
||||
|
||||
# Process batches - unlimited concurrency by default (0 = no limit)
|
||||
semaphore = asyncio.Semaphore(max_concurrent) if max_concurrent > 0 else None
|
||||
total_tokens = 0
|
||||
total_cost = 0.0
|
||||
total_cached_tokens = 0
|
||||
classified_reviews: list[ClassifiedReview] = []
|
||||
error_count = 0
|
||||
|
||||
# Get cheap client if needed
|
||||
cheap_client = None
|
||||
if cheap_batches:
|
||||
cheap_client = await self._get_cheap_llm_client()
|
||||
|
||||
async def process_batch(
|
||||
batch_reviews: list[ReviewToClassify],
|
||||
batch_num: int,
|
||||
client: LLMClientBase,
|
||||
tier_label: str = "FULL",
|
||||
):
|
||||
"""Process a single batch of reviews."""
|
||||
|
||||
async def do_batch():
|
||||
nonlocal total_tokens, total_cost, total_cached_tokens, error_count
|
||||
try:
|
||||
batch_classified, batch_metadata = await self._classify_batch(
|
||||
batch_reviews,
|
||||
input_data["config"]["profile"],
|
||||
client,
|
||||
batch_id,
|
||||
input_data["config"],
|
||||
)
|
||||
|
||||
batch_tokens = batch_metadata.get("total_tokens", 0)
|
||||
batch_cost = batch_metadata.get("cost_usd", 0.0)
|
||||
batch_cached = batch_metadata.get("cached_tokens", 0)
|
||||
|
||||
total_tokens += batch_tokens
|
||||
total_cost += batch_cost
|
||||
total_cached_tokens += batch_cached
|
||||
|
||||
# Update batch sizer with actual token usage for adaptive sizing
|
||||
if self._batch_sizer:
|
||||
input_tokens = batch_metadata.get("input_tokens", 0)
|
||||
output_tokens = batch_metadata.get("output_tokens", 0)
|
||||
self._batch_sizer.update_from_response(
|
||||
batch_size=len(batch_reviews),
|
||||
input_tokens=input_tokens - self._system_prompt_tokens, # Exclude system prompt
|
||||
output_tokens=output_tokens,
|
||||
)
|
||||
|
||||
total_batches = len(full_batches) + len(cheap_batches)
|
||||
logger.info(
|
||||
f"[{tier_label}] Batch {batch_num}/{total_batches}: "
|
||||
f"{len(batch_classified)} reviews, "
|
||||
f"{batch_tokens:,} tokens ({batch_cached:,} cached), "
|
||||
f"${batch_cost:.4f}"
|
||||
)
|
||||
|
||||
return batch_classified
|
||||
|
||||
except PartialBatchResult as e:
|
||||
# Partial success - we recovered some reviews
|
||||
logger.info(
|
||||
f"Batch {batch_num} partial success: {len(e.partial_results)} recovered, "
|
||||
f"{len(e.missing_indices)} need reprocessing"
|
||||
)
|
||||
|
||||
# Process the recovered results
|
||||
partial_classified: list[ClassifiedReview] = []
|
||||
profile = input_data["config"]["profile"]
|
||||
|
||||
for partial_review in e.partial_results:
|
||||
idx = partial_review.get("review_index", -1)
|
||||
if 0 <= idx < len(batch_reviews):
|
||||
review = batch_reviews[idx]
|
||||
try:
|
||||
classified = self._process_llm_response(
|
||||
review,
|
||||
{
|
||||
"spans": partial_review.get("spans", []),
|
||||
"review_summary": partial_review.get("review_summary", {}),
|
||||
},
|
||||
profile,
|
||||
batch_id,
|
||||
is_fallback=False,
|
||||
)
|
||||
partial_classified.append(classified)
|
||||
|
||||
if self.review_repo and self.span_repo:
|
||||
await self._persist_classification(
|
||||
classified, review, batch_id, input_data["config"]
|
||||
)
|
||||
except Exception as pe:
|
||||
logger.warning(f"Error processing recovered review {idx}: {pe}")
|
||||
e.missing_indices.append(idx)
|
||||
|
||||
# Update cost tracking from partial metadata
|
||||
if e.metadata:
|
||||
total_tokens += e.metadata.get("total_tokens", 0)
|
||||
total_cost += e.metadata.get("cost_usd", 0.0)
|
||||
total_cached_tokens += e.metadata.get("cached_tokens", 0)
|
||||
|
||||
# Only fallback process the missing reviews
|
||||
if e.missing_indices:
|
||||
missing_reviews = [batch_reviews[i] for i in e.missing_indices if 0 <= i < len(batch_reviews)]
|
||||
error_count += len(missing_reviews)
|
||||
logger.info(f"Reprocessing {len(missing_reviews)} missing reviews individually")
|
||||
fallback_results = await self._fallback_individual_processing(
|
||||
missing_reviews,
|
||||
input_data["config"]["profile"],
|
||||
client, # Use same client as batch
|
||||
batch_id,
|
||||
input_data["config"],
|
||||
)
|
||||
partial_classified.extend(fallback_results)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error classifying review {review['review_id']}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
error_count += 1
|
||||
return partial_classified
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{tier_label}] Batch {batch_num} failed: {e}", exc_info=True)
|
||||
error_count += len(batch_reviews)
|
||||
# Fallback: process individually
|
||||
return await self._fallback_individual_processing(
|
||||
batch_reviews,
|
||||
input_data["config"]["profile"],
|
||||
client, # Use same client as batch
|
||||
batch_id,
|
||||
input_data["config"],
|
||||
)
|
||||
|
||||
# Run with or without semaphore
|
||||
if semaphore:
|
||||
async with semaphore:
|
||||
return await do_batch()
|
||||
else:
|
||||
return await do_batch()
|
||||
|
||||
# Process all batches concurrently (both FULL and CHEAP tiers)
|
||||
all_batch_tasks = []
|
||||
|
||||
# FULL tier batches
|
||||
for i, batch in enumerate(full_batches):
|
||||
all_batch_tasks.append(
|
||||
process_batch(batch, i + 1, llm_client, "FULL")
|
||||
)
|
||||
|
||||
# CHEAP tier batches
|
||||
for i, batch in enumerate(cheap_batches):
|
||||
all_batch_tasks.append(
|
||||
process_batch(batch, len(full_batches) + i + 1, cheap_client, "CHEAP")
|
||||
)
|
||||
|
||||
batch_results = await asyncio.gather(*all_batch_tasks) if all_batch_tasks else []
|
||||
|
||||
# Flatten results from LLM processing
|
||||
for batch_result in batch_results:
|
||||
classified_reviews.extend(batch_result)
|
||||
|
||||
# Add skip-classified reviews (no LLM)
|
||||
classified_reviews.extend(skip_classified)
|
||||
|
||||
# Calculate stats
|
||||
total_spans = sum(len(r.get("spans", [])) for r in classified_reviews)
|
||||
avg_spans = total_spans / len(classified_reviews) if classified_reviews else 0
|
||||
|
||||
# Log final statistics
|
||||
skip_count = len(skip_classified)
|
||||
llm_count = len(classified_reviews) - skip_count
|
||||
logger.info(
|
||||
f"Stage 2 complete: {len(classified_reviews)} classified, "
|
||||
f"{error_count} errors, {total_spans} spans total"
|
||||
f"Stage 2 complete: {len(classified_reviews)} classified "
|
||||
f"(LLM={llm_count}, skipped={skip_count}), "
|
||||
f"{error_count} errors, {total_spans} spans total, "
|
||||
f"${total_cost:.4f} cost, {total_cached_tokens:,} cached tokens"
|
||||
)
|
||||
|
||||
if self._batch_sizer:
|
||||
stats = self._batch_sizer.get_stats_summary()
|
||||
logger.info(
|
||||
f"Batch sizing stats: "
|
||||
f"avg_input={stats['avg_input_tokens']} tokens/review, "
|
||||
f"avg_output={stats['avg_output_tokens']} tokens/review, "
|
||||
f"range=[{stats['min_review_tokens']}-{stats['max_review_tokens']}]"
|
||||
)
|
||||
|
||||
return Stage2Output(
|
||||
batch_id=batch_id,
|
||||
taxonomy_version=input_data["config"]["taxonomy_version"],
|
||||
model_version=self.config.llm_model,
|
||||
prompt_version="v1.0",
|
||||
prompt_version="v2.0-batched",
|
||||
reviews_classified=classified_reviews,
|
||||
stats=Stage2Stats(
|
||||
input_count=len(input_data["reviews"]),
|
||||
input_count=len(reviews),
|
||||
success_count=len(classified_reviews),
|
||||
error_count=error_count,
|
||||
total_spans=total_spans,
|
||||
@@ -167,42 +483,127 @@ class Stage2Classifier:
|
||||
),
|
||||
)
|
||||
|
||||
async def _classify_review(
|
||||
async def _classify_batch(
|
||||
self,
|
||||
review: ReviewToClassify,
|
||||
reviews: list[ReviewToClassify],
|
||||
profile: str,
|
||||
llm_client: LLMClientBase,
|
||||
batch_id: str,
|
||||
) -> tuple[ClassifiedReview | None, dict[str, Any]]:
|
||||
config: dict[str, Any],
|
||||
) -> tuple[list[ClassifiedReview], dict[str, Any]]:
|
||||
"""
|
||||
Classify a single review.
|
||||
Classify a batch of reviews in a single LLM call.
|
||||
|
||||
Args:
|
||||
review: Review to classify
|
||||
reviews: List of reviews to classify
|
||||
profile: Classification profile
|
||||
llm_client: LLM client instance
|
||||
batch_id: Batch identifier
|
||||
config: Classification config
|
||||
|
||||
Returns:
|
||||
Tuple of (classified review, metadata)
|
||||
Tuple of (list of classified reviews, aggregated metadata)
|
||||
"""
|
||||
metadata: dict[str, Any] = {}
|
||||
|
||||
# Call LLM for classification
|
||||
try:
|
||||
llm_response, llm_metadata = await llm_client.classify(
|
||||
review["text"],
|
||||
profile,
|
||||
# Prepare batch input
|
||||
batch_input: list[BatchReviewInput] = [
|
||||
BatchReviewInput(
|
||||
review_id=r["review_id"],
|
||||
text=r["text"],
|
||||
rating=r["rating"],
|
||||
)
|
||||
metadata.update(llm_metadata)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"LLM classification failed for {review['review_id']}, "
|
||||
f"using fallback: {e}"
|
||||
)
|
||||
llm_response = create_fallback_response(review["text"])
|
||||
metadata["fallback"] = True
|
||||
for r in reviews
|
||||
]
|
||||
|
||||
# Call LLM for batch classification
|
||||
llm_responses, metadata = await llm_client.classify_batch(batch_input, profile)
|
||||
|
||||
# Process each response
|
||||
classified_reviews: list[ClassifiedReview] = []
|
||||
|
||||
for i, (review, llm_response) in enumerate(zip(reviews, llm_responses)):
|
||||
try:
|
||||
classified = self._process_llm_response(
|
||||
review,
|
||||
llm_response,
|
||||
profile,
|
||||
batch_id,
|
||||
is_fallback=False,
|
||||
)
|
||||
classified_reviews.append(classified)
|
||||
|
||||
# Persist to database if configured
|
||||
if self.review_repo and self.span_repo:
|
||||
await self._persist_classification(
|
||||
classified,
|
||||
review,
|
||||
batch_id,
|
||||
config,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing review {review['review_id']}: {e}")
|
||||
# Use fallback for this review
|
||||
fallback = create_fallback_response(review["text"])
|
||||
classified = self._process_llm_response(
|
||||
review, fallback, profile, batch_id, is_fallback=True
|
||||
)
|
||||
classified_reviews.append(classified)
|
||||
|
||||
return classified_reviews, metadata
|
||||
|
||||
async def _fallback_individual_processing(
|
||||
self,
|
||||
reviews: list[ReviewToClassify],
|
||||
profile: str,
|
||||
llm_client: LLMClientBase,
|
||||
batch_id: str,
|
||||
config: dict[str, Any],
|
||||
) -> list[ClassifiedReview]:
|
||||
"""
|
||||
Fallback to individual processing when batch fails.
|
||||
|
||||
This ensures we can still classify reviews even if batching fails.
|
||||
"""
|
||||
logger.warning(f"Falling back to individual processing for {len(reviews)} reviews")
|
||||
classified_reviews: list[ClassifiedReview] = []
|
||||
|
||||
for review in reviews:
|
||||
try:
|
||||
classified, _ = await self._classify_review(
|
||||
review, profile, llm_client, batch_id
|
||||
)
|
||||
if classified:
|
||||
classified_reviews.append(classified)
|
||||
|
||||
if self.review_repo and self.span_repo:
|
||||
await self._persist_classification(
|
||||
classified, review, batch_id, config
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Individual classification failed for {review['review_id']}: {e}")
|
||||
# Use fallback
|
||||
fallback = create_fallback_response(review["text"])
|
||||
classified = self._process_llm_response(
|
||||
review, fallback, profile, batch_id, is_fallback=True
|
||||
)
|
||||
classified_reviews.append(classified)
|
||||
|
||||
return classified_reviews
|
||||
|
||||
def _process_llm_response(
|
||||
self,
|
||||
review: ReviewToClassify,
|
||||
llm_response: LLMClassificationResponse,
|
||||
profile: str,
|
||||
batch_id: str,
|
||||
is_fallback: bool = False,
|
||||
) -> ClassifiedReview:
|
||||
"""
|
||||
Process an LLM response into a ClassifiedReview.
|
||||
|
||||
This is shared logic for both batch and individual processing.
|
||||
"""
|
||||
# Validate and fix response
|
||||
llm_response = self._validate_and_fix_response(llm_response, review["text"])
|
||||
|
||||
@@ -217,7 +618,10 @@ class Stage2Classifier:
|
||||
# Ensure exactly one primary span
|
||||
spans = self._ensure_primary_span(spans)
|
||||
|
||||
# Find the primary span for review-level classification
|
||||
# Post-LLM validation
|
||||
spans = self._validate_span_classifications(spans)
|
||||
|
||||
# Find primary span
|
||||
primary_span = next((s for s in spans if s.get("is_primary")), spans[0] if spans else None)
|
||||
|
||||
# Generate embedding
|
||||
@@ -247,10 +651,59 @@ class Stage2Classifier:
|
||||
embedding=embedding,
|
||||
spans=spans,
|
||||
classification_confidence={
|
||||
"overall": 0.8 if not metadata.get("fallback") else 0.3
|
||||
"overall": 0.8 if not is_fallback else 0.3
|
||||
},
|
||||
processing_time_ms=metadata.get("latency_ms", 0),
|
||||
), metadata
|
||||
processing_time_ms=0, # Set at batch level
|
||||
)
|
||||
|
||||
async def _classify_review(
|
||||
self,
|
||||
review: ReviewToClassify,
|
||||
profile: str,
|
||||
llm_client: LLMClientBase,
|
||||
batch_id: str,
|
||||
) -> tuple[ClassifiedReview | None, dict[str, Any]]:
|
||||
"""
|
||||
Classify a single review (used for fallback when batching fails).
|
||||
|
||||
Args:
|
||||
review: Review to classify
|
||||
profile: Classification profile
|
||||
llm_client: LLM client instance
|
||||
batch_id: Batch identifier
|
||||
|
||||
Returns:
|
||||
Tuple of (classified review, metadata)
|
||||
"""
|
||||
metadata: dict[str, Any] = {}
|
||||
is_fallback = False
|
||||
|
||||
# Call LLM for classification
|
||||
try:
|
||||
llm_response, llm_metadata = await llm_client.classify(
|
||||
review["text"],
|
||||
profile,
|
||||
)
|
||||
metadata.update(llm_metadata)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"LLM classification failed for {review['review_id']}, "
|
||||
f"using fallback: {e}"
|
||||
)
|
||||
llm_response = create_fallback_response(review["text"])
|
||||
metadata["fallback"] = True
|
||||
is_fallback = True
|
||||
|
||||
# Use shared processing logic
|
||||
classified = self._process_llm_response(
|
||||
review,
|
||||
llm_response,
|
||||
profile,
|
||||
batch_id,
|
||||
is_fallback=is_fallback,
|
||||
)
|
||||
|
||||
return classified, metadata
|
||||
|
||||
def _validate_and_fix_response(
|
||||
self,
|
||||
@@ -405,6 +858,45 @@ class Stage2Classifier:
|
||||
|
||||
return spans
|
||||
|
||||
def _validate_span_classifications(
|
||||
self,
|
||||
spans: list[ExtractedSpan],
|
||||
) -> list[ExtractedSpan]:
|
||||
"""
|
||||
Post-LLM validation to catch common misclassifications.
|
||||
|
||||
Uses keyword detection to identify obvious errors like:
|
||||
- Price mentions classified as P codes (should be V)
|
||||
- Staff behavior classified as A codes (should be P)
|
||||
- Scam mentions classified as P/V codes (should be R)
|
||||
|
||||
Args:
|
||||
spans: List of classified spans
|
||||
|
||||
Returns:
|
||||
List of spans with corrections applied
|
||||
"""
|
||||
corrections = 0
|
||||
for span in spans:
|
||||
correction = validate_classification(
|
||||
span.get("span_text", ""),
|
||||
span.get("urt_primary", "O1.01"),
|
||||
span.get("valence", "V0"),
|
||||
)
|
||||
if correction:
|
||||
original = span["urt_primary"]
|
||||
span["urt_primary"] = correction["suggested_urt"]
|
||||
corrections += 1
|
||||
logger.debug(
|
||||
f"Validation corrected {original} → {correction['suggested_urt']} "
|
||||
f"({correction['reason']})"
|
||||
)
|
||||
|
||||
if corrections:
|
||||
logger.info(f"Post-LLM validation corrected {corrections} spans")
|
||||
|
||||
return spans
|
||||
|
||||
def _calculate_trust_score(
|
||||
self,
|
||||
review: ReviewToClassify,
|
||||
@@ -467,6 +959,72 @@ class Stage2Classifier:
|
||||
quotes[code] = span["span_text"][:100]
|
||||
return quotes
|
||||
|
||||
def _create_skip_classification(
|
||||
self,
|
||||
review: ReviewToClassify,
|
||||
skip_classification: dict,
|
||||
batch_id: str,
|
||||
) -> ClassifiedReview:
|
||||
"""
|
||||
Create a ClassifiedReview for a SKIP tier review (no LLM).
|
||||
|
||||
Args:
|
||||
review: Source review
|
||||
skip_classification: Pre-assigned classification from router
|
||||
batch_id: Batch identifier
|
||||
|
||||
Returns:
|
||||
ClassifiedReview with generic classification
|
||||
"""
|
||||
urt_primary = skip_classification.get("urt_primary", "V4.03")
|
||||
valence = skip_classification.get("valence", "V0")
|
||||
intensity = skip_classification.get("intensity", "I1")
|
||||
|
||||
# Create a single span for the entire review
|
||||
span_key = f"{review['review_id']}:0:{review['text'][:50]}"
|
||||
span_hash = hashlib.sha256(span_key.encode()).hexdigest()[:16]
|
||||
span_id = f"SPN-{span_hash}"
|
||||
|
||||
span = ExtractedSpan(
|
||||
span_id=span_id,
|
||||
span_index=0,
|
||||
span_text=review["text"],
|
||||
span_start=0,
|
||||
span_end=len(review["text"]),
|
||||
profile="lite", # type: ignore
|
||||
urt_primary=urt_primary,
|
||||
urt_secondary=[],
|
||||
valence=valence,
|
||||
intensity=intensity,
|
||||
comparative="CR-N",
|
||||
confidence="low",
|
||||
usn=f"URT:S:{urt_primary}:{valence[1]}{intensity[1]}:11TC.ES.N",
|
||||
is_primary=True,
|
||||
)
|
||||
|
||||
# Generate embedding if available
|
||||
embedding: list[float] = []
|
||||
if self.embedding_service:
|
||||
embedding = self.embedding_service.embed(review.get("text_normalized", review["text"]))
|
||||
|
||||
return ClassifiedReview(
|
||||
source=review["source"],
|
||||
review_id=review["review_id"],
|
||||
review_version=review["review_version"],
|
||||
urt_primary=urt_primary,
|
||||
urt_secondary=[],
|
||||
valence=valence,
|
||||
intensity=intensity,
|
||||
comparative="CR-N",
|
||||
staff_mentions=[],
|
||||
quotes={},
|
||||
trust_score=self.config.trust_score_floor, # Minimum trust for skipped reviews
|
||||
embedding=embedding,
|
||||
spans=[span],
|
||||
classification_confidence={"overall": 0.2, "skip_reason": skip_classification.get("skip_reason", "auto_routed")},
|
||||
processing_time_ms=0,
|
||||
)
|
||||
|
||||
def _generate_usn(self, span: LLMSpanResponse) -> str:
|
||||
"""
|
||||
Generate USN (URT String Notation) for a span.
|
||||
@@ -536,4 +1094,5 @@ class Stage2Classifier:
|
||||
batch_id,
|
||||
self.config.llm_model,
|
||||
config["taxonomy_version"],
|
||||
job_id=config.get("job_id"),
|
||||
)
|
||||
|
||||
@@ -69,6 +69,9 @@ class Stage3Router:
|
||||
"""
|
||||
logger.info(f"Stage 3: Routing {len(input_data['spans'])} spans")
|
||||
|
||||
# Get job_id from input (may be None)
|
||||
job_id = input_data.get("job_id")
|
||||
|
||||
routed_spans: list[RoutedSpan] = []
|
||||
issues_created: list[str] = []
|
||||
issues_updated: list[str] = []
|
||||
@@ -81,7 +84,7 @@ class Stage3Router:
|
||||
spans_skipped += 1
|
||||
continue
|
||||
|
||||
routed = await self._route_span(span)
|
||||
routed = await self._route_span(span, job_id=job_id)
|
||||
if routed:
|
||||
routed_spans.append(routed)
|
||||
|
||||
@@ -114,12 +117,13 @@ class Stage3Router:
|
||||
),
|
||||
)
|
||||
|
||||
async def _route_span(self, span: SpanToRoute) -> RoutedSpan | None:
|
||||
async def _route_span(self, span: SpanToRoute, job_id: str | None = None) -> RoutedSpan | None:
|
||||
"""
|
||||
Route a single span to an issue.
|
||||
|
||||
Args:
|
||||
span: Span to route
|
||||
job_id: Optional job ID to link issues to pipeline executions
|
||||
|
||||
Returns:
|
||||
RoutedSpan with routing info, or None if skipped
|
||||
@@ -149,6 +153,7 @@ class Stage3Router:
|
||||
entity=span.get("entity_normalized"),
|
||||
entity_normalized=span.get("entity_normalized"),
|
||||
taxonomy_version=self.config.taxonomy_version,
|
||||
job_id=job_id,
|
||||
)
|
||||
|
||||
routed = RoutedSpan(
|
||||
|
||||
@@ -194,25 +194,24 @@ class Stage4Aggregator:
|
||||
else:
|
||||
raise ValueError(f"Unknown bucket type: {bucket_type}")
|
||||
|
||||
def _get_period_date(self, target_date: date, bucket_type: str) -> str:
|
||||
"""Get the period date string for a bucket."""
|
||||
def _get_period_date(self, target_date: date, bucket_type: str) -> date:
|
||||
"""Get the period date for a bucket."""
|
||||
if bucket_type == "day":
|
||||
return target_date.isoformat()
|
||||
return target_date
|
||||
elif bucket_type == "week":
|
||||
# Week starts on Monday
|
||||
start = target_date - timedelta(days=target_date.weekday())
|
||||
return start.isoformat()
|
||||
return target_date - timedelta(days=target_date.weekday())
|
||||
elif bucket_type == "month":
|
||||
return target_date.replace(day=1).isoformat()
|
||||
return target_date.replace(day=1)
|
||||
else:
|
||||
return target_date.isoformat()
|
||||
return target_date
|
||||
|
||||
def _aggregate_by_code(
|
||||
self,
|
||||
span_data: list[dict[str, Any]],
|
||||
business_id: str,
|
||||
place_id: str,
|
||||
period_date: str,
|
||||
period_date: date,
|
||||
bucket_type: str,
|
||||
taxonomy_version: str,
|
||||
) -> list[FactRecord]:
|
||||
@@ -243,7 +242,7 @@ class Stage4Aggregator:
|
||||
span_data: list[dict[str, Any]],
|
||||
business_id: str,
|
||||
place_id: str,
|
||||
period_date: str,
|
||||
period_date: date,
|
||||
bucket_type: str,
|
||||
taxonomy_version: str,
|
||||
) -> list[FactRecord]:
|
||||
@@ -275,7 +274,7 @@ class Stage4Aggregator:
|
||||
span_data: list[dict[str, Any]],
|
||||
business_id: str,
|
||||
place_id: str,
|
||||
period_date: str,
|
||||
period_date: date,
|
||||
bucket_type: str,
|
||||
taxonomy_version: str,
|
||||
) -> FactRecord:
|
||||
@@ -296,7 +295,7 @@ class Stage4Aggregator:
|
||||
spans: list[dict[str, Any]],
|
||||
business_id: str,
|
||||
place_id: str,
|
||||
period_date: str,
|
||||
period_date: date,
|
||||
bucket_type: str,
|
||||
subject_type: str,
|
||||
subject_id: str,
|
||||
@@ -449,7 +448,7 @@ class Stage4Aggregator:
|
||||
self,
|
||||
business_id: str,
|
||||
place_id: str,
|
||||
period_date: str,
|
||||
period_date: date,
|
||||
bucket_type: str,
|
||||
subject_type: str,
|
||||
subject_id: str,
|
||||
|
||||
@@ -1,477 +0,0 @@
|
||||
"""
|
||||
Stage 4: Synthesize - Generate AI narratives and action plans.
|
||||
|
||||
This stage runs after classification and routing to produce:
|
||||
- Executive narrative (business-specific story)
|
||||
- Section insights (sentiment, category, timeline)
|
||||
- Action plan with prioritized recommendations
|
||||
- Timeline annotations for key events
|
||||
- Marketing angles from strengths
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import asyncpg
|
||||
|
||||
from reviewiq_pipeline.services.llm_client import LLMClientBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionItem:
|
||||
"""A specific action recommendation."""
|
||||
id: str
|
||||
title: str
|
||||
why: str
|
||||
what: str
|
||||
who: str
|
||||
impact: str
|
||||
evidence: list[str]
|
||||
estimated_rating_lift: float | None
|
||||
complexity: str # 'quick' | 'medium' | 'complex'
|
||||
priority: str # 'critical' | 'high' | 'medium' | 'low'
|
||||
timeline: str
|
||||
related_subcode: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimelineAnnotation:
|
||||
"""An annotation for a key event on the timeline."""
|
||||
date: str
|
||||
label: str
|
||||
description: str
|
||||
type: str # 'positive' | 'negative' | 'neutral' | 'event'
|
||||
|
||||
|
||||
@dataclass
|
||||
class Synthesis:
|
||||
"""Complete synthesis output from Stage 4."""
|
||||
executive_narrative: str
|
||||
sentiment_insight: str
|
||||
category_insight: str
|
||||
timeline_insight: str
|
||||
priority_domain: str | None
|
||||
priority_issue: str | None
|
||||
action_plan: list[ActionItem]
|
||||
issue_actions: dict[str, str]
|
||||
timeline_annotations: list[TimelineAnnotation]
|
||||
marketing_angles: list[str]
|
||||
competitor_context: str | None
|
||||
generated_at: str
|
||||
|
||||
|
||||
SYNTHESIS_SYSTEM_PROMPT = """You are an expert business analyst specializing in customer experience and review analysis.
|
||||
|
||||
Your task is to analyze classified review data and generate actionable business insights.
|
||||
|
||||
You will receive:
|
||||
1. Summary statistics (total reviews, rating, sentiment distribution)
|
||||
2. Top issues by category with example quotes
|
||||
3. Top strengths with example quotes
|
||||
4. Domain breakdown (what customers talk about most)
|
||||
|
||||
Generate a JSON response with these fields:
|
||||
|
||||
{
|
||||
"executive_narrative": "2-3 paragraph story explaining the business situation, key problems, and path forward. Be specific with numbers and examples.",
|
||||
|
||||
"sentiment_insight": "1-2 sentences explaining WHY sentiment is distributed this way. Connect to specific issues.",
|
||||
|
||||
"category_insight": "1-2 sentences about the pattern in categories. Which domain needs most attention and why?",
|
||||
|
||||
"timeline_insight": "1-2 sentences about trends if data shows changes over time.",
|
||||
|
||||
"priority_domain": "Single letter code (P/V/J/O/A/E/R) for the domain needing most attention, or null",
|
||||
|
||||
"priority_issue": "The subcode (e.g., 'V1.03') that should be fixed first, or null",
|
||||
|
||||
"action_plan": [
|
||||
{
|
||||
"id": "action_1",
|
||||
"title": "Clear action title",
|
||||
"why": "Root cause from the reviews",
|
||||
"what": "Specific steps to take",
|
||||
"who": "Department or role responsible",
|
||||
"impact": "Expected outcome",
|
||||
"evidence": ["Quote 1", "Quote 2"],
|
||||
"estimated_rating_lift": 0.3,
|
||||
"complexity": "quick|medium|complex",
|
||||
"priority": "critical|high|medium|low",
|
||||
"timeline": "This week|This month|This quarter",
|
||||
"related_subcode": "V1.03"
|
||||
}
|
||||
],
|
||||
|
||||
"timeline_annotations": [
|
||||
{
|
||||
"date": "2024-01-15",
|
||||
"label": "Short label",
|
||||
"description": "What happened",
|
||||
"type": "positive|negative|neutral|event"
|
||||
}
|
||||
],
|
||||
|
||||
"marketing_angles": [
|
||||
"Way to promote strength 1",
|
||||
"Way to promote strength 2"
|
||||
],
|
||||
|
||||
"competitor_context": "How this compares to industry/competitors, or null if unknown"
|
||||
}
|
||||
|
||||
Be specific, actionable, and business-focused. Use actual numbers and quotes from the data.
|
||||
Prioritize actions by impact and feasibility.
|
||||
"""
|
||||
|
||||
|
||||
class SynthesisStage:
|
||||
"""
|
||||
Stage 4: Generate AI synthesis from classified review data.
|
||||
|
||||
This stage:
|
||||
1. Aggregates classification results
|
||||
2. Identifies patterns and priorities
|
||||
3. Generates narrative insights via LLM
|
||||
4. Produces actionable recommendations
|
||||
"""
|
||||
|
||||
def __init__(self, pool: asyncpg.Pool, llm_client: LLMClientBase):
|
||||
self.pool = pool
|
||||
self.llm_client = llm_client
|
||||
|
||||
async def run(self, job_id: str, execution_id: str) -> Synthesis:
|
||||
"""
|
||||
Generate synthesis for a completed pipeline execution.
|
||||
|
||||
Args:
|
||||
job_id: The scraping job ID
|
||||
execution_id: The pipeline execution ID
|
||||
|
||||
Returns:
|
||||
Synthesis object with all generated insights
|
||||
"""
|
||||
logger.info(f"Stage 4: Generating synthesis for job {job_id}")
|
||||
|
||||
# Gather all the data we need
|
||||
context = await self._gather_context(job_id)
|
||||
|
||||
# Generate synthesis via LLM
|
||||
synthesis = await self._generate_synthesis(context)
|
||||
|
||||
# Store synthesis in database
|
||||
await self._store_synthesis(execution_id, synthesis)
|
||||
|
||||
logger.info(f"Stage 4: Synthesis complete - {len(synthesis.action_plan)} actions generated")
|
||||
return synthesis
|
||||
|
||||
async def _gather_context(self, job_id: str) -> dict[str, Any]:
|
||||
"""Gather all context needed for synthesis."""
|
||||
|
||||
# Get overview stats
|
||||
overview = await self.pool.fetchrow("""
|
||||
SELECT
|
||||
COUNT(DISTINCT r.review_id) as total_reviews,
|
||||
AVG(r.rating) as avg_rating,
|
||||
COUNT(s.span_id) as total_spans
|
||||
FROM reviews r
|
||||
LEFT JOIN pipeline.spans s ON s.source_review_id = r.review_id
|
||||
WHERE r.job_id = $1
|
||||
""", job_id)
|
||||
|
||||
# Get sentiment distribution
|
||||
sentiment = await self.pool.fetch("""
|
||||
SELECT
|
||||
valence,
|
||||
COUNT(*) as count,
|
||||
COUNT(DISTINCT source_review_id) as review_count
|
||||
FROM pipeline.spans
|
||||
WHERE job_id = $1 AND valence IS NOT NULL
|
||||
GROUP BY valence
|
||||
ORDER BY count DESC
|
||||
""", job_id)
|
||||
|
||||
# Get top issues (weaknesses)
|
||||
top_issues = await self.pool.fetch("""
|
||||
SELECT
|
||||
s.urt_primary as subcode,
|
||||
sc.name as subcode_name,
|
||||
sc.definition,
|
||||
d.code as domain,
|
||||
d.name as domain_name,
|
||||
COUNT(*) as span_count,
|
||||
COUNT(*) FILTER (WHERE s.valence = 'V-') as negative_count,
|
||||
ARRAY_AGG(s.span_text ORDER BY s.intensity DESC) FILTER (WHERE s.valence = 'V-') as example_quotes
|
||||
FROM pipeline.spans s
|
||||
JOIN pipeline.urt_subcodes sc ON sc.code = s.urt_primary
|
||||
JOIN pipeline.urt_domains d ON d.code = SUBSTRING(s.urt_primary, 1, 1)
|
||||
WHERE s.job_id = $1 AND s.valence = 'V-'
|
||||
GROUP BY s.urt_primary, sc.name, sc.definition, d.code, d.name
|
||||
ORDER BY negative_count DESC
|
||||
LIMIT 10
|
||||
""", job_id)
|
||||
|
||||
# Get top strengths
|
||||
top_strengths = await self.pool.fetch("""
|
||||
SELECT
|
||||
s.urt_primary as subcode,
|
||||
sc.name as subcode_name,
|
||||
sc.definition,
|
||||
d.code as domain,
|
||||
d.name as domain_name,
|
||||
COUNT(*) as span_count,
|
||||
COUNT(*) FILTER (WHERE s.valence = 'V+') as positive_count,
|
||||
ARRAY_AGG(s.span_text ORDER BY s.intensity DESC) FILTER (WHERE s.valence = 'V+') as example_quotes
|
||||
FROM pipeline.spans s
|
||||
JOIN pipeline.urt_subcodes sc ON sc.code = s.urt_primary
|
||||
JOIN pipeline.urt_domains d ON d.code = SUBSTRING(s.urt_primary, 1, 1)
|
||||
WHERE s.job_id = $1 AND s.valence = 'V+'
|
||||
GROUP BY s.urt_primary, sc.name, sc.definition, d.code, d.name
|
||||
ORDER BY positive_count DESC
|
||||
LIMIT 5
|
||||
""", job_id)
|
||||
|
||||
# Get domain distribution
|
||||
domains = await self.pool.fetch("""
|
||||
SELECT
|
||||
SUBSTRING(urt_primary, 1, 1) as domain,
|
||||
d.name as domain_name,
|
||||
COUNT(*) as total_count,
|
||||
COUNT(*) FILTER (WHERE valence = 'V+') as positive_count,
|
||||
COUNT(*) FILTER (WHERE valence = 'V-') as negative_count
|
||||
FROM pipeline.spans s
|
||||
JOIN pipeline.urt_domains d ON d.code = SUBSTRING(s.urt_primary, 1, 1)
|
||||
WHERE s.job_id = $1
|
||||
GROUP BY SUBSTRING(urt_primary, 1, 1), d.name
|
||||
ORDER BY total_count DESC
|
||||
""", job_id)
|
||||
|
||||
# Get business name if available
|
||||
business = await self.pool.fetchrow("""
|
||||
SELECT DISTINCT business_name
|
||||
FROM reviews
|
||||
WHERE job_id = $1 AND business_name IS NOT NULL
|
||||
LIMIT 1
|
||||
""", job_id)
|
||||
|
||||
return {
|
||||
"business_name": business["business_name"] if business else "This business",
|
||||
"overview": dict(overview) if overview else {},
|
||||
"sentiment": [dict(r) for r in sentiment],
|
||||
"top_issues": [dict(r) for r in top_issues],
|
||||
"top_strengths": [dict(r) for r in top_strengths],
|
||||
"domains": [dict(r) for r in domains],
|
||||
}
|
||||
|
||||
async def _generate_synthesis(self, context: dict[str, Any]) -> Synthesis:
|
||||
"""Generate synthesis using LLM."""
|
||||
|
||||
# Build the user prompt with context
|
||||
user_prompt = f"""Analyze this review data for {context['business_name']}:
|
||||
|
||||
## Overview
|
||||
- Total Reviews: {context['overview'].get('total_reviews', 0)}
|
||||
- Average Rating: {context['overview'].get('avg_rating', 'N/A')}
|
||||
- Total Insights Extracted: {context['overview'].get('total_spans', 0)}
|
||||
|
||||
## Sentiment Distribution
|
||||
{self._format_sentiment(context['sentiment'])}
|
||||
|
||||
## Top Issues (Problems)
|
||||
{self._format_issues(context['top_issues'])}
|
||||
|
||||
## Top Strengths
|
||||
{self._format_strengths(context['top_strengths'])}
|
||||
|
||||
## Domain Breakdown
|
||||
{self._format_domains(context['domains'])}
|
||||
|
||||
Generate a complete synthesis with actionable insights.
|
||||
"""
|
||||
|
||||
# Call LLM
|
||||
try:
|
||||
response = await self.llm_client.generate(
|
||||
system_prompt=SYNTHESIS_SYSTEM_PROMPT,
|
||||
user_prompt=user_prompt,
|
||||
temperature=0.7, # Allow some creativity
|
||||
max_tokens=4000,
|
||||
)
|
||||
|
||||
# Parse JSON response
|
||||
result = json.loads(response)
|
||||
|
||||
# Convert to Synthesis object
|
||||
return Synthesis(
|
||||
executive_narrative=result.get("executive_narrative", ""),
|
||||
sentiment_insight=result.get("sentiment_insight", ""),
|
||||
category_insight=result.get("category_insight", ""),
|
||||
timeline_insight=result.get("timeline_insight", ""),
|
||||
priority_domain=result.get("priority_domain"),
|
||||
priority_issue=result.get("priority_issue"),
|
||||
action_plan=[
|
||||
ActionItem(
|
||||
id=a.get("id", f"action_{i}"),
|
||||
title=a.get("title", ""),
|
||||
why=a.get("why", ""),
|
||||
what=a.get("what", ""),
|
||||
who=a.get("who", ""),
|
||||
impact=a.get("impact", ""),
|
||||
evidence=a.get("evidence", []),
|
||||
estimated_rating_lift=a.get("estimated_rating_lift"),
|
||||
complexity=a.get("complexity", "medium"),
|
||||
priority=a.get("priority", "medium"),
|
||||
timeline=a.get("timeline", "This month"),
|
||||
related_subcode=a.get("related_subcode", ""),
|
||||
)
|
||||
for i, a in enumerate(result.get("action_plan", []))
|
||||
],
|
||||
issue_actions={}, # Can be populated from action_plan
|
||||
timeline_annotations=[
|
||||
TimelineAnnotation(
|
||||
date=t.get("date", ""),
|
||||
label=t.get("label", ""),
|
||||
description=t.get("description", ""),
|
||||
type=t.get("type", "neutral"),
|
||||
)
|
||||
for t in result.get("timeline_annotations", [])
|
||||
],
|
||||
marketing_angles=result.get("marketing_angles", []),
|
||||
competitor_context=result.get("competitor_context"),
|
||||
generated_at=datetime.utcnow().isoformat(),
|
||||
)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse LLM response: {e}")
|
||||
return self._create_fallback_synthesis()
|
||||
except Exception as e:
|
||||
logger.error(f"Synthesis generation failed: {e}")
|
||||
return self._create_fallback_synthesis()
|
||||
|
||||
def _format_sentiment(self, sentiment: list[dict]) -> str:
|
||||
"""Format sentiment data for prompt."""
|
||||
lines = []
|
||||
for s in sentiment:
|
||||
valence = s.get("valence", "Unknown")
|
||||
count = s.get("count", 0)
|
||||
reviews = s.get("review_count", 0)
|
||||
label = {"V+": "Positive", "V-": "Negative", "V0": "Neutral", "V±": "Mixed"}.get(valence, valence)
|
||||
lines.append(f"- {label}: {count} mentions ({reviews} reviews)")
|
||||
return "\n".join(lines) or "No sentiment data"
|
||||
|
||||
def _format_issues(self, issues: list[dict]) -> str:
|
||||
"""Format issues for prompt."""
|
||||
lines = []
|
||||
for i, issue in enumerate(issues[:5], 1):
|
||||
subcode = issue.get("subcode", "")
|
||||
name = issue.get("subcode_name", "")
|
||||
domain = issue.get("domain_name", "")
|
||||
count = issue.get("negative_count", 0)
|
||||
quotes = issue.get("example_quotes", [])[:2]
|
||||
|
||||
lines.append(f"{i}. [{subcode}] {name} ({domain})")
|
||||
lines.append(f" - {count} negative mentions")
|
||||
for q in quotes:
|
||||
if q:
|
||||
lines.append(f' - Example: "{q[:100]}..."' if len(q) > 100 else f' - Example: "{q}"')
|
||||
return "\n".join(lines) or "No issues found"
|
||||
|
||||
def _format_strengths(self, strengths: list[dict]) -> str:
|
||||
"""Format strengths for prompt."""
|
||||
lines = []
|
||||
for i, strength in enumerate(strengths[:3], 1):
|
||||
subcode = strength.get("subcode", "")
|
||||
name = strength.get("subcode_name", "")
|
||||
domain = strength.get("domain_name", "")
|
||||
count = strength.get("positive_count", 0)
|
||||
quotes = strength.get("example_quotes", [])[:2]
|
||||
|
||||
lines.append(f"{i}. [{subcode}] {name} ({domain})")
|
||||
lines.append(f" - {count} positive mentions")
|
||||
for q in quotes:
|
||||
if q:
|
||||
lines.append(f' - Example: "{q[:100]}..."' if len(q) > 100 else f' - Example: "{q}"')
|
||||
return "\n".join(lines) or "No strengths found"
|
||||
|
||||
def _format_domains(self, domains: list[dict]) -> str:
|
||||
"""Format domain distribution for prompt."""
|
||||
lines = []
|
||||
for d in domains:
|
||||
domain = d.get("domain", "")
|
||||
name = d.get("domain_name", "")
|
||||
total = d.get("total_count", 0)
|
||||
positive = d.get("positive_count", 0)
|
||||
negative = d.get("negative_count", 0)
|
||||
lines.append(f"- {domain} ({name}): {total} total ({positive} positive, {negative} negative)")
|
||||
return "\n".join(lines) or "No domain data"
|
||||
|
||||
def _create_fallback_synthesis(self) -> Synthesis:
|
||||
"""Create a minimal synthesis when LLM fails."""
|
||||
return Synthesis(
|
||||
executive_narrative="Unable to generate detailed analysis. Please review the data manually.",
|
||||
sentiment_insight="",
|
||||
category_insight="",
|
||||
timeline_insight="",
|
||||
priority_domain=None,
|
||||
priority_issue=None,
|
||||
action_plan=[],
|
||||
issue_actions={},
|
||||
timeline_annotations=[],
|
||||
marketing_angles=[],
|
||||
competitor_context=None,
|
||||
generated_at=datetime.utcnow().isoformat(),
|
||||
)
|
||||
|
||||
async def _store_synthesis(self, execution_id: str, synthesis: Synthesis) -> None:
|
||||
"""Store synthesis in database."""
|
||||
await self.pool.execute("""
|
||||
UPDATE pipeline.executions
|
||||
SET
|
||||
synthesis = $2,
|
||||
updated_at = NOW()
|
||||
WHERE execution_id = $1
|
||||
""", execution_id, json.dumps({
|
||||
"executive_narrative": synthesis.executive_narrative,
|
||||
"sentiment_insight": synthesis.sentiment_insight,
|
||||
"category_insight": synthesis.category_insight,
|
||||
"timeline_insight": synthesis.timeline_insight,
|
||||
"priority_domain": synthesis.priority_domain,
|
||||
"priority_issue": synthesis.priority_issue,
|
||||
"action_plan": [
|
||||
{
|
||||
"id": a.id,
|
||||
"title": a.title,
|
||||
"why": a.why,
|
||||
"what": a.what,
|
||||
"who": a.who,
|
||||
"impact": a.impact,
|
||||
"evidence": a.evidence,
|
||||
"estimated_rating_lift": a.estimated_rating_lift,
|
||||
"complexity": a.complexity,
|
||||
"priority": a.priority,
|
||||
"timeline": a.timeline,
|
||||
"related_subcode": a.related_subcode,
|
||||
}
|
||||
for a in synthesis.action_plan
|
||||
],
|
||||
"issue_actions": synthesis.issue_actions,
|
||||
"timeline_annotations": [
|
||||
{
|
||||
"date": t.date,
|
||||
"label": t.label,
|
||||
"description": t.description,
|
||||
"type": t.type,
|
||||
}
|
||||
for t in synthesis.timeline_annotations
|
||||
],
|
||||
"marketing_angles": synthesis.marketing_angles,
|
||||
"competitor_context": synthesis.competitor_context,
|
||||
"generated_at": synthesis.generated_at,
|
||||
}))
|
||||
486
packages/reviewiq-pipeline/validate_router.py
Normal file
486
packages/reviewiq-pipeline/validate_router.py
Normal file
@@ -0,0 +1,486 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Validate router decisions against real reviews with minimal LLM cost.
|
||||
|
||||
This script:
|
||||
1. Loads real reviews from database
|
||||
2. Routes them through the router
|
||||
3. Cherry-picks samples from each tier for validation
|
||||
4. Optionally runs LLM on small samples to validate decisions
|
||||
|
||||
Usage:
|
||||
# Dry run - just show routing decisions, no LLM calls
|
||||
python validate_router.py <job_id> --dry-run
|
||||
|
||||
# Validate with LLM (costs ~$0.05-0.10)
|
||||
python validate_router.py <job_id> --validate
|
||||
|
||||
# Custom sample sizes
|
||||
python validate_router.py <job_id> --validate --skip-samples=3 --cheap-samples=5 --full-samples=3
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("validate_router")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Result of validating a single review."""
|
||||
review_id: str
|
||||
text: str
|
||||
rating: int
|
||||
routed_tier: str
|
||||
routing_reason: str
|
||||
routing_signals: dict
|
||||
# LLM results (if validated)
|
||||
llm_urt: str | None = None
|
||||
llm_valence: str | None = None
|
||||
llm_span_count: int | None = None
|
||||
llm_cost: float | None = None
|
||||
# Validation verdict
|
||||
routing_correct: bool | None = None
|
||||
notes: str = ""
|
||||
|
||||
|
||||
async def load_reviews_from_db(job_id: str, database_url: str) -> list[dict]:
|
||||
"""Load reviews from database for a job."""
|
||||
import asyncpg
|
||||
|
||||
conn = await asyncpg.connect(database_url)
|
||||
try:
|
||||
# Get reviews with text from pipeline schema
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
re.review_id,
|
||||
re.text,
|
||||
re.rating,
|
||||
re.business_id,
|
||||
re.place_id
|
||||
FROM pipeline.reviews_enriched re
|
||||
WHERE re.job_id = $1::uuid
|
||||
AND re.text IS NOT NULL
|
||||
AND re.text != ''
|
||||
ORDER BY re.id
|
||||
""", job_id)
|
||||
|
||||
reviews = []
|
||||
for row in rows:
|
||||
text = row["text"] or ""
|
||||
reviews.append({
|
||||
"review_id": row["review_id"],
|
||||
"text": text,
|
||||
"text_normalized": text.lower().strip(),
|
||||
"rating": row["rating"],
|
||||
"business_id": row["business_id"],
|
||||
"place_id": row["place_id"],
|
||||
"source": "google",
|
||||
"review_version": 1,
|
||||
"review_time": "2024-01-01T00:00:00Z",
|
||||
})
|
||||
|
||||
logger.info(f"Loaded {len(reviews)} reviews from job {job_id}")
|
||||
return reviews
|
||||
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
def route_reviews(reviews: list[dict]) -> dict[str, list[dict]]:
|
||||
"""Route reviews and return grouped by tier."""
|
||||
from reviewiq_pipeline.services.review_router import (
|
||||
ReviewRouter,
|
||||
RoutingTier,
|
||||
create_router,
|
||||
)
|
||||
|
||||
router = create_router(conservative=True)
|
||||
routed = router.route_batch(reviews)
|
||||
|
||||
return {
|
||||
"skip": routed[RoutingTier.SKIP],
|
||||
"cheap": routed[RoutingTier.CHEAP_MODEL],
|
||||
"full": routed[RoutingTier.FULL_MODEL],
|
||||
}
|
||||
|
||||
|
||||
def select_diverse_samples(
|
||||
reviews: list[dict],
|
||||
tier: str,
|
||||
n_samples: int,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Select diverse samples from a tier for validation.
|
||||
|
||||
Strategy:
|
||||
- For SKIP: Pick different ratings, different lengths
|
||||
- For CHEAP: Pick different word counts, different ratings
|
||||
- For FULL: Pick different routing reasons
|
||||
"""
|
||||
if not reviews or n_samples <= 0:
|
||||
return []
|
||||
|
||||
samples = []
|
||||
seen_reasons = set()
|
||||
seen_ratings = set()
|
||||
|
||||
# First pass: get diversity by reason and rating
|
||||
for review in reviews:
|
||||
routing = review.get("_routing")
|
||||
if not routing:
|
||||
continue
|
||||
|
||||
reason = routing.reason
|
||||
rating = review["rating"]
|
||||
|
||||
# Prioritize diversity
|
||||
key = (reason, rating)
|
||||
if key not in seen_reasons or len(samples) < n_samples:
|
||||
if len(samples) < n_samples:
|
||||
samples.append(review)
|
||||
seen_reasons.add(key)
|
||||
seen_ratings.add(rating)
|
||||
|
||||
# Fill remaining slots if needed
|
||||
for review in reviews:
|
||||
if len(samples) >= n_samples:
|
||||
break
|
||||
if review not in samples:
|
||||
samples.append(review)
|
||||
|
||||
return samples[:n_samples]
|
||||
|
||||
|
||||
def print_routing_summary(routed: dict[str, list[dict]]):
|
||||
"""Print summary of routing decisions."""
|
||||
total = sum(len(v) for v in routed.values())
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("ROUTING SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
for tier, reviews in routed.items():
|
||||
pct = len(reviews) / total * 100 if total > 0 else 0
|
||||
print(f"\n{tier.upper()} TIER: {len(reviews)} reviews ({pct:.1f}%)")
|
||||
|
||||
# Group by reason
|
||||
reasons = {}
|
||||
for r in reviews:
|
||||
routing = r.get("_routing")
|
||||
if routing:
|
||||
reason = routing.reason
|
||||
reasons[reason] = reasons.get(reason, 0) + 1
|
||||
|
||||
for reason, count in sorted(reasons.items(), key=lambda x: -x[1]):
|
||||
print(f" - {reason}: {count}")
|
||||
|
||||
|
||||
def print_samples(samples: list[dict], tier: str):
|
||||
"""Print sample reviews for inspection."""
|
||||
print(f"\n{'=' * 70}")
|
||||
print(f"{tier.upper()} TIER SAMPLES ({len(samples)} reviews)")
|
||||
print("=" * 70)
|
||||
|
||||
for i, review in enumerate(samples, 1):
|
||||
routing = review.get("_routing")
|
||||
signals = routing.signals if routing else {}
|
||||
|
||||
print(f"\n[{i}] Review ID: {review['review_id']}")
|
||||
print(f" Rating: {'⭐' * review['rating']}")
|
||||
print(f" Text: \"{review['text'][:100]}{'...' if len(review['text']) > 100 else ''}\"")
|
||||
print(f" Routing: {routing.reason if routing else 'N/A'}")
|
||||
print(f" Signals: words={signals.get('word_count', '?')}, "
|
||||
f"chars={signals.get('char_count', '?')}, "
|
||||
f"numbers={signals.get('has_numbers', '?')}, "
|
||||
f"sentences={signals.get('sentence_count', '?')}")
|
||||
|
||||
|
||||
async def validate_with_llm(
|
||||
samples: list[dict],
|
||||
tier: str,
|
||||
config: Any,
|
||||
) -> list[ValidationResult]:
|
||||
"""
|
||||
Run LLM classification on samples to validate routing decisions.
|
||||
|
||||
Returns validation results with verdicts.
|
||||
"""
|
||||
from reviewiq_pipeline.services.llm_client import LLMClient, BatchReviewInput, PartialBatchResult
|
||||
|
||||
results = []
|
||||
|
||||
if not samples:
|
||||
return results
|
||||
|
||||
# Create LLM client
|
||||
client = LLMClient.create(config)
|
||||
|
||||
try:
|
||||
# Prepare batch input
|
||||
batch_input = [
|
||||
BatchReviewInput(
|
||||
review_id=r["review_id"],
|
||||
text=r["text"],
|
||||
rating=r["rating"],
|
||||
)
|
||||
for r in samples
|
||||
]
|
||||
|
||||
# Run classification
|
||||
logger.info(f"Running LLM on {len(samples)} {tier} tier samples...")
|
||||
|
||||
llm_responses = []
|
||||
metadata = {}
|
||||
|
||||
try:
|
||||
llm_responses, metadata = await client.classify_batch(batch_input, "standard")
|
||||
except PartialBatchResult as e:
|
||||
# Handle partial results
|
||||
logger.warning(f"Partial result for {tier} tier: {len(e.partial_results)} recovered")
|
||||
metadata = e.metadata or {}
|
||||
|
||||
# Build responses from partial results
|
||||
for partial in e.partial_results:
|
||||
idx = partial.get("review_index", -1)
|
||||
if 0 <= idx < len(samples):
|
||||
llm_responses.append({
|
||||
"spans": partial.get("spans", []),
|
||||
"review_summary": partial.get("review_summary", {}),
|
||||
"_index": idx,
|
||||
})
|
||||
|
||||
# Pad with empty responses for missing indices
|
||||
processed_indices = {r.get("_index", -1) for r in llm_responses}
|
||||
for i, sample in enumerate(samples):
|
||||
if i not in processed_indices:
|
||||
llm_responses.append({
|
||||
"spans": [],
|
||||
"review_summary": {},
|
||||
"_index": i,
|
||||
"_error": "partial_recovery_failed",
|
||||
})
|
||||
|
||||
# Sort by original index
|
||||
llm_responses.sort(key=lambda x: x.get("_index", 999))
|
||||
|
||||
cost = metadata.get("cost_usd", 0)
|
||||
logger.info(f"LLM cost for {tier} tier: ${cost:.4f}")
|
||||
|
||||
# Process results
|
||||
for review, llm_response in zip(samples, llm_responses):
|
||||
routing = review.get("_routing")
|
||||
signals = routing.signals if routing else {}
|
||||
|
||||
spans = llm_response.get("spans", [])
|
||||
primary_span = next((s for s in spans if s.get("is_primary")), spans[0] if spans else {})
|
||||
|
||||
urt = primary_span.get("urt_primary", "N/A")
|
||||
valence = primary_span.get("valence", "N/A")
|
||||
|
||||
# Determine if routing was correct
|
||||
routing_correct = None
|
||||
notes = ""
|
||||
|
||||
if tier == "skip":
|
||||
# SKIP is correct if LLM gives generic code (V4.03) or single low-info span
|
||||
is_generic = urt in ("V4.03", "V4.01", "V4.02", "O1.01")
|
||||
is_simple = len(spans) == 1 and primary_span.get("intensity") == "I1"
|
||||
routing_correct = is_generic or is_simple
|
||||
if not routing_correct:
|
||||
notes = f"LLM found specific content: {urt}"
|
||||
else:
|
||||
notes = "Correctly skipped (generic/simple)"
|
||||
|
||||
elif tier == "cheap":
|
||||
# CHEAP is correct if classification is straightforward
|
||||
# (single domain, no complex causal chains)
|
||||
is_simple = len(spans) <= 2
|
||||
routing_correct = is_simple
|
||||
if not routing_correct:
|
||||
notes = f"Complex: {len(spans)} spans found"
|
||||
else:
|
||||
notes = "Simple enough for cheap model"
|
||||
|
||||
elif tier == "full":
|
||||
# FULL is correct if there's meaningful content
|
||||
has_content = len(spans) >= 1 and urt not in ("V4.03", "O1.01")
|
||||
routing_correct = has_content
|
||||
if routing_correct:
|
||||
notes = f"Correctly sent to full: {len(spans)} spans, {urt}"
|
||||
else:
|
||||
notes = "Could have been cheaper"
|
||||
|
||||
result = ValidationResult(
|
||||
review_id=review["review_id"],
|
||||
text=review["text"],
|
||||
rating=review["rating"],
|
||||
routed_tier=tier,
|
||||
routing_reason=routing.reason if routing else "N/A",
|
||||
routing_signals=signals,
|
||||
llm_urt=urt,
|
||||
llm_valence=valence,
|
||||
llm_span_count=len(spans),
|
||||
llm_cost=cost / len(samples),
|
||||
routing_correct=routing_correct,
|
||||
notes=notes,
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def print_validation_results(results: list[ValidationResult], tier: str):
|
||||
"""Print validation results."""
|
||||
if not results:
|
||||
return
|
||||
|
||||
print(f"\n{'=' * 70}")
|
||||
print(f"{tier.upper()} TIER VALIDATION RESULTS")
|
||||
print("=" * 70)
|
||||
|
||||
correct = sum(1 for r in results if r.routing_correct)
|
||||
total = len(results)
|
||||
accuracy = correct / total * 100 if total > 0 else 0
|
||||
|
||||
print(f"\nAccuracy: {correct}/{total} ({accuracy:.1f}%)")
|
||||
|
||||
for r in results:
|
||||
status = "✅" if r.routing_correct else "❌"
|
||||
print(f"\n{status} [{r.review_id}] \"{r.text[:60]}...\"")
|
||||
print(f" Rating: {r.rating}, Routed: {r.routed_tier} ({r.routing_reason})")
|
||||
print(f" LLM: URT={r.llm_urt}, Valence={r.llm_valence}, Spans={r.llm_span_count}")
|
||||
print(f" Notes: {r.notes}")
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(description="Validate router decisions")
|
||||
parser.add_argument("job_id", help="Job ID to analyze")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show routing only, no LLM")
|
||||
parser.add_argument("--validate", action="store_true", help="Run LLM validation")
|
||||
parser.add_argument("--skip-samples", type=int, default=3, help="SKIP tier samples")
|
||||
parser.add_argument("--cheap-samples", type=int, default=5, help="CHEAP tier samples")
|
||||
parser.add_argument("--full-samples", type=int, default=3, help="FULL tier samples")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Database URL
|
||||
database_url = os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://scraper:scraper123@localhost:5437/scraper"
|
||||
)
|
||||
|
||||
# Load reviews
|
||||
reviews = await load_reviews_from_db(args.job_id, database_url)
|
||||
if not reviews:
|
||||
print("No reviews found for job")
|
||||
return
|
||||
|
||||
# Route reviews
|
||||
routed = route_reviews(reviews)
|
||||
|
||||
# Print summary
|
||||
print_routing_summary(routed)
|
||||
|
||||
# Select samples
|
||||
skip_samples = select_diverse_samples(routed["skip"], "skip", args.skip_samples)
|
||||
cheap_samples = select_diverse_samples(routed["cheap"], "cheap", args.cheap_samples)
|
||||
full_samples = select_diverse_samples(routed["full"], "full", args.full_samples)
|
||||
|
||||
# Print samples
|
||||
print_samples(skip_samples, "skip")
|
||||
print_samples(cheap_samples, "cheap")
|
||||
print_samples(full_samples, "full")
|
||||
|
||||
# Estimate cost
|
||||
total_samples = len(skip_samples) + len(cheap_samples) + len(full_samples)
|
||||
estimated_cost = total_samples * 0.003 # ~$0.003 per review with Sonnet
|
||||
print(f"\n{'=' * 70}")
|
||||
print(f"VALIDATION COST ESTIMATE: ~${estimated_cost:.3f} for {total_samples} samples")
|
||||
print("=" * 70)
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[DRY RUN] No LLM calls made. Use --validate to run validation.")
|
||||
return
|
||||
|
||||
if not args.validate:
|
||||
print("\nUse --validate to run LLM validation on these samples.")
|
||||
return
|
||||
|
||||
# Run validation
|
||||
from reviewiq_pipeline.config import Config
|
||||
|
||||
config = Config(
|
||||
database_url=database_url,
|
||||
llm_provider="anthropic",
|
||||
llm_model="claude-sonnet-4-5-20250929",
|
||||
anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY",
|
||||
"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"),
|
||||
)
|
||||
|
||||
all_results = []
|
||||
total_cost = 0
|
||||
|
||||
# Validate each tier
|
||||
for tier, samples in [("skip", skip_samples), ("cheap", cheap_samples), ("full", full_samples)]:
|
||||
if samples:
|
||||
results = await validate_with_llm(samples, tier, config)
|
||||
all_results.extend(results)
|
||||
total_cost += sum(r.llm_cost or 0 for r in results)
|
||||
print_validation_results(results, tier)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'=' * 70}")
|
||||
print("VALIDATION SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
for tier in ["skip", "cheap", "full"]:
|
||||
tier_results = [r for r in all_results if r.routed_tier == tier]
|
||||
if tier_results:
|
||||
correct = sum(1 for r in tier_results if r.routing_correct)
|
||||
total = len(tier_results)
|
||||
print(f"{tier.upper()}: {correct}/{total} correct ({correct/total*100:.0f}%)")
|
||||
|
||||
overall_correct = sum(1 for r in all_results if r.routing_correct)
|
||||
overall_total = len(all_results)
|
||||
print(f"\nOVERALL: {overall_correct}/{overall_total} correct ({overall_correct/overall_total*100:.0f}%)")
|
||||
print(f"TOTAL COST: ${total_cost:.4f}")
|
||||
|
||||
# Recommendations
|
||||
print(f"\n{'=' * 70}")
|
||||
print("RECOMMENDATIONS")
|
||||
print("=" * 70)
|
||||
|
||||
skip_errors = [r for r in all_results if r.routed_tier == "skip" and not r.routing_correct]
|
||||
if skip_errors:
|
||||
print("\n⚠️ SKIP tier false negatives found:")
|
||||
for r in skip_errors:
|
||||
print(f" - \"{r.text[:50]}...\" → {r.llm_urt}")
|
||||
print(" Consider tightening SKIP criteria")
|
||||
else:
|
||||
print("\n✅ SKIP tier looks safe")
|
||||
|
||||
cheap_errors = [r for r in all_results if r.routed_tier == "cheap" and not r.routing_correct]
|
||||
if cheap_errors:
|
||||
print("\n⚠️ CHEAP tier may miss complexity:")
|
||||
for r in cheap_errors:
|
||||
print(f" - \"{r.text[:50]}...\" → {r.llm_span_count} spans")
|
||||
else:
|
||||
print("\n✅ CHEAP tier thresholds look good")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user