From 44d017b3f76ba008f272bd8098cd622cc41c297a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?= <35082514+alezmad@users.noreply.github.com> Date: Sat, 24 Jan 2026 12:31:16 +0000 Subject: [PATCH] Finalize ReviewIQ Architecture v3.1.2 (production-ready) Three final fixes applied: 1. issue_spans versioning: Added source + review_version columns with FK to reviews_enriched(source, review_id, review_version). Spans now correctly reference the exact review version. 2. Competitor business_id rule: Clarified that competitor reviews use customer's business_id + competitor's place_id (not NULL). Keeps facts and joins working without special-case logic. 3. Trust-weighted facts: Clarified trust_weighted_* columns are reserved but not populated in v3.1. Trust scoring applies to issue priority only. Aggregation deferred to v3.2. Status: Production-grade architecture specification. Co-Authored-By: Claude Opus 4.5 --- .artifacts/ReviewIQ-Architecture-v3.1.md | 78 ++++++++++++++++++------ 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/.artifacts/ReviewIQ-Architecture-v3.1.md b/.artifacts/ReviewIQ-Architecture-v3.1.md index 9782940..0f0f9e3 100644 --- a/.artifacts/ReviewIQ-Architecture-v3.1.md +++ b/.artifacts/ReviewIQ-Architecture-v3.1.md @@ -1,6 +1,6 @@ # ReviewIQ: Review Intelligence Pipeline -**Version**: 3.1.1 +**Version**: 3.1.2 **Status**: Architecture Specification (Reviewed) **Date**: 2026-01-24 @@ -339,11 +339,15 @@ ALTER TABLE issues ADD CONSTRAINT fk_issues_location FOREIGN KEY (business_id, place_id) REFERENCES locations(business_id, place_id); --- Issue spans (link table: issue โ†” review) +-- Issue spans (link table: issue โ†” review, versioned) CREATE TABLE issue_spans ( id SERIAL PRIMARY KEY, issue_id TEXT NOT NULL REFERENCES issues(issue_id) ON DELETE CASCADE, - review_id TEXT NOT NULL, -- Matches reviews_enriched (source, review_id, version) + + -- Full review reference (versioned) + source TEXT NOT NULL DEFAULT 'google', + review_id TEXT NOT NULL, + review_version INT NOT NULL, -- Span metadata is_primary_match BOOLEAN DEFAULT TRUE, -- Primary vs secondary code match @@ -355,11 +359,17 @@ CREATE TABLE issue_spans ( created_at TIMESTAMP DEFAULT NOW(), - UNIQUE(issue_id, review_id) + UNIQUE(issue_id, source, review_id, review_version) ); +-- FK to versioned review +ALTER TABLE issue_spans + ADD CONSTRAINT fk_span_review + FOREIGN KEY (source, review_id, review_version) + REFERENCES reviews_enriched(source, review_id, review_version); + CREATE INDEX idx_spans_issue ON issue_spans(issue_id); -CREATE INDEX idx_spans_review ON issue_spans(review_id); +CREATE INDEX idx_spans_review ON issue_spans(source, review_id, review_version); CREATE INDEX idx_spans_issue_time ON issue_spans(issue_id, review_time DESC); -- Issue events (audit log) @@ -427,7 +437,7 @@ CREATE TABLE fact_timeseries ( cr_worse INT DEFAULT 0, cr_same INT DEFAULT 0, - -- Trust-weighted variants + -- Trust-weighted variants (DEFERRED to v3.2 - columns reserved, not populated) trust_weighted_strength FLOAT, trust_weighted_negative FLOAT, @@ -463,6 +473,11 @@ CREATE INDEX idx_facts_all_locations ON fact_timeseries(business_id, period_date | `domain` | โšก Derived | Rollup from urt_code at query time | | `issue` | ๐Ÿ”œ Optional | Recommended for issue timelines (v3.2) | +**v3.1 Trust Score Usage**: +- `trust_score` is applied to **issue priority scoring** and **filtering** (see ยง4.2) +- `trust_weighted_strength` / `trust_weighted_negative` columns are **reserved but not populated** in v3.1 +- Trust-weighted fact aggregation (`SUM(trust_score * intensity_weight)`) deferred to v3.2 + ### 2.5 Sub-Patterns (Persistent Clustering Results) ```sql @@ -771,12 +786,18 @@ async def aggregate_to_issue(review: dict) -> Optional[str]: async def add_span_to_issue(issue_id: str, review: dict): """Add review span to issue and update counters.""" - # Insert span (with denormalized review_time for timeline queries) + # Insert span (versioned, with denormalized review_time for timeline queries) await db.execute(""" - INSERT INTO issue_spans (issue_id, review_id, is_primary_match, intensity, review_time) - VALUES (%s, %s, %s, %s, %s) - ON CONFLICT (issue_id, review_id) DO NOTHING - """, [issue_id, review['review_id'], True, review['intensity'], review['review_time']]) + INSERT INTO issue_spans ( + issue_id, source, review_id, review_version, + is_primary_match, intensity, review_time + ) + VALUES (%s, %s, %s, %s, %s, %s, %s) + ON CONFLICT (issue_id, source, review_id, review_version) DO NOTHING + """, [ + issue_id, review['source'], review['review_id'], review['review_version'], + True, review['intensity'], review['review_time'] + ]) # Update issue counters await db.execute(""" @@ -811,8 +832,9 @@ async def recalculate_priority(issue_id: str): i.*, (SELECT AVG(re.trust_score) FROM issue_spans s - JOIN reviews_enriched re ON s.review_id = re.review_id - WHERE s.issue_id = i.issue_id AND re.is_latest = TRUE) as avg_trust + JOIN reviews_enriched re ON (s.source, s.review_id, s.review_version) + = (re.source, re.review_id, re.review_version) + WHERE s.issue_id = i.issue_id) as avg_trust FROM issues i WHERE i.issue_id = %s """, [issue_id]) @@ -869,6 +891,7 @@ async def get_issue_reviews(issue_id: str, return await db.query(f""" SELECT re.review_id, + re.review_version, re.text, s.review_time, re.rating, @@ -882,7 +905,8 @@ async def get_issue_reviews(issue_id: str, s.weight, l.display_name as location_name FROM issue_spans s - JOIN reviews_enriched re ON s.review_id = re.review_id AND re.is_latest = TRUE + JOIN reviews_enriched re ON (s.source, s.review_id, s.review_version) + = (re.source, re.review_id, re.review_version) JOIN locations l ON (re.business_id, re.place_id) = (l.business_id, l.place_id) WHERE s.issue_id = %s ORDER BY {order_clause} @@ -1098,7 +1122,16 @@ async def get_timeline(business_id: str, ### 6.1 Competitor Setup (Clean Model) -Competitors are tracked in the `competitors` table only. They are **not** injected into `locations` with fake business_ids. Competitor reviews are stored in `reviews_raw` and `reviews_enriched` with `business_id = NULL` or a dedicated `__competitors__` partition. +Competitors are tracked in the `competitors` table only. They are **not** injected into `locations` with fake business_ids. + +**Competitor Review Storage Rule**: Competitor reviews are stored with the **customer's business_id** and the **competitor's place_id**. This keeps all queries and facts working without NULL semantics. The `competitors` table distinguishes "own" vs "competitor" place_ids: + +``` +reviews_enriched.business_id = +reviews_enriched.place_id = +``` + +The customer's own locations are in `locations(business_id, place_id)`. Competitor place_ids are **not** added to `locations` โ€” they're identified via `competitors.competitor_place_id`. ```python async def setup_competitor(business_id: str, competitor_place_id: str, @@ -1124,9 +1157,9 @@ async def pull_competitor_reviews(business_id: str): """, [business_id]) for comp in competitors: - # Store competitor reviews with special business_id marker + # Store competitor reviews with customer's business_id + competitor's place_id await pull_reviews_for_competitor( - business_id=business_id, + business_id=business_id, # Customer's business_id (NOT NULL) place_id=comp['competitor_place_id'] ) ``` @@ -1322,8 +1355,8 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; | Field | Value | |-------|-------| -| **Document** | ReviewIQ Architecture v3.1.1 | -| **Status** | Specification Complete (Reviewed) | +| **Document** | ReviewIQ Architecture v3.1.2 | +| **Status** | Specification Complete (Production-Ready) | | **Date** | 2026-01-24 | | **Dependencies** | URT Specification v5.1, Issue Lifecycle Framework C1 | | **Source** | Google Reviews only | @@ -1336,8 +1369,9 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; | v3.0 | Issue lifecycle, strength scores, timeline charts | | v3.1 | Relational refactor: issue_spans, fact_timeseries, raw/enriched split, multi-location, competitors, trust scoring | | v3.1.1 | **Reviewed**: Versioned enriched PK, tenant-scoped locations, 'ALL' sentinel, competitor cleanup, fixed get_timeline params, clarified issue key scope | +| v3.1.2 | **Final**: Versioned issue_spans FK, competitor business_id rule, trust-weighted facts deferred | -### Fixes Applied (v3.1.1) +### Fixes Applied (v3.1.1 โ†’ v3.1.2) | Issue | Fix | |-------|-----| @@ -1349,6 +1383,9 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; | get_timeline param ordering bug | Fixed: params built in correct order | | Issue entity fields but no extraction | Clarified: v3.1 key is `(business_id, place_id, primary_subcode)` only; entity fields reserved for v3.2 | | Missing indexes | Added `idx_spans_issue_time`, FK to locations | +| **issue_spans.review_id underspecified** | Added `source`, `review_version` columns + FK to versioned review | +| **Competitor business_id = NULL breaks joins** | Rule: competitor reviews use customer's `business_id` + competitor's `place_id` | +| **trust_weighted_* columns implied populated** | Clarified: columns reserved but not populated in v3.1; deferred to v3.2 | ### Deferred to v3.2+ @@ -1359,6 +1396,7 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; | intent_signals extraction | Needs action playbooks | | stability_score tracking | Premature for v1 | | issue facts in fact_timeseries | Optional performance optimization | +| Trust-weighted fact aggregation | Columns reserved; `SUM(trust_score * intensity_weight)` deferred | | KPI integration | Placeholder only in v3.1 | ---