From 9515dd2d4242bedcf81886134260df0aa8957723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?= <35082514+alezmad@users.noreply.github.com> Date: Sat, 24 Jan 2026 12:34:35 +0000 Subject: [PATCH] Polish ReviewIQ v3.1.2: tenant-scoping and FK integrity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final fixes for production-ready spec: 1. locations.location_type: Added 'owned'|'competitor' flag. Competitors now inserted into locations (preserves FK integrity). 2. Competitor fact query: Added business_id filter to prevent cross-tenant contamination when same competitor tracked by multiple customers. 3. issue_events versioning: Added source + review_version columns for complete review reference in audit log. 4. Enrichment tenant-scoping: business_id now passed from ingest job (not looked up). Validates place_id exists under tenant. 5. Footer: Fixed version string v3.1.1 → v3.1.2. Status: Ship-ready specification. Co-Authored-By: Claude Opus 4.5 --- .artifacts/ReviewIQ-Architecture-v3.1.md | 77 ++++++++++++++++++------ 1 file changed, 60 insertions(+), 17 deletions(-) diff --git a/.artifacts/ReviewIQ-Architecture-v3.1.md b/.artifacts/ReviewIQ-Architecture-v3.1.md index 0f0f9e3..1cb9e6c 100644 --- a/.artifacts/ReviewIQ-Architecture-v3.1.md +++ b/.artifacts/ReviewIQ-Architecture-v3.1.md @@ -114,9 +114,12 @@ ReviewIQ v3.1 transforms Google Reviews into actionable business intelligence th ```sql -- Business locations (multi-tenant: same place_id can exist for multiple businesses) +-- Includes both owned locations and tracked competitor locations CREATE TABLE locations ( business_id TEXT NOT NULL, -- Internal business identifier place_id TEXT NOT NULL, -- Google Place ID + location_type TEXT NOT NULL DEFAULT 'owned' + CHECK (location_type IN ('owned', 'competitor')), display_name TEXT NOT NULL, address TEXT, city TEXT, @@ -131,6 +134,8 @@ CREATE TABLE locations ( ); CREATE INDEX idx_locations_place ON locations(place_id); +CREATE INDEX idx_locations_owned ON locations(business_id) + WHERE location_type = 'owned'; -- URT code reference CREATE TABLE urt_codes ( @@ -383,13 +388,19 @@ CREATE TABLE issue_events ( actor TEXT, -- User or 'system' notes TEXT, - review_id TEXT, -- Triggering review if applicable + + -- Triggering review reference (versioned) + source TEXT DEFAULT 'google', + review_id TEXT, + review_version INT, metadata JSONB, -- Additional context created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX idx_events_issue ON issue_events(issue_id, created_at DESC); +CREATE INDEX idx_events_review ON issue_events(source, review_id, review_version) + WHERE review_id IS NOT NULL; ``` ### 2.4 Unified Analytics Spine @@ -580,8 +591,14 @@ async def store_raw_review(place_id: str, review: dict) -> int: ### 3.2 Enrichment Pipeline ```python -async def enrich_review(raw_id: int) -> dict: - """Full enrichment: normalize → classify → embed → trust score.""" +async def enrich_review(raw_id: int, business_id: str) -> dict: + """ + Full enrichment: normalize → classify → embed → trust score. + + Args: + raw_id: ID from reviews_raw + business_id: Tenant context (passed from ingest job, not looked up) + """ raw = await db.query_one( "SELECT * FROM reviews_raw WHERE id = %s", [raw_id] @@ -590,11 +607,13 @@ async def enrich_review(raw_id: int) -> dict: # 1. Normalize text = normalize_text(raw['review_text']) - # 2. Map to business + # 2. Validate place_id exists under this tenant (owned or competitor) location = await db.query_one( - "SELECT business_id FROM locations WHERE place_id = %s", - [raw['place_id']] + "SELECT display_name, location_type FROM locations WHERE business_id = %s AND place_id = %s", + [business_id, raw['place_id']] ) + if not location: + raise ValueError(f"place_id {raw['place_id']} not registered for business {business_id}") # 3. Parallel: LLM classify + embed classify_task = asyncio.create_task(classify_review_llm(text)) @@ -623,7 +642,7 @@ async def enrich_review(raw_id: int) -> dict: 'review_version': raw['review_version'], 'is_latest': True, 'raw_id': raw_id, - 'business_id': location['business_id'], + 'business_id': business_id, # Passed from ingest job (tenant context) 'place_id': raw['place_id'], 'text': raw['review_text'], 'text_normalized': text, @@ -814,7 +833,12 @@ async def add_span_to_issue(issue_id: str, review: dict): """, [issue_id, issue_id, issue_id]) await recalculate_priority(issue_id) - await log_issue_event(issue_id, 'span_added', review_id=review['review_id']) + await log_issue_event( + issue_id, 'span_added', + source=review['source'], + review_id=review['review_id'], + review_version=review['review_version'] + ) ``` ### 4.2 Priority Scoring (Trust-Weighted) @@ -1122,22 +1146,35 @@ async def get_timeline(business_id: str, ### 6.1 Competitor Setup (Clean Model) -Competitors are tracked in the `competitors` table only. They are **not** injected into `locations` with fake business_ids. +Competitors are tracked in both `competitors` (relationship metadata) and `locations` (with `location_type='competitor'`). This preserves FK integrity and enables consistent joins for display names/timezones. -**Competitor Review Storage Rule**: Competitor reviews are stored with the **customer's business_id** and the **competitor's place_id**. This keeps all queries and facts working without NULL semantics. The `competitors` table distinguishes "own" vs "competitor" place_ids: +**Competitor Review Storage Rule**: Competitor reviews are stored with the **customer's business_id** and the **competitor's place_id**: ``` reviews_enriched.business_id = reviews_enriched.place_id = ``` -The customer's own locations are in `locations(business_id, place_id)`. Competitor place_ids are **not** added to `locations` — they're identified via `competitors.competitor_place_id`. +The `locations.location_type` column distinguishes ownership: +- `'owned'` — customer's own locations +- `'competitor'` — tracked competitor locations + +This keeps all queries and FK constraints working without NULL semantics or special-case logic. ```python async def setup_competitor(business_id: str, competitor_place_id: str, competitor_name: str, relationship: str = 'direct'): """Register a competitor for tracking.""" + # 1. Add to locations with location_type='competitor' (enables FK + joins) + await db.execute(""" + INSERT INTO locations (business_id, place_id, location_type, display_name) + VALUES (%s, %s, 'competitor', %s) + ON CONFLICT (business_id, place_id) DO UPDATE SET + display_name = EXCLUDED.display_name + """, [business_id, competitor_place_id, competitor_name]) + + # 2. Track relationship metadata in competitors table await db.execute(""" INSERT INTO competitors (business_id, competitor_place_id, competitor_name, relationship) VALUES (%s, %s, %s, %s) @@ -1197,18 +1234,19 @@ async def get_competitor_comparison(business_id: str, code: str, } for comp in competitors: - # Query competitor's facts (stored with their place_id) + # Query competitor's facts (tenant-scoped: business_id + place_id) comp_metrics = await db.query_one(""" SELECT SUM(negative_strength) as negative_strength, SUM(review_count) as review_count, AVG(avg_rating) as avg_rating FROM fact_timeseries - WHERE place_id = %s + WHERE business_id = %s + AND place_id = %s AND subject_type = 'urt_code' AND subject_id = %s AND period_date BETWEEN %s AND %s - """, [comp['competitor_place_id'], code, start, end]) + """, [business_id, comp['competitor_place_id'], code, start, end]) comparison['competitors'].append({ 'name': comp['competitor_name'], @@ -1369,7 +1407,7 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; | v3.0 | Issue lifecycle, strength scores, timeline charts | | v3.1 | Relational refactor: issue_spans, fact_timeseries, raw/enriched split, multi-location, competitors, trust scoring | | v3.1.1 | **Reviewed**: Versioned enriched PK, tenant-scoped locations, 'ALL' sentinel, competitor cleanup, fixed get_timeline params, clarified issue key scope | -| v3.1.2 | **Final**: Versioned issue_spans FK, competitor business_id rule, trust-weighted facts deferred | +| v3.1.2 | **Final**: Versioned issue_spans FK, competitor business_id rule, trust-weighted facts deferred, location_type flag, tenant-scoped enrichment | ### Fixes Applied (v3.1.1 → v3.1.2) @@ -1378,7 +1416,7 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; | reviews_enriched PK wrong for edits | PK = `(source, review_id, review_version)` + `is_latest` flag | | raw_id ambiguous under versioning | raw_id references specific raw version | | locations.place_id prevents multi-tenant | PK = `(business_id, place_id)` (tenant-scoped) | -| Competitor fake business_id pattern | Removed; competitors table is separate, no injection into locations | +| Competitor fake business_id pattern | Competitors inserted into `locations` with `location_type='competitor'` | | fact_timeseries.place_id NOT NULL blocks rollups | `place_id='ALL'` sentinel for all-locations | | get_timeline param ordering bug | Fixed: params built in correct order | | Issue entity fields but no extraction | Clarified: v3.1 key is `(business_id, place_id, primary_subcode)` only; entity fields reserved for v3.2 | @@ -1386,6 +1424,11 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; | **issue_spans.review_id underspecified** | Added `source`, `review_version` columns + FK to versioned review | | **Competitor business_id = NULL breaks joins** | Rule: competitor reviews use customer's `business_id` + competitor's `place_id` | | **trust_weighted_* columns implied populated** | Clarified: columns reserved but not populated in v3.1; deferred to v3.2 | +| **Footer version string** | Fixed: v3.1.1 → v3.1.2 | +| **Competitor fact query missing tenant scope** | Added `business_id` filter to competitor comparison query | +| **reviews_enriched FK conflicts with competitor rule** | Added `location_type` column to `locations`; competitors inserted with `'competitor'` type | +| **issue_events.review_id not versioned** | Added `source`, `review_version` columns to issue_events | +| **Enrichment lookup breaks multi-tenant** | `business_id` now passed from ingest job; validated against locations | ### Deferred to v3.2+ @@ -1401,4 +1444,4 @@ WHERE r.subject_type = 'overall' AND r.subject_id = 'all'; --- -*End of ReviewIQ Architecture v3.1.1* +*End of ReviewIQ Architecture v3.1.2*