Initial commit - WhyRating Engine (Google Reviews Scraper)

This commit is contained in:
Alejandro Gutiérrez
2026-02-02 18:19:00 +00:00
parent 0543a08242
commit 2206ddeff2
136 changed files with 51138 additions and 855 deletions

View File

@@ -0,0 +1,238 @@
#!/usr/bin/env python3
"""
Guarded L1 Config Fixer - V2 (Threshold-based, Sector-scoped)
Only applies fixes when:
1. Evidence is from sector-scoped validation
2. Frequency exceeds threshold (default 3%)
3. Changes are logged with version bump
Usage:
python fix_l1_configs_v2.py --apply # Apply fixes from validation
python fix_l1_configs_v2.py --dry-run # Show what would change
python fix_l1_configs_v2.py --revert SECTOR # Revert to previous version
"""
import argparse
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
CONFIGS_DIR = Path(__file__).parent.parent / "data" / "primitive_configs" / "l1"
CHANGELOG_FILE = CONFIGS_DIR / "CHANGELOG.json"
# Minimum threshold for auto-enabling (% of sector spans)
ENABLE_THRESHOLD_PCT = 3.0
# Fixes derived from sector-scoped validation (validate_l1_configs_v2.py output)
# These are the ONLY fixes that should be applied
SECTOR_SCOPED_FIXES = {
"ENTERTAINMENT": {
"evidence": "2,320 spans from Go Karts + Soho Club",
"enable": [
("TASTE", 4.3, "Entertainment venues have concessions/food service"),
],
"add_weight": [
("CRAFT", 1.3, "13.4% frequency but unweighted"),
],
"remove_weight": [],
},
"FOOD_DINING": {
"evidence": "61 spans from Fika cafe",
"enable": [
("COMFORT", 9.8, "Seating/atmosphere comfort matters for cafes"),
],
"add_weight": [
("AVAILABILITY", 1.2, "16.4% frequency but unweighted"),
],
"remove_weight": [
# Note: Small sample size (61 spans) - these may be false negatives
# Keep weights but flag for review with more data
],
},
"AUTOMOTIVE": {
"evidence": "1,201 spans from ClickRent car rental",
"enable": [], # Nothing exceeds 3% threshold
"add_weight": [],
"remove_weight": [
# CONDITION, HONESTY, PROMISES, RECOVERY all have 0 appearances
# However, may be specific to rental vs repair - keep for now
],
},
}
def load_changelog() -> list[dict]:
"""Load the changelog file."""
if CHANGELOG_FILE.exists():
with open(CHANGELOG_FILE) as f:
return json.load(f)
return []
def save_changelog(entries: list[dict]) -> None:
"""Save the changelog file."""
CHANGELOG_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(CHANGELOG_FILE, "w") as f:
json.dump(entries, f, indent=2)
f.write("\n")
def load_config(sector_code: str) -> dict[str, Any] | None:
"""Load a sector config."""
config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
if not config_path.exists():
return None
with open(config_path) as f:
return json.load(f)
def save_config(sector_code: str, config: dict[str, Any]) -> None:
"""Save a sector config."""
config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
with open(config_path, "w") as f:
json.dump(config, f, indent=2)
f.write("\n")
def apply_fixes(sector_code: str, fixes: dict, dry_run: bool = False) -> list[str]:
"""Apply fixes to a sector config."""
config = load_config(sector_code)
if not config:
return [f"❌ Config not found for {sector_code}"]
enabled = set(config.get("enabled", []))
disabled = set(config.get("disabled", []))
weights = config.get("weights", {})
changes = []
evidence = fixes.get("evidence", "unknown")
# Enable primitives
for prim, pct, reason in fixes.get("enable", []):
if pct < ENABLE_THRESHOLD_PCT:
changes.append(f"⚠️ SKIP {prim}: {pct:.1f}% below {ENABLE_THRESHOLD_PCT}% threshold")
continue
if prim in disabled:
disabled.remove(prim)
enabled.add(prim)
changes.append(f"✓ ENABLE {prim}: {pct:.1f}% in sector data ({reason})")
elif prim not in enabled:
enabled.add(prim)
changes.append(f"✓ ADD {prim}: {pct:.1f}% in sector data ({reason})")
# Add weights
for prim, weight, reason in fixes.get("add_weight", []):
if prim not in weights:
weights[prim] = weight
changes.append(f"⚖️ WEIGHT {prim}: {weight}x ({reason})")
# Remove weights
for prim, reason in fixes.get("remove_weight", []):
if prim in weights:
del weights[prim]
changes.append(f"⚖️ UNWEIGHT {prim}: ({reason})")
if not changes:
return ["✓ No changes needed"]
if not dry_run:
# Bump version
old_version = config.get("config_version", "1.0")
major, minor = old_version.split(".")
new_version = f"{major}.{int(minor) + 1}"
config["enabled"] = sorted(enabled)
config["disabled"] = sorted(disabled)
config["weights"] = dict(sorted(weights.items()))
config["config_version"] = new_version
config["config_updated_at"] = datetime.now(timezone.utc).isoformat()
save_config(sector_code, config)
# Log to changelog
changelog = load_changelog()
changelog.append({
"sector": sector_code,
"version": new_version,
"previous_version": old_version,
"timestamp": datetime.now(timezone.utc).isoformat(),
"evidence": evidence,
"changes": changes,
})
save_changelog(changelog)
changes.append(f"📝 Version: {old_version}{new_version}")
return changes
def revert_config(sector_code: str, to_version: str | None = None) -> list[str]:
"""Revert a config to a previous version."""
changelog = load_changelog()
# Find entries for this sector
sector_entries = [e for e in changelog if e["sector"] == sector_code]
if not sector_entries:
return [f"❌ No changelog entries for {sector_code}"]
# TODO: Implement actual revert by storing full config snapshots
return [f"⚠️ Revert not yet implemented - manual restore required"]
def main():
parser = argparse.ArgumentParser(description="Guarded L1 config fixer")
parser.add_argument("--apply", action="store_true", help="Apply sector-scoped fixes")
parser.add_argument("--dry-run", action="store_true", help="Show what would change")
parser.add_argument("--revert", metavar="SECTOR", help="Revert sector to previous version")
parser.add_argument("--sector", help="Apply to specific sector only")
parser.add_argument("--show-changelog", action="store_true", help="Show changelog")
args = parser.parse_args()
if args.show_changelog:
changelog = load_changelog()
print(json.dumps(changelog, indent=2))
return
if args.revert:
changes = revert_config(args.revert.upper())
for change in changes:
print(change)
return
if args.apply or args.dry_run:
print("=" * 60)
print(f"L1 CONFIG FIXER V2 - {'DRY RUN' if args.dry_run else 'APPLYING FIXES'}")
print(f"Threshold: {ENABLE_THRESHOLD_PCT}%")
print("=" * 60)
sectors = [args.sector.upper()] if args.sector else SECTOR_SCOPED_FIXES.keys()
for sector in sectors:
if sector not in SECTOR_SCOPED_FIXES:
print(f"\n⚠️ {sector}: No sector-scoped fixes defined")
continue
print(f"\n📁 {sector}")
print(f" Evidence: {SECTOR_SCOPED_FIXES[sector]['evidence']}")
changes = apply_fixes(sector, SECTOR_SCOPED_FIXES[sector], dry_run=args.dry_run)
for change in changes:
print(f" {change}")
print("\n" + "=" * 60)
if args.dry_run:
print("DRY RUN - No changes applied")
else:
print("Fixes applied - see CHANGELOG.json for history")
print("=" * 60)
return
parser.print_help()
if __name__ == "__main__":
main()