Initial commit - WhyRating Engine (Google Reviews Scraper)

This commit is contained in:
Alejandro Gutiérrez
2026-02-02 18:19:00 +00:00
parent 0543a08242
commit 2206ddeff2
136 changed files with 51138 additions and 855 deletions

View File

@@ -354,7 +354,11 @@ class DatabaseManager:
callback_status,
callback_attempts,
scraper_version,
scraper_variant
scraper_variant,
business_name,
business_category,
business_address,
business_rating
FROM jobs
WHERE job_id = $1
""", job_id)
@@ -575,6 +579,69 @@ class DatabaseManager:
log.debug(f"Updated session fingerprint for job {job_id}")
async def update_job_metadata(
self,
job_id: UUID,
metadata_updates: Dict[str, Any]
):
"""
Update specific fields in job metadata without overwriting existing data.
Args:
job_id: Job UUID
metadata_updates: Dictionary of metadata fields to update/add
- bot_detected: True if sort button was hidden (bot detection)
- initial_sort_used: Sort order used for scraping
- sort_orders_attempted: List of all sort orders tried
- multi_sort: Multi-sort completion info
"""
async with self.pool.acquire() as conn:
# Merge new metadata with existing metadata using JSONB concatenation
await conn.execute("""
UPDATE jobs
SET
metadata = COALESCE(metadata, '{}'::jsonb) || $2::jsonb,
updated_at = NOW()
WHERE job_id = $1
""", job_id, json.dumps(metadata_updates))
log.debug(f"Updated job metadata for job {job_id}: {list(metadata_updates.keys())}")
async def update_business_info(
self,
job_id: UUID,
business_name: Optional[str] = None,
business_category: Optional[str] = None,
business_address: Optional[str] = None,
business_rating: Optional[float] = None
):
"""
Update business info columns for a job.
These are dedicated columns (not JSONB) for queryable business data
captured from the Google Maps page during scraping.
Args:
job_id: Job UUID
business_name: Business name from Google Maps
business_category: Business category (e.g., "Restaurant", "Toy store")
business_address: Full address from Google Maps
business_rating: Aggregate rating at time of scrape (e.g., 4.5)
"""
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE jobs
SET
business_name = COALESCE($2, business_name),
business_category = COALESCE($3, business_category),
business_address = COALESCE($4, business_address),
business_rating = COALESCE($5, business_rating),
updated_at = NOW()
WHERE job_id = $1
""", job_id, business_name, business_category, business_address, business_rating)
log.debug(f"Updated business info for job {job_id}: name={business_name}, category={business_category}")
async def mark_job_partial(
self,
job_id: UUID,
@@ -674,7 +741,11 @@ class DatabaseManager:
callback_status,
callback_attempts,
scraper_version,
scraper_variant
scraper_variant,
business_name,
business_category,
business_address,
business_rating
FROM jobs
{where_clause}
ORDER BY created_at DESC