Initial commit - WhyRating Engine (Google Reviews Scraper)

This commit is contained in:
Alejandro Gutiérrez
2026-02-02 18:19:00 +00:00
parent 0543a08242
commit 2206ddeff2
136 changed files with 51138 additions and 855 deletions

View File

@@ -0,0 +1,120 @@
-- Enable ltree extension for hierarchical data
CREATE EXTENSION IF NOT EXISTS ltree;
-- Categories tree table
CREATE TABLE IF NOT EXISTS gbp_categories (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
slug TEXT NOT NULL,
path ltree NOT NULL,
level INT NOT NULL DEFAULT 1,
parent_id INT REFERENCES gbp_categories(id),
category_count INT DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(path)
);
-- Indexes for fast hierarchical queries
CREATE INDEX IF NOT EXISTS idx_gbp_categories_path ON gbp_categories USING GIST (path);
CREATE INDEX IF NOT EXISTS idx_gbp_categories_path_btree ON gbp_categories USING BTREE (path);
CREATE INDEX IF NOT EXISTS idx_gbp_categories_name ON gbp_categories (name);
CREATE INDEX IF NOT EXISTS idx_gbp_categories_slug ON gbp_categories (slug);
CREATE INDEX IF NOT EXISTS idx_gbp_categories_level ON gbp_categories (level);
CREATE INDEX IF NOT EXISTS idx_gbp_categories_parent ON gbp_categories (parent_id);
-- Full text search index
CREATE INDEX IF NOT EXISTS idx_gbp_categories_name_trgm ON gbp_categories USING GIN (name gin_trgm_ops);
-- Enable trigram extension for fuzzy search
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- Function to update timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- Trigger for auto-updating timestamp
DROP TRIGGER IF EXISTS update_gbp_categories_updated_at ON gbp_categories;
CREATE TRIGGER update_gbp_categories_updated_at
BEFORE UPDATE ON gbp_categories
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Helper function: Get all children of a category
CREATE OR REPLACE FUNCTION get_category_children(parent_path ltree)
RETURNS TABLE (
id INT,
name TEXT,
slug TEXT,
path ltree,
level INT
) AS $$
BEGIN
RETURN QUERY
SELECT c.id, c.name, c.slug, c.path, c.level
FROM gbp_categories c
WHERE c.path <@ parent_path AND c.path != parent_path
ORDER BY c.path;
END;
$$ LANGUAGE plpgsql;
-- Helper function: Get ancestors of a category
CREATE OR REPLACE FUNCTION get_category_ancestors(category_path ltree)
RETURNS TABLE (
id INT,
name TEXT,
slug TEXT,
path ltree,
level INT
) AS $$
BEGIN
RETURN QUERY
SELECT c.id, c.name, c.slug, c.path, c.level
FROM gbp_categories c
WHERE category_path <@ c.path AND c.path != category_path
ORDER BY c.level;
END;
$$ LANGUAGE plpgsql;
-- Helper function: Search categories by name (fuzzy)
CREATE OR REPLACE FUNCTION search_categories(search_term TEXT, limit_count INT DEFAULT 20)
RETURNS TABLE (
id INT,
name TEXT,
path ltree,
level INT,
similarity REAL
) AS $$
BEGIN
RETURN QUERY
SELECT c.id, c.name, c.path, c.level,
similarity(c.name, search_term) as sim
FROM gbp_categories c
WHERE c.name ILIKE '%' || search_term || '%'
OR similarity(c.name, search_term) > 0.3
ORDER BY sim DESC, c.level, c.name
LIMIT limit_count;
END;
$$ LANGUAGE plpgsql;
-- View for tree statistics
CREATE OR REPLACE VIEW category_tree_stats AS
SELECT
level,
COUNT(*) as count,
COUNT(*) FILTER (WHERE level = 1) as sectors,
COUNT(*) FILTER (WHERE level = 2) as business_types,
COUNT(*) FILTER (WHERE level = 3) as sub_categories,
COUNT(*) FILTER (WHERE level = 4) as leaf_categories
FROM gbp_categories
GROUP BY level
ORDER BY level;
COMMENT ON TABLE gbp_categories IS 'Google Business Profile categories organized in a 4-level hierarchy using ltree';
COMMENT ON COLUMN gbp_categories.path IS 'Hierarchical path using ltree (e.g., Food_Dining.Restaurants.By_Cuisine.Afghan_restaurant)';
COMMENT ON COLUMN gbp_categories.level IS '1=Sector, 2=Business Type, 3=Sub-category, 4=Specific Category';