Initial commit - WhyRating Engine (Google Reviews Scraper)
This commit is contained in:
120
db/init/01_create_categories.sql
Normal file
120
db/init/01_create_categories.sql
Normal file
@@ -0,0 +1,120 @@
|
||||
-- Enable ltree extension for hierarchical data
|
||||
CREATE EXTENSION IF NOT EXISTS ltree;
|
||||
|
||||
-- Categories tree table
|
||||
CREATE TABLE IF NOT EXISTS gbp_categories (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
slug TEXT NOT NULL,
|
||||
path ltree NOT NULL,
|
||||
level INT NOT NULL DEFAULT 1,
|
||||
parent_id INT REFERENCES gbp_categories(id),
|
||||
category_count INT DEFAULT 0,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(path)
|
||||
);
|
||||
|
||||
-- Indexes for fast hierarchical queries
|
||||
CREATE INDEX IF NOT EXISTS idx_gbp_categories_path ON gbp_categories USING GIST (path);
|
||||
CREATE INDEX IF NOT EXISTS idx_gbp_categories_path_btree ON gbp_categories USING BTREE (path);
|
||||
CREATE INDEX IF NOT EXISTS idx_gbp_categories_name ON gbp_categories (name);
|
||||
CREATE INDEX IF NOT EXISTS idx_gbp_categories_slug ON gbp_categories (slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_gbp_categories_level ON gbp_categories (level);
|
||||
CREATE INDEX IF NOT EXISTS idx_gbp_categories_parent ON gbp_categories (parent_id);
|
||||
|
||||
-- Full text search index
|
||||
CREATE INDEX IF NOT EXISTS idx_gbp_categories_name_trgm ON gbp_categories USING GIN (name gin_trgm_ops);
|
||||
|
||||
-- Enable trigram extension for fuzzy search
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
-- Function to update timestamp
|
||||
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = CURRENT_TIMESTAMP;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ language 'plpgsql';
|
||||
|
||||
-- Trigger for auto-updating timestamp
|
||||
DROP TRIGGER IF EXISTS update_gbp_categories_updated_at ON gbp_categories;
|
||||
CREATE TRIGGER update_gbp_categories_updated_at
|
||||
BEFORE UPDATE ON gbp_categories
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_updated_at_column();
|
||||
|
||||
-- Helper function: Get all children of a category
|
||||
CREATE OR REPLACE FUNCTION get_category_children(parent_path ltree)
|
||||
RETURNS TABLE (
|
||||
id INT,
|
||||
name TEXT,
|
||||
slug TEXT,
|
||||
path ltree,
|
||||
level INT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT c.id, c.name, c.slug, c.path, c.level
|
||||
FROM gbp_categories c
|
||||
WHERE c.path <@ parent_path AND c.path != parent_path
|
||||
ORDER BY c.path;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Helper function: Get ancestors of a category
|
||||
CREATE OR REPLACE FUNCTION get_category_ancestors(category_path ltree)
|
||||
RETURNS TABLE (
|
||||
id INT,
|
||||
name TEXT,
|
||||
slug TEXT,
|
||||
path ltree,
|
||||
level INT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT c.id, c.name, c.slug, c.path, c.level
|
||||
FROM gbp_categories c
|
||||
WHERE category_path <@ c.path AND c.path != category_path
|
||||
ORDER BY c.level;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Helper function: Search categories by name (fuzzy)
|
||||
CREATE OR REPLACE FUNCTION search_categories(search_term TEXT, limit_count INT DEFAULT 20)
|
||||
RETURNS TABLE (
|
||||
id INT,
|
||||
name TEXT,
|
||||
path ltree,
|
||||
level INT,
|
||||
similarity REAL
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT c.id, c.name, c.path, c.level,
|
||||
similarity(c.name, search_term) as sim
|
||||
FROM gbp_categories c
|
||||
WHERE c.name ILIKE '%' || search_term || '%'
|
||||
OR similarity(c.name, search_term) > 0.3
|
||||
ORDER BY sim DESC, c.level, c.name
|
||||
LIMIT limit_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- View for tree statistics
|
||||
CREATE OR REPLACE VIEW category_tree_stats AS
|
||||
SELECT
|
||||
level,
|
||||
COUNT(*) as count,
|
||||
COUNT(*) FILTER (WHERE level = 1) as sectors,
|
||||
COUNT(*) FILTER (WHERE level = 2) as business_types,
|
||||
COUNT(*) FILTER (WHERE level = 3) as sub_categories,
|
||||
COUNT(*) FILTER (WHERE level = 4) as leaf_categories
|
||||
FROM gbp_categories
|
||||
GROUP BY level
|
||||
ORDER BY level;
|
||||
|
||||
COMMENT ON TABLE gbp_categories IS 'Google Business Profile categories organized in a 4-level hierarchy using ltree';
|
||||
COMMENT ON COLUMN gbp_categories.path IS 'Hierarchical path using ltree (e.g., Food_Dining.Restaurants.By_Cuisine.Afghan_restaurant)';
|
||||
COMMENT ON COLUMN gbp_categories.level IS '1=Sector, 2=Business Type, 3=Sub-category, 4=Specific Category';
|
||||
Reference in New Issue
Block a user