Added pytest + some tests.
Added AWS S3 Support (optional, for cloud image storage)
This commit is contained in:
54
tests/README.md
Normal file
54
tests/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Tests
|
||||
|
||||
This directory contains pytest tests for the Google Reviews Scraper.
|
||||
|
||||
## Running Tests
|
||||
|
||||
1. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Run all tests:
|
||||
```bash
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
3. Run specific test files:
|
||||
```bash
|
||||
pytest tests/test_mongodb_connection.py
|
||||
pytest tests/test_s3_connection.py
|
||||
```
|
||||
|
||||
4. Run with verbose output:
|
||||
```bash
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
## Test Coverage
|
||||
|
||||
### MongoDB Connection Tests (`test_mongodb_connection.py`)
|
||||
- Tests MongoDB connection when enabled in config
|
||||
- Validates MongoDB configuration parameters
|
||||
- Tests basic database operations (insert/find/delete)
|
||||
- Skips tests when MongoDB is disabled
|
||||
|
||||
### S3 Connection Tests (`test_s3_connection.py`)
|
||||
- Tests S3 connection when enabled in config
|
||||
- Validates S3 configuration parameters
|
||||
- Tests file upload/download operations
|
||||
- Tests S3Handler class initialization
|
||||
- Skips tests when S3 is disabled
|
||||
|
||||
## Configuration
|
||||
|
||||
Tests use the main `config.yaml` file in the project root. Make sure your configuration is properly set up:
|
||||
|
||||
- For MongoDB tests: Ensure `use_mongodb: true` and valid MongoDB credentials
|
||||
- For S3 tests: Ensure `use_s3: true` and valid AWS credentials
|
||||
|
||||
## Test Results
|
||||
|
||||
- Tests will be skipped if the corresponding service (MongoDB/S3) is disabled in config
|
||||
- Failed connection tests indicate configuration or service availability issues
|
||||
- All tests should pass when services are properly configured and accessible
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Tests package
|
||||
39
tests/conftest.py
Normal file
39
tests/conftest.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""
|
||||
Test configuration and fixtures for Google Reviews Scraper tests.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config():
|
||||
"""Load configuration from config.yaml"""
|
||||
config_path = Path(__file__).parent.parent / "config.yaml"
|
||||
with open(config_path, 'r') as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mongodb_config(config):
|
||||
"""Extract MongoDB configuration"""
|
||||
return config.get("mongodb", {})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s3_config(config):
|
||||
"""Extract S3 configuration"""
|
||||
return config.get("s3", {})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def use_mongodb(config):
|
||||
"""Check if MongoDB is enabled"""
|
||||
return config.get("use_mongodb", False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def use_s3(config):
|
||||
"""Check if S3 is enabled"""
|
||||
return config.get("use_s3", False)
|
||||
90
tests/test_mongodb_connection.py
Normal file
90
tests/test_mongodb_connection.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Test MongoDB connection functionality.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from pymongo import MongoClient
|
||||
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
||||
|
||||
|
||||
class TestMongoDBConnection:
|
||||
"""Test MongoDB connection and basic operations"""
|
||||
|
||||
def test_mongodb_connection_when_enabled(self, use_mongodb, mongodb_config):
|
||||
"""Test MongoDB connection when MongoDB is enabled in config"""
|
||||
if not use_mongodb:
|
||||
pytest.skip("MongoDB is disabled in configuration")
|
||||
|
||||
if not mongodb_config:
|
||||
pytest.fail("MongoDB is enabled but no MongoDB configuration found")
|
||||
|
||||
uri = mongodb_config.get("uri")
|
||||
if not uri:
|
||||
pytest.fail("MongoDB URI not found in configuration")
|
||||
|
||||
try:
|
||||
# Create MongoDB client with shorter timeout for testing
|
||||
client = MongoClient(uri, serverSelectionTimeoutMS=5000)
|
||||
|
||||
# Test connection by pinging the server
|
||||
client.admin.command('ping')
|
||||
|
||||
# Test database access
|
||||
database_name = mongodb_config.get("database", "reviews")
|
||||
db = client[database_name]
|
||||
|
||||
# Test collection access
|
||||
collection_name = mongodb_config.get("collection", "google_reviews")
|
||||
collection = db[collection_name]
|
||||
|
||||
# Verify we can perform basic operations
|
||||
# Test insert and delete a dummy document
|
||||
test_doc = {"_id": "test_connection", "test": True}
|
||||
collection.insert_one(test_doc)
|
||||
|
||||
# Verify document was inserted
|
||||
found_doc = collection.find_one({"_id": "test_connection"})
|
||||
assert found_doc is not None
|
||||
assert found_doc["test"] is True
|
||||
|
||||
# Clean up test document
|
||||
collection.delete_one({"_id": "test_connection"})
|
||||
|
||||
# Verify document was deleted
|
||||
found_doc = collection.find_one({"_id": "test_connection"})
|
||||
assert found_doc is None
|
||||
|
||||
client.close()
|
||||
|
||||
except ConnectionFailure as e:
|
||||
pytest.fail(f"Failed to connect to MongoDB: {e}")
|
||||
except ServerSelectionTimeoutError as e:
|
||||
pytest.fail(f"MongoDB server selection timeout: {e}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Unexpected error testing MongoDB: {e}")
|
||||
|
||||
def test_mongodb_config_validation(self, use_mongodb, mongodb_config):
|
||||
"""Test that MongoDB configuration is valid when enabled"""
|
||||
if not use_mongodb:
|
||||
pytest.skip("MongoDB is disabled in configuration")
|
||||
|
||||
# Check required configuration fields
|
||||
assert "uri" in mongodb_config, "MongoDB URI is required"
|
||||
assert "database" in mongodb_config, "MongoDB database name is required"
|
||||
assert "collection" in mongodb_config, "MongoDB collection name is required"
|
||||
|
||||
# Validate URI format
|
||||
uri = mongodb_config["uri"]
|
||||
assert uri.startswith("mongodb://") or uri.startswith("mongodb+srv://"), "Invalid MongoDB URI format"
|
||||
|
||||
# Validate names are not empty
|
||||
assert mongodb_config["database"].strip(), "Database name cannot be empty"
|
||||
assert mongodb_config["collection"].strip(), "Collection name cannot be empty"
|
||||
|
||||
def test_mongodb_skipped_when_disabled(self, use_mongodb):
|
||||
"""Test that MongoDB tests are skipped when disabled"""
|
||||
if use_mongodb:
|
||||
pytest.skip("MongoDB is enabled, this test is for disabled state")
|
||||
|
||||
# This test passes if we reach here, meaning MongoDB is properly disabled
|
||||
assert True
|
||||
202
tests/test_s3_connection.py
Normal file
202
tests/test_s3_connection.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
Test S3 connection functionality.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError, NoCredentialsError
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
|
||||
class TestS3Connection:
|
||||
"""Test S3 connection and basic operations"""
|
||||
|
||||
def test_s3_connection_when_enabled(self, use_s3, s3_config):
|
||||
"""Test S3 connection when S3 is enabled in config"""
|
||||
if not use_s3:
|
||||
pytest.skip("S3 is disabled in configuration")
|
||||
|
||||
if not s3_config:
|
||||
pytest.fail("S3 is enabled but no S3 configuration found")
|
||||
|
||||
# Validate required configuration
|
||||
bucket_name = s3_config.get("bucket_name")
|
||||
if not bucket_name:
|
||||
pytest.fail("S3 bucket name not found in configuration")
|
||||
|
||||
region_name = s3_config.get("region_name", "us-east-1")
|
||||
|
||||
try:
|
||||
# Create S3 client with credentials from config
|
||||
session_kwargs = {"region_name": region_name}
|
||||
|
||||
aws_access_key_id = s3_config.get("aws_access_key_id")
|
||||
aws_secret_access_key = s3_config.get("aws_secret_access_key")
|
||||
|
||||
if aws_access_key_id and aws_secret_access_key:
|
||||
session_kwargs.update({
|
||||
"aws_access_key_id": aws_access_key_id,
|
||||
"aws_secret_access_key": aws_secret_access_key
|
||||
})
|
||||
|
||||
s3_client = boto3.client("s3", **session_kwargs)
|
||||
|
||||
# Test bucket access by checking if bucket exists
|
||||
s3_client.head_bucket(Bucket=bucket_name)
|
||||
|
||||
except NoCredentialsError:
|
||||
pytest.fail("AWS credentials not found. Check your configuration or environment.")
|
||||
except ClientError as e:
|
||||
error_code = e.response.get('Error', {}).get('Code', '')
|
||||
if error_code == '404':
|
||||
pytest.fail(f"S3 bucket '{bucket_name}' not found")
|
||||
elif error_code == '403':
|
||||
pytest.fail(f"Access denied to S3 bucket '{bucket_name}'. Check your credentials and permissions.")
|
||||
else:
|
||||
pytest.fail(f"S3 client error: {e}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Unexpected error testing S3 connection: {e}")
|
||||
|
||||
def test_s3_upload_download_when_enabled(self, use_s3, s3_config):
|
||||
"""Test S3 upload and download functionality"""
|
||||
if not use_s3:
|
||||
pytest.skip("S3 is disabled in configuration")
|
||||
|
||||
if not s3_config:
|
||||
pytest.fail("S3 is enabled but no S3 configuration found")
|
||||
|
||||
bucket_name = s3_config.get("bucket_name")
|
||||
if not bucket_name:
|
||||
pytest.fail("S3 bucket name not found in configuration")
|
||||
|
||||
region_name = s3_config.get("region_name", "us-east-1")
|
||||
prefix = s3_config.get("prefix", "reviews/").rstrip("/") + "/"
|
||||
profiles_folder = s3_config.get("profiles_folder", "profiles/").strip("/")
|
||||
reviews_folder = s3_config.get("reviews_folder", "reviews/").strip("/")
|
||||
|
||||
try:
|
||||
# Create S3 client
|
||||
session_kwargs = {"region_name": region_name}
|
||||
|
||||
aws_access_key_id = s3_config.get("aws_access_key_id")
|
||||
aws_secret_access_key = s3_config.get("aws_secret_access_key")
|
||||
|
||||
if aws_access_key_id and aws_secret_access_key:
|
||||
session_kwargs.update({
|
||||
"aws_access_key_id": aws_access_key_id,
|
||||
"aws_secret_access_key": aws_secret_access_key
|
||||
})
|
||||
|
||||
s3_client = boto3.client("s3", **session_kwargs)
|
||||
|
||||
# Create a temporary test file
|
||||
test_content = b"This is a test file for S3 upload"
|
||||
# Test with reviews folder structure
|
||||
test_key = f"{prefix}{reviews_folder}/test_file.txt"
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
tmp_file.write(test_content)
|
||||
tmp_file_path = tmp_file.name
|
||||
|
||||
try:
|
||||
# Test upload
|
||||
s3_client.upload_file(
|
||||
tmp_file_path,
|
||||
bucket_name,
|
||||
test_key,
|
||||
ExtraArgs={'ACL': 'public-read'}
|
||||
)
|
||||
|
||||
# Test that file exists in S3
|
||||
s3_client.head_object(Bucket=bucket_name, Key=test_key)
|
||||
|
||||
# Test download
|
||||
with tempfile.NamedTemporaryFile(delete=False) as download_file:
|
||||
download_path = download_file.name
|
||||
|
||||
s3_client.download_file(bucket_name, test_key, download_path)
|
||||
|
||||
# Verify downloaded content matches uploaded content
|
||||
with open(download_path, 'rb') as f:
|
||||
downloaded_content = f.read()
|
||||
|
||||
assert downloaded_content == test_content, "Downloaded content doesn't match uploaded content"
|
||||
|
||||
# Clean up S3 object
|
||||
s3_client.delete_object(Bucket=bucket_name, Key=test_key)
|
||||
|
||||
finally:
|
||||
# Clean up temporary files
|
||||
if os.path.exists(tmp_file_path):
|
||||
os.unlink(tmp_file_path)
|
||||
if os.path.exists(download_path):
|
||||
os.unlink(download_path)
|
||||
|
||||
except ClientError as e:
|
||||
error_code = e.response.get('Error', {}).get('Code', '')
|
||||
if error_code == '403':
|
||||
pytest.fail(f"Access denied during S3 operations. Check your permissions.")
|
||||
else:
|
||||
pytest.fail(f"S3 operation failed: {e}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Unexpected error during S3 test: {e}")
|
||||
|
||||
def test_s3_config_validation(self, use_s3, s3_config):
|
||||
"""Test that S3 configuration is valid when enabled"""
|
||||
if not use_s3:
|
||||
pytest.skip("S3 is disabled in configuration")
|
||||
|
||||
# Check required configuration fields
|
||||
assert "bucket_name" in s3_config, "S3 bucket_name is required"
|
||||
assert s3_config["bucket_name"].strip(), "S3 bucket_name cannot be empty"
|
||||
|
||||
# Check optional fields have reasonable defaults
|
||||
region_name = s3_config.get("region_name", "us-east-1")
|
||||
assert region_name.strip(), "S3 region_name cannot be empty"
|
||||
|
||||
# Validate prefix format if provided
|
||||
prefix = s3_config.get("prefix", "")
|
||||
if prefix and not prefix.endswith("/"):
|
||||
# This is not an error, but log a warning that prefix should end with "/"
|
||||
pass
|
||||
|
||||
def test_s3_skipped_when_disabled(self, use_s3):
|
||||
"""Test that S3 tests are skipped when disabled"""
|
||||
if use_s3:
|
||||
pytest.skip("S3 is enabled, this test is for disabled state")
|
||||
|
||||
# This test passes if we reach here, meaning S3 is properly disabled
|
||||
assert True
|
||||
|
||||
def test_s3_handler_initialization(self, config):
|
||||
"""Test S3Handler class initialization with current config"""
|
||||
try:
|
||||
# Import the S3Handler class
|
||||
import sys
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
from modules.s3_handler import S3Handler
|
||||
|
||||
# Test initialization
|
||||
s3_handler = S3Handler(config)
|
||||
|
||||
# Check that handler respects the use_s3 setting
|
||||
expected_enabled = config.get("use_s3", False)
|
||||
assert s3_handler.enabled == expected_enabled, f"S3Handler enabled state should match config use_s3 setting"
|
||||
|
||||
if expected_enabled:
|
||||
# If S3 is enabled, check that configuration was loaded
|
||||
s3_config = config.get("s3", {})
|
||||
bucket_name = s3_config.get("bucket_name", "")
|
||||
|
||||
if bucket_name:
|
||||
assert s3_handler.bucket_name == bucket_name, "S3Handler should load bucket name from config"
|
||||
else:
|
||||
# If no bucket name, handler should be disabled
|
||||
assert not s3_handler.enabled, "S3Handler should be disabled when bucket_name is missing"
|
||||
|
||||
except ImportError:
|
||||
pytest.fail("Could not import S3Handler class")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error testing S3Handler initialization: {e}")
|
||||
Reference in New Issue
Block a user