Added pytest + some tests.

Added AWS S3 Support (optional, for cloud image storage)
This commit is contained in:
George Khananaev
2025-06-03 00:12:11 +07:00
parent 84399dfbe8
commit 50aaa9ce26
9 changed files with 755 additions and 5 deletions

54
tests/README.md Normal file
View File

@@ -0,0 +1,54 @@
# Tests
This directory contains pytest tests for the Google Reviews Scraper.
## Running Tests
1. Install dependencies:
```bash
pip install -r requirements.txt
```
2. Run all tests:
```bash
pytest tests/
```
3. Run specific test files:
```bash
pytest tests/test_mongodb_connection.py
pytest tests/test_s3_connection.py
```
4. Run with verbose output:
```bash
pytest tests/ -v
```
## Test Coverage
### MongoDB Connection Tests (`test_mongodb_connection.py`)
- Tests MongoDB connection when enabled in config
- Validates MongoDB configuration parameters
- Tests basic database operations (insert/find/delete)
- Skips tests when MongoDB is disabled
### S3 Connection Tests (`test_s3_connection.py`)
- Tests S3 connection when enabled in config
- Validates S3 configuration parameters
- Tests file upload/download operations
- Tests S3Handler class initialization
- Skips tests when S3 is disabled
## Configuration
Tests use the main `config.yaml` file in the project root. Make sure your configuration is properly set up:
- For MongoDB tests: Ensure `use_mongodb: true` and valid MongoDB credentials
- For S3 tests: Ensure `use_s3: true` and valid AWS credentials
## Test Results
- Tests will be skipped if the corresponding service (MongoDB/S3) is disabled in config
- Failed connection tests indicate configuration or service availability issues
- All tests should pass when services are properly configured and accessible

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Tests package

39
tests/conftest.py Normal file
View File

@@ -0,0 +1,39 @@
"""
Test configuration and fixtures for Google Reviews Scraper tests.
"""
import pytest
import yaml
from pathlib import Path
@pytest.fixture
def config():
"""Load configuration from config.yaml"""
config_path = Path(__file__).parent.parent / "config.yaml"
with open(config_path, 'r') as f:
return yaml.safe_load(f)
@pytest.fixture
def mongodb_config(config):
"""Extract MongoDB configuration"""
return config.get("mongodb", {})
@pytest.fixture
def s3_config(config):
"""Extract S3 configuration"""
return config.get("s3", {})
@pytest.fixture
def use_mongodb(config):
"""Check if MongoDB is enabled"""
return config.get("use_mongodb", False)
@pytest.fixture
def use_s3(config):
"""Check if S3 is enabled"""
return config.get("use_s3", False)

View File

@@ -0,0 +1,90 @@
"""
Test MongoDB connection functionality.
"""
import pytest
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
class TestMongoDBConnection:
"""Test MongoDB connection and basic operations"""
def test_mongodb_connection_when_enabled(self, use_mongodb, mongodb_config):
"""Test MongoDB connection when MongoDB is enabled in config"""
if not use_mongodb:
pytest.skip("MongoDB is disabled in configuration")
if not mongodb_config:
pytest.fail("MongoDB is enabled but no MongoDB configuration found")
uri = mongodb_config.get("uri")
if not uri:
pytest.fail("MongoDB URI not found in configuration")
try:
# Create MongoDB client with shorter timeout for testing
client = MongoClient(uri, serverSelectionTimeoutMS=5000)
# Test connection by pinging the server
client.admin.command('ping')
# Test database access
database_name = mongodb_config.get("database", "reviews")
db = client[database_name]
# Test collection access
collection_name = mongodb_config.get("collection", "google_reviews")
collection = db[collection_name]
# Verify we can perform basic operations
# Test insert and delete a dummy document
test_doc = {"_id": "test_connection", "test": True}
collection.insert_one(test_doc)
# Verify document was inserted
found_doc = collection.find_one({"_id": "test_connection"})
assert found_doc is not None
assert found_doc["test"] is True
# Clean up test document
collection.delete_one({"_id": "test_connection"})
# Verify document was deleted
found_doc = collection.find_one({"_id": "test_connection"})
assert found_doc is None
client.close()
except ConnectionFailure as e:
pytest.fail(f"Failed to connect to MongoDB: {e}")
except ServerSelectionTimeoutError as e:
pytest.fail(f"MongoDB server selection timeout: {e}")
except Exception as e:
pytest.fail(f"Unexpected error testing MongoDB: {e}")
def test_mongodb_config_validation(self, use_mongodb, mongodb_config):
"""Test that MongoDB configuration is valid when enabled"""
if not use_mongodb:
pytest.skip("MongoDB is disabled in configuration")
# Check required configuration fields
assert "uri" in mongodb_config, "MongoDB URI is required"
assert "database" in mongodb_config, "MongoDB database name is required"
assert "collection" in mongodb_config, "MongoDB collection name is required"
# Validate URI format
uri = mongodb_config["uri"]
assert uri.startswith("mongodb://") or uri.startswith("mongodb+srv://"), "Invalid MongoDB URI format"
# Validate names are not empty
assert mongodb_config["database"].strip(), "Database name cannot be empty"
assert mongodb_config["collection"].strip(), "Collection name cannot be empty"
def test_mongodb_skipped_when_disabled(self, use_mongodb):
"""Test that MongoDB tests are skipped when disabled"""
if use_mongodb:
pytest.skip("MongoDB is enabled, this test is for disabled state")
# This test passes if we reach here, meaning MongoDB is properly disabled
assert True

202
tests/test_s3_connection.py Normal file
View File

@@ -0,0 +1,202 @@
"""
Test S3 connection functionality.
"""
import pytest
import boto3
from botocore.exceptions import ClientError, NoCredentialsError
from pathlib import Path
import tempfile
import os
class TestS3Connection:
"""Test S3 connection and basic operations"""
def test_s3_connection_when_enabled(self, use_s3, s3_config):
"""Test S3 connection when S3 is enabled in config"""
if not use_s3:
pytest.skip("S3 is disabled in configuration")
if not s3_config:
pytest.fail("S3 is enabled but no S3 configuration found")
# Validate required configuration
bucket_name = s3_config.get("bucket_name")
if not bucket_name:
pytest.fail("S3 bucket name not found in configuration")
region_name = s3_config.get("region_name", "us-east-1")
try:
# Create S3 client with credentials from config
session_kwargs = {"region_name": region_name}
aws_access_key_id = s3_config.get("aws_access_key_id")
aws_secret_access_key = s3_config.get("aws_secret_access_key")
if aws_access_key_id and aws_secret_access_key:
session_kwargs.update({
"aws_access_key_id": aws_access_key_id,
"aws_secret_access_key": aws_secret_access_key
})
s3_client = boto3.client("s3", **session_kwargs)
# Test bucket access by checking if bucket exists
s3_client.head_bucket(Bucket=bucket_name)
except NoCredentialsError:
pytest.fail("AWS credentials not found. Check your configuration or environment.")
except ClientError as e:
error_code = e.response.get('Error', {}).get('Code', '')
if error_code == '404':
pytest.fail(f"S3 bucket '{bucket_name}' not found")
elif error_code == '403':
pytest.fail(f"Access denied to S3 bucket '{bucket_name}'. Check your credentials and permissions.")
else:
pytest.fail(f"S3 client error: {e}")
except Exception as e:
pytest.fail(f"Unexpected error testing S3 connection: {e}")
def test_s3_upload_download_when_enabled(self, use_s3, s3_config):
"""Test S3 upload and download functionality"""
if not use_s3:
pytest.skip("S3 is disabled in configuration")
if not s3_config:
pytest.fail("S3 is enabled but no S3 configuration found")
bucket_name = s3_config.get("bucket_name")
if not bucket_name:
pytest.fail("S3 bucket name not found in configuration")
region_name = s3_config.get("region_name", "us-east-1")
prefix = s3_config.get("prefix", "reviews/").rstrip("/") + "/"
profiles_folder = s3_config.get("profiles_folder", "profiles/").strip("/")
reviews_folder = s3_config.get("reviews_folder", "reviews/").strip("/")
try:
# Create S3 client
session_kwargs = {"region_name": region_name}
aws_access_key_id = s3_config.get("aws_access_key_id")
aws_secret_access_key = s3_config.get("aws_secret_access_key")
if aws_access_key_id and aws_secret_access_key:
session_kwargs.update({
"aws_access_key_id": aws_access_key_id,
"aws_secret_access_key": aws_secret_access_key
})
s3_client = boto3.client("s3", **session_kwargs)
# Create a temporary test file
test_content = b"This is a test file for S3 upload"
# Test with reviews folder structure
test_key = f"{prefix}{reviews_folder}/test_file.txt"
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(test_content)
tmp_file_path = tmp_file.name
try:
# Test upload
s3_client.upload_file(
tmp_file_path,
bucket_name,
test_key,
ExtraArgs={'ACL': 'public-read'}
)
# Test that file exists in S3
s3_client.head_object(Bucket=bucket_name, Key=test_key)
# Test download
with tempfile.NamedTemporaryFile(delete=False) as download_file:
download_path = download_file.name
s3_client.download_file(bucket_name, test_key, download_path)
# Verify downloaded content matches uploaded content
with open(download_path, 'rb') as f:
downloaded_content = f.read()
assert downloaded_content == test_content, "Downloaded content doesn't match uploaded content"
# Clean up S3 object
s3_client.delete_object(Bucket=bucket_name, Key=test_key)
finally:
# Clean up temporary files
if os.path.exists(tmp_file_path):
os.unlink(tmp_file_path)
if os.path.exists(download_path):
os.unlink(download_path)
except ClientError as e:
error_code = e.response.get('Error', {}).get('Code', '')
if error_code == '403':
pytest.fail(f"Access denied during S3 operations. Check your permissions.")
else:
pytest.fail(f"S3 operation failed: {e}")
except Exception as e:
pytest.fail(f"Unexpected error during S3 test: {e}")
def test_s3_config_validation(self, use_s3, s3_config):
"""Test that S3 configuration is valid when enabled"""
if not use_s3:
pytest.skip("S3 is disabled in configuration")
# Check required configuration fields
assert "bucket_name" in s3_config, "S3 bucket_name is required"
assert s3_config["bucket_name"].strip(), "S3 bucket_name cannot be empty"
# Check optional fields have reasonable defaults
region_name = s3_config.get("region_name", "us-east-1")
assert region_name.strip(), "S3 region_name cannot be empty"
# Validate prefix format if provided
prefix = s3_config.get("prefix", "")
if prefix and not prefix.endswith("/"):
# This is not an error, but log a warning that prefix should end with "/"
pass
def test_s3_skipped_when_disabled(self, use_s3):
"""Test that S3 tests are skipped when disabled"""
if use_s3:
pytest.skip("S3 is enabled, this test is for disabled state")
# This test passes if we reach here, meaning S3 is properly disabled
assert True
def test_s3_handler_initialization(self, config):
"""Test S3Handler class initialization with current config"""
try:
# Import the S3Handler class
import sys
sys.path.append(str(Path(__file__).parent.parent))
from modules.s3_handler import S3Handler
# Test initialization
s3_handler = S3Handler(config)
# Check that handler respects the use_s3 setting
expected_enabled = config.get("use_s3", False)
assert s3_handler.enabled == expected_enabled, f"S3Handler enabled state should match config use_s3 setting"
if expected_enabled:
# If S3 is enabled, check that configuration was loaded
s3_config = config.get("s3", {})
bucket_name = s3_config.get("bucket_name", "")
if bucket_name:
assert s3_handler.bucket_name == bucket_name, "S3Handler should load bucket name from config"
else:
# If no bucket name, handler should be disabled
assert not s3_handler.enabled, "S3Handler should be disabled when bucket_name is missing"
except ImportError:
pytest.fail("Could not import S3Handler class")
except Exception as e:
pytest.fail(f"Error testing S3Handler initialization: {e}")