fixed the english scraper

This commit is contained in:
George Khananaev
2025-06-02 23:22:19 +07:00
parent cbc4bfe72d
commit c4fa7ecd93

View File

@@ -585,6 +585,7 @@ class GoogleReviewsScraper:
# Get button text and attributes for verification # Get button text and attributes for verification
button_text = element.text.strip() if element.text else "" button_text = element.text.strip() if element.text else ""
button_aria = element.get_attribute("aria-label") or "" button_aria = element.get_attribute("aria-label") or ""
button_class = element.get_attribute("class") or ""
# Skip buttons that are clearly not sort buttons # Skip buttons that are clearly not sort buttons
negative_keywords = ["back", "next", "previous", "close", "cancel", "חזרה", "סגור", "ปิด"] negative_keywords = ["back", "next", "previous", "close", "cancel", "חזרה", "סגור", "ปิด"]
@@ -592,11 +593,24 @@ class GoogleReviewsScraper:
for keyword in negative_keywords): for keyword in negative_keywords):
continue continue
# Found a potential sort button # Positive detection for sort buttons
sort_button = element sort_keywords = ["sort", "Sort", "SORT", "סידור", "เรียง", "排序", "trier", "ordenar", "sortieren"]
log.info(f"Found sort button with selector: {selector}") has_sort_keyword = any(keyword in button_text or keyword in button_aria
log.info(f"Button text: '{button_text}', aria-label: '{button_aria}'") for keyword in sort_keywords)
break
# Check for common sort button classes
has_sort_class = "HQzyZ" in button_class or "sort" in button_class.lower()
# Check for aria attributes that indicate a dropdown
has_dropdown_attrs = (element.get_attribute("aria-haspopup") == "true" or
element.get_attribute("aria-expanded") is not None)
if has_sort_keyword or has_sort_class or has_dropdown_attrs:
# Found a potential sort button
sort_button = element
log.info(f"Found sort button with selector: {selector}")
log.info(f"Button text: '{button_text}', aria-label: '{button_aria}'")
break
except Exception as e: except Exception as e:
log.debug(f"Error checking element: {e}") log.debug(f"Error checking element: {e}")
continue continue
@@ -648,6 +662,32 @@ class GoogleReviewsScraper:
except: except:
continue continue
# Final fallback: look for any button in the reviews area that might open a dropdown
if not sort_button:
try:
# Look specifically in the reviews container area
reviews_container = driver.find_elements(By.CSS_SELECTOR, 'div.m6QErb, div.DxyBCb')
for container in reviews_container:
try:
# Find all buttons in this container
buttons = container.find_elements(By.TAG_NAME, 'button')
for button in buttons:
try:
if (button.is_displayed() and button.is_enabled() and
(button.get_attribute("aria-haspopup") == "true" or
"dropdown" in (button.get_attribute("class") or "").lower())):
sort_button = button
log.info("Found potential sort button via fallback dropdown detection")
break
except:
continue
if sort_button:
break
except:
continue
except Exception as e:
log.debug(f"Error in fallback sort button detection: {e}")
# Final check - do we have a sort button? # Final check - do we have a sort button?
if not sort_button: if not sort_button:
log.warning("No sort button found with any method - keeping default sort order") log.warning("No sort button found with any method - keeping default sort order")