From 877f9e046ce1b15ec549230e458c9a018d85d78d Mon Sep 17 00:00:00 2001 From: Eddie Date: Mon, 6 Oct 2025 16:20:43 +0100 Subject: [PATCH] Code to Review Customer Ticket --- scrapeops_scrapy/core/api.py | 18 +++++++++++++++++- scrapeops_scrapy/core/model.py | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/scrapeops_scrapy/core/api.py b/scrapeops_scrapy/core/api.py index 30ff58b..2e45b75 100644 --- a/scrapeops_scrapy/core/api.py +++ b/scrapeops_scrapy/core/api.py @@ -198,10 +198,13 @@ def get(url, proxy=None, check=True): @staticmethod def post(url, body=None, files=None, proxy=None): + import logging + logger = logging.getLogger(__name__) + proxies = None if ProxyNormalizer.unknown_proxy_scheme(proxy) is not True: proxies = {ProxyNormalizer.get_proxy_scheme(proxy): proxy} - for _ in range(SOPSRequest.RETRY_LIMIT): + for attempt in range(SOPSRequest.RETRY_LIMIT): try: response = requests.post(url, json=body, timeout=SOPSRequest.TIMEOUT, files=files, proxies=proxies, headers={'api_key': SOPSRequest.API_KEY}) if response.status_code == 401: @@ -212,13 +215,26 @@ def post(url, body=None, files=None, proxy=None): else: time.sleep(3) raise ScrapeOpsAPIResponseError + except (TypeError, ValueError) as json_error: + # These are JSON serialization errors - don't retry, log and return immediately + logger.error( + "ScrapeOps: Unable to send monitoring data due to non-serializable settings. " + "Some of your Scrapy settings contain functions or other objects that cannot be " + "converted to JSON format. To resolve this, add the problematic setting names " + "to SCRAPEOPS_SETTINGS_EXCLUSION_LIST in your settings.py file. " + f"Error details: {json_error}" + ) + error = json_error + return None, str(error) # Don't retry on serialization errors except requests.exceptions.ConnectionError as e: + logger.warning(f"ScrapeOps: Connection error (attempt {attempt+1}): {e}") error = e continue except ScrapeOpsAPIResponseError as e: error = e continue except Exception as e: + logger.error(f"ScrapeOps: Unexpected error (attempt {attempt+1}): {e}") error = e continue return None, str(error) diff --git a/scrapeops_scrapy/core/model.py b/scrapeops_scrapy/core/model.py index 2089589..98c0a6e 100644 --- a/scrapeops_scrapy/core/model.py +++ b/scrapeops_scrapy/core/model.py @@ -216,14 +216,43 @@ def check_spider_attributes(self, spider): def get_settings(self, spider): + import logging + logger = logging.getLogger(__name__) + default_scrapy_settings = default_settings.__dict__ full_settings = spider.settings.copy_to_dict() self.spider_settings = {} + non_serializable_settings = [] + for key, value in full_settings.items(): if key not in default_scrapy_settings and self.include_setting(key): - self.spider_settings[key] = value + if self._is_serializable(value): + self.spider_settings[key] = value + else: + non_serializable_settings.append(key) elif default_scrapy_settings.get(key) != value and self.include_setting(key): - self.spider_settings[key] = value + if self._is_serializable(value): + self.spider_settings[key] = value + else: + non_serializable_settings.append(key) + + # Log warning about non-serializable settings + if non_serializable_settings: + logger.warning( + f"ScrapeOps: Excluded {len(non_serializable_settings)} non-serializable settings from monitoring: " + f"{', '.join(non_serializable_settings)}. " + f"These settings contain functions or other objects that cannot be sent to ScrapeOps. " + f"To suppress this warning, add these setting names to SCRAPEOPS_SETTINGS_EXCLUSION_LIST in your settings.py file." + ) + + def _is_serializable(self, value): + """Check if a value can be JSON serialized.""" + import json + try: + json.dumps(value) + return True + except (TypeError, ValueError): + return False def include_setting(self, key): exclusion_terms = ['API_KEY', 'APIKEY', 'SECRET_KEY', 'SECRETKEY', 'PASSWORD', 'CONNECTION_STRING']