from datetime import date import os from urllib.parse import urlencode import scrapy class ChartSpider(scrapy.Spider): name = "chart" allowed_domains = ["data.example.net"] api_url = "https://data.example.net/v1/charts" def __init__( self, symbol="MSFT", interval="1d", start="2026-04-01", end="2026-04-03", *args, **kwargs, ): super().__init__(*args, **kwargs) self.symbol = symbol.strip().upper() self.interval = interval.strip() self.start_date = self._parse_day(start, "start") self.end_date = self._parse_day(end, "end") def _parse_day(self, value, label): try: return date.fromisoformat(str(value)).isoformat() except ValueError as exc: raise ValueError(f"{label} must use YYYY-MM-DD") from exc def _headers(self): headers = {"Accept": "application/json"} api_token = os.getenv("CHART_API_TOKEN") if api_token: headers["Authorization"] = f"Bearer {api_token}" return headers async def start(self): params = urlencode( { "symbol": self.symbol, "interval": self.interval, "start": self.start_date, "end": self.end_date, } ) yield scrapy.Request( url=f"{self.api_url}?{params}", headers=self._headers(), callback=self.parse, ) def parse(self, response): payload = response.json() bars = payload.get("bars") if not isinstance(bars, list): self.logger.error("Missing bars list") return symbol = payload.get("symbol") or self.symbol for bar in bars: if not isinstance(bar, dict): continue yield { "symbol": symbol, "date": bar.get("date"), "open": bar.get("open"), "high": bar.get("high"), "low": bar.get("low"), "close": bar.get("close"), "volume": bar.get("volume"), }