import scrapy class FeedSpider(scrapy.Spider): name = "feed" allowed_domains = ["api.example.net"] curl_command = "curl 'https://api.example.net/feed?cursor=0' -H 'Accept: application/json' -H 'Referer: https://www.example.net/feed/'" custom_settings = { "AUTOTHROTTLE_ENABLED": True, "AUTOTHROTTLE_START_DELAY": 0.25, "AUTOTHROTTLE_MAX_DELAY": 10.0, "DOWNLOAD_DELAY": 0.25, } async def start(self): yield scrapy.Request.from_curl(self.curl_command, callback=self.parse_feed) def parse_feed(self, response): payload = response.json() for entry in payload.get("items", []): yield { "id": entry.get("id"), "title": entry.get("title"), } next_cursor = payload.get("next_cursor") if next_cursor is None: return yield response.request.replace( url=f"https://api.example.net/feed?cursor={next_cursor}", callback=self.parse_feed, )