import json from json import JSONDecodeError import scrapy class JsonldProductSpider(scrapy.Spider): name = "jsonld_product" start_urls = ["https://shop.example.com/products/starter-plan"] def parse(self, response): for raw in response.css('script[type="application/ld+json"]::text').getall(): for obj in self.iter_jsonld_objects(raw): if not self.is_target_type(obj, "Product"): continue offers = obj.get("offers") or {} yield { "name": obj.get("name"), "sku": obj.get("sku"), "price": offers.get("price"), "currency": offers.get("priceCurrency"), "url": response.url, "jsonld_type": obj.get("@type"), } def iter_jsonld_objects(self, raw): try: data = json.loads(raw.strip()) except JSONDecodeError: return if isinstance(data, dict) and isinstance(data.get("@graph"), list): for node in data["@graph"]: if isinstance(node, dict): yield node return if isinstance(data, list): for node in data: if isinstance(node, dict): yield node return if isinstance(data, dict): yield data def is_target_type(self, obj, target): value = obj.get("@type") if isinstance(value, str): return value == target if isinstance(value, list): return target in value return False