import csv import io import scrapy class ScrapeCsvSpider(scrapy.Spider): """Scrape a remote CSV file and emit each row as an item.""" name = "scrape-csv" start_urls = ["http://files.example.net:8000/data/products.csv"] def parse(self, response): """Parse the CSV response and yield a dictionary for each row.""" text = response.text.lstrip("\ufeff") try: dialect = csv.Sniffer().sniff(text[:2048]) except csv.Error: dialect = csv.excel reader = csv.DictReader(io.StringIO(text), dialect=dialect) for row in reader: if not any(row.values()): continue clean_row = {} for key, value in row.items(): if key is None: continue clean_row[key.strip()] = value.strip() if isinstance(value, str) else value yield clean_row