import scrapy def join_text(values): return " ".join(value.strip() for value in values if value.strip()) class JobsSpider(scrapy.Spider): name = "jobs" allowed_domains = ["careers.example.com"] start_urls = ["https://careers.example.com/jobs/"] def parse(self, response): for card in response.css("article.job"): href = card.css("a.detail::attr(href)").get() card_location = card.css("p.location::text").get(default="").strip() if href: yield response.follow( href, callback=self.parse_job, cb_kwargs={"card_location": card_location}, ) next_href = response.css("a.next::attr(href)").get() if next_href: yield response.follow(next_href, callback=self.parse) def parse_job(self, response, card_location): yield { "title": response.css("h1::text").get(default="").strip(), "team": response.css("p.team::text").get(default="").strip(), "location": response.css("p.location::text").get(default=card_location).strip(), "employment_type": response.css("p.employment-type::text") .get(default="") .strip(), "description": join_text( response.css("div.job-description p::text").getall() ), "url": response.url, }