Source code for city_scrapers_core.spiders.tribe

import json
from datetime import datetime
from typing import Dict, Iterable, List

import scrapy

from city_scrapers_core.constants import NOT_CLASSIFIED
from city_scrapers_core.items import Meeting
from city_scrapers_core.spiders import CityScrapersSpider


[docs]class EventsCalendarSpider(CityScrapersSpider): """Subclass of CityScrapersSpider that may be useful for WordPress sites with the Events Calendar plugin. Three additional things need to be implemented when subclassing: 1. a categories dict 2. _parse_location() 3. _parse_links()""" @property def categories(self) -> Dict: """categories dict should be of the following format: categories = { BOARD: ["category-1", "category-2"], .. }""" raise NotImplementedError("Must assign categories field") def _parse_location(self, item: Dict) -> Dict: raise NotImplementedError("Must implement _parse_location") def _parse_links(self, item: Dict) -> List[Dict]: raise NotImplementedError("Must implement _parse_links") def parse(self, response: scrapy.http.Response) -> Iterable[scrapy.Request]: res = json.loads(response.text) for item in res["events"]: classification = self._parse_classification(item) if classification == NOT_CLASSIFIED: continue meeting = Meeting( title=item["title"], description=item["description"], classification=self._parse_classification(item), start=self._parse_start(item["start_date_details"]), end=self._parse_end(item["end_date_details"]), all_day=item["all_day"], time_notes="", location=self._parse_location(item), links=self._parse_links(item), source=self._parse_source(item), ) meeting["status"] = self._get_status(meeting) meeting["id"] = self._get_id(meeting) yield meeting if "next_rest_url" in res: yield response.follow(res["next_rest_url"], callback=self.parse) def _parse_classification(self, item: Dict) -> str: """Parse classification from categories dict, which needs to be specified in the subclass.""" if item["categories"]: for category in item["categories"]: for CLASSIFICATION in self.categories: if category["slug"] in self.categories[CLASSIFICATION]: return CLASSIFICATION return NOT_CLASSIFIED def _parse_start(self, item: Dict) -> str: return datetime( int(item["year"]), int(item["month"]), int(item["day"]), int(item["hour"]), int(item["minutes"]), int(item["seconds"]), ) def _parse_end(self, item: Dict) -> datetime: return datetime( int(item["year"]), int(item["month"]), int(item["day"]), int(item["hour"]), int(item["minutes"]), int(item["seconds"]), ) def _parse_source(self, item: Dict) -> str: """Pulls specific meeting URL if available, otherwise defaults to the general page.""" source = item["url"] if item["url"] else self.start_urls[0] return source