Source code for city_scrapers_core.spiders.tribe

import json
from datetime import datetime
from typing import Dict, Iterable, List

import scrapy

from city_scrapers_core.constants import NOT_CLASSIFIED
from city_scrapers_core.items import Meeting
from city_scrapers_core.spiders import CityScrapersSpider


[docs]class EventsCalendarSpider(CityScrapersSpider):
    """Subclass of CityScrapersSpider that may be useful
    for WordPress sites with the Events Calendar plugin.
    Three additional things need to be implemented when subclassing:
        1. a categories dict
        2. _parse_location()
        3. _parse_links()"""

    @property
    def categories(self) -> Dict:
        """categories dict should be of the following format:
        categories = {
            BOARD: ["category-1", "category-2"],
            ..
        }"""
        raise NotImplementedError("Must assign categories field")

    def _parse_location(self, item: Dict) -> Dict:
        raise NotImplementedError("Must implement _parse_location")

    def _parse_links(self, item: Dict) -> List[Dict]:
        raise NotImplementedError("Must implement _parse_links")

    def parse(self, response: scrapy.http.Response) -> Iterable[scrapy.Request]:
        res = json.loads(response.text)
        for item in res["events"]:
            classification = self._parse_classification(item)
            if classification == NOT_CLASSIFIED:
                continue
            meeting = Meeting(
                title=item["title"],
                description=item["description"],
                classification=self._parse_classification(item),
                start=self._parse_start(item["start_date_details"]),
                end=self._parse_end(item["end_date_details"]),
                all_day=item["all_day"],
                time_notes="",
                location=self._parse_location(item),
                links=self._parse_links(item),
                source=self._parse_source(item),
            )

            meeting["status"] = self._get_status(meeting)
            meeting["id"] = self._get_id(meeting)

            yield meeting

        if "next_rest_url" in res:
            yield response.follow(res["next_rest_url"], callback=self.parse)

    def _parse_classification(self, item: Dict) -> str:
        """Parse classification from categories dict,
        which needs to be specified in the subclass."""
        if item["categories"]:
            for category in item["categories"]:
                for CLASSIFICATION in self.categories:
                    if category["slug"] in self.categories[CLASSIFICATION]:
                        return CLASSIFICATION
        return NOT_CLASSIFIED

    def _parse_start(self, item: Dict) -> str:
        return datetime(
            int(item["year"]),
            int(item["month"]),
            int(item["day"]),
            int(item["hour"]),
            int(item["minutes"]),
            int(item["seconds"]),
        )

    def _parse_end(self, item: Dict) -> datetime:
        return datetime(
            int(item["year"]),
            int(item["month"]),
            int(item["day"]),
            int(item["hour"]),
            int(item["minutes"]),
            int(item["seconds"]),
        )

    def _parse_source(self, item: Dict) -> str:
        """Pulls specific meeting URL if available,
        otherwise defaults to the general page."""
        source = item["url"] if item["url"] else self.start_urls[0]
        return source