Source code for city_scrapers_core.extensions.azure_storage

from scrapy.extensions.feedexport import BlockingFeedStorage


[docs]class AzureBlobFeedStorage(BlockingFeedStorage): """ Subclass of :class:`scrapy.extensions.feedexport.BlockingFeedStorage` for writing scraper results to Azure Blob Storage. :param uri: Azure Blob Storage URL including an account name, credentials, container, and filename """ def __init__(self, uri: str): from azure.storage.blob import ContainerClient container = uri.split("@")[1].split("/")[0] filename = "/".join(uri.split("@")[1].split("/")[1::]) account_name, account_key = uri[8::].split("@")[0].split(":") self.account_name = account_name self.account_key = account_key self.container = container self.filename = filename self.container_client = ContainerClient( f"{self.account_name}.blob.core.windows.net", self.container, credential=self.account_key, ) def _store_in_thread(self, file): file.seek(0) self.container_client.upload_blob(self.filename, file, overwrite=True)