Source code for intergov.processors.obj_spider

import random
import time

from intergov.conf import env, env_s3_config, env_queue_config
from intergov.domain.jurisdiction import Jurisdiction
from intergov.repos.object_lake import ObjectLakeRepo
from intergov.repos.object_retrieval import ObjectRetrievalRepo
from intergov.repos.object_acl import ObjectACLRepo
from intergov.use_cases import RetrieveAndStoreForeignDocumentsUseCase

from intergov.loggers import logging

logger = logging.getLogger('obj_spider')


[docs]class ObjectSpider(object): """ Iterate over the RetrieveAndStoreForeignDocumentUseCase. """ def __init__(self): self._prepare_repos_confs() self._prepare_repos() self._prepare_use_case() def _prepare_repos_confs(self): self.repo_conf = { 'object_lake': env_s3_config('PROC_OBJ_SPIDER_OBJ_LAKE'), 'object_retrieval': env_queue_config('PROC_OBJ_SPIDER_OBJ_RETRIEVAL'), 'object_acl': env_s3_config('PROC_OBJ_SPIDER_OBJ_ACL'), } def _prepare_repos(self): self.repos = { 'object_lake_repo': ObjectLakeRepo(self.repo_conf['object_lake']), 'object_retrieval_repo': ObjectRetrievalRepo(self.repo_conf['object_retrieval']), 'object_acl_repo': ObjectACLRepo(self.repo_conf['object_acl']), } def _prepare_use_case(self): self.use_case = RetrieveAndStoreForeignDocumentsUseCase( jurisdiction=Jurisdiction(env("IGL_JURISDICTION", default='AU')), **self.repos ) def __iter__(self): logger.info("Starting the Object Spider") return self def __next__(self): try: result = self.use_case.execute() except Exception as e: logger.exception(e) result = None return result
if __name__ == '__main__': # pragma: no cover # To start it manually, from the base dir: # PYTHONPATH="`pwd`" python intergov/processors/obj_spider/__init__.py for result in ObjectSpider(): if result is None: time.sleep(random.randint(1, 5))