Source code for qetch.content

# Copyright (c) 2018 Stephen Bunn (stephen@bunn.io)
# MIT License <https://opensource.org/licenses/MIT>

import datetime
from typing import (Any, List, Dict,)

from .extractors._common import (BaseExtractor,)

import furl


[docs]class Content(object):
    """ The resulting content instance yielded by extractors.
    """

    def __init__(
        self, uid: str, source: str, fragments: List[str],
        extractor: BaseExtractor, extension: str=None,
        title: str=None, description: str=None, quality: float=0.0,
        uploaded_by: str=None, uploaded_date: datetime.datetime=None,
        metadata: Dict[str, Any]={}
    ):
        """ Initializes the Content instance.

        uid (str): The unique id for the discovered content.
        source (str): The source url given to the extractor.
        fragments (list[str]): A list of urls which represent the raw content.
        extractor (BaseExtractor): The extractor which discovered the content.
        extension (str): The extension for the resulting file.
        title (str, optional): A title for the content.
        description (str, optional): A description for the content.
        quality (float, optional): A level of quality for the content in
            relation to other content with the same source.
        uploaded_by (str, optional): A string of the uploader's name.
        uploaded_date (datetime.datetime, optional): A datetime instance for
            when the content was uploaded.
        metadata (dict[str,....], optional): Any additional metadata about
            the discovered content.
        """

        (
            self.uid, self.source, self.fragments, self.extractor,
            self.extension, self.title, self.description, self.quality,
            self.uploaded_by, self.uploaded_date, self.metadata,
        ) = (
            uid, source, fragments, extractor, extension,
            title, description, quality,
            uploaded_by, uploaded_date, metadata
        )

    def __repr__(self):
        """ Returns a string representation of the content.

        Returns:
            str: The string representation of the instance.
        """

        return (
            f'<{self.__class__.__name__} ({self.quality}) '
            f'"{self.uid}.{self.extension}">'
        )

    @property
    def uid(self) -> str:
        """ The unique id of the discovered content.

        Returns:
            str: The unique id of the discovered content.
        """

        if not hasattr(self, '_uid'):
            self._uid = None
        return self._uid

    @uid.setter
    def uid(self, uid: str):
        if isinstance(uid, str) and len(uid) > 0:
            self._uid = uid

    @property
    def source(self) -> furl.furl:
        """ The given source url from where the content came from.

        Returns:
            furl.furl: The given source url from where the content came from.
        """

        if not hasattr(self, '_source'):
            self._source = None
        return self._source

    @source.setter
    def source(self, url: str):
        if isinstance(url, str) and len(url) > 0:
            self._source = furl.furl(url=url)

    @property
    def fragments(self) -> List[str]:
        """ A list of urls which represent the raw content.

        Returns:
            list[str]: A list of urls which represent the raw content.
        """

        if not hasattr(self, '_fragments'):
            self._fragments = []
        return self._fragments

    @fragments.setter
    def fragments(self, fragments: List[str]):
        if isinstance(fragments, list) and len(fragments) > 0 and \
                all(isinstance(fragment, str) for fragment in fragments):
            self._fragments = fragments

    @property
    def extractor(self) -> BaseExtractor:
        """ The extractor which discovered the content.

        Returns:
            BaseExtractor: The extractor which discovered the content.
        """

        if not hasattr(self, '_extractor'):
            self._extractor = None
        return self._extractor

    @extractor.setter
    def extractor(self, extractor: BaseExtractor):
        if isinstance(extractor, BaseExtractor):
            self._extractor = extractor

    @property
    def extension(self) -> str:
        """ The extension of the resulting content.

        Returns:
            str: The extension for the resulting content.
        """

        if not hasattr(self, '_extension'):
            self._extension = None
        return self._extension

    @extension.setter
    def extension(self, extension: str):
        if isinstance(extension, str) and len(extension) > 0:
            self._extension = extension

    @property
    def title(self) -> str:
        """ The title of the content.

        Returns:
            str: The title of the content.
        """

        if not hasattr(self, '_title'):
            self._title = None
        return self._title

    @title.setter
    def title(self, title: str):
        if isinstance(title, str) and len(title) > 0:
            self._title = title

    @property
    def description(self) -> str:
        """ The description of the content.

        Returns:
            str: The description of the content.
        """

        if not hasattr(self, '_description'):
            self._description = None
        return self._description

    @description.setter
    def description(self, description: str):
        if isinstance(description, str) and len(description) > 0:
            self._description = description

    @property
    def quality(self) -> float:
        """ The contextual quality for the current content.

        Returns:
            float: The contextual quality for the current content.
        """

        if not hasattr(self, '_quality'):
            self._quality = 0.0
        return self._quality

    @quality.setter
    def quality(self, quality: float):
        if isinstance(quality, float) and 0.0 <= quality <= 1.0:
            self._quality = quality

    @property
    def uploaded_by(self) -> str:
        """ A string of the uploader's name.

        Returns:
            str: A string of the uploader's name.
        """

        if not hasattr(self, '_uploaded_by'):
            self._uploaded_by = None
        return self._uploaded_by

    @uploaded_by.setter
    def uploaded_by(self, uploaded_by: str):
        if isinstance(uploaded_by, str) and len(uploaded_by) > 0:
            self._uploaded_by = uploaded_by

    @property
    def uploaded_date(self) -> datetime.datetime:
        """ The datetime the content was uploaded.

        Returns:
            datetime.datetime: The datetime the content was uploaded.
        """

        if not hasattr(self, '_uploaded_date'):
            self._uploaded_date = None
        return self._uploaded_date

    @uploaded_date.setter
    def uploaded_date(self, uploaded_date: datetime.datetime):
        if isinstance(uploaded_date, datetime.datetime):
            self._uploaded_date = uploaded_date

    @property
    def metadata(self) -> Dict[str, Any]:
        """ Any metadata for the current content.

        Returns:
            dict[str,....]: Any metadata for the current content.
        """

        if not hasattr(self, '_metadata'):
            self._metadata = {}
        return self._metadata

    @metadata.setter
    def metadata(self, metadata: Dict[str, Any]):
        if isinstance(metadata, dict):
            self._metadata = metadata

[docs]    def get_size(self) -> int:
        """ Returns the sum of the length of the fragments.

        Returns:
            int: The sum of the length of the fragments.
        """

        return sum(
            int(self.extractor.session.head(
                fragment
            ).headers['Content-Length'])
            for fragment in self.fragments
        )
Source code for qetch.content

Table Of Contents

About

Useful Links

Author's Projects

Related Topics