Source code for qetch
# Copyright (c) 2018 Stephen Bunn (stephen@bunn.io)
# MIT License <https://opensource.org/licenses/MIT>
import inspect
from . import (extractors, downloaders,)
from .content import (Content,)
IGNORED_EXTRACTORS = (
extractors._common.BaseExtractor,
extractors.GenericExtractor,
)
IGNORED_DOWNLOADERS = (
downloaders._common.BaseDownloader,
)
[docs]def get_extractor(
url: str, init: bool=False,
*args, **kwargs
) -> extractors._common.BaseExtractor:
""" Gets the first extractor that can handle a given url.
Args:
url (str): The url that needs to be extracted
init (bool, optional): If True initializes the class, otherwise returns
the class
Returns:
extractors._common.BaseExtractor: The extractor that can
handle the url.
Examples:
Basic usage...
>>> import qetch
>>> extractor = qetch.get_extractor(GFYCAT_URL, init=True)
>>> print(extractor)
<GfycatExtractor "gfycat">
"""
for (extractor_name, extractor_class,) in inspect.getmembers(
extractors,
predicate=inspect.isclass
):
if extractor_class not in IGNORED_EXTRACTORS:
if extractor_class.can_handle(url):
return (
extractor_class
if not init else
extractor_class(*args, **kwargs)
)
# if no extractor can handle, just return GenericExtractor
return (
extractors.GenericExtractor
if not init else
extractor_class.GenericExtractor(*args, **kwargs)
)
[docs]def get_downloader(
content: Content, init: bool=False,
*args, **kwargs
) -> downloaders._common.BaseDownloader:
""" Gets the first downloader that can handle a given content.
Args:
content (Content): The content that needs to be downloaded
init (bool, optional): If True initializes the class, otherwise
returns the class
Returns:
downloaders._common.BaseDownloader: The downloader that can handle the
content.
Examples:
Basic usage...
>>> import qetch
>>> content = next(qetch.get_extractor(GFYCAT_URL, init=True)
... .extract(GFYCAT_URL))[0]
>>> downloader = qetch.get_downloader(content, init=True)
>>> print(downloader)
<HTTPDownloader at 0xABCDEF1234567890>
"""
for (downloader_name, downloader_class,) in inspect.getmembers(
downloaders,
predicate=inspect.isclass
):
if downloader_class not in IGNORED_DOWNLOADERS:
if downloader_class.can_handle(content):
return (
downloader_class
if not init else
downloader_class(*args, **kwargs)
)