mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 05:08:28 +03:00
More manifests, base modules and rename from archiver to extractor.
This commit is contained in:
@@ -1,17 +1,12 @@
|
||||
import os
|
||||
import mimetypes
|
||||
|
||||
import requests
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core.context import ArchivingContext
|
||||
from auto_archiver.archivers.archiver import Archiver
|
||||
from auto_archiver.base_modules.extractor import Extractor
|
||||
from auto_archiver.core.metadata import Metadata, Media
|
||||
from .dropin import GenericDropin, InfoExtractor
|
||||
|
||||
class Bluesky(GenericDropin):
|
||||
|
||||
def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Archiver, url: str) -> Metadata:
|
||||
def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
|
||||
result = Metadata()
|
||||
result.set_url(url)
|
||||
result.set_title(post["record"]["text"])
|
||||
@@ -42,7 +37,7 @@ class Bluesky(GenericDropin):
|
||||
|
||||
|
||||
|
||||
def _download_bsky_embeds(self, post: dict, archiver: Archiver) -> list[Media]:
|
||||
def _download_bsky_embeds(self, post: dict, archiver: Extractor) -> list[Media]:
|
||||
"""
|
||||
Iterates over image(s) or video in a Bluesky post and downloads them
|
||||
"""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from auto_archiver.core.metadata import Metadata
|
||||
from auto_archiver.archivers.archiver import Archiver
|
||||
from auto_archiver.base_modules.extractor import Extractor
|
||||
|
||||
class GenericDropin:
|
||||
"""Base class for dropins for the generic extractor.
|
||||
@@ -30,7 +30,7 @@ class GenericDropin:
|
||||
raise NotImplementedError("This method should be implemented in the subclass")
|
||||
|
||||
|
||||
def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Archiver, url: str) -> Metadata:
|
||||
def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
|
||||
"""
|
||||
This method should create a Metadata object from the post data.
|
||||
"""
|
||||
|
||||
@@ -5,10 +5,10 @@ from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.archivers.archiver import Archiver
|
||||
from auto_archiver.base_modules.extractor import Extractor
|
||||
from ...core import Metadata, Media, ArchivingContext
|
||||
|
||||
class GenericExtractor(Archiver):
|
||||
class GenericExtractor(Extractor):
|
||||
name = "youtubedl_archiver" #left as is for backwards compat
|
||||
_dropins = {}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ from typing import Type
|
||||
|
||||
from auto_archiver.utils import traverse_obj
|
||||
from auto_archiver.core.metadata import Metadata, Media
|
||||
from auto_archiver.archivers.archiver import Archiver
|
||||
from auto_archiver.base_modules.extractor import Extractor
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
from dateutil.parser import parse as parse_dt
|
||||
@@ -19,7 +19,7 @@ class Truth(GenericDropin):
|
||||
def skip_ytdlp_download(self, url, ie_instance: Type[InfoExtractor]) -> bool:
|
||||
return True
|
||||
|
||||
def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Archiver, url: str) -> Metadata:
|
||||
def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
|
||||
"""
|
||||
Creates metadata from a truth social post
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from slugify import slugify
|
||||
|
||||
from auto_archiver.core.metadata import Metadata, Media
|
||||
from auto_archiver.utils import UrlUtil
|
||||
from auto_archiver.archivers.archiver import Archiver
|
||||
from auto_archiver.base_modules.extractor import Extractor
|
||||
|
||||
from .dropin import GenericDropin, InfoExtractor
|
||||
|
||||
@@ -32,7 +32,7 @@ class Twitter(GenericDropin):
|
||||
twid = ie_instance._match_valid_url(url).group('id')
|
||||
return ie_instance._extract_status(twid=twid)
|
||||
|
||||
def create_metadata(self, tweet: dict, ie_instance: InfoExtractor, archiver: Archiver, url: str) -> Metadata:
|
||||
def create_metadata(self, tweet: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
|
||||
result = Metadata()
|
||||
try:
|
||||
if not tweet.get("user") or not tweet.get("created_at"):
|
||||
|
||||
Reference in New Issue
Block a user