From a0c4a828257fadcf9f44a139f0d581e9b93d6064 Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Wed, 12 Feb 2025 11:32:13 +0000 Subject: [PATCH] Improved docstrings for base modules --- src/auto_archiver/core/database.py | 10 ++++++++++ src/auto_archiver/core/enricher.py | 15 ++++++++++++--- src/auto_archiver/core/extractor.py | 14 ++++++++++++-- src/auto_archiver/core/feeder.py | 18 +++++++++++++++++- src/auto_archiver/core/formatter.py | 17 ++++++++++++++++- src/auto_archiver/core/storage.py | 22 ++++++++++++++++++++-- 6 files changed, 87 insertions(+), 9 deletions(-) diff --git a/src/auto_archiver/core/database.py b/src/auto_archiver/core/database.py index 0eb5d81..a6e76e5 100644 --- a/src/auto_archiver/core/database.py +++ b/src/auto_archiver/core/database.py @@ -1,3 +1,8 @@ +""" +Database module for the auto-archiver that defines the interface for implementing database modules +in the media archiving framework. +""" + from __future__ import annotations from abc import abstractmethod from typing import Union @@ -5,6 +10,11 @@ from typing import Union from auto_archiver.core import Metadata, BaseModule class Database(BaseModule): + """ + Base class for implementing database modules in the media archiving framework. + + Subclasses must implement the `fetch` and `done` methods to define platform-specific behavior. + """ def started(self, item: Metadata) -> None: """signals the DB that the given item archival has started""" diff --git a/src/auto_archiver/core/enricher.py b/src/auto_archiver/core/enricher.py index 0e50fa9..45e75d7 100644 --- a/src/auto_archiver/core/enricher.py +++ b/src/auto_archiver/core/enricher.py @@ -1,5 +1,5 @@ """ -Enrichers are modular components that enhance archived content by adding +Base module for Enrichers – modular components that enhance archived content by adding context, metadata, or additional processing. These add additional information to the context, such as screenshots, hashes, and metadata. @@ -13,7 +13,16 @@ from abc import abstractmethod from auto_archiver.core import Metadata, BaseModule class Enricher(BaseModule): - """Base classes and utilities for enrichers in the Auto-Archiver system.""" + """Base classes and utilities for enrichers in the Auto-Archiver system. + + Enricher modules must implement the `enrich` method to define their behavior. + """ @abstractmethod - def enrich(self, to_enrich: Metadata) -> None: pass + def enrich(self, to_enrich: Metadata) -> None: + """ + Enriches a Metadata object with additional information or context. + + Takes the metadata object to enrich as an argument and modifies it in place, returning None. + """ + pass diff --git a/src/auto_archiver/core/extractor.py b/src/auto_archiver/core/extractor.py index 794c06c..3b05ef7 100644 --- a/src/auto_archiver/core/extractor.py +++ b/src/auto_archiver/core/extractor.py @@ -29,14 +29,24 @@ class Extractor(BaseModule): valid_url: re.Pattern = None def cleanup(self) -> None: - # called when extractors are done, or upon errors, cleanup any resources + """ + Called when extractors are done, or upon errors, cleanup any resources + """ pass def sanitize_url(self, url: str) -> str: - # used to clean unnecessary URL parameters OR unfurl redirect links + """ + Used to clean unnecessary URL parameters OR unfurl redirect links + """ return url def match_link(self, url: str) -> re.Match: + """ + Returns a match object if the given URL matches the valid_url pattern or False/None if not. + + Normally used in the `suitable` method to check if the URL is supported by this extractor. + + """ return self.valid_url.match(url) def suitable(self, url: str) -> bool: diff --git a/src/auto_archiver/core/feeder.py b/src/auto_archiver/core/feeder.py index 352cfd9..e8302e6 100644 --- a/src/auto_archiver/core/feeder.py +++ b/src/auto_archiver/core/feeder.py @@ -1,3 +1,7 @@ +""" +The feeder base module defines the interface for implementing feeders in the media archiving framework. +""" + from __future__ import annotations from abc import abstractmethod from auto_archiver.core import Metadata @@ -5,5 +9,17 @@ from auto_archiver.core import BaseModule class Feeder(BaseModule): + """ + Base class for implementing feeders in the media archiving framework. + + Subclasses must implement the `__iter__` method to define platform-specific behavior. + """ + @abstractmethod - def __iter__(self) -> Metadata: return None \ No newline at end of file + def __iter__(self) -> Metadata: + """ + Returns an iterator (use `yield`) over the items to be archived. + + These should be instances of Metadata, typically created with Metadata().set_url(url). + """ + return None \ No newline at end of file diff --git a/src/auto_archiver/core/formatter.py b/src/auto_archiver/core/formatter.py index cf27cb3..3bfc250 100644 --- a/src/auto_archiver/core/formatter.py +++ b/src/auto_archiver/core/formatter.py @@ -1,9 +1,24 @@ +""" +Base module for formatters – modular components that format metadata into media objects for storage. + +The most commonly used formatter is the HTML formatter, which takes metadata and formats it into an HTML file for storage. +""" + from __future__ import annotations from abc import abstractmethod from auto_archiver.core import Metadata, Media, BaseModule class Formatter(BaseModule): + """ + Base class for implementing formatters in the media archiving framework. + + Subclasses must implement the `format` method to define their behavior. + """ @abstractmethod - def format(self, item: Metadata) -> Media: return None \ No newline at end of file + def format(self, item: Metadata) -> Media: + """ + Formats a Metadata object into a user-viewable format (e.g. HTML) and stores it if needed. + """ + return None \ No newline at end of file diff --git a/src/auto_archiver/core/storage.py b/src/auto_archiver/core/storage.py index 5dfa39d..15d4705 100644 --- a/src/auto_archiver/core/storage.py +++ b/src/auto_archiver/core/storage.py @@ -1,3 +1,7 @@ +""" +Base module for Storage modules – modular components that store media objects in various locations. +""" + from __future__ import annotations from abc import abstractmethod from typing import IO @@ -12,6 +16,12 @@ from auto_archiver.core import Media, BaseModule, Metadata from auto_archiver.modules.hash_enricher.hash_enricher import HashEnricher from auto_archiver.core.module import get_module class Storage(BaseModule): + + """ + Base class for implementing storage modules in the media archiving framework. + + Subclasses must implement the `get_cdn_url` and `uploadf` methods to define their behavior. + """ def store(self, media: Media, url: str, metadata: Metadata=None) -> None: if media.is_stored(in_storage=self): @@ -22,10 +32,18 @@ class Storage(BaseModule): media.add_url(self.get_cdn_url(media)) @abstractmethod - def get_cdn_url(self, media: Media) -> str: pass + def get_cdn_url(self, media: Media) -> str: + """ + Returns the URL of the media object stored in the CDN. + """ + pass @abstractmethod - def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass + def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: + """ + Uploads (or saves) a file to the storage service/location. + """ + pass def upload(self, media: Media, **kwargs) -> bool: logger.debug(f'[{self.__class__.__name__}] storing file {media.filename} with key {media.key}')