Compare commits

...

3 Commits

Author SHA1 Message Date
Miguel Sozinho Ramalho
a786d4bb0e chooses most complete result from api (#116) 2023-12-13 11:26:46 +00:00
Miguel Sozinho Ramalho
128d4136e3 fixes empty api search results (#115) 2023-12-13 10:51:25 +00:00
Miguel Sozinho Ramalho
98fb574d89 fixing older db entries formats (#114) 2023-12-12 22:47:54 +00:00
4 changed files with 24 additions and 8 deletions

View File

@@ -65,7 +65,9 @@ class Media:
@property # getter .mimetype
def mimetype(self) -> str:
assert self.filename is not None and len(self.filename) > 0, "cannot get mimetype from media without filename"
if not self.filename or len(self.filename) == 0:
logger.warning(f"cannot get mimetype from media without filename: {self}")
return ""
if not self._mimetype:
self._mimetype = mimetypes.guess_type(self.filename)[0]
return self._mimetype or ""

View File

@@ -165,3 +165,16 @@ class Metadata:
def __str__(self) -> str:
return self.__repr__()
@staticmethod
def choose_most_complete(results: List[Metadata]) -> Metadata:
# returns the most complete result from a list of results
# prioritizes results with more media, then more metadata
if len(results) == 0: return None
if len(results) == 1: return results[0]
most_complete = results[0]
for r in results[1:]:
if len(r.media) > len(most_complete.media): most_complete = r
elif len(r.media) == len(most_complete.media) and len(r.metadata) > len(most_complete.metadata): most_complete = r
return most_complete

View File

@@ -35,16 +35,17 @@ class AAApiDb(Database):
""" query the database for the existence of this item"""
if not self.allow_rearchive: return
params = {"url": item.get_url(), "limit": 1}
params = {"url": item.get_url(), "limit": 15}
headers = {"Authorization": f"Bearer {self.api_token}", "accept": "application/json"}
response = requests.get(os.path.join(self.api_endpoint, "tasks/search-url"), params=params, headers=headers)
if response.status_code == 200:
logger.success(f"API returned a previously archived instance: {response.json()}")
# TODO: can we do better than just returning the first result?
return Metadata.from_dict(response.json()[0]["result"])
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
if len(response.json()):
logger.success(f"API returned {len(response.json())} previously archived instance(s)")
fetched_metadata = [Metadata.from_dict(r["result"]) for r in response.json()]
return Metadata.choose_most_complete(fetched_metadata)
else:
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
return False

View File

@@ -3,7 +3,7 @@ _MAJOR = "0"
_MINOR = "7"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "2"
_PATCH = "5"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""