Bump version to v0.4.5 for release

Merge pull request #72 from milesmcc/patch-1
Fix hash enricher for flatfile output (closes #71)
2026-06-08 11:28:28 +03:00 · 2023-03-16 15:05:42 +00:00 · 2023-03-16 15:04:55 +00:00 · 2023-03-14 13:37:54 -07:00 · 2023-03-10 11:34:29 +00:00 · 2023-02-27 10:30:06 +01:00
36 changed files with 710 additions and 1968 deletions
--- a/.github/workflows/docker-publish.yaml
+++ b/.github/workflows/docker-publish.yaml
@@ -0,0 +1,48 @@
+name: Docker
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+on:
+  release:
+    types: [published]
+  push:
+    branches: [ "dockerize" ]
+    tags: [ "v*.*.*" ]
+
+env:
+  # Use docker.io for Docker Hub if empty
+  REGISTRY: ghcr.io
+  # github.repository as <account>/<repo>
+  IMAGE_NAME: ${{ github.repository }}
+
+
+jobs:
+  push_to_registry:
+    name: Push Docker image to Docker Hub
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v3
+      
+      - name: Log in to Docker Hub
+        uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+      
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
+        with:
+          images: bellingcat/auto-archiver
+      
+      - name: Build and push Docker image
+        uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/python-publish.yaml
+++ b/.github/workflows/python-publish.yaml
@@ -6,28 +6,26 @@
 # separate terms of service, privacy policy, and support
 # documentation.

-name: Upload Python Package
+name: Pypi

 on:
  release:
    types: [published]
-  
  push:
-    branches:
-      - dockerize
-    tags:
-      - 'v*.*.*'
+    branches: [ "dockerize" ]
+    tags: [ "v*.*.*" ]

 permissions:
  contents: read

 jobs:
  deploy:
-
+    name: Publish python package
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v3
+
    - name: Set up Python 3.10
      uses: actions/setup-python@v4
      with:
@@ -35,12 +33,21 @@ jobs:

    - name: Install dependencies
      run: |
-        python -m pip install --upgrade pip
-        pip install build
-    - name: Build package
-      run: python -m build
-    - name: Publish package
+        python -m pip install --upgrade --upgrade-strategy=eager pip setuptools wheel twine pipenv
+        python -m pip install -e . --upgrade
+        python -m pipenv install --dev --python 3.10
+      env:
+        PIPENV_DEFAULT_PYTHON_VERSION: "3.10"
+
+    - name: Build wheels
+      run: |
+        python -m pipenv run python setup.py sdist bdist_wheel
+
+    - name: Publish a Python distribution to PyPI
      uses: pypa/gh-action-pypi-publish@release/v1
      with:
        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
+        verbose: true
+        skip_existing: true
+        password: ${{ secrets.PYPI_API_TOKEN }}
+        packages_dir: dist/
--- a/2
+++ b/2
@@ -21,7 +21,7 @@ RUN pip install --upgrade pip && \
 # TODO: avoid copying unnecessary files, including .git
 COPY Pipfile Pipfile.lock ./
 RUN pipenv install --python=3.10 --system --deploy
-ENV IS_DOCKER=1
+# ENV IS_DOCKER=1
 # doing this at the end helps during development, builds are quick
 COPY ./src/ . 

--- a/4
+++ b/4
@@ -14,7 +14,6 @@ loguru = "*"
 ffmpeg-python = "*"
 selenium = "*"
 snscrape = "*"
-yt-dlp = "*"
 telethon = "*"
 google-api-python-client = "*"
 google-auth-httplib2 = "*"
@@ -23,13 +22,14 @@ oauth2client = "*"
 python-slugify = "*"
 pyyaml = "*"
 dateparser = "*"
-vk-url-scraper = "*"
 python-twitter-v2 = "*"
 instaloader = "*"
 tqdm = "*"
 jinja2 = "*"
 cryptography = "==38.0.4"
 dataclasses-json = "*"
+yt-dlp = ">=2023.2.17"
+vk-url-scraper = "*"

 [requires]
 python_version = "3.9"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "e2f5d017d9bc9eef90cced189b6e3017d740c35d204962479417109a4deeb7f4"
+            "sha256": "7176a6666639452dbf30939fa095ff23518aee6da7d9561de0f12ba0aceed527"
        },
        "pipfile-spec": 6,
        "requires": {
@@ -57,19 +57,19 @@
        },
        "boto3": {
            "hashes": [
-                "sha256:3a1ffeecfe6e61d414617294b822b008e604ccfd83434c483f429a2922db314d",
-                "sha256:ebea98f3054b467caf6c8aead9f0ef78395a78bce78b04db12fde452c02b3734"
+                "sha256:17f0d782487275cac12676a61b3f1a4900954cc454c842b8551ca47a3dcd59b4",
+                "sha256:bf808f7433629650128ab577a9d4a0f4daf072d9f2f3a907b9d567a6952d9154"
            ],
            "index": "pypi",
-            "version": "==1.26.66"
+            "version": "==1.26.77"
        },
        "botocore": {
            "hashes": [
-                "sha256:4d1ac019e677cc39e615f9d473fa658ea22a8d906c1c562f9406b5d0cd854cbd",
-                "sha256:772da07d2a49a9d2dc8d23e060e88eb72881e58074be7c813aa946ecdbd0e5b5"
+                "sha256:9d94a02f2584b52c65fb3cb309fb1b29d6d0c36d69062722b0275c1c382c44c9",
+                "sha256:d8aa7bffe2422de282b2d02945b7b45d5fecf00f67b65eebb0b1fa3de1abc6d0"
            ],
            "markers": "python_version >= '3.7'",
-            "version": "==1.29.66"
+            "version": "==1.29.77"
        },
        "brotli": {
            "hashes": [
@@ -176,11 +176,11 @@
        },
        "certifi": {
            "hashes": [
-                "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d",
-                "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"
+                "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3",
+                "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"
            ],
            "markers": "python_version >= '3.6'",
-            "version": "==2022.6.15"
+            "version": "==2022.12.7"
        },
        "cffi": {
            "hashes": [
@@ -253,11 +253,97 @@
        },
        "charset-normalizer": {
            "hashes": [
-                "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597",
-                "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"
+                "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b",
+                "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42",
+                "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d",
+                "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b",
+                "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a",
+                "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59",
+                "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154",
+                "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1",
+                "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c",
+                "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a",
+                "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d",
+                "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6",
+                "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b",
+                "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b",
+                "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783",
+                "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5",
+                "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918",
+                "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555",
+                "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639",
+                "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786",
+                "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e",
+                "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed",
+                "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820",
+                "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8",
+                "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3",
+                "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541",
+                "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14",
+                "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be",
+                "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e",
+                "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76",
+                "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b",
+                "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c",
+                "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b",
+                "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3",
+                "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc",
+                "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6",
+                "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59",
+                "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4",
+                "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d",
+                "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d",
+                "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3",
+                "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a",
+                "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea",
+                "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6",
+                "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e",
+                "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603",
+                "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24",
+                "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a",
+                "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58",
+                "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678",
+                "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a",
+                "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c",
+                "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6",
+                "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18",
+                "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174",
+                "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317",
+                "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f",
+                "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc",
+                "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837",
+                "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41",
+                "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c",
+                "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579",
+                "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753",
+                "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8",
+                "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291",
+                "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087",
+                "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866",
+                "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3",
+                "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d",
+                "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1",
+                "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca",
+                "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e",
+                "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db",
+                "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72",
+                "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d",
+                "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc",
+                "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539",
+                "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d",
+                "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af",
+                "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b",
+                "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602",
+                "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f",
+                "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478",
+                "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c",
+                "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e",
+                "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479",
+                "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7",
+                "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8"
            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==2.0.12"
+            "markers": "python_version >= '3.6'",
+            "version": "==3.0.1"
        },
        "click": {
            "hashes": [
@@ -348,11 +434,11 @@
        },
        "flask": {
            "hashes": [
-                "sha256:642c450d19c4ad482f96729bd2a8f6d32554aa1e231f4f6b4e7e5264b16cca2b",
-                "sha256:b9c46cc36662a7949f34b52d8ec7bb59c0d74ba08ba6cb9ce9adc1d8676d9526"
+                "sha256:7eb373984bf1c770023fce9db164ed0c3353cd0b53f130f4693da0ca756a2e6d",
+                "sha256:c0bec9477df1cb867e5a67c9e1ab758de9cb4a3e52dd70681f59fa40a62b3f2d"
            ],
            "markers": "python_version >= '3.7'",
-            "version": "==2.2.2"
+            "version": "==2.2.3"
        },
        "future": {
            "hashes": [
@@ -371,19 +457,19 @@
        },
        "google-api-python-client": {
            "hashes": [
-                "sha256:42a44e9adfca6bb27540ce52348aa1d3b81e214bcc53d454a76ebfbe8eee1483",
-                "sha256:f18e9dbb365f0485194a8daf5d60da2cff6a80ce2c9a694efc2b279922cb3dd0"
+                "sha256:577c0aeae1eb3c754eacb9122d369d67609fef759bc6a4fa16cafeab4f30019b",
+                "sha256:b9b6dc5f139892310093ba75d0df4c78f48655078953c923957dab1ec86129e7"
            ],
            "index": "pypi",
-            "version": "==2.77.0"
+            "version": "==2.79.0"
        },
        "google-auth": {
            "hashes": [
-                "sha256:5045648c821fb72384cdc0e82cc326df195f113a33049d9b62b74589243d2acc",
-                "sha256:ed7057a101af1146f0554a769930ac9de506aeca4fd5af6543ebe791851a9fbd"
+                "sha256:5fd170986bce6bfd7bb5c845c4b8362edb1e0cba901e062196e83f8bb5d5d32c",
+                "sha256:75d76ea857df65938e1f71dcbcd7d0cd48e3f80b34b8870ba229c9292081f7ef"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
-            "version": "==2.16.0"
+            "version": "==2.16.1"
        },
        "google-auth-httplib2": {
            "hashes": [
@@ -435,18 +521,18 @@
        },
        "idna": {
            "hashes": [
-                "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
-                "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
+                "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
+                "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
            ],
            "markers": "python_version >= '3.5'",
-            "version": "==3.3"
+            "version": "==3.4"
        },
        "instaloader": {
            "hashes": [
-                "sha256:ba925a87e2c305a3d24173d1bb0457d5a7e2e77dbac7206eeeb46f9104ecb08e"
+                "sha256:16040c170fb5230c1981a47e1990261e3c0ecffe0417be95fa265632244e7c01"
            ],
            "index": "pypi",
-            "version": "==4.9.5"
+            "version": "==4.9.6"
        },
        "itsdangerous": {
            "hashes": [
@@ -565,11 +651,11 @@
        },
        "markdown-it-py": {
            "hashes": [
-                "sha256:93de681e5c021a432c63147656fe21790bc01231e0cd2da73626f1aa3ac0fe27",
-                "sha256:cf7e59fed14b5ae17c0006eff14a2d9a00ed5f3a846148153899a0224e2c07da"
+                "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30",
+                "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"
            ],
            "markers": "python_version >= '3.7'",
-            "version": "==2.1.0"
+            "version": "==2.2.0"
        },
        "markupsafe": {
            "hashes": [
@@ -700,23 +786,22 @@
        },
        "protobuf": {
            "hashes": [
-                "sha256:1f22ac0ca65bb70a876060d96d914dae09ac98d114294f77584b0d2644fa9c30",
-                "sha256:237216c3326d46808a9f7c26fd1bd4b20015fb6867dc5d263a493ef9a539293b",
-                "sha256:27f4d15021da6d2b706ddc3860fac0a5ddaba34ab679dc182b60a8bb4e1121cc",
-                "sha256:299ea899484ee6f44604deb71f424234f654606b983cb496ea2a53e3c63ab791",
-                "sha256:3d164928ff0727d97022957c2b849250ca0e64777ee31efd7d6de2e07c494717",
-                "sha256:6ab80df09e3208f742c98443b6166bcb70d65f52cfeb67357d52032ea1ae9bec",
-                "sha256:78a28c9fa223998472886c77042e9b9afb6fe4242bd2a2a5aced88e3f4422aa7",
-                "sha256:7cd532c4566d0e6feafecc1059d04c7915aec8e182d1cf7adee8b24ef1e2e6ab",
-                "sha256:89f9149e4a0169cddfc44c74f230d7743002e3aa0b9472d8c28f0388102fc4c2",
-                "sha256:a53fd3f03e578553623272dc46ac2f189de23862e68565e83dde203d41b76fc5",
-                "sha256:b135410244ebe777db80298297a97fbb4c862c881b4403b71bac9d4107d61fd1",
-                "sha256:b98d0148f84e3a3c569e19f52103ca1feacdac0d2df8d6533cf983d1fda28462",
-                "sha256:d1736130bce8cf131ac7957fa26880ca19227d4ad68b4888b3be0dea1f95df97",
-                "sha256:f45460f9ee70a0ec1b6694c6e4e348ad2019275680bd68a1d9314b8c7e01e574"
+                "sha256:1669cb7524221a8e2d9008d0842453dbefdd0fcdd64d67672f657244867635fb",
+                "sha256:29288813aacaa302afa2381db1d6e0482165737b0afdf2811df5fa99185c457b",
+                "sha256:47d31bdf58222dd296976aa1646c68c6ee80b96d22e0a3c336c9174e253fd35e",
+                "sha256:652d8dfece122a24d98eebfef30e31e455d300efa41999d1182e015984ac5930",
+                "sha256:7c535d126e7dcc714105ab20b418c4fedbd28f8b8afc42b7350b1e317bbbcc71",
+                "sha256:86c3d20428b007537ba6792b475c0853bba7f66b1f60e610d913b77d94b486e4",
+                "sha256:a33a273d21852f911b8bda47f39f4383fe7c061eb1814db2c76c9875c89c2491",
+                "sha256:ab4d043865dd04e6b09386981fe8f80b39a1e46139fb4a3c206229d6b9f36ff6",
+                "sha256:b2fea9dc8e3c0f32c38124790ef16cba2ee0628fe2022a52e435e1117bfef9b1",
+                "sha256:c27f371f0159feb70e6ea52ed7e768b3f3a4c5676c1900a7e51a24740381650e",
+                "sha256:c3325803095fb4c2a48649c321d2fbde59f8fbfcb9bfc7a86df27d112831c571",
+                "sha256:e474b63bab0a2ea32a7b26a4d8eec59e33e709321e5e16fb66e766b61b82a95e",
+                "sha256:e894e9ae603e963f0842498c4cd5d39c6a60f0d7e4c103df50ee939564298658"
            ],
            "markers": "python_version >= '3.7'",
-            "version": "==4.21.12"
+            "version": "==4.22.0"
        },
        "pyaes": {
            "hashes": [
@@ -838,14 +923,6 @@
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
            "version": "==2.8.2"
        },
-        "python-dotenv": {
-            "hashes": [
-                "sha256:b7e3b04a59693c42c36f9ab1cc2acc46fa5df8c78e178fc33a8d4cd05c8d498f",
-                "sha256:d92a187be61fe482e4fd675b6d52200e7be63a12b724abbf931a40ce4fa92938"
-            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==0.20.0"
-        },
        "python-slugify": {
            "hashes": [
                "sha256:51f217508df20a6c166c7821683384b998560adcf8f19a6c2ca8b460528ccd9c",
@@ -1019,11 +1096,11 @@
        },
        "requests": {
            "hashes": [
-                "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f",
-                "sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b"
+                "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa",
+                "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"
            ],
            "markers": "python_version >= '3.7' and python_version < '4'",
-            "version": "==2.28.0"
+            "version": "==2.28.2"
        },
        "requests-oauthlib": {
            "hashes": [
@@ -1067,11 +1144,11 @@
        },
        "selenium": {
            "hashes": [
-                "sha256:20f28ee4ea9b273b4112a7df5276ebb3052f79ff6eff42a564db6143e5926683",
-                "sha256:fee36724d6cf0b18c73781bb8ec7be4a35ab1e2564e64e64e64da75e50e052af"
+                "sha256:bd04eb41395605d9b2b65fe587f3fed21431da75512985c52772529e5e210c60",
+                "sha256:c48372905bffcc3b24bd55ab4683a07ee5e1f30fe918c59558ea5ee44cedf6c3"
            ],
            "index": "pypi",
-            "version": "==4.8.0"
+            "version": "==4.8.2"
        },
        "six": {
            "hashes": [
@@ -1106,11 +1183,11 @@
        },
        "soupsieve": {
            "hashes": [
-                "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759",
-                "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"
+                "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955",
+                "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"
            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==2.3.2.post1"
+            "markers": "python_version >= '3.7'",
+            "version": "==2.4"
        },
        "telethon": {
            "hashes": [
@@ -1160,11 +1237,11 @@
        },
        "typing-extensions": {
            "hashes": [
-                "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa",
-                "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"
+                "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb",
+                "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"
            ],
            "markers": "python_version >= '3.7'",
-            "version": "==4.4.0"
+            "version": "==4.5.0"
        },
        "typing-inspect": {
            "hashes": [
@@ -1198,27 +1275,30 @@
            "version": "==4.1.1"
        },
        "urllib3": {
-            "hashes": [
-                "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
-                "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
+            "extras": [
+                "socks"
            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
-            "version": "==1.26.9"
+            "hashes": [
+                "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72",
+                "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
+            "version": "==1.26.14"
        },
        "vk-api": {
            "hashes": [
-                "sha256:11c731e214ebc7fa911db81efb021f97587493a5402b992f24748fe1cd9d7afc",
-                "sha256:d0ae766fa93a40d47c5da045d94201721bf766dbde122a1d2253516b35c5edf3"
+                "sha256:c71021506449afe5b9bbb1c4acb0d86b35a007ddc21678478e46fbbeabd1f3ef",
+                "sha256:c7741e40bc05980c91ed94c84542e1e7e7370e101b5eaa74222958d4130fe3c2"
            ],
-            "version": "==11.9.8"
+            "version": "==11.9.9"
        },
        "vk-url-scraper": {
            "hashes": [
-                "sha256:1cd6daad89a1f920902cb68c5952c5ab5e80ba2bf4a8c3657c781b5b0f9d406b",
-                "sha256:d430de947575e321cedceecfdf198b8bd14db3026038b924547e8b1c7c6a09ed"
+                "sha256:5a32fb5419f7bb8bd35de8548948fe27a06f857a4d086c87e142bf07aabc3fd7",
+                "sha256:a87c5aa7c1570c3aa87031e78c2052105e3681f57503fd4cb56470c3ab6106d6"
            ],
            "index": "pypi",
-            "version": "==0.3.10"
+            "version": "==0.3.15"
        },
        "websockets": {
            "hashes": [
@@ -1297,11 +1377,11 @@
        },
        "werkzeug": {
            "hashes": [
-                "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f",
-                "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5"
+                "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe",
+                "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"
            ],
            "markers": "python_version >= '3.7'",
-            "version": "==2.2.2"
+            "version": "==2.2.3"
        },
        "wsproto": {
            "hashes": [
@@ -1313,11 +1393,11 @@
        },
        "yt-dlp": {
            "hashes": [
-                "sha256:0e7b81fc6ac8d1b7d3fffa79f9044ca4163784422582c9a3593305da2a69ec02",
-                "sha256:d7d1f81d230756f094b4d9ee59b37b2c13b2e63ff5fb72cda53625edb072cdae"
+                "sha256:3b2df037c80922f0f83f63ee2f9253496b4a8668c0fe8d2a836ba9040f853b07",
+                "sha256:9af92de5effc193bdb51216d9ebf28874d96180d202fae752b0d9f2a63380f3a"
            ],
            "index": "pypi",
-            "version": "==2022.7.18"
+            "version": "==2023.2.17"
        }
    },
    "develop": {
--- a/README.md
+++ b/README.md
@@ -1,238 +1,208 @@
-# Auto Archiver
+<h1 align="center">Auto Archiver</h1>
+
+[![PyPI version](https://badge.fury.io/py/auto-archiver.svg)](https://badge.fury.io/py/auto-archiver)
+[![Docker Image Version (latest by date)](https://img.shields.io/docker/v/bellingcat/auto-archiver?label=version&logo=docker)](https://pypi.org/project/auto-archiver/)
+<!-- ![Docker Pulls](https://img.shields.io/docker/pulls/bellingcat/auto-archiver) -->
+<!-- [![PyPI download month](https://img.shields.io/pypi/dm/auto-archiver.svg)](https://pypi.python.org/pypi/auto-archiver/) -->
+<!-- [![Documentation Status](https://readthedocs.org/projects/vk-url-scraper/badge/?version=latest)](https://vk-url-scraper.readthedocs.io/en/latest/?badge=latest) -->
+
+
 Read the [article about Auto Archiver on bellingcat.com](https://www.bellingcat.com/resources/2022/09/22/preserve-vital-online-content-with-bellingcats-auto-archiver-tool/).


 Python tool to automatically archive social media posts, videos, and images from a Google Sheets, the console, and more. Uses different archivers depending on the platform, and can save content to local storage, S3 bucket (Digital Ocean Spaces, AWS, ...), and Google Drive. If using Google Sheets as the source for links, it will be updated with information about the archived content. It can be run manually or on an automated basis.

-There are 3 ways to use the auto-archiver
-1. (simplest) via docker `docker ... TODO`
-2. (pypi) `pip install auto-archiver`
-3. (legacy) clone and manually install from repo (see legacy [tutorial video](https://youtu.be/VfAhcuV2tLQ))
+There are 3 ways to use the auto-archiver:
+1. (easiest installation) via docker
+2. (local python install) `pip install auto-archiver`
+3. (legacy/development) clone and manually install from repo (see legacy [tutorial video](https://youtu.be/VfAhcuV2tLQ))
+
+But **you always need a configuration/orchestration file**, which is where you'll configure where/what/how to archive. Make sure you read [orchestration](#orchestration).


+## How to run the auto-archiver

-### Examples
+### Option 1 - docker
+
+<details><summary><code>Docker instructions</code></summary>
+
+[![dockeri.co](https://dockerico.blankenship.io/image/bellingcat/auto-archiver)](https://hub.docker.com/r/bellingcat/auto-archiver)
+
+Docker works like a virtual machine running inside your computer, it isolates everything and makes installation simple. Since it is an isolated environment when you need to pass it your orchestration file or get downloaded media out of docker you will need to connect folders on your machine with folders inside docker with the `-v` volume flag.


+1. install [docker](https://docs.docker.com/get-docker/)
+2. pull the auto-archiver docker [image](https://hub.docker.com/r/bellingcat/auto-archiver) with `docker pull bellingcat/auto-archiver`
+3. run the docker image locally in a container: `docker run --rm -v $PWD/secrets:/app/secrets -v $PWD/local_archive:/app/local_archive bellingcat/auto-archiver -m auto_archiver  --config secrets/orchestration.yaml` breaking this command down:
+   1. `docker run` tells docker to start a new container (an instance of the image)
+   2. `--rm` makes sure this container is removed after execution (less garbage locally)
+   3. `-v $PWD/secrets:/app/secrets` - your secrets folder
+      1. `-v` is a volume flag which means a folder that you have on your computer will be connected to a folder inside the docker container
+      2. `$PWD/secrets` points to a `secrets/` folder in your current working directory (where your console points to), we use this folder as a best practice to hold all the secrets/tokens/passwords/... you use
+      3. `/app/secrets` points to the path the docker container where this image can be found
+   4.  `-v $PWD/local_archive:/app/local_archive` - (optional) if you use local_storage
+       1.  `-v` same as above, this is a volume instruction
+       2.  `$PWD/local_archive` is a folder `local_archive/` in case you want to archive locally and have the files accessible outside docker
+       3.  `/app/local_archive` is a folder inside docker that you can reference in your orchestration.yml file 

-# Requirement configurations
-# Running with docker
-# Running without docker
+</details>
+
+### Option 2 - python package
+
+<details><summary><code>Python package instructions</code></summary>
+
+1. make sure you have python 3.8 or higher installed
+2. install the package `pip/pipenv/conda install auto-archiver`
+3. test it's installed with `auto-archiver --help`
+4. run it with your orchestration file and pass any flags you want in the command line `auto-archiver --config secrets/orchestration.yaml`
+   1. if your orchestration file is inside a `secrets/` which we advise
+
+</details>


+### Option 3 - local installation
+This can also be used for development.

-### Setup checklist
-Use this to make sure you help making sure you did all the required steps:
-* [ ] you have a `/secrets` folder with all your configuration files including
-  * [ ] a configuration file eg: `config.yaml` pointing to the correct location of other files
-  * [ ] you have a `service_account.json` 
-  * [ ] (optional for telegram) a `anon.session` which appears after the 1st run to avoid logging into the 
-  * [ ] (optional for VK) a `vk_config.v2.json`
-  * [ ] (optional for using GoogleDrive storage) `gd-token.json`
-  * [ ] (optional for instagram) `instaloader.session` file which appears after the 1st run and login in telegram
-  * [ ] (optional for browsertrix) `profile.tar.gz` file
-
-### Private telegram channels
-* Cannot use bot token
-* Should have one with bot token, one without
-* Setup join all private invite links at the start
-* 
-
-## Setup
-### Always required
-1. [A Google Service account is necessary for use with `gspread`.](https://gspread.readthedocs.io/en/latest/oauth2.html#for-bots-using-service-account) Credentials for this account should be stored in `service_account.json`, in the same directory as the script.
-2. A configuration file, see [Configuration file](#configuration-file).
-
-### With docker image
-[Docker](https://www.docker.com/) is like a virtual machine program that isolates all the installation dependencies needed for the auto-archiver and it should be the only thing you need to install.
-
-<!-- TODO add further instructions for docker -->
-
-### Without docker
-Check this [tutorial video](https://youtu.be/VfAhcuV2tLQ) for setup without the docker image.
-
-If you are using `pipenv` (recommended), `pipenv install` is sufficient to install Python prerequisites.
-
-You need to install the following requirements on your machine:
-1. [A Google Service account is necessary for use with `gspread`.](https://gspread.readthedocs.io/en/latest/oauth2.html#for-bots-using-service-account) Credentials for this account should be stored in `service_account.json`, in the same directory as the script.
-2. [ffmpeg](https://www.ffmpeg.org/) must also be installed locally for this tool to work. 
-3. [firefox](https://www.mozilla.org/en-US/firefox/new/) and [geckodriver](https://github.com/mozilla/geckodriver/releases) on a path folder like `/usr/local/bin`. 
-4. [fonts-noto](https://fonts.google.com/noto) to deal with multiple unicode characters during selenium/geckodriver's screenshots: `sudo apt install fonts-noto -y`. 
-5. Internet Archive credentials can be retrieved from https://archive.org/account/s3.php.
-6. If you would like to take archival [WACZ](https://specs.webrecorder.net/wacz/1.1.1/) snapshots using [browsertrix-crawler](https://github.com/webrecorder/browsertrix-crawler) in addition to screenshots you will need to install [Docker](https://www.docker.com/). 
-   1. To improve the websites browsertrix can archive you can also create a custom profile by running `docker run -p 9222:9222 -p 9223:9223 -v $PWD/browsertrix/crawls/profiles:/crawls/profiles/ -it webrecorder/browsertrix-crawler create-login-profile --interactive --url "https://youtube.com"`, going to [http://localhost:9223/](http://localhost:9223/) and accepting the cookies prompt on youtube, and then navigating to other websites and logging in as per your needs, so as to access more publicly blocked content, and then specifying the created `profile.tar.gz` in your config file under `execution.browsertrix.profile`. 
-
-### Configuration file
-Configuration is done via a config.yaml file (see [example.config.yaml](example.config.yaml)) and some properties of that file can be overwritten via command line arguments. Make a copy of that file and rename it to your liking eg. `config-test.yaml` . Here is the current result from running the `python auto_archive.py --help`:
-
-<details><summary><code>python auto_archive.py --help</code></summary>
+<details><summary><code>Legacy instructions, only use if docker/package is not an option</code></summary>


+Install the following locally:
+1. [ffmpeg](https://www.ffmpeg.org/) must also be installed locally for this tool to work. 
+2. [firefox](https://www.mozilla.org/en-US/firefox/new/) and [geckodriver](https://github.com/mozilla/geckodriver/releases) on a path folder like `/usr/local/bin`. 
+3. [fonts-noto](https://fonts.google.com/noto) to deal with multiple unicode characters during selenium/geckodriver's screenshots: `sudo apt install fonts-noto -y`. 

-```js
-usage: auto_archive.py [-h] [--config CONFIG] [--storage {s3,local,gd}] [--sheet SHEET] [--header HEADER] [--check-if-exists] [--save-logs] [--s3-private] [--col-url URL] [--col-status STATUS] [--col-folder FOLDER]
-                       [--col-archive ARCHIVE] [--col-date DATE] [--col-thumbnail THUMBNAIL] [--col-thumbnail_index THUMBNAIL_INDEX] [--col-timestamp TIMESTAMP] [--col-title TITLE] [--col-duration DURATION]
-                       [--col-screenshot SCREENSHOT] [--col-hash HASH]
+Clone and run:
+1. `git clone https://github.com/bellingcat/auto-archiver`
+2. `pipenv install`
+3. `pipenv run python -m src.auto_archiver --config secrets/orchestration.yaml`

-Automatically archive social media posts, videos, and images from a Google Sheets document. 
-The command line arguments will always override the configurations in the provided YAML config file (--config), only some high-level options
-are allowed via the command line and the YAML configuration file is the preferred method. The sheet must have the "url" and "status" for the archiver to work.
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --config CONFIG       the filename of the YAML configuration file (defaults to 'config.yaml')
-  --storage {s3,local,gd}
-                        which storage to use [execution.storage in config.yaml]
-  --sheet SHEET         the name of the google sheets document [execution.sheet in config.yaml]
-  --header HEADER       1-based index for the header row [execution.header in config.yaml]
-  --check-if-exists     when possible checks if the URL has been archived before and does not archive the same URL twice [exceution.check_if_exists]
-  --save-logs           creates or appends execution logs to files logs/LEVEL.log [exceution.save_logs]
-  --s3-private          Store content without public access permission (only for storage=s3) [secrets.s3.private in config.yaml]
-  --col-url URL         the name of the column to READ url FROM (default='link')
-  --col-status STATUS   the name of the column to FILL WITH status (default='archive status')
-  --col-folder FOLDER   the name of the column to READ folder FROM (default='destination folder')
-  --col-archive ARCHIVE
-                        the name of the column to FILL WITH archive (default='archive location')
-  --col-date DATE       the name of the column to FILL WITH date (default='archive date')
-  --col-thumbnail THUMBNAIL
-                        the name of the column to FILL WITH thumbnail (default='thumbnail')
-  --col-thumbnail_index THUMBNAIL_INDEX
-                        the name of the column to FILL WITH thumbnail_index (default='thumbnail index')
-  --col-timestamp TIMESTAMP
-                        the name of the column to FILL WITH timestamp (default='upload timestamp')
-  --col-title TITLE     the name of the column to FILL WITH title (default='upload title')
-  --col-duration DURATION
-                        the name of the column to FILL WITH duration (default='duration')
-  --col-screenshot SCREENSHOT
-                        the name of the column to FILL WITH screenshot (default='screenshot')
-  --col-hash HASH       the name of the column to FILL WITH hash (default='hash')
-```

 </details><br/>

-#### Example invocations
-All the configurations can be specified in the YAML config file, but sometimes it is useful to override only some of those like the sheet that we are running the archival on, here are some examples (possibly prepended by `pipenv run`):
+# Orchestration
+The archiver work is orchestrated by the following workflow (we call each a **step**): 
+1. **Feeder** gets the links (from a spreadsheet, from the console, ...)
+2. **Archiver** tries to archive the link (twitter, youtube, ...)
+3. **Enricher** adds more info to the content (hashes, thumbnails, ...)
+4. **Formatter** creates a report from all the archived content (HTML, PDF, ...)
+5. **Database** knows what's been archived and also stores the archive result (spreadsheet, CSV, or just the console)
+
+To check all available steps (which archivers, storages, databses, ...) exist check the [example.orchestration.yaml](example.orchestration.yaml).
+
+The great thing is you configure all the workflow in your `orchestration.yaml` file which we advise you put into a `secrets/` folder and don't share it with others because it will contain passwords and other secrets. 
+
+The structure of orchestration file is split into 2 parts: `steps` (what **steps** to use) and `configs` (how those steps should behave), here's a simplification:
+```yaml
+# orchestration.yaml content
+steps:
+  feeder: gsheet_feeder
+  archivers: # order matters
+    - youtubedl_archiver
+  enrichers:
+    - thumbnail_enricher
+  formatter: html_formatter
+  storages:
+    - local_storage
+  databases:
+    - gsheet_db
+
+configurations:
+  gsheet_feeder:
+    sheet: "your google sheet name"
+    header: 2 # row with header for your sheet
+  # ... configurations for the other steps here ...
+```
+
+All the `configurations` in the `orchestration.yaml` file (you can name it differently but need to pass it in the `--config FILENAME` argument) can be seen in the console by using the `--help` flag. They can also be overwritten, for example if you are using the `cli_feeder` to archive from the command line and want to provide the URLs you should do:

 ```bash
-# all the configurations come from config.yaml
-python auto_archive.py
+auto-archiver --config orchestration.yaml --cli_feeder.urls="url1,url2,url3"
+```

-# all the configurations come from config.yaml,
-# checks if URL is not archived twice and saves logs to logs/ folder
-python auto_archive.py --check-if-exists --save_logs
+Here's the complete workflow that the auto-archiver goes through:
+```mermaid
+graph TD
+    s((start)) --> F(fa:fa-table Feeder)
+    F -->|get and clean URL| D1{fa:fa-database Database}
+    D1 -->|is already archived| e((end))
+    D1 -->|not yet archived| a(fa:fa-download Archivers)
+    a -->|got media| E(fa:fa-chart-line Enrichers)
+    E --> S[fa:fa-box-archive Storages]
+    E --> Fo(fa:fa-code Formatter)
+    Fo --> S
+    Fo -->|update database| D2(fa:fa-database Database)
+    D2 --> e
+```

-# all the configurations come from my_config.yaml
-python auto_archive.py --config my_config.yaml
+## Orchestration checklist
+Use this to make sure you help making sure you did all the required steps:
+* [ ] you have a `/secrets` folder with all your configuration files including
+  * [ ] a orchestration file eg: `orchestration.yaml` pointing to the correct location of other files
+  * [ ] (optional if you use GoogleSheets) you have a `service_account.json` (see [how-to](https://gspread.readthedocs.io/en/latest/oauth2.html#for-bots-using-service-account))
+  * [ ] (optional for telegram) a `anon.session` which appears after the 1st run where you login to telegram
+    * if you use private channels you need to add `channel_invites` and set `join_channels=true` at least once
+  * [ ] (optional for VK) a `vk_config.v2.json`
+  * [ ] (optional for using GoogleDrive storage) `gd-token.json` (see [help script](scripts/create_update_gdrive_oauth_token.py))
+  * [ ] (optional for instagram) `instaloader.session` file which appears after the 1st run and login in instagram
+  * [ ] (optional for browsertrix) `profile.tar.gz` file

-# reads the configurations but saves archived content to google drive instead
-python auto_archive.py --config my_config.yaml --storage gd
+#### Example invocations
+These assume you've installed with pipenv, see docker section above for how to run through docker

-# uses the configurations but for another google docs sheet 
+```bash
+# all the configurations come from ./orchestration.yaml
+auto-archiver
+# all the configurations come from ./secrets/orchestration.yaml
+auto-archiver --config secrets/orchestration.yaml
+# uses the same configurations but for another google docs sheet 
 # with a header on row 2 and with some different column names
-python auto_archive.py --config my_config.yaml --sheet="use it on another sheets doc" --header=2 --col-link="put urls here"
-
-# all the configurations come from config.yaml and specifies that s3 files should be private
-python auto_archive.py --s3-private
+# notice that columns is a dictionary so you need to pass it as JSON and it will override only the values provided
+auto-archiver --config orchestration.yaml --gsheet_feeder.sheet="use it on another sheets doc" --gsheet_feeder.header=2 --gsheet_feeder.columns='{"url": "link"}'
+# all the configurations come from orchestration.yaml and specifies that s3 files should be private
+auto-archiver --s3_storage.private=1
 ```

 ### Extra notes on configuration
 #### Google Drive
 To use Google Drive storage you need the id of the shared folder in the `config.yaml` file which must be shared with the service account eg `autoarchiverservice@auto-archiver-111111.iam.gserviceaccount.com` and then you can use `--storage=gd`

-#### Telethon (Telegrams API Library)
+#### Telethon + Instagram with telegram bot
 The first time you run, you will be prompted to do a authentication with the phone number associated, alternatively you can put your `anon.session` in the root.


-## Running
-The `--sheet name` property (or `execution.sheet` in the YAML file) is the name of the Google Sheet to check for URLs. 
+## Running on Google Sheets Feeder (gsheet_feeder)
+The `--gseets_feeder.sheet` property is the name of the Google Sheet to check for URLs. 
 This sheet must have been shared with the Google Service account used by `gspread`. 
-This sheet must also have specific columns (case-insensitive) in the `header` row (see `COLUMN_NAMES` in [gworksheet.py](utils/gworksheet.py)), only the `link` and `status` columns are mandatory:
-* `Link` (required): the location of the media to be archived. This is the only column that should be supplied with data initially
-* `Archive status` (required): the status of the auto archiver script. Any row with text in this column will be skipped automatically.
-* `Destination folder`: (optional) by default files are saved to a folder called `name-of-sheets-document/name-of-sheets-tab/` using this option you can organize documents into folder from the sheet. 
-* `Archive location`: the location of the archived version. For files that were not able to be auto archived, this can be manually updated.
-* `Archive date`: the date that the auto archiver script ran for this file
-* `Upload timestamp`: the timestamp extracted from the video. (For YouTube, this unfortunately does not currently include the time)
-* `Upload title`: the "title" of the video from the original source
-* `Hash`: a hash of the first video or image found
-* `Screenshot`: a screenshot taken with from a browser view of opening the page
-* in case of videos
-  * `Duration`: duration in seconds
-  * `Thumbnail`: an image thumbnail of the video (resize row height to make this more visible)
-  * `Thumbnail index`: a link to a page that shows many thumbnails for the video, useful for quickly seeing video content
-
+This sheet must also have specific columns (case-insensitive) in the `header` row - see [Gsheet.configs](src/auto_archiver/utils/gsheet.py) for all their names.

 For example, for use with this spreadsheet:

 ![A screenshot of a Google Spreadsheet with column headers defined as above, and several Youtube and Twitter URLs in the "Media URL" column](docs/demo-before.png)

-```pipenv run python auto_archive.py --sheet archiver-test```
-
 When the auto archiver starts running, it updates the "Archive status" column.
-
 ![A screenshot of a Google Spreadsheet with column headers defined as above, and several Youtube and Twitter URLs in the "Media URL" column. The auto archiver has added "archive in progress" to one of the status columns.](docs/demo-progress.png)
-
 The links are downloaded and archived, and the spreadsheet is updated to the following:
-
 ![A screenshot of a Google Spreadsheet with videos archived and metadata added per the description of the columns above.](docs/demo-after.png)
+Note that the first row is skipped, as it is assumed to be a header row (`--gsheet_feeder.header=1` and you can change it if you use more rows above). Rows with an empty URL column, or a non-empty archive column are also skipped. All sheets in the document will be checked.

-Note that the first row is skipped, as it is assumed to be a header row (`--header=1` and you can change it if you use more rows above). Rows with an empty URL column, or a non-empty archive column are also skipped. All sheets in the document will be checked.

-## Automating
+---
+## Development
+Use `python -m src.auto_archiver --config secrets/orchestration.yaml` to run from the local development environment.

-The auto-archiver can be run automatically via cron. An example crontab entry that runs the archiver every minute is as follows.
-
-```* * * * * python auto_archive.py --sheet archiver-test```
-
-With this configuration, the archiver should archive and store all media added to the Google Sheet every 60 seconds. Of course, additional logging information, etc. might be required.
-
-# auto_auto_archiver
-
-To make it easier to set up new auto-archiver sheets, the auto-auto-archiver will look at a particular sheet and run the auto-archiver on every sheet name in column A, starting from row 11. (It starts here to support instructional text in the first rows of the sheet, as shown below.) You can simply use your default config as for `auto_archiver.py` but use `--sheet` to specify the name of the sheet that lists the names of sheets to archive.It must be shared with the same service account.
-
-![A screenshot of a Google Spreadsheet configured to show instructional text and a list of sheet names to check with auto-archiver.](docs/auto-auto.png)
-
-# Docker development
-* working with docker locally:
+#### Docker development
+working with docker locally:
  * `docker build . -t auto-archiver` to build a local image
  * `docker run --rm -v $PWD/secrets:/app/secrets aa --config secrets/config.yaml`
    * to use local archive, also create a volume `-v` for it by adding `-v $PWD/local_archive:/app/local_archive`
-* release to docker hub
+
+
+release to docker hub
  * `docker image tag auto-archiver bellingcat/auto-archiver:latest`
-  * `docker push bellingcat/auto-archiver` (validate [here]())
-
-# Code structure
-Code is split into functional concepts:
-1. [Archivers](archivers/) - receive a URL that they try to archive
-2. [Storages](storages/) - they deal with where the archived files go
-3. [Utilities](utils/)
-   1. [GWorksheet](utils/gworksheet.py) - facilitates some of the reading/writing tasks for a Google Worksheet
-
-### Current Archivers
-Archivers are tested in a meaningful order with Wayback Machine being the failsafe, that can easily be changed in the code. 
-
-> Note: We have 2 Twitter Archivers (`TwitterArchiver`, `TwitterApiArchiver`) because one requires Twitter API V2 credentials and has better results and the other does not rely on official APIs and misses out on some content. 
-
-https://mermaid.js.org/syntax/flowchart.html
-```mermaid
-graph TD
-    A(Archiver) -->|parent of| B(TelethonArchiver)
-    A -->|parent of| C(TiktokArchiver)
-    A -->|parent of| D(YoutubeDLArchiver)
-    A -->|parent of| D(InstagramArchiver)
-    A -->|parent of| E(TelegramArchiver)
-    A -->|parent of| F(TwitterArchiver)
-    A -->|parent of| G(VkArchiver)
-    A -->|parent of| H(WaybackArchiver)
-    F -->|parent of| I(TwitterApiArchiver)
-```
-### Current Storages
-```mermaid
-graph TD
-    A(BaseStorage) -->|parent of| B(S3Storage)
-    A(BaseStorage) -->|parent of| C(LocalStorage)
-    A(BaseStorage) -->|parent of| D(GoogleDriveStorage)
-```
-
-
+  * `docker push bellingcat/auto-archiver`

+#### RELEASE
+* update version in [version.py](src/auto_archiver/version.py)
+* run `bash ./scripts/release.sh` and confirm
+* package is automatically updated in pypi
+* docker image is automatically pushed to dockerhup
--- a/example.config.yaml
+++ b/example.config.yaml
@@ -1,143 +0,0 @@
---
-secrets:
-  # needed if you use storage=s3
-  s3:
-    # contains S3 info on region, bucket, key and secret
-    region: reg1
-    bucket: my-bucket
-    key: "s3 API key"
-    secret: "s3 API secret"
-    # use region format like such
-    endpoint_url: "https://{region}.digitaloceanspaces.com"
-    # endpoint_url: "https://s3.{region}.amazonaws.com"
-    #use bucket, region, and key (key is the archived file path generated when executing) format like such as:
-    cdn_url: "https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}"
-    # if private:true S3 urls will not be readable online
-    private: false
-    # with 'random' you can generate a random UUID for the URL instead of a predictable path, useful to still have public but unlisted files, alternative is 'default' or not omitted from config
-    key_path: random
-
-  # needed if you use storage=gd
-  google_drive:
-    # To authenticate with google you have two options (1. service account OR 2. OAuth token)
-
-    # 1. service account - storage space will count towards the developer account
-    # filename can be the same or different file from google_sheets.service_account, defaults to "service_account.json"
-    # service_account: "service_account.json"
-
-    # 2. OAuth token  - storage space will count towards the owner of the GDrive folder
-    # (only 1. or 2. - if both specified then this 2. takes precedence)
-    # needs write access on the server so refresh flow works
-    # To get the token, run the file `create_update_test_oauth_token.py`
-    # you can edit that file if you want a different token filename, default is "gd-token.json"
-    oauth_token_filename: "gd-token.json"
-
-    root_folder_id: copy XXXX from https://drive.google.com/drive/folders/XXXX
-
-  # needed if you use storage=local
-  local:
-    # local path to save files in
-    save_to: "./local_archive"
-
-  wayback:
-    # to get credentials visit https://archive.org/account/s3.php
-    key: your API key
-    secret: your API secret
-
-  telegram:
-    # to get credentials see: https://telegra.ph/How-to-get-Telegram-APP-ID--API-HASH-05-27
-    api_id: your API key, see
-    api_hash: your API hash
-    # optional, but allows access to more content such as large videos, talk to @botfather
-    bot_token: your bot-token
-    # optional, defaults to ./anon, records the telegram login session for future usage
-    session_file: "secrets/anon"
-
-  # twitter configuration - API V2 only
-  # if you don't provide credentials the less-effective unofficial TwitterArchiver will be used instead
-  twitter:
-    # either bearer_token only
-    bearer_token: ""
-    # OR all of the below
-    consumer_key: ""
-    consumer_secret: ""
-    access_token: ""
-    access_secret: ""
-
-  # vkontakte (vk.com) credentials
-  vk:
-    username: "phone number or email"
-    password: "password"
-    # optional, defaults to ./vk_config.v2.json, records VK login session for future usage
-    session_file: "secrets/vk_config.v2.json"
-
-  # instagram  credentials
-  instagram:
-    username: "username"
-    password: "password"
-    session_file: "instaloader.session" # <- default value
-
-  google_sheets:
-    # local filename: defaults to service_account.json, see https://gspread.readthedocs.io/en/latest/oauth2.html#for-bots-using-service-account
-    service_account: "service_account.json"
-
-  facebook:
-    # optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'
-    cookie: ""
-execution:
-  # can be overwritten with CMD --sheet=
-  sheet: your-sheet-name
-
-  # block or allow worksheets by name, instead of defaulting to checking all worksheets in a Spreadsheet
-  # worksheet_allow and worksheet_block can be single values or lists
-  # if worksheet_allow is specified, worksheet_block is ignored
-  # worksheet_allow:
-  #   - Sheet1
-  #   - "Sheet 2"
-  # worksheet_block: BlockedSheet
-
-  # which row of your tabs contains the header, can be overwritten with CMD --header=
-  header: 1
-  # which storage to use, can be overwritten with CMD --storage=
-  storage: s3
-  # defaults to false, when true will try to avoid duplicate URL archives
-  check_if_exists: true
-
-  # choose a hash algorithm (either SHA-256 or SHA3-512, defaults to SHA-256)
-  # hash_algorithm: SHA-256
-
-  # optional configurations for the selenium browser that takes screenshots, these are the defaults
-  selenium:
-    # values under 10s might mean screenshots fail to grab screenshot
-    timeout_seconds: 120
-    window_width: 1400
-    window_height: 2000
-
-  # optional browsertrix configuration (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles)
-  # browsertrix will capture a WACZ archive of the page which can then be seen as the original on replaywebpage
-  browsertrix:
-    enabled: true # defaults to false
-    profile: "./browsertrix/crawls/profile.tar.gz"
-    timeout_seconds: 120 # defaults to 90s
-  # puts execution logs into /logs folder, defaults to false
-  save_logs: true
-  # custom column names, only needed if different from default, can be overwritten with CMD --col-NAME="VALUE"
-  # url and status are the only columns required to be present in the google sheet
-  column_names:
-    url: link
-    status: archive status
-    archive: archive location
-    # use this column to override default location data
-    folder: folder
-    date: archive date
-    thumbnail: thumbnail
-    thumbnail_index: thumbnail index
-    timestamp: upload timestamp
-    title: upload title
-    duration: duration
-    screenshot: screenshot
-    hash: hash
-    wacz: wacz
-    # if you want the replaypage to work, make sure to allow CORS on your bucket, see https://replayweb.page/docs/embedding#cors-restrictions
-    replaywebpage: replaywebpage
-
--- a/example.orchestration.yaml
+++ b/example.orchestration.yaml
@@ -0,0 +1,123 @@
+steps:
+  # only 1 feeder allowed
+  feeder: gsheet_feeder # defaults to cli_feeder
+  archivers: # order matters, uncomment to activate
+    # - vk_archiver
+    # - telethon_archiver
+    # - telegram_archiver
+    # - twitter_archiver
+    # - twitter_api_archiver
+    # - instagram_tbot_archiver
+    # - instagram_archiver
+    # - tiktok_archiver
+    - youtubedl_archiver
+    - wayback_archiver_enricher
+  enrichers:
+    - hash_enricher
+    # - screenshot_enricher
+    # - thumbnail_enricher
+    # - wayback_archiver_enricher
+    # - wacz_enricher
+    
+  formatter: html_formatter # defaults to mute_formatter
+  storages:
+    - local_storage
+    # - s3_storage
+    # - gdrive_storage
+  databases:
+    - console_db
+    # - csv_db
+    # - gsheet_db
+    # - mongo_db
+
+configurations:
+  gsheet_feeder:
+    sheet: "your sheet name"
+    header: 1
+    service_account: "secrets/service_account.json"
+    # allow_worksheets: "only parse this worksheet"
+    # block_worksheets: "blocked sheet 1,blocked sheet 2"
+    use_sheet_names_in_stored_paths: false
+    columns:
+      url: link
+      status: archive status
+      folder: destination folder
+      archive: archive location
+      date: archive date
+      thumbnail: thumbnail
+      thumbnail_index: thumbnail index
+      timestamp: upload timestamp
+      title: upload title
+      text: textual content
+      duration: duration
+      screenshot: screenshot
+      hash: hash
+      wacz: wacz
+      replaywebpage: replaywebpage
+  instagram_tbot_archiver:
+    api_id: "TELEGRAM_BOT_API_ID"
+    api_hash: "TELEGRAM_BOT_API_HASH"
+    # session_file: "secrets/anon"
+  telethon_archiver:
+    api_id: "TELEGRAM_BOT_API_ID"
+    api_hash: "TELEGRAM_BOT_API_HASH"
+    # session_file: "secrets/anon"
+    join_channels: false
+    channel_invites: # if you want to archive from private channels
+      - invite: https://t.me/+123456789
+        id: 0000000001
+      - invite: https://t.me/+123456788
+        id: 0000000002
+
+  twitter_api_archiver:
+    # either bearer_token only
+    bearer_token: "TWITTER_BEARER_TOKEN"
+    # OR all of the below
+    # consumer_key: ""
+    # consumer_secret: ""
+    # access_token: ""
+    # access_secret: ""
+  instagram_archiver:
+    username: "INSTAGRAM_USERNAME"
+    password: "INSTAGRAM_PASSWORD"
+    # session_file: "secrets/instaloader.session"
+
+  vk_archiver:
+    username: "or phone number"
+    password: "vk pass"
+    session_file: "secrets/vk_config.v2.json"
+
+  screenshot_enricher:
+    width: 1280
+    height: 2300
+  wayback_archiver_enricher:
+    timeout: 10
+    key: "wayback key"
+    secret: "wayback secret"
+  hash_enricher:
+    algorithm: "SHA3-512" # can also be SHA-256
+  wacz_enricher:
+    profile: secrets/profile.tar.gz
+  local_storage:
+    save_to: "./local_archive"
+    save_absolute: true
+    filename_generator: static
+    path_generator: flat
+  s3_storage:
+    bucket: your-bucket-name
+    region: reg1
+    key: S3_KEY
+    secret: S3_SECRET
+    endpoint_url: "https://{region}.digitaloceanspaces.com"
+    cdn_url: "https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}"
+    # if private:true S3 urls will not be readable online
+    private: false
+    # with 'random' you can generate a random UUID for the URL instead of a predictable path, useful to still have public but unlisted files, alternative is 'default' or not omitted from config
+    key_path: random
+
+  gdrive_storage:
+    path_generator: url
+    filename_generator: random
+    root_folder_id: folder_id_from_url
+    oauth_token: secrets/gd-token.json # needs to be generated with scripts/create_update_gdrive_oauth_token.py
+    service_account: "secrets/service_account.json"
--- a/orchestration.example.yaml
+++ b/orchestration.example.yaml
@@ -1,82 +0,0 @@
-steps:
-  # only 1 feeder allowed
-  # a feeder could be in an "infinite loop" for example: gsheets_infinite feeder which holds-> this could be an easy logic addiction by modifying for each to while not feeder.done() if it becomes necessary
-  feeder: gsheet_feeder # default -> only expects URL from CLI
-  archivers: # order matters
-    - telethon
-    # - tiktok
-    # - twitter
-    # - instagram
-    # - webarchive # this way it runs as a failsafe only
-  # enrichers:
-  #   - screenshot
-    # - wacz
-    # - webarchive # this way it runs for every case, webarchive extends archiver and enrichment
-    # - thumbnails
-  formatters:
-    - HTMLFormater
-    - PdfFormater
-  storages:
-    - local_storage
-    - s3
-  databases:
-    - gsheets_db
-    - mongo_db
-
-
-
-configurations:
-  global:
-    - save_logs: False
-  gsheet_feeder:
-    sheet: my-auto-archiver
-    header: 2 # defaults to 1 in GSheetsFeeder
-    service_account: "secrets/service_account.json"
-    # allow_worksheets: "allowed"
-    # block_worksheets: "blocked1,blocked2"
-    columns:
-        'url': 'link'
-        'status': 'archive status'
-        'folder': 'destination folder'
-        'archive': 'archive location'
-        'date': 'archive date'
-        'thumbnail': 'thumbnail'
-        'thumbnail_index': 'thumbnail index'
-        'timestamp': 'upload timestamp'
-        'title': 'upload title'
-        'duration': 'duration'
-        'screenshot': 'screenshot'
-        'hash': 'hash'
-        'wacz': 'wacz'
-        'replaywebpage': 'replaywebpage'
-  telethon:
-    api_id: "1234567"
-    api_hash: "examplehash"
-    session_file: "secrets/anon"
-    channel_invites:
-      - invite: https://t.me/+XXXXXXXXXXXXXX
-        id: 1000000000
-      - invite: https://t.me/joinchat/XXXXXXXXXXXXXX
-        id: 1000000001
-
-  tiktok:
-    api_keys:
-      - username: 1
-        password: 2
-      - username: 3
-        password: 4
-    username: "abc"
-    password: "123"
-    token: "here"
-  screenshot:
-    width: 1280
-    height: 4600
-  wacz:
-    profile: secrets/profile.tar.gz
-  webarchive:
-    api_key: "12345"
-  s3: 
-    - bucket: 123
-    - region: "nyc3"
-    - cdn: "{region}{bucket}"
-
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -3,7 +3,7 @@

 set -e

-TAG=$(python -c 'from src.auto_archiver.version import VERSION; print("v" + VERSION)')
+TAG=$(python -c 'from src.auto_archiver.version import __version__; print("v" + __version__)')

 read -p "Creating new release for $TAG. Do you want to continue? [Y/n] " prompt

--- a/setup.cfg
+++ b/setup.cfg
@@ -1,32 +1,36 @@
 [metadata]
 name = auto_archiver
-version = 2.0.0
+version = attr: auto_archiver.version.__version__
 author = Bellingcat
 author_email = tech@bellingcat.com
 description = Easily archive online media content
-long_description = file: README.md, LICENSE
+long_description = file: README.md
+long_description_content_type = text/markdown
 keywords = archive, oosi, osint, scraping
 license = MIT
 classifiers =
-	Intended Audience :: Developers,
-	Intended Audience :: Science/Research,
-	License :: OSI Approved :: MIT License,
-	Programming Language :: Python :: 3,
+	Intended Audience :: Developers
+	Intended Audience :: Science/Research
+	License :: OSI Approved :: MIT License
+	Programming Language :: Python :: 3
+project_urls = 
+	Source Code = https://github.com/bellingcat/auto-archiver
+	Bug Tracker = https://github.com/bellingcat/auto-archiver/issues
+	Bellingcat = https://www.bellingcat.com
+platforms = any

 [options]
 setup_requires =
    setuptools-pipfile
 zip_safe = False
-include_package_data = True
 package_dir=
    =src
 packages=find:
 find_packages=true
 python_requires = >=3.8

-# [options.package_data]
-# * = *.txt, *.rst
-# hello = *.msg
+[options.package_data]
+* = *.html

 [options.entry_points]
 console_scripts =
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,4 @@
+from setuptools import setup
+
+if __name__ == "__main__":
+    setup()
--- a/src/Pipfile
+++ b/src/Pipfile
@@ -1,39 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-gspread = "*"
-boto3 = "*"
-argparse = "*"
-beautifulsoup4 = "*"
-tiktok-downloader = "*"
-bs4 = "*"
-loguru = "*"
-ffmpeg-python = "*"
-selenium = "*"
-snscrape = "*"
-yt-dlp = "*"
-telethon = "*"
-google-api-python-client = "*"
-google-auth-httplib2 = "*"
-google-auth-oauthlib = "*"
-oauth2client = "*"
-python-slugify = "*"
-pyyaml = "*"
-dateparser = "*"
-vk-url-scraper = "*"
-python-twitter-v2 = "*"
-instaloader = "*"
-tqdm = "*"
-jinja2 = "*"
-cryptography = "==38.0.4"
-dataclasses-json = "*"
-
-[requires]
-python_version = "3.9"
-
-[dev-packages]
-autopep8 = "*"
-setuptools-pipfile = "*"
--- a/src/Pipfile.lock
+++ b/src/Pipfile.lock
--- a/src/auto_archiver/init.py
+++ b/src/auto_archiver/init.py
@@ -4,4 +4,4 @@ from . import archivers, databases, enrichers, feeders, formatters, storages, ut
 from .core.orchestrator import ArchivingOrchestrator
 from .core.config import Config
 # making accessible directly
-from .core.metadata import Metadata
+from .core.metadata import Metadata
--- a/src/auto_archiver/archivers/init.py
+++ b/src/auto_archiver/archivers/init.py
@@ -3,6 +3,7 @@ from .telethon_archiver import TelethonArchiver
 from .twitter_archiver import TwitterArchiver
 from .twitter_api_archiver import TwitterApiArchiver
 from .instagram_archiver import InstagramArchiver
+from .instagram_tbot_archiver import InstagramTbotArchiver
 from .tiktok_archiver import TiktokArchiver
 from .telegram_archiver import TelegramArchiver
 from .vk_archiver import VkArchiver
--- a/src/auto_archiver/archivers/instagram_tbot_archiver.py
+++ b/src/auto_archiver/archivers/instagram_tbot_archiver.py
@@ -0,0 +1,77 @@
+
+from telethon.sync import TelegramClient
+from loguru import logger
+import time, os
+from sqlite3 import OperationalError
+from . import Archiver
+from ..core import Metadata, Media
+
+
+class InstagramTbotArchiver(Archiver):
+    """
+    calls a telegram bot to fetch instagram posts/stories... and gets available media from it
+    https://github.com/adw0rd/instagrapi
+    https://t.me/instagram_load_bot
+    """
+    name = "instagram_tbot_archiver"
+
+    def __init__(self, config: dict) -> None:
+        super().__init__(config)
+        self.assert_valid_string("api_id")
+        self.assert_valid_string("api_hash")
+        self.timeout = int(self.timeout)
+        try:
+            self.client = TelegramClient(self.session_file, self.api_id, self.api_hash)
+        except OperationalError as e:
+            logger.error(f"Unable to access the {self.session_file} session, please make sure you don't use the same session file here and in telethon_archiver. if you do then disable at least one of the archivers for the 1st time you setup telethon session: {e}")
+
+    @staticmethod
+    def configs() -> dict:
+        return {
+            "api_id": {"default": None, "help": "telegram API_ID value, go to https://my.telegram.org/apps"},
+            "api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"},
+            "session_file": {"default": "secrets/anon-insta", "help": "optional, records the telegram login session for future usage, '.session' will be appended to the provided value."},
+            "timeout": {"default": 15, "help": "timeout to fetch the instagram content in seconds."},
+        }
+
+    def setup(self) -> None:
+        logger.info(f"SETUP {self.name} checking login...")
+        with self.client.start():
+            logger.success(f"SETUP {self.name} login works.")
+
+    def download(self, item: Metadata) -> Metadata:
+        url = item.get_url()
+        if not "instagram.com" in url: return False
+
+        result = Metadata()
+        tmp_dir = item.get_tmp_dir()
+        with self.client.start():
+            chat = self.client.get_entity("instagram_load_bot")
+            since_id = self.client.send_message(entity=chat, message=url).id
+
+            attempts = 0
+            seen_media = []
+            message = ""
+            time.sleep(4)
+            # media is added before text by the bot so it can be used as a stop-logic mechanism
+            while attempts < self.timeout and (not message or not len(seen_media)):
+                attempts += 1
+                time.sleep(1)
+                for post in self.client.iter_messages(chat, min_id=since_id):
+                    since_id = max(since_id, post.id)
+                    if post.media and post.id not in seen_media:
+                        filename_dest = os.path.join(tmp_dir, f'{chat.id}_{post.id}')
+                        media = self.client.download_media(post.media, filename_dest)
+                        if media: 
+                            result.add_media(Media(media))
+                            seen_media.append(post.id)
+                    if post.message: message += post.message
+
+            if "You must enter a URL to a post" in message: 
+                logger.debug(f"invalid link {url=} for {self.name}: {message}")
+                return False
+                
+            if message:
+                result.set_content(message).set_title(message[:128])
+
+            return result.success("insta-via-bot")
--- a/src/auto_archiver/archivers/telethon_archiver.py
+++ b/src/auto_archiver/archivers/telethon_archiver.py
@@ -114,7 +114,7 @@ class TelethonArchiver(Archiver):
        with self.client.start():
        # with self.client.start(bot_token=self.bot_token):
            try:
-                post = self.client.get_messages(chat,   ids=post_id)
+                post = self.client.get_messages(chat, ids=post_id)
            except ValueError as e:
                logger.error(f"Could not fetch telegram {url} possibly it's private: {e}")
                return False
--- a/src/auto_archiver/archivers/twitter_archiver.py
+++ b/src/auto_archiver/archivers/twitter_archiver.py
@@ -37,7 +37,7 @@ class TwitterArchiver(Archiver):
        return self.link_clean_pattern.sub("\\1", url)

    def is_rearchivable(self, url: str) -> bool:
-        # Twitter posts are static
+        # Twitter posts are static (for now)
        return False

    def download(self, item: Metadata) -> Metadata:
@@ -86,7 +86,7 @@ class TwitterArchiver(Archiver):
            media.filename = self.download_from_url(media.get("src"), f'{slugify(url)}_{i}{ext}', item)
            result.add_media(media)

-        return result.success("twitter")
+        return result.success("twitter-snscrape")

    def download_alternative(self, item: Metadata, url: str, tweet_id: str) -> Metadata:
        """
--- a/src/auto_archiver/archivers/youtubedl_archiver.py
+++ b/src/auto_archiver/archivers/youtubedl_archiver.py
@@ -6,7 +6,7 @@ from ..core import Metadata, Media


 class YoutubeDLArchiver(Archiver):
-    name = "youtubedl_enricher"
+    name = "youtubedl_archiver"

    def __init__(self, config: dict) -> None:
        super().__init__(config)
--- a/src/auto_archiver/core/config.py
+++ b/src/auto_archiver/core/config.py
@@ -51,7 +51,7 @@ class Config:
                epilog="Check the code at https://github.com/bellingcat/auto-archiver"
            )

-            parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='config.yaml')
+            parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')

        for configurable in self.configurable_parents:
            child: Step
--- a/src/auto_archiver/core/metadata.py
+++ b/src/auto_archiver/core/metadata.py
@@ -63,6 +63,9 @@ class Metadata:
    def is_success(self) -> bool:
        return "success" in self.status

+    def is_empty(self) -> bool:
+        return not self.is_success() and len(self.media) == 0 and len(self.get_clean_metadata()) <= 2  # url, processed_at
+
    @property  # getter .netloc
    def netloc(self) -> str:
        return urlparse(self.get_url()).netloc
@@ -122,7 +125,7 @@ class Metadata:
        for m in self.media:
            if m.get("id") == id: return m
        return default
-    
+
    def get_first_image(self, default=None) -> Media:
        for m in self.media:
            if "image" in m.mimetype: return m
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -31,7 +31,6 @@ class ArchivingOrchestrator:
            self.feed_item(item)

    def feed_item(self, item: Metadata) -> Metadata:
-        print("ARCHIVING", item)
        try:
            with tempfile.TemporaryDirectory(dir="./") as tmp_dir:
                item.set_tmp_dir(tmp_dir)
@@ -124,6 +123,9 @@ class ArchivingOrchestrator:
                s.store(final_media, result)
            result.set_final_media(final_media)

+        if result.is_empty():
+            result.status = "nothing archived"
+
        # signal completion to databases (DBs, Google Sheets, CSV, ...)
        for d in self.databases: d.done(result)

--- a/src/auto_archiver/databases/gsheet_db.py
+++ b/src/auto_archiver/databases/gsheet_db.py
@@ -2,10 +2,8 @@ from typing import Union, Tuple
 import datetime
 from urllib.parse import quote

-# from metadata import Metadata
 from loguru import logger

-# from . import Enricher
 from . import Database
 from ..core import Metadata
 from ..core import Media
@@ -61,13 +59,13 @@ class GsheetsDb(Database):
        cell_updates.append((row, 'status', item.status))

        media: Media = item.get_final_media()
-
-        batch_if_valid('archive', "\n".join(media.urls))
+        if hasattr(media, "urls"):
+            batch_if_valid('archive', "\n".join(media.urls))
        batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())
        batch_if_valid('title', item.get_title())
        batch_if_valid('text', item.get("content", "")[:500])
        batch_if_valid('timestamp', item.get_timestamp())
-        if (screenshot := item.get_media_by_id("screenshot")):
+        if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"):
            batch_if_valid('screenshot', "\n".join(screenshot.urls))

        if (thumbnail := item.get_first_image("thumbnail")):
--- a/src/auto_archiver/enrichers/hash_enricher.py
+++ b/src/auto_archiver/enrichers/hash_enricher.py
@@ -16,11 +16,13 @@ class HashEnricher(Enricher):
        super().__init__(config)
        algo_choices = self.configs()["algorithm"]["choices"]
        assert self.algorithm in algo_choices, f"Invalid hash algorithm selected, must be one of {algo_choices} (you selected {self.algorithm})."
+        self.chunksize = int(self.chunksize)

    @staticmethod
    def configs() -> dict:
        return {
-            "algorithm": {"default": "SHA-256", "help": "hash algorithm to use", "choices": ["SHA-256", "SHA3-512"]}
+            "algorithm": {"default": "SHA-256", "help": "hash algorithm to use", "choices": ["SHA-256", "SHA3-512"]},
+            "chunksize": {"default": 1.6e7, "help": "number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB"},
        }

    def enrich(self, to_enrich: Metadata) -> None:
@@ -28,12 +30,19 @@ class HashEnricher(Enricher):
        logger.debug(f"calculating media hashes for {url=} (using {self.algorithm})")

        for i, m in enumerate(to_enrich.media):
-            with open(m.filename, "rb") as f:
-                bytes = f.read()  # read entire file as bytes
-                hash = None
-                if self.algorithm == "SHA-256":
-                    hash = hashlib.sha256(bytes)
-                elif self.algorithm == "SHA3-512":
-                    hash = hashlib.sha3_512(bytes)
-                else: continue
-                to_enrich.media[i].set("hash", f"{self.algorithm}:{hash.hexdigest()}")
+            if len(hd := self.calculate_hash(m.filename)):
+                to_enrich.media[i].set("hash", f"{self.algorithm}:{hd}")
+
+    def calculate_hash(self, filename):
+        hash = None
+        if self.algorithm == "SHA-256":
+            hash = hashlib.sha256()
+        elif self.algorithm == "SHA3-512":
+            hash = hashlib.sha3_512()
+        else: return ""
+        with open(filename, "rb") as f:
+            while True:
+                buf = f.read(self.chunksize)
+                if not buf: break
+                hash.update(buf)
+        return hash.hexdigest()
--- a/src/auto_archiver/enrichers/screenshot_enricher.py
+++ b/src/auto_archiver/enrichers/screenshot_enricher.py
@@ -3,7 +3,7 @@ import time, uuid, os
 from selenium.common.exceptions import TimeoutException

 from . import Enricher
-from ..utils import Webdriver
+from ..utils import Webdriver, UrlUtil
 from ..core import Media, Metadata

 class ScreenshotEnricher(Enricher):
@@ -14,16 +14,21 @@ class ScreenshotEnricher(Enricher):
        return {
            "width": {"default": 1280, "help": "width of the screenshots"},
            "height": {"default": 720, "help": "height of the screenshots"},
-            "timeout": {"default": 60, "help": "timeout for taking the screenshot"}
+            "timeout": {"default": 60, "help": "timeout for taking the screenshot"},
+            "sleep_before_screenshot": {"default": 4, "help": "seconds to wait for the pages to load before taking screenshot"}
        }

    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
+        if UrlUtil.is_auth_wall(url):
+            logger.debug(f"[SKIP] SCREENSHOT since url is behind AUTH WALL: {url=}")
+            return
+
        logger.debug(f"Enriching screenshot for {url=}")
        with Webdriver(self.width, self.height, self.timeout, 'facebook.com' in url) as driver:
            try:
                driver.get(url)
-                time.sleep(2)
+                time.sleep(int(self.sleep_before_screenshot))
                screenshot_file = os.path.join(to_enrich.get_tmp_dir(), f"screenshot_{str(uuid.uuid4())[0:8]}.png")
                driver.save_screenshot(screenshot_file)
                to_enrich.add_media(Media(filename=screenshot_file), id="screenshot")
@@ -31,4 +36,3 @@ class ScreenshotEnricher(Enricher):
                logger.info("TimeoutException loading page for screenshot")
            except Exception as e:
                logger.error(f"Got error while loading webdriver for screenshot enricher: {e}")
-        # return None
--- a/src/auto_archiver/enrichers/wacz_enricher.py
+++ b/src/auto_archiver/enrichers/wacz_enricher.py
@@ -3,6 +3,7 @@ from loguru import logger

 from ..core import Media, Metadata
 from . import Enricher
+from ..utils import UrlUtil


 class WaczEnricher(Enricher):
@@ -20,11 +21,17 @@ class WaczEnricher(Enricher):
        return {
            "profile": {"default": None, "help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles)."},
            "timeout": {"default": 90, "help": "timeout for WACZ generation in seconds"},
+            "ignore_auth_wall": {"default": True, "help": "skip URL if it is behind authentication wall, set to False if you have browsertrix profile configured for private content."},
        }

    def enrich(self, to_enrich: Metadata) -> bool:
        # TODO: figure out support for browsertrix in docker
        url = to_enrich.get_url()
+
+        if UrlUtil.is_auth_wall(url):
+            logger.debug(f"[SKIP] SCREENSHOT since url is behind AUTH WALL: {url=}")
+            return
+
        logger.debug(f"generating WACZ for {url=}")
        collection = str(uuid.uuid4())[0:8]
        browsertrix_home = os.path.abspath(to_enrich.get_tmp_dir())
--- a/src/auto_archiver/enrichers/wayback_enricher.py
+++ b/src/auto_archiver/enrichers/wayback_enricher.py
@@ -1,8 +1,10 @@
 from loguru import logger
 import time, requests

+
 from . import Enricher
 from ..archivers import Archiver
+from ..utils import UrlUtil
 from ..core import Metadata

 class WaybackArchiverEnricher(Enricher, Archiver):
@@ -33,6 +35,10 @@ class WaybackArchiverEnricher(Enricher, Archiver):

    def enrich(self, to_enrich: Metadata) -> bool:
        url = to_enrich.get_url()
+        if UrlUtil.is_auth_wall(url):
+            logger.debug(f"[SKIP] WAYBACK since url is behind AUTH WALL: {url=}")
+            return
+
        logger.debug(f"calling wayback for {url=}")

        if to_enrich.get("wayback"):
--- a/src/auto_archiver/formatters/html_formatter.py
+++ b/src/auto_archiver/formatters/html_formatter.py
@@ -3,7 +3,9 @@ from dataclasses import dataclass
 import mimetypes, uuid, os, pathlib
 from jinja2 import Environment, FileSystemLoader
 from urllib.parse import quote
+from loguru import logger

+from ..version import __version__
 from ..core import Metadata, Media
 from . import Formatter

@@ -25,16 +27,21 @@ class HtmlFormatter(Formatter):
    @staticmethod
    def configs() -> dict:
        return {
-            "detect_thumbnails": {"default": True, "help": "if true will group by thumbnails generated by thumbnail enricher by id 'thumbnail_00'"},
-
+            "detect_thumbnails": {"default": True, "help": "if true will group by thumbnails generated by thumbnail enricher by id 'thumbnail_00'"}
        }

    def format(self, item: Metadata) -> Media:
+        url = item.get_url()
+        if item.is_empty():
+            logger.debug(f"[SKIP] FORMAT there is no media or metadata to format: {url=}")
+            return
+
        content = self.template.render(
-            url=item.get_url(),
+            url=url,
            title=item.get_title(),
            media=item.media,
-            metadata=item.get_clean_metadata()
+            metadata=item.get_clean_metadata(),
+            version=__version__
        )
        html_path = os.path.join(item.get_tmp_dir(), f"formatted{str(uuid.uuid4())}.html")
        with open(html_path, mode="w", encoding="utf-8") as outf:
--- a/src/auto_archiver/formatters/templates/init.py
+++ b/src/auto_archiver/formatters/templates/init.py
--- a/src/auto_archiver/formatters/templates/html_template.html
+++ b/src/auto_archiver/formatters/templates/html_template.html
@@ -162,7 +162,7 @@
        {% endfor %}
    </table>

-    <p style="text-align:center;">Made with <a href="https://github.com/bellingcat/auto-archiver">bellingcat/auto-archiver</a></p>
+    <p style="text-align:center;">Made with <a href="https://github.com/bellingcat/auto-archiver">bellingcat/auto-archiver</a> v{{ version }}</p>
 </body>
 <script defer>
    // notification logic
--- a/src/auto_archiver/storages/storage.py
+++ b/src/auto_archiver/storages/storage.py
@@ -5,6 +5,7 @@ import hashlib
 from typing import IO, Any

 from ..core import Media, Metadata, Step
+from ..enrichers import HashEnricher
 from loguru import logger
 import os, uuid
 from slugify import slugify
@@ -64,18 +65,18 @@ class Storage(Step):
        filename, ext = os.path.splitext(media.filename)

        # path_generator logic
-        if self.path_generator == "flat": 
+        if self.path_generator == "flat":
            path = ""
-            filename = slugify(filename) # in case it comes with os.sep
+            filename = slugify(filename)  # in case it comes with os.sep
        elif self.path_generator == "url": path = slugify(item.get_url())
        elif self.path_generator == "random":
            path = item.get("random_path", str(uuid.uuid4())[:16], True)

        # filename_generator logic
        if self.filename_generator == "random": filename = str(uuid.uuid4())[:16]
-        elif self.filename_generator == "static": 
-            with open(media.filename, "rb") as f:
-                bytes = f.read()  # read entire file as bytes
-            filename = hashlib.sha256(bytes).hexdigest()[:24]
+        elif self.filename_generator == "static":
+            he = HashEnricher({"hash_enricher": {"algorithm": "SHA-256", "chunksize": 1.6e7}})
+            hd = he.calculate_hash(media.filename)
+            filename = hd[:24]

-        media.key = os.path.join(folder, path, f"{filename}{ext}")
+        media.key = os.path.join(folder, path, f"{filename}{ext}")
--- a/src/auto_archiver/utils/init.py
+++ b/src/auto_archiver/utils/init.py
@@ -2,4 +2,5 @@
 from .gworksheet import GWorksheet
 from .misc import *
 from .webdriver import Webdriver
-from .gsheet import Gsheets
+from .gsheet import Gsheets
+from .url import UrlUtil
--- a/src/auto_archiver/utils/gworksheet.py
+++ b/src/auto_archiver/utils/gworksheet.py
@@ -40,11 +40,11 @@ class GWorksheet:

    def _col_index(self, col: str):
        self._check_col_exists(col)
-        return self.headers.index(self.columns[col])
+        return self.headers.index(self.columns[col].lower())

    def col_exists(self, col: str):
        self._check_col_exists(col)
-        return self.columns[col] in self.headers
+        return self.columns[col].lower() in self.headers

    def count_rows(self):
        return len(self.values)
--- a/src/auto_archiver/utils/url.py
+++ b/src/auto_archiver/utils/url.py
@@ -0,0 +1,19 @@
+import re
+
+class UrlUtil:
+    telegram_private = re.compile(r"https:\/\/t\.me(\/c)\/(.+)\/(\d+)")
+    is_istagram = re.compile(r"https:\/\/www\.instagram\.com")
+
+    @staticmethod
+    def clean(url): return url
+
+    @staticmethod
+    def is_auth_wall(url):
+        """
+        checks if URL is behind an authentication wall meaning steps like wayback, wacz, ... may not work
+        """
+        if UrlUtil.telegram_private.match(url): return True
+        if UrlUtil.is_istagram.match(url): return True
+
+        return False
+
--- a/src/auto_archiver/version.py
+++ b/src/auto_archiver/version.py
@@ -1,12 +1,12 @@

 _MAJOR = "0"
-_MINOR = "2"
+_MINOR = "4"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "10"
+_PATCH = "5"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""

 VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
-VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)
+__version__ = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)
Author	SHA1	Message	Date
msramalho	39818e648a	Bump version to v0.4.5 for release	2023-03-16 15:05:42 +00:00
Miguel Sozinho Ramalho	2bbf534d67	Merge pull request #72 from milesmcc/patch-1 Fix hash enricher for flatfile output (closes #71)	2023-03-16 15:04:55 +00:00
R. Miles McCain	6be7536fad	Fix hash enricher for flatfile output (closes #71 )	2023-03-14 13:37:54 -07:00
msramalho	0654e8c5c6	hash calculation in chunks to avoid exhausting RAM	2023-03-10 11:34:29 +00:00
msramalho	0e3c427371	Bump version to v0.4.3 for release	2023-02-27 10:30:06 +01:00
msramalho	7497bc08c0	Bump version to v0.4.2 for release	2023-02-23 17:14:29 +01:00
msramalho	49863768fe	vk updates	2023-02-22 18:35:15 +01:00
msramalho	7b9483bbf9	yt-dlp update	2023-02-22 18:28:20 +01:00
msramalho	cd81cae559	auth wall for WACZ	2023-02-20 16:08:45 +00:00
msramalho	23894fad51	normalize columns	2023-02-20 16:08:35 +00:00
msramalho	876988b587	detect invalid url messages instagram bot	2023-02-20 12:22:52 +00:00
msramalho	f95293b84b	support for multiple media instagram	2023-02-20 11:25:02 +00:00
msramalho	2fbcbe4e8b	double session issues	2023-02-20 11:11:39 +00:00
msramalho	d1e4574c6c	readme updates	2023-02-17 16:30:50 +00:00
msramalho	d347b26d37	updating example config	2023-02-17 16:26:23 +00:00
msramalho	1970fa3c82	new instagram archiver via telegram bot	2023-02-17 16:15:25 +00:00
msramalho	aa5430451e	instagram archiver via telegram bot	2023-02-17 15:46:29 +00:00
msramalho	f35875a94c	name fix	2023-02-17 15:46:05 +00:00
msramalho	5505255ea3	url auth wall detect	2023-02-17 15:45:58 +00:00
msramalho	da17b3f68a	name fix	2023-02-17 15:45:35 +00:00
msramalho	d6dbdec6ac	example	2023-02-09 12:32:55 +00:00
msramalho	224ebe7ee8	links	2023-02-08 22:27:56 +00:00
msramalho	54a1bc2172	update readme	2023-02-08 22:26:24 +00:00
msramalho	77948207d1	update	2023-02-08 22:24:40 +00:00
msramalho	60552ae0ea	update readme	2023-02-08 22:23:25 +00:00
msramalho	f255271ecb	update README	2023-02-08 22:17:22 +00:00
msramalho	db45e0980e	Bump version to v0.3.0 for release	2023-02-08 22:13:46 +00:00
msramalho	2a7ece5dcc	cleanups and docs	2023-02-08 22:13:19 +00:00
msramalho	d14adf0242	Bump version to v0.2.24 for release	2023-02-08 11:22:53 +00:00
msramalho	75459d2880	docker	2023-02-08 11:22:38 +00:00
msramalho	94406bda7a	Bump version to v0.2.23 for release	2023-02-08 10:42:12 +00:00
msramalho	6244f35cff	Bump version to v0.2.22 for release	2023-02-08 09:50:36 +00:00
msramalho	adb3a7332f	version	2023-02-08 09:49:48 +00:00
msramalho	0d903fa196	Bump version to v0.2.21 for release	2023-02-08 09:42:26 +00:00
msramalho	e5f3e56968	skip existing	2023-02-08 09:37:50 +00:00
msramalho	57e7023f64	Bump version to v0.2.20 for release	2023-02-08 09:27:53 +00:00
msramalho	be9e4b2032	Bump version to v0.2.19 for release	2023-02-08 00:02:55 +00:00
msramalho	59603d1136	Bump version to v0.2.18 for release	2023-02-07 23:59:45 +00:00
msramalho	db32b2db0d	token name	2023-02-07 23:59:33 +00:00
msramalho	d31b3dda52	Bump version to v0.2.17 for release	2023-02-07 23:56:42 +00:00
msramalho	fa593ee9e2	Bump version to v0.2.16 for release	2023-02-07 23:49:12 +00:00
msramalho	9d2f14d3a1	Bump version to v0.2.15 for release	2023-02-07 23:44:04 +00:00
msramalho	f81ff14faa	license to publish	2023-02-07 23:43:50 +00:00
msramalho	5ed38ffaab	clean readme	2023-02-07 23:37:53 +00:00
msramalho	3a70036e71	Bump version to v0.2.13 for release	2023-02-07 23:31:56 +00:00
msramalho	58b6bcef87	3.10	2023-02-07 23:31:48 +00:00
msramalho	4060f3dfb2	Bump version to v0.2.12 for release	2023-02-07 23:27:44 +00:00
msramalho	bf3f433785	pipenv	2023-02-07 23:27:37 +00:00
msramalho	8a419d34d5	Bump version to v0.2.11 for release	2023-02-07 23:24:51 +00:00
msramalho	8bbe7e2057	back to setup	2023-02-07 23:24:44 +00:00