diff --git a/.gitignore b/.gitignore index 4b7e9ce..8501c5d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ config-*.json config.yaml config-*.yaml logs/* -local_archive/ \ No newline at end of file +local_archive/ +vk_config*.json \ No newline at end of file diff --git a/Pipfile b/Pipfile index fedfd51..1b55d86 100644 --- a/Pipfile +++ b/Pipfile @@ -22,6 +22,8 @@ google-auth-oauthlib = "*" oauth2client = "*" python-slugify = "*" pyyaml = "*" +vk-api = "*" +dateparser = "*" [requires] python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock index 9f1a12b..0b911f3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "602a05a8fa475181c24714ab57188a417fdfddf373a7dab4fa0ba0fcb7ce8d0a" + "sha256": "d06498403429a8fffcd6d049b314872c0095abee7fb9c6ffd3ba3d7b0c31c8cd" }, "pipfile-spec": 6, "requires": { @@ -50,19 +50,19 @@ }, "boto3": { "hashes": [ - "sha256:28ab0947c49a6fb2409004d4a10b2828aec231cb95ca1d800cb1411e191cc201", - "sha256:833e67edfb73f2cc22ff27a1c33728686dc90a9e81ba2551f9462ea2d1b04f41" + "sha256:0821212ff521cb934801b1f655cef3c0e976775324b1018f1751700d0f42dbb4", + "sha256:87d34861727699c795bf8d65703f2435e75f12879bdd483e08b35b7c5510e8c8" ], "index": "pypi", - "version": "==1.24.8" + "version": "==1.24.9" }, "botocore": { "hashes": [ - "sha256:ad92702930d6cb7b587fc2f619672feb74d5218f8de387a28c2905820db79027", - "sha256:db6667b8dfd175d16187653942cd91dd1f0cf36adc0ea9d7a0805ba4d2a3321f" + "sha256:5669b982b0583e73daef1fe0a4df311055e6287326f857dbb1dcc2de1d8412ad", + "sha256:7a7588b0170e571317496ac4104803329d5bc792bc008e8a757ffd440f1b6fa6" ], "markers": "python_version >= '3.7'", - "version": "==1.27.8" + "version": "==1.27.9" }, "brotli": { "hashes": [ @@ -152,7 +152,7 @@ "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7", "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==2022.5.18.1" }, "cffi": { @@ -267,6 +267,14 @@ ], "version": "==37.0.2" }, + "dateparser": { + "hashes": [ + "sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9", + "sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628" + ], + "index": "pypi", + "version": "==1.1.1" + }, "ffmpeg-python": { "hashes": [ "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127", @@ -303,7 +311,7 @@ "sha256:958024c6aa3460b08f35741231076a4dd9a4c819a6a39d44da9627febe8b28f0", "sha256:ce1daa49644b50398093d2a9ad886501aa845e2602af70c3001b9f402a9d7359" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==2.8.1" }, "google-api-python-client": { @@ -316,11 +324,11 @@ }, "google-auth": { "hashes": [ - "sha256:8a954960f852d5f19e6af14dd8e75c20159609e85d8db37e4013cc8c3824a7e1", - "sha256:df549a1433108801b11bdcc0e312eaf0d5f0500db42f0523e4d65c78722e8475" + "sha256:819b70140d05501739e1387291d39f0de3b4dff3b00ae4aff8e7a05369957f89", + "sha256:9b1da39ab8731c3061f36fefde9f8bb902dbee9eb28e3a67e8cfa7dc1be76227" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==2.7.0" + "version": "==2.8.0" }, "google-auth-httplib2": { "hashes": [ @@ -343,7 +351,7 @@ "sha256:023eaea9d8c1cceccd9587c6af6c20f33eeeb05d4148670f2b0322dc1511700c", "sha256:b09b56f5463070c2153753ef123f07d2e49235e89148e9b2459ec8ed2f68d7d3" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==1.56.2" }, "gspread": { @@ -359,7 +367,7 @@ "sha256:70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06", "sha256:8ddd78563b633ca55346c8cd41ec0af27d3c79931828beffb46ce70a379e7442" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==0.13.0" }, "httplib2": { @@ -554,7 +562,7 @@ "sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2", "sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==3.2.0" }, "outcome": { @@ -682,7 +690,7 @@ "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb", "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==2.12.0" }, "pyopenssl": { @@ -724,6 +732,21 @@ "index": "pypi", "version": "==6.1.2" }, + "pytz": { + "hashes": [ + "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", + "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" + ], + "version": "==2022.1" + }, + "pytz-deprecation-shim": { + "hashes": [ + "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6", + "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==0.1.0.post0" + }, "pyyaml": { "hashes": [ "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", @@ -763,7 +786,88 @@ "index": "pypi", "version": "==6.0" }, + "regex": { + "hashes": [ + "sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14", + "sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9", + "sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204", + "sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f", + "sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737", + "sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b", + "sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3", + "sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4", + "sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac", + "sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f", + "sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29", + "sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772", + "sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1", + "sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863", + "sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66", + "sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed", + "sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47", + "sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f", + "sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f", + "sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008", + "sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d", + "sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571", + "sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0", + "sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a", + "sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3", + "sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7", + "sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447", + "sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493", + "sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4", + "sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede", + "sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640", + "sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd", + "sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c", + "sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee", + "sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30", + "sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b", + "sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec", + "sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1", + "sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e", + "sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8", + "sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9", + "sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231", + "sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7", + "sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729", + "sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960", + "sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056", + "sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357", + "sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7", + "sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3", + "sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7", + "sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573", + "sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0", + "sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178", + "sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f", + "sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834", + "sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c", + "sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015", + "sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0", + "sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57", + "sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635", + "sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07", + "sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2", + "sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1", + "sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b", + "sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2", + "sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5", + "sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b", + "sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86", + "sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5", + "sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93", + "sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0", + "sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f", + "sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d", + "sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==2022.3.2" + }, "requests": { + "extras": [], "hashes": [ "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f", "sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b" @@ -799,7 +903,7 @@ "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17", "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb" ], - "markers": "python_version >= '3.6' and python_version < '4'", + "markers": "python_version < '4' and python_full_version >= '3.6.0'", "version": "==4.8" }, "s3transfer": { @@ -853,7 +957,7 @@ "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759", "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==2.3.2.post1" }, "telethon": { @@ -902,12 +1006,28 @@ "markers": "python_version >= '3.5'", "version": "==0.9.2" }, + "tzdata": { + "hashes": [ + "sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9", + "sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==2022.1" + }, + "tzlocal": { + "hashes": [ + "sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745", + "sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==4.2" + }, "uritemplate": { "hashes": [ "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0", "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==4.1.1" }, "urllib3": { @@ -922,6 +1042,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==1.26.9" }, + "vk-api": { + "hashes": [ + "sha256:11c731e214ebc7fa911db81efb021f97587493a5402b992f24748fe1cd9d7afc", + "sha256:d0ae766fa93a40d47c5da045d94201721bf766dbde122a1d2253516b35c5edf3" + ], + "index": "pypi", + "version": "==11.9.8" + }, "websockets": { "hashes": [ "sha256:07cdc0a5b2549bcfbadb585ad8471ebdc7bdf91e32e34ae3889001c1c106a6af", diff --git a/README.md b/README.md index 39204a2..e60774c 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ With this configuration, the archiver should archive and store all media added t # auto_auto_archiver -To make it easier to set up new auto-archiver sheets, the auto-auto-archiver will look at a particular sheet and run the auto-archiver on every sheet name in column A, starting from row 11. (It starts here to support instructional text in the first rows of the sheet, as shown below.) This script takes one command line argument, with `--sheet`, the name of the sheet. It must be shared with the same service account. +To make it easier to set up new auto-archiver sheets, the auto-auto-archiver will look at a particular sheet and run the auto-archiver on every sheet name in column A, starting from row 11. (It starts here to support instructional text in the first rows of the sheet, as shown below.) You can simply use your default config as for `auto_archiver.py` but use `--sheet` to specify the name of the sheet that lists the names of sheets to archive.It must be shared with the same service account.  @@ -152,15 +152,16 @@ Code is split into functional concepts: 1. [GWorksheet](utils/gworksheet.py) - facilitates some of the reading/writing tasks for a Google Worksheet ### Current Archivers -Archivers are tested in a meaningful order with Wayback Machine being the default, that can easily be changed in the code. +Archivers are tested in a meaningful order with Wayback Machine being the failsafe, that can easily be changed in the code. ```mermaid graph TD - A(Archiver) -->|parent of| B(YoutubeDLArchiver) - A -->|parent of| C(TikTokArchiver) - A -->|parent of| D(TwitterArchiver) + A(Archiver) -->|parent of| B(TelethonArchiver) + A -->|parent of| C(TiktokArchiver) + A -->|parent of| D(YoutubeDLArchiver) A -->|parent of| E(TelegramArchiver) - A -->|parent of| F(TelethonArchiver) - A -->|parent of| G(WaybackArchiver) + A -->|parent of| F(TwitterArchiver) + A -->|parent of| G(VkArchiver) + A -->|parent of| H(WaybackArchiver) ``` ### Current Storages ```mermaid diff --git a/archivers/__init__.py b/archivers/__init__.py index 40fbb4b..33700d1 100644 --- a/archivers/__init__.py +++ b/archivers/__init__.py @@ -5,4 +5,5 @@ from .telethon_archiver import TelethonArchiver from .tiktok_archiver import TiktokArchiver from .wayback_archiver import WaybackArchiver from .youtubedl_archiver import YoutubeDLArchiver -from .twitter_archiver import TwitterArchiver \ No newline at end of file +from .twitter_archiver import TwitterArchiver +from .vk_archiver import VkArchiver \ No newline at end of file diff --git a/archivers/base_archiver.py b/archivers/base_archiver.py index 18e4c1b..a8b0413 100644 --- a/archivers/base_archiver.py +++ b/archivers/base_archiver.py @@ -1,4 +1,4 @@ -import os, datetime, shutil, hashlib, time, requests, re +import os, datetime, shutil, hashlib, time, requests, re, mimetypes from dataclasses import dataclass from abc import ABC, abstractmethod from urllib.parse import urlparse @@ -58,7 +58,13 @@ class Archiver(ABC):
{object}"
page += f"