From d4825196f13e8037cbd9005fa4d630d17f42c0ef Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Tue, 10 Jan 2023 00:22:16 +0000 Subject: [PATCH] html template working with jinja templates --- Pipfile | 2 + Pipfile.lock | 299 ++++++++++---------- src/configs/v2config.py | 9 +- src/formatters/__init__.py | 2 + src/formatters/formatter.py | 21 ++ src/formatters/html_formatter.py | 36 +++ src/formatters/templates/html_template.html | 101 +++++++ src/media.py | 9 +- src/metadata.py | 26 +- src/orchestrator.py | 27 +- src/steps/gsheet.py | 1 + 11 files changed, 369 insertions(+), 164 deletions(-) create mode 100644 src/formatters/__init__.py create mode 100644 src/formatters/formatter.py create mode 100644 src/formatters/html_formatter.py create mode 100644 src/formatters/templates/html_template.html diff --git a/Pipfile b/Pipfile index 2095f2b..d79388d 100644 --- a/Pipfile +++ b/Pipfile @@ -27,6 +27,8 @@ vk-url-scraper = "*" python-twitter-v2 = "*" instaloader = "*" tqdm = "*" +jinja2 = "*" +cryptography = "==38.0.4" [requires] python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock index 5bfeba7..83e2607 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "60b8f39d7a466e194c98a3fb6a03f74f03b108f5fac4cce8657c5ffdf6a02962" + "sha256": "bcc36e9ecdf6d383a1010629484eec271699ac23b40be045d9a9669b4c9fac8c" }, "pipfile-spec": 6, "requires": { @@ -34,11 +34,11 @@ }, "attrs": { "hashes": [ - "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6", - "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c" + "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", + "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" ], - "markers": "python_version >= '3.5'", - "version": "==22.1.0" + "markers": "python_version >= '3.6'", + "version": "==22.2.0" }, "authlib": { "hashes": [ @@ -57,19 +57,19 @@ }, "boto3": { "hashes": [ - "sha256:53badfc5f145b8a3f9117512b41bc5a64db1cce1b549061d8edba68909e63fdf", - "sha256:548081a0f8854bb2eea1e368ab29945478105f56989546f653c75528dcb07d88" + "sha256:96055651f7be882175aa334ad46528e1ad79fb8ca33fa9c3998cc1d985b34eab", + "sha256:e24d65c31780c208768ebcd152d8a0181591c9c8e7d971e23f318d7f41910ba1" ], "index": "pypi", - "version": "==1.26.28" + "version": "==1.26.46" }, "botocore": { "hashes": [ - "sha256:982732e7ed65cb6ed11ea3ce0e32dff2bcd465836c32376154f0802aa0a112c7", - "sha256:f0b8bb976e368dea20a960b47169e31fc0828feb6f0b9f59f1e5be8d08919b10" + "sha256:78bf25933e35eb6354a9e80fe156f86dce4d346a92afe364dfce25c17ab0639f", + "sha256:dbac2fde265f13beb9191ec3ff63b90b515e9ed63875edc3afbd72c5f585e48b" ], "markers": "python_version >= '3.7'", - "version": "==1.29.28" + "version": "==1.29.46" }, "brotli": { "hashes": [ @@ -168,11 +168,11 @@ }, "cachetools": { "hashes": [ - "sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757", - "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db" + "sha256:5991bc0e08a1319bb618d3195ca5b6bc76646a49c21d55962977197b301cc1fe", + "sha256:8462eebf3a6c15d25430a8c27c56ac61340b2ecf60c9ce57afc2b97e450e47da" ], "markers": "python_version ~= '3.7'", - "version": "==5.2.0" + "version": "==5.2.1" }, "certifi": { "hashes": [ @@ -269,10 +269,10 @@ }, "cloudscraper": { "hashes": [ - "sha256:5f0cde23774270e8a092de68e0fbd68e17854c767fc2d4042a91bda9e4816871", - "sha256:ec30da6cee60d0a95e898d9b3aaf09291a0d8b6cf751e86c6f3420b699a00091" + "sha256:2776c70f3661c028e59fd306ac2b104882c9b3cb3f798086251e00fc2d72c3a2", + "sha256:3b9753724616ac4d811e7922ddc9dba9b4419749ebaa35b0ba503d442522df2e" ], - "version": "==1.2.66" + "version": "==1.2.67" }, "commonmark": { "hashes": [ @@ -310,7 +310,7 @@ "sha256:ca57eb3ddaccd1112c18fc80abe41db443cc2e9dcb1917078e02dfa010a4f353", "sha256:ce127dd0a6a0811c251a6cddd014d292728484e530d80e872ad9806cfb1c5b3c" ], - "markers": "python_version >= '3.6'", + "index": "pypi", "version": "==38.0.4" }, "dataclasses-json": { @@ -323,19 +323,19 @@ }, "dateparser": { "hashes": [ - "sha256:4431159799b63d8acec5d7d844c5e06edf3d1b0eb2bda6d4cac87134ddddd01c", - "sha256:73ec6e44a133c54076ecf9f9dc0fbe3dd4831f154f977ff06f53114d57c5425e" + "sha256:107f3cc87a60770e10d111349adc1504224a6b60753a47a64b0ec842ab85b5a9", + "sha256:ceb159f1b4a9df54ed6209e91298097deafde476037f8611b4cb2b1cb8b31c58" ], "index": "pypi", - "version": "==1.1.4" + "version": "==1.1.5" }, "exceptiongroup": { "hashes": [ - "sha256:542adf9dea4055530d6e1279602fa5cb11dab2395fa650b8674eaec35fc4a828", - "sha256:bd14967b79cd9bdb54d97323216f8fdf533e278df937aa2a90089e7d6e06e5ec" + "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e", + "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23" ], "markers": "python_version < '3.11'", - "version": "==1.0.4" + "version": "==1.1.0" }, "ffmpeg-python": { "hashes": [ @@ -347,11 +347,11 @@ }, "filelock": { "hashes": [ - "sha256:7565f628ea56bfcd8e54e42bdc55da899c85c1abfe1b5bcfd147e9188cebb3b2", - "sha256:8df285554452285f79c035efb0c861eb33a4bcfa5b7a137016e32e6a90f9792c" + "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de", + "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d" ], "markers": "python_version >= '3.7'", - "version": "==3.8.2" + "version": "==3.9.0" }, "flask": { "hashes": [ @@ -378,19 +378,19 @@ }, "google-api-python-client": { "hashes": [ - "sha256:03624a28b5ba94f3c3d44761081f5dbf8cabaa20c5c3a96c046457c5713efb9b", - "sha256:bc2447a7479006d98927fb20faa74d892d3758ff68e99b621367632bc42b8af8" + "sha256:9412ad3445518fa9d24d02c673a70b07c9d124990f44763cdf4f5304ca5b4d08", + "sha256:a4ea351db2bb2a9b1a7e96d8fa8de0fcbc31d9e237b724f4a07b243c2d63e9a4" ], "index": "pypi", - "version": "==2.69.0" + "version": "==2.71.0" }, "google-auth": { "hashes": [ - "sha256:6897b93556d8d807ad70701bb89f000183aea366ca7ed94680828b37437a4994", - "sha256:72f12a6cfc968d754d7bdab369c5c5c16032106e52d32c6dfd8484e4c01a6d1f" + "sha256:5045648c821fb72384cdc0e82cc326df195f113a33049d9b62b74589243d2acc", + "sha256:ed7057a101af1146f0554a769930ac9de506aeca4fd5af6543ebe791851a9fbd" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==2.15.0" + "version": "==2.16.0" }, "google-auth-httplib2": { "hashes": [ @@ -410,11 +410,11 @@ }, "googleapis-common-protos": { "hashes": [ - "sha256:27a849d6205838fb6cc3c1c21cb9800707a661bb21c6ce7fb13e99eb1f8a0c46", - "sha256:a9f4a1d7f6d9809657b7f1316a1aa527f6664891531bcfcc13b6696e685f443c" + "sha256:c727251ec025947d545184ba17e3578840fc3a24a0516a020479edab660457df", + "sha256:ca3befcd4580dab6ad49356b46bf165bb68ff4b32389f028f1abd7c10ab9519a" ], "markers": "python_version >= '3.7'", - "version": "==1.57.0" + "version": "==1.58.0" }, "gspread": { "hashes": [ @@ -468,7 +468,7 @@ "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61" ], - "markers": "python_version >= '3.7'", + "index": "pypi", "version": "==3.1.2" }, "jmespath": { @@ -489,79 +489,86 @@ }, "lxml": { "hashes": [ - "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318", - "sha256:0538747a9d7827ce3e16a8fdd201a99e661c7dee3c96c885d8ecba3c35d1032c", - "sha256:0645e934e940107e2fdbe7c5b6fb8ec6232444260752598bc4d09511bd056c0b", - "sha256:079b68f197c796e42aa80b1f739f058dcee796dc725cc9a1be0cdb08fc45b000", - "sha256:0f3f0059891d3254c7b5fb935330d6db38d6519ecd238ca4fce93c234b4a0f73", - "sha256:10d2017f9150248563bb579cd0d07c61c58da85c922b780060dcc9a3aa9f432d", - "sha256:1355755b62c28950f9ce123c7a41460ed9743c699905cbe664a5bcc5c9c7c7fb", - "sha256:13c90064b224e10c14dcdf8086688d3f0e612db53766e7478d7754703295c7c8", - "sha256:1423631e3d51008871299525b541413c9b6c6423593e89f9c4cfbe8460afc0a2", - "sha256:1436cf0063bba7888e43f1ba8d58824f085410ea2025befe81150aceb123e345", - "sha256:1a7c59c6ffd6ef5db362b798f350e24ab2cfa5700d53ac6681918f314a4d3b94", - "sha256:1e1cf47774373777936c5aabad489fef7b1c087dcd1f426b621fda9dcc12994e", - "sha256:206a51077773c6c5d2ce1991327cda719063a47adc02bd703c56a662cdb6c58b", - "sha256:21fb3d24ab430fc538a96e9fbb9b150029914805d551deeac7d7822f64631dfc", - "sha256:27e590352c76156f50f538dbcebd1925317a0f70540f7dc8c97d2931c595783a", - "sha256:287605bede6bd36e930577c5925fcea17cb30453d96a7b4c63c14a257118dbb9", - "sha256:2aaf6a0a6465d39b5ca69688fce82d20088c1838534982996ec46633dc7ad6cc", - "sha256:32a73c53783becdb7eaf75a2a1525ea8e49379fb7248c3eeefb9412123536387", - "sha256:41fb58868b816c202e8881fd0f179a4644ce6e7cbbb248ef0283a34b73ec73bb", - "sha256:4780677767dd52b99f0af1f123bc2c22873d30b474aa0e2fc3fe5e02217687c7", - "sha256:4878e667ebabe9b65e785ac8da4d48886fe81193a84bbe49f12acff8f7a383a4", - "sha256:487c8e61d7acc50b8be82bda8c8d21d20e133c3cbf41bd8ad7eb1aaeb3f07c97", - "sha256:4beea0f31491bc086991b97517b9683e5cfb369205dac0148ef685ac12a20a67", - "sha256:4cfbe42c686f33944e12f45a27d25a492cc0e43e1dc1da5d6a87cbcaf2e95627", - "sha256:4d5bae0a37af799207140652a700f21a85946f107a199bcb06720b13a4f1f0b7", - "sha256:4e285b5f2bf321fc0857b491b5028c5f276ec0c873b985d58d7748ece1d770dd", - "sha256:57e4d637258703d14171b54203fd6822fda218c6c2658a7d30816b10995f29f3", - "sha256:5974895115737a74a00b321e339b9c3f45c20275d226398ae79ac008d908bff7", - "sha256:5ef87fca280fb15342726bd5f980f6faf8b84a5287fcc2d4962ea8af88b35130", - "sha256:603a464c2e67d8a546ddaa206d98e3246e5db05594b97db844c2f0a1af37cf5b", - "sha256:6653071f4f9bac46fbc30f3c7838b0e9063ee335908c5d61fb7a4a86c8fd2036", - "sha256:6ca2264f341dd81e41f3fffecec6e446aa2121e0b8d026fb5130e02de1402785", - "sha256:6d279033bf614953c3fc4a0aa9ac33a21e8044ca72d4fa8b9273fe75359d5cca", - "sha256:6d949f53ad4fc7cf02c44d6678e7ff05ec5f5552b235b9e136bd52e9bf730b91", - "sha256:6daa662aba22ef3258934105be2dd9afa5bb45748f4f702a3b39a5bf53a1f4dc", - "sha256:6eafc048ea3f1b3c136c71a86db393be36b5b3d9c87b1c25204e7d397cee9536", - "sha256:830c88747dce8a3e7525defa68afd742b4580df6aa2fdd6f0855481e3994d391", - "sha256:86e92728ef3fc842c50a5cb1d5ba2bc66db7da08a7af53fb3da79e202d1b2cd3", - "sha256:8caf4d16b31961e964c62194ea3e26a0e9561cdf72eecb1781458b67ec83423d", - "sha256:8d1a92d8e90b286d491e5626af53afef2ba04da33e82e30744795c71880eaa21", - "sha256:8f0a4d179c9a941eb80c3a63cdb495e539e064f8054230844dcf2fcb812b71d3", - "sha256:9232b09f5efee6a495a99ae6824881940d6447debe272ea400c02e3b68aad85d", - "sha256:927a9dd016d6033bc12e0bf5dee1dde140235fc8d0d51099353c76081c03dc29", - "sha256:93e414e3206779ef41e5ff2448067213febf260ba747fc65389a3ddaa3fb8715", - "sha256:98cafc618614d72b02185ac583c6f7796202062c41d2eeecdf07820bad3295ed", - "sha256:9c3a88d20e4fe4a2a4a84bf439a5ac9c9aba400b85244c63a1ab7088f85d9d25", - "sha256:9f36de4cd0c262dd9927886cc2305aa3f2210db437aa4fed3fb4940b8bf4592c", - "sha256:a60f90bba4c37962cbf210f0188ecca87daafdf60271f4c6948606e4dabf8785", - "sha256:a614e4afed58c14254e67862456d212c4dcceebab2eaa44d627c2ca04bf86837", - "sha256:ae06c1e4bc60ee076292e582a7512f304abdf6c70db59b56745cca1684f875a4", - "sha256:b122a188cd292c4d2fcd78d04f863b789ef43aa129b233d7c9004de08693728b", - "sha256:b570da8cd0012f4af9fa76a5635cd31f707473e65a5a335b186069d5c7121ff2", - "sha256:bcaa1c495ce623966d9fc8a187da80082334236a2a1c7e141763ffaf7a405067", - "sha256:bd34f6d1810d9354dc7e35158aa6cc33456be7706df4420819af6ed966e85448", - "sha256:be9eb06489bc975c38706902cbc6888f39e946b81383abc2838d186f0e8b6a9d", - "sha256:c4b2e0559b68455c085fb0f6178e9752c4be3bba104d6e881eb5573b399d1eb2", - "sha256:c62e8dd9754b7debda0c5ba59d34509c4688f853588d75b53c3791983faa96fc", - "sha256:c852b1530083a620cb0de5f3cd6826f19862bafeaf77586f1aef326e49d95f0c", - "sha256:d9fc0bf3ff86c17348dfc5d322f627d78273eba545db865c3cd14b3f19e57fa5", - "sha256:dad7b164905d3e534883281c050180afcf1e230c3d4a54e8038aa5cfcf312b84", - "sha256:e5f66bdf0976ec667fc4594d2812a00b07ed14d1b44259d19a41ae3fff99f2b8", - "sha256:e8f0c9d65da595cfe91713bc1222af9ecabd37971762cb830dea2fc3b3bb2acf", - "sha256:edffbe3c510d8f4bf8640e02ca019e48a9b72357318383ca60e3330c23aaffc7", - "sha256:eea5d6443b093e1545ad0210e6cf27f920482bfcf5c77cdc8596aec73523bb7e", - "sha256:ef72013e20dd5ba86a8ae1aed7f56f31d3374189aa8b433e7b12ad182c0d2dfb", - "sha256:f05251bbc2145349b8d0b77c0d4e5f3b228418807b1ee27cefb11f69ed3d233b", - "sha256:f1be258c4d3dc609e654a1dc59d37b17d7fef05df912c01fc2e15eb43a9735f3", - "sha256:f9ced82717c7ec65a67667bb05865ffe38af0e835cdd78728f1209c8fffe0cad", - "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8", - "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f" + "sha256:01d36c05f4afb8f7c20fd9ed5badca32a2029b93b1750f571ccc0b142531caf7", + "sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726", + "sha256:05ca3f6abf5cf78fe053da9b1166e062ade3fa5d4f92b4ed688127ea7d7b1d03", + "sha256:090c6543d3696cbe15b4ac6e175e576bcc3f1ccfbba970061b7300b0c15a2140", + "sha256:0dc313ef231edf866912e9d8f5a042ddab56c752619e92dfd3a2c277e6a7299a", + "sha256:0f2b1e0d79180f344ff9f321327b005ca043a50ece8713de61d1cb383fb8ac05", + "sha256:13598ecfbd2e86ea7ae45ec28a2a54fb87ee9b9fdb0f6d343297d8e548392c03", + "sha256:16efd54337136e8cd72fb9485c368d91d77a47ee2d42b057564aae201257d419", + "sha256:1ab8f1f932e8f82355e75dda5413a57612c6ea448069d4fb2e217e9a4bed13d4", + "sha256:223f4232855ade399bd409331e6ca70fb5578efef22cf4069a6090acc0f53c0e", + "sha256:2455cfaeb7ac70338b3257f41e21f0724f4b5b0c0e7702da67ee6c3640835b67", + "sha256:2899456259589aa38bfb018c364d6ae7b53c5c22d8e27d0ec7609c2a1ff78b50", + "sha256:2a29ba94d065945944016b6b74e538bdb1751a1db6ffb80c9d3c2e40d6fa9894", + "sha256:2a87fa548561d2f4643c99cd13131acb607ddabb70682dcf1dff5f71f781a4bf", + "sha256:2e430cd2824f05f2d4f687701144556646bae8f249fd60aa1e4c768ba7018947", + "sha256:36c3c175d34652a35475a73762b545f4527aec044910a651d2bf50de9c3352b1", + "sha256:3818b8e2c4b5148567e1b09ce739006acfaa44ce3156f8cbbc11062994b8e8dd", + "sha256:3ab9fa9d6dc2a7f29d7affdf3edebf6ece6fb28a6d80b14c3b2fb9d39b9322c3", + "sha256:3efea981d956a6f7173b4659849f55081867cf897e719f57383698af6f618a92", + "sha256:4c8f293f14abc8fd3e8e01c5bd86e6ed0b6ef71936ded5bf10fe7a5efefbaca3", + "sha256:5344a43228767f53a9df6e5b253f8cdca7dfc7b7aeae52551958192f56d98457", + "sha256:58bfa3aa19ca4c0f28c5dde0ff56c520fbac6f0daf4fac66ed4c8d2fb7f22e74", + "sha256:5b4545b8a40478183ac06c073e81a5ce4cf01bf1734962577cf2bb569a5b3bbf", + "sha256:5f50a1c177e2fa3ee0667a5ab79fdc6b23086bc8b589d90b93b4bd17eb0e64d1", + "sha256:63da2ccc0857c311d764e7d3d90f429c252e83b52d1f8f1d1fe55be26827d1f4", + "sha256:6749649eecd6a9871cae297bffa4ee76f90b4504a2a2ab528d9ebe912b101975", + "sha256:6804daeb7ef69e7b36f76caddb85cccd63d0c56dedb47555d2fc969e2af6a1a5", + "sha256:689bb688a1db722485e4610a503e3e9210dcc20c520b45ac8f7533c837be76fe", + "sha256:699a9af7dffaf67deeae27b2112aa06b41c370d5e7633e0ee0aea2e0b6c211f7", + "sha256:6b418afe5df18233fc6b6093deb82a32895b6bb0b1155c2cdb05203f583053f1", + "sha256:76cf573e5a365e790396a5cc2b909812633409306c6531a6877c59061e42c4f2", + "sha256:7b515674acfdcadb0eb5d00d8a709868173acece5cb0be3dd165950cbfdf5409", + "sha256:7b770ed79542ed52c519119473898198761d78beb24b107acf3ad65deae61f1f", + "sha256:7d2278d59425777cfcb19735018d897ca8303abe67cc735f9f97177ceff8027f", + "sha256:7e91ee82f4199af8c43d8158024cbdff3d931df350252288f0d4ce656df7f3b5", + "sha256:821b7f59b99551c69c85a6039c65b75f5683bdc63270fec660f75da67469ca24", + "sha256:822068f85e12a6e292803e112ab876bc03ed1f03dddb80154c395f891ca6b31e", + "sha256:8340225bd5e7a701c0fa98284c849c9b9fc9238abf53a0ebd90900f25d39a4e4", + "sha256:85cabf64adec449132e55616e7ca3e1000ab449d1d0f9d7f83146ed5bdcb6d8a", + "sha256:880bbbcbe2fca64e2f4d8e04db47bcdf504936fa2b33933efd945e1b429bea8c", + "sha256:8d0b4612b66ff5d62d03bcaa043bb018f74dfea51184e53f067e6fdcba4bd8de", + "sha256:8e20cb5a47247e383cf4ff523205060991021233ebd6f924bca927fcf25cf86f", + "sha256:925073b2fe14ab9b87e73f9a5fde6ce6392da430f3004d8b72cc86f746f5163b", + "sha256:998c7c41910666d2976928c38ea96a70d1aa43be6fe502f21a651e17483a43c5", + "sha256:9b22c5c66f67ae00c0199f6055705bc3eb3fcb08d03d2ec4059a2b1b25ed48d7", + "sha256:9f102706d0ca011de571de32c3247c6476b55bb6bc65a20f682f000b07a4852a", + "sha256:a08cff61517ee26cb56f1e949cca38caabe9ea9fbb4b1e10a805dc39844b7d5c", + "sha256:a0a336d6d3e8b234a3aae3c674873d8f0e720b76bc1d9416866c41cd9500ffb9", + "sha256:a35f8b7fa99f90dd2f5dc5a9fa12332642f087a7641289ca6c40d6e1a2637d8e", + "sha256:a38486985ca49cfa574a507e7a2215c0c780fd1778bb6290c21193b7211702ab", + "sha256:a5da296eb617d18e497bcf0a5c528f5d3b18dadb3619fbdadf4ed2356ef8d941", + "sha256:a6e441a86553c310258aca15d1c05903aaf4965b23f3bc2d55f200804e005ee5", + "sha256:a82d05da00a58b8e4c0008edbc8a4b6ec5a4bc1e2ee0fb6ed157cf634ed7fa45", + "sha256:ab323679b8b3030000f2be63e22cdeea5b47ee0abd2d6a1dc0c8103ddaa56cd7", + "sha256:b1f42b6921d0e81b1bcb5e395bc091a70f41c4d4e55ba99c6da2b31626c44892", + "sha256:b23e19989c355ca854276178a0463951a653309fb8e57ce674497f2d9f208746", + "sha256:b264171e3143d842ded311b7dccd46ff9ef34247129ff5bf5066123c55c2431c", + "sha256:b26a29f0b7fc6f0897f043ca366142d2b609dc60756ee6e4e90b5f762c6adc53", + "sha256:b64d891da92e232c36976c80ed7ebb383e3f148489796d8d31a5b6a677825efe", + "sha256:b9cc34af337a97d470040f99ba4282f6e6bac88407d021688a5d585e44a23184", + "sha256:bc718cd47b765e790eecb74d044cc8d37d58562f6c314ee9484df26276d36a38", + "sha256:be7292c55101e22f2a3d4d8913944cbea71eea90792bf914add27454a13905df", + "sha256:c83203addf554215463b59f6399835201999b5e48019dc17f182ed5ad87205c9", + "sha256:c9ec3eaf616d67db0764b3bb983962b4f385a1f08304fd30c7283954e6a7869b", + "sha256:ca34efc80a29351897e18888c71c6aca4a359247c87e0b1c7ada14f0ab0c0fb2", + "sha256:ca989b91cf3a3ba28930a9fc1e9aeafc2a395448641df1f387a2d394638943b0", + "sha256:d02a5399126a53492415d4906ab0ad0375a5456cc05c3fc0fc4ca11771745cda", + "sha256:d17bc7c2ccf49c478c5bdd447594e82692c74222698cfc9b5daae7ae7e90743b", + "sha256:d5bf6545cd27aaa8a13033ce56354ed9e25ab0e4ac3b5392b763d8d04b08e0c5", + "sha256:d6b430a9938a5a5d85fc107d852262ddcd48602c120e3dbb02137c83d212b380", + "sha256:da248f93f0418a9e9d94b0080d7ebc407a9a5e6d0b57bb30db9b5cc28de1ad33", + "sha256:da4dd7c9c50c059aba52b3524f84d7de956f7fef88f0bafcf4ad7dde94a064e8", + "sha256:df0623dcf9668ad0445e0558a21211d4e9a149ea8f5666917c8eeec515f0a6d1", + "sha256:e5168986b90a8d1f2f9dc1b841467c74221bd752537b99761a93d2d981e04889", + "sha256:efa29c2fe6b4fdd32e8ef81c1528506895eca86e1d8c4657fda04c9b3786ddf9", + "sha256:f1496ea22ca2c830cbcbd473de8f114a320da308438ae65abad6bab7867fe38f", + "sha256:f49e52d174375a7def9915c9f06ec4e569d235ad428f70751765f48d5926678c" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==4.9.1" + "version": "==4.9.2" }, "markupsafe": { "hashes": [ @@ -665,31 +672,31 @@ }, "packaging": { "hashes": [ - "sha256:2198ec20bd4c017b8f9717e00f0c8714076fc2fd93816750ab48e2c41de2cfd3", - "sha256:957e2148ba0e1a3b282772e791ef1d8083648bc131c8ab0c1feba110ce1146c3" + "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", + "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" ], "markers": "python_version >= '3.7'", - "version": "==22.0" + "version": "==23.0" }, "protobuf": { "hashes": [ - "sha256:25266bf373ee06d5d66f9eb1ec9d434b243dccce5c32faf151054cfa6f9dcbf1", - "sha256:260e346927fd4e6fbb49ab545137b19610c24a1d853dc5f29ddf777ab1987211", - "sha256:2c6a4d13732d9b094db31b3841986c38b17ac61a3fe05ee26a779d94c4c3fb43", - "sha256:4922e3320ed70e81f05060822da36923d09fd9e04e17f411f2d8d8d0070f9f5c", - "sha256:4b75c947289a2e9c1f37d21c593f1ef6fb4fed33977dfb2ac84f799eb29a8ff4", - "sha256:4d01ef83517c181d60ea1c6d0b2f644be250ade740d6554a2f5a021b1ad622e3", - "sha256:553e35c0878f6855e55f01a14561e6bce6df79b6636a5acf83b9d9ac7eab7922", - "sha256:85ccb4753ee21de7dc81a7a68a051f25dbe133ffa01a639ac998427d0b223387", - "sha256:a5a14b907a191319e7a58b38c583bbf50deb21e002f723a912c5e4f6969a778e", - "sha256:a944dc9550baae276afc7dc8193191d4c2ad660270a1e5ed5a71539817ebe2e2", - "sha256:bab4b21a986ded225b9392c07ce21c35d790951f51e1ebfd32e4d443b05c3726", - "sha256:c3b9e329b4c247dc3ba5c50f60915a84e08278eb6d9e3fa674d0d04ff816bfd7", - "sha256:d91a47c77b33580024b0271b65bb820c4e0264c25eb49151ad01e691de8fa0b6", - "sha256:efb16b16fd3eef25357f84d516062753014b76279ce4e0ec4880badd2fba7370" + "sha256:1f22ac0ca65bb70a876060d96d914dae09ac98d114294f77584b0d2644fa9c30", + "sha256:237216c3326d46808a9f7c26fd1bd4b20015fb6867dc5d263a493ef9a539293b", + "sha256:27f4d15021da6d2b706ddc3860fac0a5ddaba34ab679dc182b60a8bb4e1121cc", + "sha256:299ea899484ee6f44604deb71f424234f654606b983cb496ea2a53e3c63ab791", + "sha256:3d164928ff0727d97022957c2b849250ca0e64777ee31efd7d6de2e07c494717", + "sha256:6ab80df09e3208f742c98443b6166bcb70d65f52cfeb67357d52032ea1ae9bec", + "sha256:78a28c9fa223998472886c77042e9b9afb6fe4242bd2a2a5aced88e3f4422aa7", + "sha256:7cd532c4566d0e6feafecc1059d04c7915aec8e182d1cf7adee8b24ef1e2e6ab", + "sha256:89f9149e4a0169cddfc44c74f230d7743002e3aa0b9472d8c28f0388102fc4c2", + "sha256:a53fd3f03e578553623272dc46ac2f189de23862e68565e83dde203d41b76fc5", + "sha256:b135410244ebe777db80298297a97fbb4c862c881b4403b71bac9d4107d61fd1", + "sha256:b98d0148f84e3a3c569e19f52103ca1feacdac0d2df8d6533cf983d1fda28462", + "sha256:d1736130bce8cf131ac7957fa26880ca19227d4ad68b4888b3be0dea1f95df97", + "sha256:f45460f9ee70a0ec1b6694c6e4e348ad2019275680bd68a1d9314b8c7e01e574" ], "markers": "python_version >= '3.7'", - "version": "==4.21.11" + "version": "==4.21.12" }, "pyaes": { "hashes": [ @@ -774,11 +781,11 @@ }, "pygments": { "hashes": [ - "sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1", - "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42" + "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297", + "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717" ], "markers": "python_version >= '3.6'", - "version": "==2.13.0" + "version": "==2.14.0" }, "pyparsing": { "hashes": [ @@ -822,18 +829,18 @@ }, "python-twitter-v2": { "hashes": [ - "sha256:18c14853da8b499775a11a3f5e1d0692a7017fa41eca91ac5afa73f35b935a90", - "sha256:fbe582ae7c6b33f6055b97e23dd106874e6650091d257fe67bfd024b96ebf8d6" + "sha256:2397d518c17bfbc16a3d414b1cf6d3c231fd8d322f21c755ac2215c9ee675537", + "sha256:4e03a30b2570fa4f17fbc7293d850fb8276c66be106d55e460b9287de37e1dd2" ], "index": "pypi", - "version": "==0.8.0" + "version": "==0.8.1" }, "pytz": { "hashes": [ - "sha256:222439474e9c98fced559f1709d89e6c9cbf8d79c794ff3eb9f8800064291427", - "sha256:e89512406b793ca39f5971bc999cc538ce125c0e51c27941bef4568b460095e2" + "sha256:7ccfae7b4b2c067464a6733c6261673fdb8fd1be905460396b97a073e9fa683a", + "sha256:93007def75ae22f7cd991c84e02d434876818661f8df9ad5df9e950ff4e52cfd" ], - "version": "==2022.6" + "version": "==2022.7" }, "pytz-deprecation-shim": { "hashes": [ @@ -1009,11 +1016,11 @@ }, "rich": { "hashes": [ - "sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e", - "sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0" + "sha256:25f83363f636995627a99f6e4abc52ed0970ebbd544960cc63cbb43aaac3d6f0", + "sha256:41fe1d05f433b0f4724cda8345219213d2bfa472ef56b2f64f415b5b94d51b04" ], - "markers": "python_version < '4' and python_full_version >= '3.6.3'", - "version": "==12.6.0" + "markers": "python_version >= '3.7'", + "version": "==13.0.1" }, "rsa": { "hashes": [ @@ -1080,11 +1087,11 @@ }, "telethon": { "hashes": [ - "sha256:148ac8c27908853d5d8a116d55ce947e9ba167bb697c75226ae95645b2e5a504", - "sha256:de7a1619110a2c06390fb5340839c6503c6b108b5f1a2f3bbe1ef60f02cecacb" + "sha256:3ec7ea04e61e0179dd08b974b609814e1a5298eeda3d68368a34bba754f43aec", + "sha256:d894f6ef2bf2cb119f6413b9f620957503785bab0999694b4bf67dea36f8ee09" ], "index": "pypi", - "version": "==1.26.0" + "version": "==1.26.1" }, "text-unidecode": { "hashes": [ @@ -1289,11 +1296,11 @@ "develop": { "autopep8": { "hashes": [ - "sha256:8b1659c7f003e693199f52caffdc06585bb0716900bbc6a7442fd931d658c077", - "sha256:ad924b42c2e27a1ac58e432166cc4588f5b80747de02d0d35b1ecbd3e7d57207" + "sha256:be5bc98c33515b67475420b7b1feafc8d32c1a69862498eda4983b45bffd2687", + "sha256:d27a8929d8dcd21c0f4b3859d2d07c6c25273727b98afc984c039df0f0d86566" ], "index": "pypi", - "version": "==2.0.0" + "version": "==2.0.1" }, "pycodestyle": { "hashes": [ @@ -1308,7 +1315,7 @@ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_version >= '3.7'", + "markers": "python_version < '3.11'", "version": "==2.0.1" } } diff --git a/src/configs/v2config.py b/src/configs/v2config.py index 7260d41..5b47d0f 100644 --- a/src/configs/v2config.py +++ b/src/configs/v2config.py @@ -6,6 +6,7 @@ from typing import List from archivers import Archiverv2 from feeders import Feeder from databases import Database +from formatters import Formatter from storages import StorageV2 from steps.step import Step from enrichers import Enricher @@ -21,13 +22,14 @@ class ConfigV2: Enricher, Archiverv2, Database, - StorageV2 + StorageV2, + Formatter # Util ] feeder: Step # TODO:= BaseFeeder + formatter: Formatter archivers: List[Archiverv2] = field(default_factory=[]) # TODO: fix type enrichers: List[Enricher] = field(default_factory=[]) - formatters: List[Step] = field(default_factory=[]) # TODO: fix type storages: List[Step] = field(default_factory=[]) # TODO: fix type databases: List[Database] = field(default_factory=[]) @@ -50,6 +52,7 @@ class ConfigV2: for configurable in self.configurable_parents: child: Step for child in configurable.__subclasses__(): + assert child.configs() is not None and type(child.configs()) == dict, f"class '{child.name}' should have a configs method returning a dict." for config, details in child.configs().items(): assert "." not in child.name, f"class prop name cannot contain dots('.'): {child.name}" assert "." not in config, f"config property cannot contain dots('.'): {config}" @@ -87,6 +90,7 @@ class ConfigV2: # print("config.py", self.config) self.feeder = Feeder.init(steps.get("feeder", "cli_feeder"), self.config) + self.formatter = Formatter.init(steps.get("formatter", "html_formatter"), self.config) self.enrichers = [Enricher.init(e, self.config) for e in steps.get("enrichers", [])] self.archivers = [Archiverv2.init(e, self.config) for e in steps.get("archivers", [])] self.databases = [Database.init(e, self.config) for e in steps.get("databases", [])] @@ -97,6 +101,7 @@ class ConfigV2: print("archivers", [e for e in self.archivers]) print("databases", [e for e in self.databases]) print("storages", [e for e in self.storages]) + print("formatter", self.formatter) def validate(self): pass diff --git a/src/formatters/__init__.py b/src/formatters/__init__.py new file mode 100644 index 0000000..07a52a0 --- /dev/null +++ b/src/formatters/__init__.py @@ -0,0 +1,2 @@ +from .formatter import Formatter +from .html_formatter import HtmlFormatter \ No newline at end of file diff --git a/src/formatters/formatter.py b/src/formatters/formatter.py new file mode 100644 index 0000000..7199be2 --- /dev/null +++ b/src/formatters/formatter.py @@ -0,0 +1,21 @@ +from __future__ import annotations +from dataclasses import dataclass +from abc import abstractmethod +from metadata import Metadata +from steps.step import Step + + +@dataclass +class Formatter(Step): + name = "formatter" + + def __init__(self, config: dict) -> None: + # without this STEP.__init__ is not called + super().__init__(config) + + def init(name: str, config: dict) -> Formatter: + # only for code typing + return Step.init(name, config, Formatter) + + @abstractmethod + def format(self, item) -> Metadata: return None \ No newline at end of file diff --git a/src/formatters/html_formatter.py b/src/formatters/html_formatter.py new file mode 100644 index 0000000..6c278f5 --- /dev/null +++ b/src/formatters/html_formatter.py @@ -0,0 +1,36 @@ +from __future__ import annotations +from dataclasses import dataclass +from abc import abstractmethod +from metadata import Metadata +from media import Media +from formatters import Formatter +from jinja2 import Environment, FileSystemLoader +import uuid, os, pathlib + + +@dataclass +class HtmlFormatter(Formatter): + name = "html_formatter" + + def __init__(self, config: dict) -> None: + # without this STEP.__init__ is not called + super().__init__(config) + self.environment = Environment(loader=FileSystemLoader(os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/"))) + self.template = self.environment.get_template("html_template.html") + + @staticmethod + def configs() -> dict: + return {} + + def format(self, item: Metadata) -> Media: + print("FORMATTING") + content = self.template.render( + url=item.get_url(), + title=item.get_title(), + media=item.media, + metadata=item.get_clean_metadata() + ) + html_path = os.path.join(item.get("tmp_dir"), f"formatted{str(uuid.uuid4())}.html") + with open(html_path, mode="w", encoding="utf-8") as outf: + outf.write(content) + return Media(filename=html_path) diff --git a/src/formatters/templates/html_template.html b/src/formatters/templates/html_template.html new file mode 100644 index 0000000..fa278eb --- /dev/null +++ b/src/formatters/templates/html_template.html @@ -0,0 +1,101 @@ +{# templates/results.html #} + + + + + + + + {{ url }} + + + + +

Archived media for {{ url }}

+

title: '{{ title }}'

+

content {{ media | length }} item(s)

+ + + + + + {% for m in media %} + + + + + {% endfor %} +
aboutpreview
+
    +
  • ARCHIVE
  • + {% if m.hash | length > 1 %} +
  • hash: {{ m.hash }}
  • + {% endif %} +
  • key: {{ m.key }}
  • +
  • type: {{ m.mimetype }}
  • +
+ +
+ {% if 'image' in m.mimetype %} + + {% elif 'video' in m.mimetype %} + + {% elif 'audio' in m.mimetype %} + + {% else %} + No preview available, please open the link. + {% endif %} +
+

metadata

+ + + + + + {% for key in metadata %} + + + + + {% endfor %} +
keyvalue
{{ key }}{{ metadata[key] }}
+ + + + \ No newline at end of file diff --git a/src/media.py b/src/media.py index c499b5b..58eae27 100644 --- a/src/media.py +++ b/src/media.py @@ -3,8 +3,7 @@ from __future__ import annotations from ast import List from typing import Any, Union, Dict from dataclasses import dataclass -from datetime import datetime -import json +import mimetypes @dataclass @@ -12,5 +11,11 @@ class Media: filename: str key: str = None cdn_url: str = None + mimetype: str = None # eg: image/jpeg # id: str = None # hash: str = None # TODO: added by enrichers + + def set_mimetype(self) -> Media: + if not self.mimetype: + self.mimetype = mimetypes.guess_type(self.filename)[0] + return self diff --git a/src/metadata.py b/src/metadata.py index f48c636..ceece8d 100644 --- a/src/metadata.py +++ b/src/metadata.py @@ -3,7 +3,8 @@ from __future__ import annotations from ast import List, Set from typing import Any, Union, Dict from dataclasses import dataclass, field -import datetime +import datetime, mimetypes +from loguru import logger # import json from media import Media @@ -12,9 +13,11 @@ from media import Media @dataclass class Metadata: status: str = "" + _processed_at: datetime = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) metadata: Dict[str, Any] = field(default_factory=dict) tmp_keys: Set[str] = field(default_factory=set) # keys that are not to be saved in DBs media: List[Media] = field(default_factory=list) + final_media: Media = None # can be overwritten by formatters rearchivable: bool = False # def __init__(self, url, metadata = {}) -> None: @@ -85,13 +88,20 @@ class Metadata: return ts def add_media(self, media: Media) -> Metadata: - # print(f"adding {filename} to {self.metadata.get('media')}") - # return self.set("media", self.get_media() + [filename]) - # return self.get_media().append(media) + media.set_mimetype() return self.media.append(media) + def set_final_media(self, final: Media) -> Metadata: + if final: + if self.final_media: + logger.warning(f"overwriting final media value :{self.final_media} with {final}") + final.set_mimetype() + self.final_media = final + return self + def get_single_media(self) -> Media: - # TODO: check if formatters were applied and choose with priority + if self.final_media: + return self.final_media return self.media[0] # def as_json(self) -> str: @@ -99,6 +109,12 @@ class Metadata: # return json.dumps(self.metadata) # #TODO: datetime is not serializable + def get_clean_metadata(self) -> Metadata: + return dict( + {k: v for k, v in self.metadata.items() if k not in self.tmp_keys}, + **{"processed_at": self._processed_at} # TODO: move to enrichment + ) + def cleanup(self) -> Metadata: # TODO: refactor so it returns a JSON with all intended properties, except tmp_keys # the code below leads to errors if database needs tmp_keys after they are removed diff --git a/src/orchestrator.py b/src/orchestrator.py index 3bc5ea7..5a8ff31 100644 --- a/src/orchestrator.py +++ b/src/orchestrator.py @@ -4,6 +4,8 @@ from typing import Union, Dict from dataclasses import dataclass from archivers import Archiverv2 +from feeders import Feeder +from formatters import Formatter from storages import StorageV2 from enrichers import Enricher from databases import Database @@ -13,7 +15,6 @@ import tempfile, time, traceback from loguru import logger - """ how not to couple the different pieces of logic due to the use of constants for the metadata keys? @@ -132,7 +133,8 @@ class ArchivingOrchestrator: # Archiver.init(a, config) # for a in config.archivers # ] - self.feeder = config.feeder + self.feeder : Feeder = config.feeder + self.formatter : Formatter = config.formatter self.enrichers = config.enrichers self.archivers: List[Archiverv2] = config.archivers self.databases: List[Database] = config.databases @@ -237,14 +239,21 @@ class ArchivingOrchestrator: for e in self.enrichers: result.merge(e.enrich(result)) - # formatters, enrichers, and storages will sometimes look for specific properties: eg
  • Screenshot:
  • - for f in self.formatters: - result.merge(f.format(result)) - - # storage + # store media + unstored_media = result.media[::] + result.media = [] for s in self.storages: - for i, m in enumerate(result.media): - result.media[i] = s.store(m, result) + for m in unstored_media: + result.media.append(s.store(m, result)) + + # formatters, enrichers, and storages will sometimes look for specific properties: eg
  • Screenshot:
  • + # TODO: should there only be 1 formatter? + # for f in self.formatters: + # result.merge(f.format(result)) + # final format and store it + if (final_media := self.formatter.format(result)): + for s in self.storages: + result.set_final_media(s.store(final_media, result)) # signal completion to databases (DBs, Google Sheets, CSV, ...) # a hash registration service could be one database: forensic archiving diff --git a/src/steps/gsheet.py b/src/steps/gsheet.py index 6bfb5d7..262add1 100644 --- a/src/steps/gsheet.py +++ b/src/steps/gsheet.py @@ -12,6 +12,7 @@ class Gsheets(Step): super().__init__(config) self.gsheets_client = gspread.service_account(filename=self.service_account) assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}" + assert self.sheet is not None, "You need to define a sheet name in your orchestration file when using gsheets." @staticmethod def configs() -> dict: