From 319c1e8f92531db0d89f72fa72b3d06c7172c06c Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 14 Feb 2025 09:48:37 +0000 Subject: [PATCH] Add more tests. --- poetry.lock | 30 +++++------ .../whisper_enricher/whisper_enricher.py | 3 +- tests/conftest.py | 8 --- .../metadata_enricher_ytshort_expected.pickle | Bin 0 -> 12524 bytes .../metadata_enricher_ytshort_input.pickle | Bin 0 -> 10840 bytes tests/enrichers/test_metadata_enricher.py | 15 +++++- tests/enrichers/test_ssl_enricher.py | 2 + tests/enrichers/test_thumbnail_enricher.py | 6 +-- tests/enrichers/test_whisper_enricher.py | 48 ++++++++++++++++-- tests/test_metadata.py | 23 ++++++++- 10 files changed, 102 insertions(+), 33 deletions(-) create mode 100644 tests/data/metadata/metadata_enricher_ytshort_expected.pickle create mode 100644 tests/data/metadata/metadata_enricher_ytshort_input.pickle diff --git a/poetry.lock b/poetry.lock index decadca..d61b908 100644 --- a/poetry.lock +++ b/poetry.lock @@ -172,18 +172,18 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.36.17" +version = "1.36.19" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.36.17-py3-none-any.whl", hash = "sha256:59bcf0c4b04d9cc36f8b418ad17ab3c4a99a21a175d2fad7096aa21cbe84630b"}, - {file = "boto3-1.36.17.tar.gz", hash = "sha256:5ecae20e780a3ce9afb3add532b61c466a8cb8960618e4fa565b3883064c1346"}, + {file = "boto3-1.36.19-py3-none-any.whl", hash = "sha256:7784590369a9d545bb07b2de56b6ce4d5a5e232883a957f704c3f842caeba155"}, + {file = "boto3-1.36.19.tar.gz", hash = "sha256:8c2c2a4ccdfe35dd2611ee1b7473dd2383948415c777e42dc4e7f1ebe371fe8c"}, ] [package.dependencies] -botocore = ">=1.36.17,<1.37.0" +botocore = ">=1.36.19,<1.37.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -192,14 +192,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.36.17" +version = "1.36.19" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.36.17-py3-none-any.whl", hash = "sha256:069858b2fd693548035d7fd53a774e37e4260fea64e0ac9b8a3aee904f9321df"}, - {file = "botocore-1.36.17.tar.gz", hash = "sha256:cec13e0a7ce78e71aad0b397581b4e81824c7981ef4c261d2e296d200c399b09"}, + {file = "botocore-1.36.19-py3-none-any.whl", hash = "sha256:98882c106fec4c08678ea028199f7f5119550fab95d682b30846f7aae04b7bec"}, + {file = "botocore-1.36.19.tar.gz", hash = "sha256:cdf6729f601f82b1acdb9004b1f88b57cfb470f576394cdb3bbf5150f7fafb5b"}, ] [package.dependencies] @@ -860,14 +860,14 @@ tool = ["click (>=6.0.0)"] [[package]] name = "googleapis-common-protos" -version = "1.66.0" +version = "1.67.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"}, - {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"}, + {file = "googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741"}, + {file = "googleapis_common_protos-1.67.0.tar.gz", hash = "sha256:21398025365f138be356d5923e9168737d94d46a72aefee4a6110a1f23463c86"}, ] [package.dependencies] @@ -1235,14 +1235,14 @@ files = [ [[package]] name = "myst-parser" -version = "4.0.0" +version = "4.0.1" description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser," optional = false python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"}, - {file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"}, + {file = "myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"}, + {file = "myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4"}, ] [package.dependencies] @@ -1254,10 +1254,10 @@ pyyaml = "*" sphinx = ">=7,<9" [package.extras] -code-style = ["pre-commit (>=3.0,<4.0)"] +code-style = ["pre-commit (>=4.0,<5.0)"] linkify = ["linkify-it-py (>=2.0,<3.0)"] rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] -testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"] +testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"] testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"] [[package]] diff --git a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py index 89579f9..917ab85 100644 --- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py +++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py @@ -29,8 +29,7 @@ class WhisperEnricher(Enricher): job_results = {} for i, m in enumerate(to_enrich.media): if m.is_video() or m.is_audio(): - # TODO: this used to pass all storage items to store now - # Now only passing S3, the rest will get added later in the usual order (?) + # Only storing S3, the rest will get added later in the usual order (?) m.store(url=url, metadata=to_enrich, storages=[self.s3]) try: job_id = self.submit_job(m) diff --git a/tests/conftest.py b/tests/conftest.py index d7f484f..f7ed4b7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -133,14 +133,6 @@ def unpickle(): return _unpickle -@pytest.fixture -def mock_python_dependencies(): - with patch("auto_archiver.core.module") as mock_check_python_dep: - # Mock all Python dependencies as available - mock_check_python_dep.return_value = True - yield mock_check_python_dep - - @pytest.fixture def mock_binary_dependencies(): with patch("shutil.which") as mock_shutil_which: diff --git a/tests/data/metadata/metadata_enricher_ytshort_expected.pickle b/tests/data/metadata/metadata_enricher_ytshort_expected.pickle new file mode 100644 index 0000000000000000000000000000000000000000..23ce5f6101dbd8b63b5aa4062a97d753d748ebc8 GIT binary patch literal 12524 zcmbVSTW=iKk(Mmcq9`emlXViqi|oNV3p=sZocj!k&4r>ap=FM>vSe9{F^Ha-Gt+JM zOi%j4Aq6au07(`L^qYGhgZ!8LfXzdGLEe&ok+169dMG;?t|eNWnmVVdPTlLAUtay+ zKi4ktf5qE=p2bIgG77>InRuf(k={&Z{@Bm_ve-N{zkXT%@K@!3mG2d=rFCX^GVj~N)2weQ@^T5x<-hoKuG>`l& zjOSt$=h0Y1aeOSI@EAx;AEM6|WpER`{^iEoH(TGr8J5L2NT56mBQJZ2zqIkoJR5;e z92qlrBVRBAnwZh?~h#Fpr~n z8p`wo;m^k+h?gP_q>%HIFa_0AWcW0JeiKPFSI)>2`+*!C3))|yMUhzea~TO7AW%V6 z?rwdZMk3hTcI`hOMcvvXznxBhnub4a>^wX;Ieqp*cBid7Cl4oLYwI1QIq?qUdS|F) z9zk92SnkhoZll1TgYXd>LGSBGYOL*N`wyZ!4_`idA@`p*!{=Yj|KnbM`f_iN3^GsH z0Ln+77h5NxTpqy^=2?08f0v6JOF3NlQ>m!D|8-QBel`kzdh+8HU2cJAIgJw>b@_Q& z+`2!{WCC5yXA-;n#Vr^q{i?}5dac+ZyE7l(Dz0hf%VJAw9y^xBcZ}qdIEvz>Obox9 zzb-bF)ybp`dWAMYAdsPb`)BSbd{d{)rz({xFQf3@Ukjkq@JA zgd>go`83C|mBnjvUKZb+gpo|cm-5J8ATXSkANJq<hQJ6M-sdj|$kSF|dhn}iTgGC6lRy&hmhU4#?jOZ|>0F_Sugz?~1&ste%_ z`YY@;JQiv%G^88eP91_H_83T0q4=o-u$bfx4%F$=D)+fZM>Fyh18FI-mv%|ZYY<8n zg!5w~QphpSPdUO1pVo^d;^w+I=Y(r{uQ1y3I()K0g^w<2I-H-t*%4t4!)>og2N7E% zX?r`Q{}UO6BlwHK>v(MfQ?OQ%oz8{BuicUYwOMu{#6cfn+!5-w!O+q_bC4dLdWK|a zKwYm^12g?T}mKGlR6_)kt%pRnU&w2PJ}^jP1G|uf+mk{XqR<8&OS?o)K_Go?FDbh z_7i4s_jS<*ee0O@p4TF7P3*^K4v0}~x#)Ra3X$j>jLGeIb@F(V1nc-rL3k1;Q)!pM zmC19Lh_``T$RXV&7A#e%CZ3_vJtL3y(IsR=4GB{1sP^D=f~xK!c!NGVj3l#m*Qn{Z zWH@Tnl~R{sRspfRRRe2i&yJ{$G8zPs#kOh&fGx!OKL8w~=MrszVnpPDAY$4U)rJD9 zL;zSHZP34Ky}bd~R!KvjG61Zep&X%Lb3oR!;|(dDlx8iV(ej$)Scpid#musn*JEQg zA+4c3H=Hfd0Q&O`ebXEX2Bt{VNDSyu?Li+dj7L5f+9Pf&@|9u*3~*{uq292mee;#D z-RLOett8n7lB~)x=tYT^Dpz#sa68~Ts3oJ0buBDZFXM4YHHdFdud7l7wc3W$o;r5q z@;Fb>#XC6r`V59|B2HsRRX|QfJ02Gi%$cJmXn$%))sBMqAT?YBZ_tksV@xU0tf}ba zJQWbO1m%IKa+{9%NIn-e__ zf+~5Uyy7IP>w4OPHljrrLL2l~qG%JYOOz`5LAE4gLFR~z+ce>{=Za;3*rs^0WZ3Si zBBFXK$g@-@4<+K13Aa!X@2)hWW>YNO`dCOU*wN8tmTi~S?3q#!qdW9J_FPduvmRa1 z4^(a;k-p7tCI`C~4M?0PW2-h4)VcvnQEEUoWdAhQ)RbIQ5O6ya2LK?@t%(Qkv8?Da z-P+WSV%8b8;TegXltx}*o0sN}^1W&7PM}ruYAcou03S)3g_B4Mu<2KvPH!l93oh|5 zf>tSXT6I;}!2LAKfN*xM6w?6lF(VQhB%S#yg}M7ixBd>mr4#(k9AB%U)s|2=CeP=B z8rz#Pm3j|qFlmv9*F~fFW>2|=ct*ala>`BR95!A6iOcIzY+E^iI9|*!qBbI+_$RBX zW}y~B4+nURXJ&}$(V6Netwv}|tUyDc6VJsFEAblSxGi?>&}u7RGB$|M%+OF~F2exG zp{*)kM}_s&KdG3XF2!}a*JZPh@IaekObuDo0@FkTcDb!PIu&+=+;6V zLLC6QYY?c06ZodTk&%+#p>FqX$j2nf5KEl%5YWzq-iGvI`2qnKoB z0{f%H#Ex2AbPD8KYv2v~OVq4d1)#!mRWdn1ESDP58$4a;SNiETI(sUXZJ7Z8rx}z- zwlo5a6R>qc0Q5K!%^Lik>oa{Dz)d!6jW?h`hY8_2NT`?1wSzICHoRDk$*ldg^#ISb zZ1~LO!#H-$HekA2Q5I>WM`KQfklPC#IGF>d6h~Z1b0Bz2Yu41-TnG`X)&*Q+2|6-Vy0JFu0kQ>aSNKe4x}@tqw~PmbLmMVA)T4|7 zHRI${)1KEc%-4|~^_k+4hOzJizC0YFIIt_&^Vh&Z1z$- zR7Zdl918v60D((Mgp2j@P`P57%F1CfS{Dw$);P8`8<1M6*wpcv0emKvV{55~&dQAG zJ+&HC{Hi+`Ud3BiYD55acBh(%_@ap})28VG356h=6N6EfEY zhC&8};6ce+m%LyI4G95ock57EWD;XF1Yr~ousSLM0H3CPozbAECxEW$fuS8VMc-5z ziV!1I^3p}|IHq6&{cpfX5hU{l4^ zcQFdr9fpql460LXw26>G4cf~d1(xyLZyHls>*IDy(PnCR(tt9N)1DeCOH{4v;!)$M zN4`v#9Y__0_>78y_z3d(IMMSyhH{PW?j{(<7C%X1bvwZzo*$d3Iv~jJly^*7#1J+D zF5tPV@(nS48$lZinwQ%;e?2tRB=TKdJTe{DL{(PA29ixTf56ez{Y<3_2Jm!m8tVke zAYFtSSH~)z9=X9Lu(W6E#+VtgJ>%>Q+DWN*G^6tiq0uDInGWIu`H(5=nKWA=Ub(2G z%^=o;RjQs%OygK36QCem&{LZ~Ga*>YK%QC}lD=t_*m*&UU1U(}6SEWHqLRc(~BE6~>vZ!;itcQkMzgVWGO2=WE zpyjd-K%6CQIR*eHkBOAVVF2QAUDY1~86{Uc1L4it(0GVMjiIKkvJOfo+EJ;9R>A5p zY`OrR9Z-OjeuUuaK|t8@kxyMv!aW6>iIYuVa8i+D;scOWRMPJ?usix2sy-^i4FFN2 zQ1B#OKo+Y16fOG1`*G;fGmN#d27Pq&$(gy>104h>BJKhyAZ$cr9=b(hG>)zmR*d=S z5N66htYg6fy!>$}d`i_-2x>$RWsl*61yR&cW*}_g*&Fsyzpsxs=%Z;fgfVj^Dwuvc z2%DLHjR1-C(8UElKv)3PotMEidU^TX`={aLDK$@@%7otY30$NO@gA($Kyt0u@T&0v zX4az552bEb9<1r7QP=N;MF*H{N%5SY?12h$BsNySpR31v zfZ`qt(5H!yN21#Bg45!<0z5moR~ENN^=?g0>asQRyQ7W|`M#mP{>VNWydvjQydvC( zSR+A;Uf(_I3bh}pz>Ub{c1(}cgAJG5I*{i*o-a%|o+FG-7Uk&V}PaqHG%s!VuGotE;S z>vV#r>cF-R@jhq~o)YuL6(q$n&x`9vGpIIWgIqDNYey;EQ;y63$%ny*>WOx6Sp3V1 zvu@A)Qw%P~a^mMv=6$i4mchsU;PK~W@PuACzE^CF=v~(TD{Q>+>gL7TaiiH4$7{un zbDrEbp8WUX#NNh<`6094#LzXX8C?zt|t{C#~JaZcV(`5RD(gyQ>beSqI0b%;1WDY~X=R dqX7>Ie%B9v4~@;r@oql!{sx!S6FmR` literal 0 HcmV?d00001 diff --git a/tests/data/metadata/metadata_enricher_ytshort_input.pickle b/tests/data/metadata/metadata_enricher_ytshort_input.pickle new file mode 100644 index 0000000000000000000000000000000000000000..5f1a4eb4317bf69f2ccf59306fb645985a70c648 GIT binary patch literal 10840 zcmb7KdygB{6<<<9mQB)1MWrC5cGOTMFBpY^)&pnTO-e>&v^Zz(~QDSz>&@~iSr@j{x3EKkeg$Ghz9S+qKPl;l}H zk$1f`pH5|pPYx{sI&kxBwNB^*IXe>>J_Tu+g-coe^+|dE=-U6T+LEZzvRY?aJ@*o5!C#lKn;J?ahO^cXV`1S3mE&QqA{mX&^odz%I# z2IDh!g2A`3)L6%#-2YAV_JfO)NAmus?eNntVj&fmH@}U`R%FxQz0Es&wmgE&a*?cY)#Vpuar4)4CfBf4yp*6W zikom!`c<<#xl$aF-LT;Wr-^8xclg5=LPLlf1CXCs8=Xl|~|7WkGp5c)oZckl`Z0oqv5TFRmi$LxDTHQ@rwx zsfs==uAVZMO;U%%Bo>roe&Qcz%hi`>>w#QuCe;k!!=6iIj&r#yN5%8-9k$I8q*}o^ zWGdvZz=tc`faMm5Fy&#vtHq1+Jc?BQ2wn@`2;K}{FAiIcR;Sr)^>LGFKH*Q(GwA$T z^!mX;IR9m%ahL!4%~s=G59z4_84lm|G7+6SP@9ml!~r@T0G84Lx;+3mz#XhluzL>; zps9FOp4)_w%rhA~oWTGv;_iiWl1u%LEU}b2fgqe8s#BN38;|zHYx%6yz0k03`NJj@ zNA59@wn7o91F)JD4Gz@n(<%2npsSgRwSjb$+FN_177D&)#I1pb36%YCWFQ(4)Dirt(W7NVLk5(eJA`X;1%|^p;9r#_k>K%%q zm3rV0DJ=4(K^poEvRr5lG*ooZB8c_Oo*VidHn5G-q2FtRcq-;H+o2k!nFrR28OVbm zbs0p0>hq=0#-o|s$S7G+ozQYN8%i11xtI?S_^eTCEZS`P9Of?8nk{9%)JP8i$0ucg z3a@Hkl&8oDaK1F*JW&g4JUWk)tr4u{bMS5hn}EQUeZ2~5tNZ`MzWX=ZJ|{ucIjdG% z-S|9Qex6>vt)T zCTn367|yce|5m^a^cBh(dHBv#^gN=V&Up2)IDoByJ-j zb3#RrkQrBR_SMOX`}9?I3+=JK&(&v(l=_S=MBZb9>_1_KaIcFt9@)Tb4*U*DYjQuH zIUz=gm15xcDMey(FfKRrn-uY;2sZIdNqC;D7t$U>C{yHak#7TckW;!VEI6u4O|nF% zyQ7E}m=ZGLh61T5DjHl)(A8ZAZ#=?;vCgdDwHi7v8Hrj=Wz=n$RY0t7)xlaivLouF zj0FK?wXK-}U=OkJ4*=)rSYixNf_Qo05i{+Jil7HmA_3G#8;{<%(cVI6tD>QA82~oV zL{8DLIUw8F$%c|nMzfyK>iBI6EMz3~V&+)KAFwlrW}ueht&SDF&PHU(-AZMbX z&rJkN=D2x8pTYl4A`rd27QBrzNFI`#_yK?o zgu&mDLu3$=?a$gRl^w(^Ss^?<=3R+!(5FaY8Z`Z05^p@RNfey87`i;$emY>zfQN@NGzfXyKi_o>^>L}R(JIDIND2Xs9ew+kV@kD#YMO63oM1ly>p-Z8SM|)Yc zjnE}d75kuClCz+4L?&$72->HbWq{bHc(Y{0?yDxEdMl{2)F)3Q@{~!pFc4E$7E!w? zR<1r4N((4DzAUnxvf2aF3SxDK`N!TX+Gn<-Yx;r8BP7bVp=N5Z8_|HobuzJLLqn|x zuoSHZbVKz|6U|NOMGXOuGf4md5?xI^M2{6kx9QfWhDuoH)JA3$axxl4g&khnLlt|A z#N9xr;Wzd?834YLGz;gE^dP3F1f3@6F$r$*FN0QTbUIDd*dYA0$bfKlt~ApC@iik8 z8YEr$D~-APM34Rsz^xPF&0JrnrOmdcbWD-Y4K>J{HkD=%8ZhaQm()e0{AO>tgM3D@ zv3JXD6&yBS0Eye{X<|n?fH+?)v7$CAp!p}Ss#c*6QV%D1tY?CsNjlhz_c5+^Va z=)!Y(#7e#fJ06Q&ICQ!ymW&VLnFX52%vBfwIki>w>#4M!icQ7+bStjQy>6QY(gP90 zni{&O17?`~Zj|w8j%FMyU2}_Q4fNXF8z>W29hvuG?UED1AVlZps3Zwq(HG%18+RqqG#1602kJ) zQpo{gz0``{;OR!cGEa9g*;BJ@+YA7>%%DB8tr1{cfNc;0px22Q))4pHpXs*&JY>Vy zm;fa@YzXThpZcl~MA9InK`Uu!l z2I{M9Q1LoINHJ=ArOl>Yv=C%j;jtXFz+_$?Q#b9MK2sM9J&&EI!GISSb~OnA>zGV7 zVUnkjHKQFBL{tT`HRR(dW)0g{bO+kxNfN2~dG1+yD5pfhGiPY1sfh*e02;qw#tXuUO%qt^QAL57ar3De&zo50>r9XSOvy;gSi}LpJe;68 zuqQb3Yv7;*u&IX`uy{Ib0nlqb;}w9Q;FE*tp^E{m;v`p2$q-gZDb3FuIF)BkaFu6H zZ#MPt5w##yodN*5i0fRz08YcuTBSPV21rSL1SG+w&>v0^cqxf=Q6CSTE4Ha@9A=|! z;Q;K7GuyKPsiT@rou3)NcT%~wjvDB!&e-15s6oxIdI!U&m~^E_1W;FZ>WN4$+UPQG z+8&U?c(2MY5Y#D(*1CA;D$JI`D2H@H?z+NK$bgVMs95Wk7Xo1*ApqXpI+PB%#7r$g z7=;t8&Po6vrird|8Z`9;&^|yVQDs~3{n-?bTQ4lScU5eLuY;lH7PgRLdc*N(Xv;8W4!lU zB-GXlyxmf~nH#>+fHslKo?0qPbggyqXmHdkU*^jJq?$rJqhlaBf;^tB^}dgx+@rg9 z6AWXIpRW`3c7j2?KQ>!+Kv3VQ?wGcSA?yU)!1Gik3^V;Uf(UxFFSmVuJv8(r^0sas znGai{sw;8>>86K2;OOptsY(R{c)PbqbOB_LZbD6{;}ma?++q`0Iikk@w8?X>gZw}-WZHUW&0dIaxu~MeAhv@ws@_d3l0+2~pdek)T~JO?Lp8&A zAaL1;UM7(FQoW959KH@XB(3#mG$zJyP$I6#scupt&(=cX<#Ig3yC)rPO}+WY*4jbX zm?*ynqPLH3*#_C3&K>}8nzEB5a4;@#oK>zA{Q>u&Pz9S<5`-4!9+>HU8gdEGm0MDo z=Koq~WH;+G`t^W0X|&Mq1oj*kD+-d(jiD?xN2#r9*!QyDZ3Jkfmo6^x0m2HX>AcK~7tfY> z%aZL@65nmQc9v#Xc+JYc=ab+=^+nF$wD{@%SEH8VF~*yjoQpil{4ZCFGWci|{O*e~ m`1q?bxKmu6(w8^NG ThumbnailEnricher: - configs: dict = { +def thumbnail_enricher(setup_module, mock_binary_dependencies) -> ThumbnailEnricher: + config: dict = { "thumbnails_per_minute": 60, "max_thumbnails": 4, } - return setup_module("thumbnail_enricher", configs) + return setup_module("thumbnail_enricher", config) @pytest.fixture diff --git a/tests/enrichers/test_whisper_enricher.py b/tests/enrichers/test_whisper_enricher.py index 8a73ed7..873198f 100644 --- a/tests/enrichers/test_whisper_enricher.py +++ b/tests/enrichers/test_whisper_enricher.py @@ -8,6 +8,9 @@ from auto_archiver.modules.s3_storage import S3Storage from auto_archiver.modules.whisper_enricher import WhisperEnricher +TEST_S3_URL = "http://cdn.example.com/test.mp4" + + @pytest.fixture def enricher(): """Fixture with mocked S3 and API dependencies""" @@ -20,7 +23,7 @@ def enricher(): "steps": {"storages": ["s3_storage"]} } mock_s3 = MagicMock(spec=S3Storage) - mock_s3.get_cdn_url.return_value = "http://s3.example.com/media.mp3" + mock_s3.get_cdn_url.return_value = TEST_S3_URL instance = WhisperEnricher() instance.name = "whisper_enricher" instance.display_name = "Whisper Enricher" @@ -53,7 +56,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests): """Test successful media processing with S3 configured""" whisper, mock_s3 = enricher # Configure mock S3 URL to match test expectation - mock_s3.get_cdn_url.return_value = "http://cdn.example.com/test.mp4" + mock_s3.get_cdn_url.return_value = TEST_S3_URL # Create test media with matching CDN URL m = Media("test.mp4") @@ -78,6 +81,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests): mock_status_response, # First call: status check mock_artifacts_response # Second call: artifacts check ] + # Run enrichment (without opening file) whisper.enrich(metadata) # Check API interactions @@ -89,5 +93,43 @@ def test_successful_job_submission(enricher, metadata, mock_requests): # Verify job status checks assert mock_requests.get.call_count == 2 assert "artifact_0_text" in metadata.media[0].get("whisper_model") - assert "test transcript" in metadata.metadata.get("content") + assert metadata.media[0].get("whisper_model") == {'artifact_0_text': 'test transcript', 'job_artifacts_check': 'http://testapi/jobs/job123/artifacts', 'job_id': 'job123', 'job_status_check': 'http://testapi/jobs/job123'} + + + +def test_submit_job(enricher): + """Test job submission method""" + whisper, _ = enricher + m = Media("test.mp4") + m.add_url(TEST_S3_URL) + with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests: + mock_response = MagicMock() + mock_response.status_code = 201 + mock_response.json.return_value = {"id": "job123"} + mock_requests.post.return_value = mock_response + job_id = whisper.submit_job(m) + assert job_id == "job123" + +def test_submit_raises_status(enricher): + whisper, _ = enricher + m = Media("test.mp4") + m.add_url(TEST_S3_URL) + with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests: + mock_response = MagicMock() + mock_response.status_code = 400 + mock_response.json.return_value = {"id": "job123"} + mock_requests.post.return_value = mock_response + with pytest.raises(AssertionError) as exc_info: + whisper.submit_job(m) + assert str(exc_info.value) == "calling the whisper api http://testapi returned a non-success code: 400" + +# @pytest.mark.parametrize("test_url, status", ["http://cdn.example.com/test.mp4",]) +def test_submit_job_fails(enricher): + """Test assertion fails with non-S3 URL""" + whisper, mock_s3 = enricher + m = Media("test.mp4") + m.add_url("http://cdn.wrongurl.com/test.mp4") + with pytest.raises(AssertionError): + whisper.submit_job(m) + diff --git a/tests/test_metadata.py b/tests/test_metadata.py index b07e107..a753936 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -162,4 +162,25 @@ def test_get_context(): def test_choose_most_complete(): - pass \ No newline at end of file + m_more = Metadata() + m_more.set_title("Title 1") + m_more.set_content("Content 1") + m_more.set_url("https://example.com") + + m_less = Metadata() + m_less.set_title("Title 2") + m_less.set_content("Content 2") + m_less.set_url("https://example.com") + m_less.set_context("key", "value") + + res = Metadata.choose_most_complete([m_more, m_less]) + assert res.metadata.get("title") == "Title 1" + +def test_choose_most_complete_from_pickles(unpickle): + # test most complete from pickles before and after an enricher has run + # Only compares length of media, not the actual media + m_before_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_input.pickle") + m_after_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_expected.pickle") + # Iterates `for r in results[1:]:` + res = Metadata.choose_most_complete([Metadata(), m_after_enriching, m_before_enriching]) + assert res.media == m_after_enriching.media