From d252c6b8f3c02074c93a8eb911328080a21fd726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Sp=C3=B6ttel?= <1682504+fspoettel@users.noreply.github.com> Date: Mon, 4 Jul 2022 13:54:07 +0200 Subject: [PATCH] test: add test suite (#7) * add pre-commit hook * improve logging of inconsistent data --- .github/workflows/main.yml | 4 + .pre-commit-config.yaml | 10 + Pipfile | 2 + Pipfile.lock | 311 +++++++++++++++++++----- geoclustering/io.py | 36 ++- pytest.ini | 3 + setup.py | 4 +- tests/__init__.py | 0 tests/clustering.py | 42 ++++ tests/encoding.py | 30 +++ tests/fixtures/clustering.csv | 6 + tests/fixtures/io.csv | 9 + tests/fixtures/snapshots/result.geojson | 1 + tests/fixtures/snapshots/result.json | 1 + tests/fixtures/snapshots/result.txt | 7 + tests/helpers.py | 16 ++ tests/io.py | 25 ++ 17 files changed, 435 insertions(+), 72 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 pytest.ini create mode 100644 tests/__init__.py create mode 100644 tests/clustering.py create mode 100644 tests/encoding.py create mode 100644 tests/fixtures/clustering.csv create mode 100644 tests/fixtures/io.csv create mode 100644 tests/fixtures/snapshots/result.geojson create mode 100644 tests/fixtures/snapshots/result.json create mode 100644 tests/fixtures/snapshots/result.txt create mode 100644 tests/helpers.py create mode 100644 tests/io.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fa051dc..2d742de 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,6 +42,10 @@ jobs: name: "Style" run: | black --check . + - python: "3.10" + task: + name: "Test" + run: pytest --exitfirst --failed-first steps: - uses: actions/checkout@v3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3fe2b64 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + # It is recommended to specify the latest version of Python + # supported by your project here, or alternatively use + # pre-commit's default_language_version, see + # https://pre-commit.com/#top_level-default_language_version + language_version: python3.9 diff --git a/Pipfile b/Pipfile index 24dcc3b..0072bc4 100644 --- a/Pipfile +++ b/Pipfile @@ -13,6 +13,8 @@ scikit-learn = "*" [dev-packages] black = "*" +pre-commit = "*" +pytest = "*" wheel = "*" [requires] diff --git a/Pipfile.lock b/Pipfile.lock index d7ddb11..16624fe 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "2c871d34049ba57c71502528723f7a03014d0bf5c768f77704cf13ef88215378" + "sha256": "5f06b83fcb7ea20fc04cdfdf3da353a1f003ea441738f7615cc3f789877a60bf" }, "pipfile-spec": 6, "requires": { @@ -16,6 +16,14 @@ ] }, "default": { + "appnope": { + "hashes": [ + "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24", + "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e" + ], + "markers": "platform_system == 'Darwin'", + "version": "==0.1.3" + }, "argon2-cffi": { "hashes": [ "sha256:8c976986f2c5c0e5000919e6de187906cfd81fb1c72bf9d88c01177e77da7f80", @@ -99,58 +107,72 @@ }, "cffi": { "hashes": [ - "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3", - "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2", - "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636", - "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20", - "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728", - "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27", - "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66", - "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443", - "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0", - "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7", - "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39", - "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605", - "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a", - "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37", - "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029", - "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139", - "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc", - "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df", - "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14", - "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880", - "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2", - "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a", - "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e", - "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474", - "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024", - "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8", - "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0", - "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e", - "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a", - "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e", - "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032", - "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6", - "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e", - "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b", - "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e", - "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954", - "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962", - "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c", - "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4", - "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55", - "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962", - "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023", - "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c", - "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6", - "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8", - "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382", - "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7", - "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc", - "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997", - "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796" + "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5", + "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef", + "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104", + "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426", + "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405", + "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375", + "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a", + "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e", + "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc", + "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf", + "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185", + "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497", + "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3", + "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35", + "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c", + "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83", + "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21", + "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca", + "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984", + "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac", + "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd", + "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee", + "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a", + "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2", + "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192", + "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7", + "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585", + "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f", + "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e", + "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27", + "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b", + "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e", + "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e", + "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d", + "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c", + "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415", + "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82", + "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02", + "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314", + "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325", + "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c", + "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3", + "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914", + "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045", + "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d", + "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9", + "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5", + "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2", + "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c", + "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3", + "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2", + "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8", + "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d", + "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d", + "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9", + "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162", + "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76", + "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4", + "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e", + "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9", + "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6", + "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b", + "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01", + "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0" ], - "version": "==1.15.0" + "version": "==1.15.1" }, "click": { "hashes": [ @@ -440,11 +462,11 @@ }, "nbclient": { "hashes": [ - "sha256:cdef7757cead1735d2c70cc66095b072dced8a1e6d1c7639ef90cd3e04a11f2e", - "sha256:f251bba200a2b401a061dfd700a7a70b5772f664fb49d4a2d3e5536ec0e98c76" + "sha256:09bae4ea2df79fa6bc50aeb8278d8b79d2036792824337fa6eee834afae17312", + "sha256:0df76a7961d99a681b4796c74a1f2553b9f998851acc01896dce064ad19a9027" ], "markers": "python_version >= '3.7'", - "version": "==0.6.4" + "version": "==0.6.6" }, "nbconvert": { "hashes": [ @@ -1037,6 +1059,14 @@ } }, "develop": { + "attrs": { + "hashes": [ + "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4", + "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==21.4.0" + }, "black": { "hashes": [ "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90", @@ -1066,6 +1096,14 @@ "index": "pypi", "version": "==22.6.0" }, + "cfgv": { + "hashes": [ + "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426", + "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736" + ], + "markers": "python_full_version >= '3.6.1'", + "version": "==3.3.1" + }, "click": { "hashes": [ "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", @@ -1074,6 +1112,36 @@ "index": "pypi", "version": "==8.1.3" }, + "distlib": { + "hashes": [ + "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b", + "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579" + ], + "version": "==0.3.4" + }, + "filelock": { + "hashes": [ + "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404", + "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04" + ], + "markers": "python_version >= '3.7'", + "version": "==3.7.1" + }, + "identify": { + "hashes": [ + "sha256:0dca2ea3e4381c435ef9c33ba100a78a9b40c0bab11189c7cf121f75815efeaa", + "sha256:3d11b16f3fe19f52039fb7e39c9c884b21cb1b586988114fbe42671f03de3e82" + ], + "markers": "python_version >= '3.7'", + "version": "==2.5.1" + }, + "iniconfig": { + "hashes": [ + "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", + "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" + ], + "version": "==1.1.1" + }, "mypy-extensions": { "hashes": [ "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", @@ -1081,6 +1149,22 @@ ], "version": "==0.4.3" }, + "nodeenv": { + "hashes": [ + "sha256:27083a7b96a25f2f5e1d8cb4b6317ee8aeda3bdd121394e5ac54e498028a042e", + "sha256:e0e7f7dfb85fc5394c6fe1e8fa98131a2473e04311a45afb6508f7cf1836fa2b" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==1.7.0" + }, + "packaging": { + "hashes": [ + "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", + "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" + ], + "markers": "python_version >= '3.6'", + "version": "==21.3" + }, "pathspec": { "hashes": [ "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a", @@ -1096,6 +1180,109 @@ "markers": "python_version >= '3.7'", "version": "==2.5.2" }, + "pluggy": { + "hashes": [ + "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", + "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.0" + }, + "pre-commit": { + "hashes": [ + "sha256:10c62741aa5704faea2ad69cb550ca78082efe5697d6f04e5710c3c229afdd10", + "sha256:4233a1e38621c87d9dda9808c6606d7e7ba0e087cd56d3fe03202a01d2919615" + ], + "index": "pypi", + "version": "==2.19.0" + }, + "py": { + "hashes": [ + "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", + "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.11.0" + }, + "pyparsing": { + "hashes": [ + "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb", + "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc" + ], + "markers": "python_full_version >= '3.6.8'", + "version": "==3.0.9" + }, + "pytest": { + "hashes": [ + "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c", + "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45" + ], + "index": "pypi", + "version": "==7.1.2" + }, + "pyyaml": { + "hashes": [ + "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", + "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", + "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", + "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b", + "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4", + "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07", + "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba", + "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9", + "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", + "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", + "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", + "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", + "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", + "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", + "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", + "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", + "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", + "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", + "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", + "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", + "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", + "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", + "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", + "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", + "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", + "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", + "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", + "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", + "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", + "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", + "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", + "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", + "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" + ], + "markers": "python_version >= '3.6'", + "version": "==6.0" + }, + "setuptools": { + "hashes": [ + "sha256:990a4f7861b31532871ab72331e755b5f14efbe52d336ea7f6118144dd478741", + "sha256:c1848f654aea2e3526d17fc3ce6aeaa5e7e24e66e645b5be2171f3f6b4e5a178" + ], + "markers": "python_version >= '3.7'", + "version": "==62.6.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "toml": { + "hashes": [ + "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", + "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" + ], + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.10.2" + }, "tomli": { "hashes": [ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", @@ -1106,11 +1293,19 @@ }, "typing-extensions": { "hashes": [ - "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708", - "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376" + "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02", + "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6" ], "markers": "python_version < '3.10'", - "version": "==4.2.0" + "version": "==4.3.0" + }, + "virtualenv": { + "hashes": [ + "sha256:288171134a2ff3bfb1a2f54f119e77cd1b81c29fc1265a2356f3e8d14c7d58c4", + "sha256:b30aefac647e86af6d82bfc944c556f8f1a9c90427b2fb4e3bfbf338cb82becf" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==20.15.1" }, "wheel": { "hashes": [ diff --git a/geoclustering/io.py b/geoclustering/io.py index 707d030..51ad016 100644 --- a/geoclustering/io.py +++ b/geoclustering/io.py @@ -1,3 +1,4 @@ +import math from keplergl import KeplerGl from pathlib import Path from pkg_resources import resource_filename @@ -38,23 +39,32 @@ def is_valid_lon(val: str) -> bool: return False +def is_not_none(val: any) -> bool: + return val is not None + + def read_csv_file(filename): """Read input csv file, dropping rows that don't have valid location data.""" - df = pd.read_csv(filename) - initial_rows = len(df) + # replace NaN for all fields not to break kepler parsing. + df = pd.read_csv(filename).replace({np.nan: None}) - df = df.dropna(subset=["lat", "lon"]) - df = df.replace( - {np.nan: None} - ) # replace for other fields not to break kepler parsing - print(f"Ignored {initial_rows - len(df)} coordinates with NaN") + # construct an index of values with valid lat & lon. + valid_index = df.lat.apply(is_valid_lat) & df.lon.apply(is_valid_lon) + df_invalid = df[~valid_index] + + if count_invalid := len(df_invalid): + df_not_empty = df_invalid[ + (df_invalid.lat.apply(is_not_none) | df_invalid.lon.apply(is_not_none)) + ] + count_not_empty = len(df_not_empty) + + if count_empty := count_invalid - count_not_empty: + print(f"Removed {count_empty} empty coordinate pairs.") + + if count_not_empty: + print(f"Removed {count_not_empty} invalid coordinate pairs:") + print(df_not_empty[["lat", "lon"]].to_string()) - valid_index = df.lat.astype(str).apply(is_valid_lat) & df.lon.astype(str).apply( - is_valid_lon - ) - if len(df_invalid := df[~valid_index]): - print(f"Found {len(df_invalid)} invalid coordinate pairs, ignoring:") - print(df_invalid[["lat", "lon"]].to_string()) return df[valid_index] diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..f96d8d2 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests/ +python_files = *.py diff --git a/setup.py b/setup.py index b36e78e..5c363ab 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,9 @@ setup( "pandas", "scikit-learn", ], - extras_require={"dev": ["black", "wheel"]}, + extras_require={ + "dev": ["black", "wheel", "pre-commit", "pytest"], + }, include_package_data=True, zip_safe=False, ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/clustering.py b/tests/clustering.py new file mode 100644 index 0000000..8c49874 --- /dev/null +++ b/tests/clustering.py @@ -0,0 +1,42 @@ +from geoclustering.clustering import cluster_locations +from geoclustering.io import read_csv_file +from tests.helpers import get_fixture_path, read_fixture_csv + + +df = read_fixture_csv("clustering.csv") + + +def has_member(list, name): + return any(x for x in list if x["name"] == name) + + +def test_clustering_all(): + # there should be one cluster with all members but Erin. + res = cluster_locations( + df=df, algorithm="dbscan", radius_km=1.97, min_cluster_size=4 + ) + assert len(res.values()) == 1 + assert len(res[0]) == 4 + + +def test_clustering_split(): + res = cluster_locations( + df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=2 + ) + # there should be two cluster: Alice & Bob and Carol & Dan + assert len(res.values()) == 2 + cluster_one = res[0] + cluster_two = res[1] + assert len(cluster_one) == 2 + assert has_member(cluster_one, "Alice") + assert has_member(cluster_one, "Bob") + assert has_member(cluster_two, "Carol") + assert has_member(cluster_two, "Dan") + + +def test_clustering_none(): + # there should be no clusters now. + res = cluster_locations( + df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=3 + ) + assert len(res.values()) == 0 diff --git a/tests/encoding.py b/tests/encoding.py new file mode 100644 index 0000000..7defb53 --- /dev/null +++ b/tests/encoding.py @@ -0,0 +1,30 @@ +from geoclustering.clustering import cluster_locations +from geoclustering.encoding import encode_clusters +from tests.helpers import read_fixture_csv, read_fixture_content + + +df = read_fixture_csv("clustering.csv") + + +def test_encoders(): + clusters = { + 0: [ + {"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, + {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}, + ], + 1: [ + {"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, + { + "id": 4, + "name": "Dan", + "lat": 52.52443559865125, + "lon": 13.41261723049818, + }, + ], + } + + res = encode_clusters(clusters) + + assert res["string"] == read_fixture_content("snapshots/result.txt") + assert res["json"] == read_fixture_content("snapshots/result.json") + assert res["geojson"] == read_fixture_content("snapshots/result.geojson") diff --git a/tests/fixtures/clustering.csv b/tests/fixtures/clustering.csv new file mode 100644 index 0000000..cc70863 --- /dev/null +++ b/tests/fixtures/clustering.csv @@ -0,0 +1,6 @@ +id,name,lat,lon +1,Alice,52.523955,13.442362 +2,Bob,52.526659,13.448097 +3,Carol,52.525626,13.419246 +4,Dan,52.52443559865125,13.41261723049818 +5,Erin,52.524838991760774,13.383188597040382 diff --git a/tests/fixtures/io.csv b/tests/fixtures/io.csv new file mode 100644 index 0000000..98cd317 --- /dev/null +++ b/tests/fixtures/io.csv @@ -0,0 +1,9 @@ +id,name,lat,lon +1,Alice,, +2,,52.523955,13.442362 +,,-90.12,132.23 +4,,78.234,-180.1212 +5,Bob,52.524838991760774,13.383188597040382 +6,Peter,91.234, +7,Horst,,23.23 +7,Erin,foo,bar diff --git a/tests/fixtures/snapshots/result.geojson b/tests/fixtures/snapshots/result.geojson new file mode 100644 index 0000000..7c83a01 --- /dev/null +++ b/tests/fixtures/snapshots/result.geojson @@ -0,0 +1 @@ +{"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.442362, 52.523955]}, "properties": {"id": 1, "name": "Alice", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.448097, 52.526659]}, "properties": {"id": 2, "name": "Bob", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.419246, 52.525626]}, "properties": {"id": 3, "name": "Carol", "cluster_id": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.412617, 52.524436]}, "properties": {"id": 4, "name": "Dan", "cluster_id": 1}}]} \ No newline at end of file diff --git a/tests/fixtures/snapshots/result.json b/tests/fixtures/snapshots/result.json new file mode 100644 index 0000000..e6c7331 --- /dev/null +++ b/tests/fixtures/snapshots/result.json @@ -0,0 +1 @@ +[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}] \ No newline at end of file diff --git a/tests/fixtures/snapshots/result.txt b/tests/fixtures/snapshots/result.txt new file mode 100644 index 0000000..c28b389 --- /dev/null +++ b/tests/fixtures/snapshots/result.txt @@ -0,0 +1,7 @@ +Cluster 0 +id 1, name Alice, lat 52.523955, lon 13.442362 +id 2, name Bob, lat 52.526659, lon 13.448097 + +Cluster 1 +id 3, name Carol, lat 52.525626, lon 13.419246 +id 4, name Dan, lat 52.52443559865125, lon 13.41261723049818 diff --git a/tests/helpers.py b/tests/helpers.py new file mode 100644 index 0000000..13440c3 --- /dev/null +++ b/tests/helpers.py @@ -0,0 +1,16 @@ +import os +from geoclustering.io import read_csv_file + + +def get_fixture_path(filename): + dir_path = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(dir_path, "fixtures", filename) + + +def read_fixture_csv(filename): + return read_csv_file(get_fixture_path(filename)) + + +def read_fixture_content(filename): + with open(get_fixture_path(filename)) as f: + return f.read() diff --git a/tests/io.py b/tests/io.py new file mode 100644 index 0000000..b43b277 --- /dev/null +++ b/tests/io.py @@ -0,0 +1,25 @@ +from pathlib import Path +import shutil +from geoclustering.io import write_output_file +from tests.helpers import read_fixture_csv + + +def test_csv_filters(): + df = read_fixture_csv("io.csv") + # entries 2 & 5 in fixture are valid. + assert len(df) == 2 + assert df.iloc[0]["name"] == None + assert df.iloc[1]["name"] == "Bob" + + +def test_write_output_file(): + p = "./this/dir/does/not/exist" + f = "test.txt" + write_output_file(p, f, "test") + + path = Path(p) / f + + with open(path) as f: + assert f.read() == "test" + + shutil.rmtree(Path("./this"))