diff --git a/Pipfile b/Pipfile index 8ef9502..7e8ed8b 100644 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,8 @@ name = "pypi" requests = "*" numpy = "*" pandas = "*" +selenium = "*" +webdriver-manager = "*" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index 8bf1cce..36e1895 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "864ee81413d2d76069a372c39e9bffb8bda1c636b2b78e2b99517f7828e10bf9" + "sha256": "66179ca2743007ccdf172c4d911e77f9518ef4471fbee749b73fe973fe46feaf" }, "pipfile-spec": 6, "requires": { @@ -16,29 +16,134 @@ ] }, "default": { + "async-generator": { + "hashes": [ + "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b", + "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144" + ], + "markers": "python_version >= '3.5'", + "version": "==1.10" + }, + "attrs": { + "hashes": [ + "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04", + "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015" + ], + "markers": "python_version >= '3.7'", + "version": "==23.1.0" + }, "certifi": { "hashes": [ - "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d", - "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412" + "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", + "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" ], - "markers": "python_full_version >= '3.6.0'", - "version": "==2022.6.15" + "markers": "python_version >= '3.6'", + "version": "==2022.12.7" }, "charset-normalizer": { "hashes": [ - "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5", - "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413" + "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6", + "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1", + "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e", + "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373", + "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62", + "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230", + "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be", + "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c", + "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0", + "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448", + "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f", + "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649", + "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d", + "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0", + "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706", + "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a", + "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59", + "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23", + "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5", + "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb", + "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e", + "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e", + "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c", + "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28", + "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d", + "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41", + "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974", + "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce", + "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f", + "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1", + "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d", + "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8", + "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017", + "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31", + "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7", + "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8", + "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e", + "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14", + "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd", + "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d", + "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795", + "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b", + "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b", + "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b", + "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203", + "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f", + "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19", + "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1", + "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a", + "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac", + "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9", + "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0", + "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137", + "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f", + "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6", + "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5", + "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909", + "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f", + "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0", + "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324", + "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755", + "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb", + "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854", + "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c", + "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60", + "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84", + "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0", + "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b", + "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1", + "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531", + "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1", + "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11", + "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326", + "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df", + "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab" ], - "markers": "python_full_version >= '3.6.0'", - "version": "==2.1.0" + "markers": "python_full_version >= '3.7.0'", + "version": "==3.1.0" + }, + "exceptiongroup": { + "hashes": [ + "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e", + "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785" + ], + "markers": "python_version < '3.11'", + "version": "==1.1.1" + }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" }, "idna": { "hashes": [ - "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", - "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", + "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" ], "markers": "python_version >= '3.5'", - "version": "==3.3" + "version": "==3.4" }, "numpy": { "hashes": [ @@ -68,6 +173,22 @@ "index": "pypi", "version": "==1.23.1" }, + "outcome": { + "hashes": [ + "sha256:6f82bd3de45da303cf1f771ecafa1633750a358436a8bb60e06a1ceb745d2672", + "sha256:c4ab89a56575d6d38a05aa16daeaa333109c1f96167aba8901ab18b6b5e0f7f5" + ], + "markers": "python_version >= '3.7'", + "version": "==1.2.0" + }, + "packaging": { + "hashes": [ + "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", + "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" + ], + "markers": "python_version >= '3.7'", + "version": "==23.1" + }, "pandas": { "hashes": [ "sha256:07238a58d7cbc8a004855ade7b75bbd22c0db4b0ffccc721556bab8a095515f6", @@ -95,6 +216,14 @@ "index": "pypi", "version": "==1.4.3" }, + "pysocks": { + "hashes": [ + "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299", + "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", + "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0" + ], + "version": "==1.7.1" + }, "python-dateutil": { "hashes": [ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", @@ -103,6 +232,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, + "python-dotenv": { + "hashes": [ + "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba", + "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.0" + }, "pytz": { "hashes": [ "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", @@ -112,11 +249,19 @@ }, "requests": { "hashes": [ - "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", - "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" + "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa", + "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf" ], "index": "pypi", - "version": "==2.28.1" + "version": "==2.28.2" + }, + "selenium": { + "hashes": [ + "sha256:478fae77cdfaec32adb1e68d59632c8c191f920535282abcaa2d1a3d98655624", + "sha256:4c19e6aac202719373108d53a5a8e9336ba8d2b25822ca32ae6ff37acbabbdbe" + ], + "index": "pypi", + "version": "==4.9.0" }, "six": { "hashes": [ @@ -126,13 +271,68 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, + "sniffio": { + "hashes": [ + "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", + "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0" + }, + "sortedcontainers": { + "hashes": [ + "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", + "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0" + ], + "version": "==2.4.0" + }, + "tqdm": { + "hashes": [ + "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5", + "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671" + ], + "markers": "python_version >= '3.7'", + "version": "==4.65.0" + }, + "trio": { + "hashes": [ + "sha256:ce68f1c5400a47b137c5a4de72c7c901bd4e7a24fbdebfe9b41de8c6c04eaacf", + "sha256:f1dd0780a89bfc880c7c7994519cb53f62aacb2c25ff487001c0052bd721cdf0" + ], + "markers": "python_version >= '3.7'", + "version": "==0.22.0" + }, + "trio-websocket": { + "hashes": [ + "sha256:0908435e4eecc49d830ae1c4d6c47b978a75f00594a2be2104d58b61a04cdb53", + "sha256:af13e9393f9051111300287947ec595d601758ce3d165328e7d36325135a8d62" + ], + "markers": "python_version >= '3.7'", + "version": "==0.10.2" + }, "urllib3": { "hashes": [ - "sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc", - "sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a" + "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305", + "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", - "version": "==1.26.11" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==1.26.15" + }, + "webdriver-manager": { + "hashes": [ + "sha256:7d3aa8d67bd6c92a5d25f4abd75eea2c6dd24ea6617bff986f502280903a0e2b", + "sha256:ee788d389b8f45222a8a62f6f39b579360a1f87be46dad6da89918354af3ce73" + ], + "index": "pypi", + "version": "==3.8.6" + }, + "wsproto": { + "hashes": [ + "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", + "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==1.2.0" } }, "develop": {} diff --git a/README.md b/README.md index 4b98a00..cd58b91 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,30 @@ instagram-scraper @creds.txt --filename @location_ids.txt --location --include-l ## Getting Instagram cookies +### Improved Tool Usage Process + +The updated process allows you to obtain cookies through a pop-up browser window. To use this new flow, simply omit the `--cookie` parameter when running the command. + +__Important: An Instagram session ID should be treated like a password, as it provides full access to your Instagram account. Using this session ID in multiple places or on multiple computers may cause Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may violate the Instagram Terms of Service and could lead to account suspension.__ + +Follow these steps to use the improved tool usage process: + +1. Run the command without the `--cookie` parameter. +2. In the opened Google Chrome instance, create a new profile. +  +3. Click "Continue without email". +  +4. Enter a name for the profile you'd like to link to instagram account, and click "Done". +  +5. Select "Only allow essential cookies". +  +6. Log in to your Instagram account. The browser will close automatically shortly afterward. +  + +Once you've completed the initial setup, you can simply click on the created profile in subsequent runs to use the associated Instagram account. + + +### Old flow This now requires the entire cookie string, in the format of an HTTP request header. Details TK. __Important: an Instagram session ID should be treated like a password — it provides full access to the Instagram account. Using this session ID in multiple places or on multiple computers may trigger Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may be a violation of the Instagram Terms of Service and could lead to account suspension.__ diff --git a/docs/selenium1.png b/docs/selenium1.png new file mode 100644 index 0000000..cac6449 Binary files /dev/null and b/docs/selenium1.png differ diff --git a/docs/selenium2.png b/docs/selenium2.png new file mode 100644 index 0000000..6dcd636 Binary files /dev/null and b/docs/selenium2.png differ diff --git a/docs/selenium3.png b/docs/selenium3.png new file mode 100644 index 0000000..5a70008 Binary files /dev/null and b/docs/selenium3.png differ diff --git a/docs/selenium4.png b/docs/selenium4.png new file mode 100644 index 0000000..cb63b3f Binary files /dev/null and b/docs/selenium4.png differ diff --git a/docs/selenium5.png b/docs/selenium5.png new file mode 100644 index 0000000..43e5c39 Binary files /dev/null and b/docs/selenium5.png differ diff --git a/instagram_locations/instagram_locations.py b/instagram_locations/instagram_locations.py index 7827a68..9c0a9f7 100644 --- a/instagram_locations/instagram_locations.py +++ b/instagram_locations/instagram_locations.py @@ -7,6 +7,11 @@ from datetime import datetime, timezone from itertools import product from statistics import pstdev from string import Template +from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromiumService +from webdriver_manager.chrome import ChromeDriverManager +from webdriver_manager.core.utils import ChromeType +from time import sleep import requests @@ -113,6 +118,24 @@ def encode_date(date_str: str): return str(max_id_num) +def get_insta_cookies(): + """ + Attempts to run selenium, provide user with the login form and extract cookies from page to be used in program. + Returns cookies formatted as name=value;name=value;... + """ + options = webdriver.ChromeOptions() + options.add_argument(r"--user-data-dir=~/.instagram_location_searcher/data") + options.add_argument(r'--profile-directory=~/.instagram_location_searcher/profile') + driver = webdriver.Chrome(options=options, service=ChromiumService(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())) + driver.get("https://www.instagram.com/") + # Check that there is cookie with name sessionid (mean we logged in) + cookies = driver.get_cookies() + while not any([cookie.get("name") == "sessionid" for cookie in cookies]): + sleep(1) + cookies = driver.get_cookies() + return "; ".join([f"{cookie['name']}={cookie['value'] }"for cookie in cookies]) + + html_template = """