diff --git a/Pipfile b/Pipfile index 8ef9502..7e8ed8b 100644 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,8 @@ name = "pypi" requests = "*" numpy = "*" pandas = "*" +selenium = "*" +webdriver-manager = "*" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index 8bf1cce..36e1895 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "864ee81413d2d76069a372c39e9bffb8bda1c636b2b78e2b99517f7828e10bf9" + "sha256": "66179ca2743007ccdf172c4d911e77f9518ef4471fbee749b73fe973fe46feaf" }, "pipfile-spec": 6, "requires": { @@ -16,29 +16,134 @@ ] }, "default": { + "async-generator": { + "hashes": [ + "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b", + "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144" + ], + "markers": "python_version >= '3.5'", + "version": "==1.10" + }, + "attrs": { + "hashes": [ + "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04", + "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015" + ], + "markers": "python_version >= '3.7'", + "version": "==23.1.0" + }, "certifi": { "hashes": [ - "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d", - "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412" + "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", + "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" ], - "markers": "python_full_version >= '3.6.0'", - "version": "==2022.6.15" + "markers": "python_version >= '3.6'", + "version": "==2022.12.7" }, "charset-normalizer": { "hashes": [ - "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5", - "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413" + "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6", + "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1", + "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e", + "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373", + "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62", + "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230", + "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be", + "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c", + "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0", + "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448", + "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f", + "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649", + "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d", + "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0", + "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706", + "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a", + "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59", + "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23", + "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5", + "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb", + "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e", + "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e", + "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c", + "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28", + "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d", + "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41", + "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974", + "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce", + "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f", + "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1", + "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d", + "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8", + "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017", + "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31", + "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7", + "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8", + "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e", + "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14", + "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd", + "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d", + "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795", + "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b", + "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b", + "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b", + "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203", + "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f", + "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19", + "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1", + "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a", + "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac", + "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9", + "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0", + "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137", + "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f", + "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6", + "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5", + "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909", + "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f", + "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0", + "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324", + "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755", + "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb", + "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854", + "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c", + "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60", + "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84", + "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0", + "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b", + "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1", + "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531", + "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1", + "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11", + "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326", + "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df", + "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab" ], - "markers": "python_full_version >= '3.6.0'", - "version": "==2.1.0" + "markers": "python_full_version >= '3.7.0'", + "version": "==3.1.0" + }, + "exceptiongroup": { + "hashes": [ + "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e", + "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785" + ], + "markers": "python_version < '3.11'", + "version": "==1.1.1" + }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" }, "idna": { "hashes": [ - "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", - "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", + "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" ], "markers": "python_version >= '3.5'", - "version": "==3.3" + "version": "==3.4" }, "numpy": { "hashes": [ @@ -68,6 +173,22 @@ "index": "pypi", "version": "==1.23.1" }, + "outcome": { + "hashes": [ + "sha256:6f82bd3de45da303cf1f771ecafa1633750a358436a8bb60e06a1ceb745d2672", + "sha256:c4ab89a56575d6d38a05aa16daeaa333109c1f96167aba8901ab18b6b5e0f7f5" + ], + "markers": "python_version >= '3.7'", + "version": "==1.2.0" + }, + "packaging": { + "hashes": [ + "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", + "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" + ], + "markers": "python_version >= '3.7'", + "version": "==23.1" + }, "pandas": { "hashes": [ "sha256:07238a58d7cbc8a004855ade7b75bbd22c0db4b0ffccc721556bab8a095515f6", @@ -95,6 +216,14 @@ "index": "pypi", "version": "==1.4.3" }, + "pysocks": { + "hashes": [ + "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299", + "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", + "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0" + ], + "version": "==1.7.1" + }, "python-dateutil": { "hashes": [ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", @@ -103,6 +232,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, + "python-dotenv": { + "hashes": [ + "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba", + "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.0" + }, "pytz": { "hashes": [ "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", @@ -112,11 +249,19 @@ }, "requests": { "hashes": [ - "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", - "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" + "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa", + "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf" ], "index": "pypi", - "version": "==2.28.1" + "version": "==2.28.2" + }, + "selenium": { + "hashes": [ + "sha256:478fae77cdfaec32adb1e68d59632c8c191f920535282abcaa2d1a3d98655624", + "sha256:4c19e6aac202719373108d53a5a8e9336ba8d2b25822ca32ae6ff37acbabbdbe" + ], + "index": "pypi", + "version": "==4.9.0" }, "six": { "hashes": [ @@ -126,13 +271,68 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, + "sniffio": { + "hashes": [ + "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", + "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0" + }, + "sortedcontainers": { + "hashes": [ + "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", + "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0" + ], + "version": "==2.4.0" + }, + "tqdm": { + "hashes": [ + "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5", + "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671" + ], + "markers": "python_version >= '3.7'", + "version": "==4.65.0" + }, + "trio": { + "hashes": [ + "sha256:ce68f1c5400a47b137c5a4de72c7c901bd4e7a24fbdebfe9b41de8c6c04eaacf", + "sha256:f1dd0780a89bfc880c7c7994519cb53f62aacb2c25ff487001c0052bd721cdf0" + ], + "markers": "python_version >= '3.7'", + "version": "==0.22.0" + }, + "trio-websocket": { + "hashes": [ + "sha256:0908435e4eecc49d830ae1c4d6c47b978a75f00594a2be2104d58b61a04cdb53", + "sha256:af13e9393f9051111300287947ec595d601758ce3d165328e7d36325135a8d62" + ], + "markers": "python_version >= '3.7'", + "version": "==0.10.2" + }, "urllib3": { "hashes": [ - "sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc", - "sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a" + "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305", + "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", - "version": "==1.26.11" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==1.26.15" + }, + "webdriver-manager": { + "hashes": [ + "sha256:7d3aa8d67bd6c92a5d25f4abd75eea2c6dd24ea6617bff986f502280903a0e2b", + "sha256:ee788d389b8f45222a8a62f6f39b579360a1f87be46dad6da89918354af3ce73" + ], + "index": "pypi", + "version": "==3.8.6" + }, + "wsproto": { + "hashes": [ + "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", + "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==1.2.0" } }, "develop": {} diff --git a/README.md b/README.md index 4b98a00..ffb353e 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,27 @@ instagram-scraper @creds.txt --filename @location_ids.txt --location --include-l ## Getting Instagram cookies +### New flow +Improved flow allows to get cookies trough pop-up browser window. +In order to use this flow you need to omit '--cookie' param. + +__Important: an Instagram session ID should be treated like a password — it provides full access to the Instagram account. Using this session ID in multiple places or on multiple computers may trigger Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may be a violation of the Instagram Terms of Service and could lead to account suspension.__ + +1. Run command with --cookie param omited +2. In opened Google Chrome instance, create new profile +![Creating new profile](docs/selenium1.png) +3. Press continue without email +![Continue without email](docs/selenium2.png) +4. Enter name of the profile that would be liked to this account and create it +![Creating an account](docs/selenium3.png) +5. Click "Only allow essential cookies" +![Only allow essential cookies](docs/selenium4.png) +6. Log-in to Instagram. Browser will close automatically short after. +![Log-in to instagram](docs/selenium5.png) + +Once set up you'll need only click on created profile in the following runs to use associated instagram profile. + +### Old flow This now requires the entire cookie string, in the format of an HTTP request header. Details TK. __Important: an Instagram session ID should be treated like a password — it provides full access to the Instagram account. Using this session ID in multiple places or on multiple computers may trigger Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may be a violation of the Instagram Terms of Service and could lead to account suspension.__ diff --git a/docs/selenium1.png b/docs/selenium1.png new file mode 100644 index 0000000..cac6449 Binary files /dev/null and b/docs/selenium1.png differ diff --git a/docs/selenium2.png b/docs/selenium2.png new file mode 100644 index 0000000..6dcd636 Binary files /dev/null and b/docs/selenium2.png differ diff --git a/docs/selenium3.png b/docs/selenium3.png new file mode 100644 index 0000000..5a70008 Binary files /dev/null and b/docs/selenium3.png differ diff --git a/docs/selenium4.png b/docs/selenium4.png new file mode 100644 index 0000000..cb63b3f Binary files /dev/null and b/docs/selenium4.png differ diff --git a/docs/selenium5.png b/docs/selenium5.png new file mode 100644 index 0000000..43e5c39 Binary files /dev/null and b/docs/selenium5.png differ diff --git a/instagram_locations/instagram_locations.py b/instagram_locations/instagram_locations.py index 7827a68..1a03b1c 100644 --- a/instagram_locations/instagram_locations.py +++ b/instagram_locations/instagram_locations.py @@ -7,6 +7,11 @@ from datetime import datetime, timezone from itertools import product from statistics import pstdev from string import Template +from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromiumService +from webdriver_manager.chrome import ChromeDriverManager +from webdriver_manager.core.utils import ChromeType +from time import sleep import requests @@ -113,6 +118,25 @@ def encode_date(date_str: str): return str(max_id_num) +def get_insta_cookies(): + """ + Attempts to run selenium, provide user with the login form and extract cookies from page to be used in program. + Returns cookies formatted as name=value;name=value;... + """ + options = webdriver.ChromeOptions() + options.add_argument(r"--user-data-dir=~/.instagram_location_searcher/data") + options.add_argument(r'--profile-directory=~/.instagram_location_searcher/profile') + driver = webdriver.Chrome(options=options, service=ChromiumService(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())) + driver.get("https://www.instagram.com/") + # Check that there is cookie with name sessionid (mean we logged in) + cookies = driver.get_cookies() + while not any([cookie.get("name") == "sessionid" for cookie in cookies]): + sleep(1) + cookies = driver.get_cookies() + print("; ".join([f"{cookie['name']}={cookie['value'] }"for cookie in cookies])) + return "; ".join([f"{cookie['name']}={cookie['value'] }"for cookie in cookies]) + + html_template = """ Instagram location visualizations @@ -193,6 +217,9 @@ def main(): args = parser.parse_args() cookie = args.cookie + # If user run command without cookie we are trying to perform automated flow to acquire the cookie + if not cookie: + cookie = get_insta_cookies() date_var = "" if args.date is not None: