Merge pull request #13 from SozinM/feature/add-selenium

Add selenium flow
This commit is contained in:
Miguel Sozinho Ramalho
2023-05-01 09:47:35 +01:00
committed by GitHub
9 changed files with 271 additions and 19 deletions

View File

@@ -7,6 +7,8 @@ name = "pypi"
requests = "*"
numpy = "*"
pandas = "*"
selenium = "*"
webdriver-manager = "*"
[dev-packages]

238
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "864ee81413d2d76069a372c39e9bffb8bda1c636b2b78e2b99517f7828e10bf9"
"sha256": "66179ca2743007ccdf172c4d911e77f9518ef4471fbee749b73fe973fe46feaf"
},
"pipfile-spec": 6,
"requires": {
@@ -16,29 +16,134 @@
]
},
"default": {
"async-generator": {
"hashes": [
"sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b",
"sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"
],
"markers": "python_version >= '3.5'",
"version": "==1.10"
},
"attrs": {
"hashes": [
"sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04",
"sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"
],
"markers": "python_version >= '3.7'",
"version": "==23.1.0"
},
"certifi": {
"hashes": [
"sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d",
"sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"
"sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3",
"sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==2022.6.15"
"markers": "python_version >= '3.6'",
"version": "==2022.12.7"
},
"charset-normalizer": {
"hashes": [
"sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5",
"sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"
"sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6",
"sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1",
"sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e",
"sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373",
"sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62",
"sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230",
"sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be",
"sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c",
"sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0",
"sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448",
"sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f",
"sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649",
"sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d",
"sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0",
"sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706",
"sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a",
"sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59",
"sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23",
"sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5",
"sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb",
"sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e",
"sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e",
"sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c",
"sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28",
"sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d",
"sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41",
"sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974",
"sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce",
"sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f",
"sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1",
"sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d",
"sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8",
"sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017",
"sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31",
"sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7",
"sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8",
"sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e",
"sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14",
"sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd",
"sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d",
"sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795",
"sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b",
"sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b",
"sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b",
"sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203",
"sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f",
"sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19",
"sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1",
"sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a",
"sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac",
"sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9",
"sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0",
"sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137",
"sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f",
"sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6",
"sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5",
"sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909",
"sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f",
"sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0",
"sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324",
"sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755",
"sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb",
"sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854",
"sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c",
"sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60",
"sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84",
"sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0",
"sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b",
"sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1",
"sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531",
"sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1",
"sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11",
"sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326",
"sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df",
"sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==2.1.0"
"markers": "python_full_version >= '3.7.0'",
"version": "==3.1.0"
},
"exceptiongroup": {
"hashes": [
"sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e",
"sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"
],
"markers": "python_version < '3.11'",
"version": "==1.1.1"
},
"h11": {
"hashes": [
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
"sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"
],
"markers": "python_version >= '3.7'",
"version": "==0.14.0"
},
"idna": {
"hashes": [
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
"sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
"sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
],
"markers": "python_version >= '3.5'",
"version": "==3.3"
"version": "==3.4"
},
"numpy": {
"hashes": [
@@ -68,6 +173,22 @@
"index": "pypi",
"version": "==1.23.1"
},
"outcome": {
"hashes": [
"sha256:6f82bd3de45da303cf1f771ecafa1633750a358436a8bb60e06a1ceb745d2672",
"sha256:c4ab89a56575d6d38a05aa16daeaa333109c1f96167aba8901ab18b6b5e0f7f5"
],
"markers": "python_version >= '3.7'",
"version": "==1.2.0"
},
"packaging": {
"hashes": [
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
],
"markers": "python_version >= '3.7'",
"version": "==23.1"
},
"pandas": {
"hashes": [
"sha256:07238a58d7cbc8a004855ade7b75bbd22c0db4b0ffccc721556bab8a095515f6",
@@ -95,6 +216,14 @@
"index": "pypi",
"version": "==1.4.3"
},
"pysocks": {
"hashes": [
"sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299",
"sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5",
"sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"
],
"version": "==1.7.1"
},
"python-dateutil": {
"hashes": [
"sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
@@ -103,6 +232,14 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.8.2"
},
"python-dotenv": {
"hashes": [
"sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba",
"sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"
],
"markers": "python_version >= '3.8'",
"version": "==1.0.0"
},
"pytz": {
"hashes": [
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
@@ -112,11 +249,19 @@
},
"requests": {
"hashes": [
"sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983",
"sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"
"sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa",
"sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"
],
"index": "pypi",
"version": "==2.28.1"
"version": "==2.28.2"
},
"selenium": {
"hashes": [
"sha256:478fae77cdfaec32adb1e68d59632c8c191f920535282abcaa2d1a3d98655624",
"sha256:4c19e6aac202719373108d53a5a8e9336ba8d2b25822ca32ae6ff37acbabbdbe"
],
"index": "pypi",
"version": "==4.9.0"
},
"six": {
"hashes": [
@@ -126,13 +271,68 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.16.0"
},
"sniffio": {
"hashes": [
"sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101",
"sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"
],
"markers": "python_version >= '3.7'",
"version": "==1.3.0"
},
"sortedcontainers": {
"hashes": [
"sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88",
"sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"
],
"version": "==2.4.0"
},
"tqdm": {
"hashes": [
"sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5",
"sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"
],
"markers": "python_version >= '3.7'",
"version": "==4.65.0"
},
"trio": {
"hashes": [
"sha256:ce68f1c5400a47b137c5a4de72c7c901bd4e7a24fbdebfe9b41de8c6c04eaacf",
"sha256:f1dd0780a89bfc880c7c7994519cb53f62aacb2c25ff487001c0052bd721cdf0"
],
"markers": "python_version >= '3.7'",
"version": "==0.22.0"
},
"trio-websocket": {
"hashes": [
"sha256:0908435e4eecc49d830ae1c4d6c47b978a75f00594a2be2104d58b61a04cdb53",
"sha256:af13e9393f9051111300287947ec595d601758ce3d165328e7d36325135a8d62"
],
"markers": "python_version >= '3.7'",
"version": "==0.10.2"
},
"urllib3": {
"hashes": [
"sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc",
"sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a"
"sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305",
"sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'",
"version": "==1.26.11"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==1.26.15"
},
"webdriver-manager": {
"hashes": [
"sha256:7d3aa8d67bd6c92a5d25f4abd75eea2c6dd24ea6617bff986f502280903a0e2b",
"sha256:ee788d389b8f45222a8a62f6f39b579360a1f87be46dad6da89918354af3ce73"
],
"index": "pypi",
"version": "==3.8.6"
},
"wsproto": {
"hashes": [
"sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065",
"sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==1.2.0"
}
},
"develop": {}

View File

@@ -61,6 +61,30 @@ instagram-scraper @creds.txt --filename @location_ids.txt --location --include-l
## Getting Instagram cookies
### Improved Tool Usage Process
The updated process allows you to obtain cookies through a pop-up browser window. To use this new flow, simply omit the `--cookie` parameter when running the command.
__Important: An Instagram session ID should be treated like a password, as it provides full access to your Instagram account. Using this session ID in multiple places or on multiple computers may cause Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may violate the Instagram Terms of Service and could lead to account suspension.__
Follow these steps to use the improved tool usage process:
1. Run the command without the `--cookie` parameter.
2. In the opened Google Chrome instance, create a new profile.
![Creating a new profile](docs/selenium1.png)
3. Click "Continue without email".
![Continue without email](docs/selenium2.png)
4. Enter a name for the profile you'd like to link to instagram account, and click "Done".
![Creating an account](docs/selenium3.png)
5. Select "Only allow essential cookies".
![Only allow essential cookies](docs/selenium4.png)
6. Log in to your Instagram account. The browser will close automatically shortly afterward.
![Log in to Instagram](docs/selenium5.png)
Once you've completed the initial setup, you can simply click on the created profile in subsequent runs to use the associated Instagram account.
### Old flow
This now requires the entire cookie string, in the format of an HTTP request header. Details TK.
__Important: an Instagram session ID should be treated like a password — it provides full access to the Instagram account. Using this session ID in multiple places or on multiple computers may trigger Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may be a violation of the Instagram Terms of Service and could lead to account suspension.__

BIN
docs/selenium1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

BIN
docs/selenium2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

BIN
docs/selenium3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
docs/selenium4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 266 KiB

BIN
docs/selenium5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 354 KiB

View File

@@ -7,6 +7,11 @@ from datetime import datetime, timezone
from itertools import product
from statistics import pstdev
from string import Template
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromiumService
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.utils import ChromeType
from time import sleep
import requests
@@ -113,6 +118,24 @@ def encode_date(date_str: str):
return str(max_id_num)
def get_insta_cookies():
"""
Attempts to run selenium, provide user with the login form and extract cookies from page to be used in program.
Returns cookies formatted as name=value;name=value;...
"""
options = webdriver.ChromeOptions()
options.add_argument(r"--user-data-dir=~/.instagram_location_searcher/data")
options.add_argument(r'--profile-directory=~/.instagram_location_searcher/profile')
driver = webdriver.Chrome(options=options, service=ChromiumService(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()))
driver.get("https://www.instagram.com/")
# Check that there is cookie with name sessionid (mean we logged in)
cookies = driver.get_cookies()
while not any([cookie.get("name") == "sessionid" for cookie in cookies]):
sleep(1)
cookies = driver.get_cookies()
return "; ".join([f"{cookie['name']}={cookie['value'] }"for cookie in cookies])
html_template = """<html>
<head>
<title>Instagram location visualizations</title>
@@ -193,6 +216,9 @@ def main():
args = parser.parse_args()
cookie = args.cookie
# If user run command without cookie we are trying to perform automated flow to acquire the cookie
if not cookie:
cookie = get_insta_cookies()
date_var = ""
if args.date is not None: