Add selenium flow

This commit is contained in:
Mikhail Sozin
2023-04-23 17:28:08 +03:00
parent 9f02a008f8
commit f9a3e36d7a
9 changed files with 269 additions and 19 deletions

View File

@@ -7,6 +7,8 @@ name = "pypi"
requests = "*"
numpy = "*"
pandas = "*"
selenium = "*"
webdriver-manager = "*"
[dev-packages]

238
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "864ee81413d2d76069a372c39e9bffb8bda1c636b2b78e2b99517f7828e10bf9"
"sha256": "66179ca2743007ccdf172c4d911e77f9518ef4471fbee749b73fe973fe46feaf"
},
"pipfile-spec": 6,
"requires": {
@@ -16,29 +16,134 @@
]
},
"default": {
"async-generator": {
"hashes": [
"sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b",
"sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"
],
"markers": "python_version >= '3.5'",
"version": "==1.10"
},
"attrs": {
"hashes": [
"sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04",
"sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"
],
"markers": "python_version >= '3.7'",
"version": "==23.1.0"
},
"certifi": {
"hashes": [
"sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d",
"sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"
"sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3",
"sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==2022.6.15"
"markers": "python_version >= '3.6'",
"version": "==2022.12.7"
},
"charset-normalizer": {
"hashes": [
"sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5",
"sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"
"sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6",
"sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1",
"sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e",
"sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373",
"sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62",
"sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230",
"sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be",
"sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c",
"sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0",
"sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448",
"sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f",
"sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649",
"sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d",
"sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0",
"sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706",
"sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a",
"sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59",
"sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23",
"sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5",
"sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb",
"sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e",
"sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e",
"sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c",
"sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28",
"sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d",
"sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41",
"sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974",
"sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce",
"sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f",
"sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1",
"sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d",
"sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8",
"sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017",
"sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31",
"sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7",
"sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8",
"sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e",
"sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14",
"sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd",
"sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d",
"sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795",
"sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b",
"sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b",
"sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b",
"sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203",
"sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f",
"sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19",
"sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1",
"sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a",
"sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac",
"sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9",
"sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0",
"sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137",
"sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f",
"sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6",
"sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5",
"sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909",
"sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f",
"sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0",
"sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324",
"sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755",
"sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb",
"sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854",
"sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c",
"sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60",
"sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84",
"sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0",
"sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b",
"sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1",
"sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531",
"sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1",
"sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11",
"sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326",
"sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df",
"sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"
],
"markers": "python_full_version >= '3.6.0'",
"version": "==2.1.0"
"markers": "python_full_version >= '3.7.0'",
"version": "==3.1.0"
},
"exceptiongroup": {
"hashes": [
"sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e",
"sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"
],
"markers": "python_version < '3.11'",
"version": "==1.1.1"
},
"h11": {
"hashes": [
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
"sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"
],
"markers": "python_version >= '3.7'",
"version": "==0.14.0"
},
"idna": {
"hashes": [
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
"sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
"sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
],
"markers": "python_version >= '3.5'",
"version": "==3.3"
"version": "==3.4"
},
"numpy": {
"hashes": [
@@ -68,6 +173,22 @@
"index": "pypi",
"version": "==1.23.1"
},
"outcome": {
"hashes": [
"sha256:6f82bd3de45da303cf1f771ecafa1633750a358436a8bb60e06a1ceb745d2672",
"sha256:c4ab89a56575d6d38a05aa16daeaa333109c1f96167aba8901ab18b6b5e0f7f5"
],
"markers": "python_version >= '3.7'",
"version": "==1.2.0"
},
"packaging": {
"hashes": [
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
],
"markers": "python_version >= '3.7'",
"version": "==23.1"
},
"pandas": {
"hashes": [
"sha256:07238a58d7cbc8a004855ade7b75bbd22c0db4b0ffccc721556bab8a095515f6",
@@ -95,6 +216,14 @@
"index": "pypi",
"version": "==1.4.3"
},
"pysocks": {
"hashes": [
"sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299",
"sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5",
"sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"
],
"version": "==1.7.1"
},
"python-dateutil": {
"hashes": [
"sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
@@ -103,6 +232,14 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.8.2"
},
"python-dotenv": {
"hashes": [
"sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba",
"sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"
],
"markers": "python_version >= '3.8'",
"version": "==1.0.0"
},
"pytz": {
"hashes": [
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
@@ -112,11 +249,19 @@
},
"requests": {
"hashes": [
"sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983",
"sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"
"sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa",
"sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"
],
"index": "pypi",
"version": "==2.28.1"
"version": "==2.28.2"
},
"selenium": {
"hashes": [
"sha256:478fae77cdfaec32adb1e68d59632c8c191f920535282abcaa2d1a3d98655624",
"sha256:4c19e6aac202719373108d53a5a8e9336ba8d2b25822ca32ae6ff37acbabbdbe"
],
"index": "pypi",
"version": "==4.9.0"
},
"six": {
"hashes": [
@@ -126,13 +271,68 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.16.0"
},
"sniffio": {
"hashes": [
"sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101",
"sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"
],
"markers": "python_version >= '3.7'",
"version": "==1.3.0"
},
"sortedcontainers": {
"hashes": [
"sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88",
"sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"
],
"version": "==2.4.0"
},
"tqdm": {
"hashes": [
"sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5",
"sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"
],
"markers": "python_version >= '3.7'",
"version": "==4.65.0"
},
"trio": {
"hashes": [
"sha256:ce68f1c5400a47b137c5a4de72c7c901bd4e7a24fbdebfe9b41de8c6c04eaacf",
"sha256:f1dd0780a89bfc880c7c7994519cb53f62aacb2c25ff487001c0052bd721cdf0"
],
"markers": "python_version >= '3.7'",
"version": "==0.22.0"
},
"trio-websocket": {
"hashes": [
"sha256:0908435e4eecc49d830ae1c4d6c47b978a75f00594a2be2104d58b61a04cdb53",
"sha256:af13e9393f9051111300287947ec595d601758ce3d165328e7d36325135a8d62"
],
"markers": "python_version >= '3.7'",
"version": "==0.10.2"
},
"urllib3": {
"hashes": [
"sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc",
"sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a"
"sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305",
"sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'",
"version": "==1.26.11"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==1.26.15"
},
"webdriver-manager": {
"hashes": [
"sha256:7d3aa8d67bd6c92a5d25f4abd75eea2c6dd24ea6617bff986f502280903a0e2b",
"sha256:ee788d389b8f45222a8a62f6f39b579360a1f87be46dad6da89918354af3ce73"
],
"index": "pypi",
"version": "==3.8.6"
},
"wsproto": {
"hashes": [
"sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065",
"sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==1.2.0"
}
},
"develop": {}

View File

@@ -61,6 +61,27 @@ instagram-scraper @creds.txt --filename @location_ids.txt --location --include-l
## Getting Instagram cookies
### New flow
Improved flow allows to get cookies trough pop-up browser window.
In order to use this flow you need to omit '--cookie' param.
__Important: an Instagram session ID should be treated like a password — it provides full access to the Instagram account. Using this session ID in multiple places or on multiple computers may trigger Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may be a violation of the Instagram Terms of Service and could lead to account suspension.__
1. Run command with --cookie param omited
2. In opened Google Chrome instance, create new profile
![Creating new profile](docs/selenium1.png)
3. Press continue without email
![Continue without email](docs/selenium2.png)
4. Enter name of the profile that would be liked to this account and create it
![Creating an account](docs/selenium3.png)
5. Click "Only allow essential cookies"
![Only allow essential cookies](docs/selenium4.png)
6. Log-in to Instagram. Browser will close automatically short after.
![Log-in to instagram](docs/selenium5.png)
Once set up you'll need only click on created profile in the following runs to use associated instagram profile.
### Old flow
This now requires the entire cookie string, in the format of an HTTP request header. Details TK.
__Important: an Instagram session ID should be treated like a password — it provides full access to the Instagram account. Using this session ID in multiple places or on multiple computers may trigger Instagram to invalidate all session IDs. Using this session ID for any purpose other than the official Instagram website or application may be a violation of the Instagram Terms of Service and could lead to account suspension.__

BIN
docs/selenium1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

BIN
docs/selenium2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

BIN
docs/selenium3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
docs/selenium4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 266 KiB

BIN
docs/selenium5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 354 KiB

View File

@@ -7,6 +7,11 @@ from datetime import datetime, timezone
from itertools import product
from statistics import pstdev
from string import Template
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromiumService
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.utils import ChromeType
from time import sleep
import requests
@@ -113,6 +118,25 @@ def encode_date(date_str: str):
return str(max_id_num)
def get_insta_cookies():
"""
Attempts to run selenium, provide user with the login form and extract cookies from page to be used in program.
Returns cookies formatted as name=value;name=value;...
"""
options = webdriver.ChromeOptions()
options.add_argument(r"--user-data-dir=~/.instagram_location_searcher/data")
options.add_argument(r'--profile-directory=~/.instagram_location_searcher/profile')
driver = webdriver.Chrome(options=options, service=ChromiumService(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()))
driver.get("https://www.instagram.com/")
# Check that there is cookie with name sessionid (mean we logged in)
cookies = driver.get_cookies()
while not any([cookie.get("name") == "sessionid" for cookie in cookies]):
sleep(1)
cookies = driver.get_cookies()
print("; ".join([f"{cookie['name']}={cookie['value'] }"for cookie in cookies]))
return "; ".join([f"{cookie['name']}={cookie['value'] }"for cookie in cookies])
html_template = """<html>
<head>
<title>Instagram location visualizations</title>
@@ -193,6 +217,9 @@ def main():
args = parser.parse_args()
cookie = args.cookie
# If user run command without cookie we are trying to perform automated flow to acquire the cookie
if not cookie:
cookie = get_insta_cookies()
date_var = ""
if args.date is not None: