mirror of
https://github.com/bellingcat/vk-url-scraper.git
synced 2026-06-12 21:38:36 +03:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
798684a334 | ||
|
|
a556b237e9 | ||
|
|
283bc35658 | ||
|
|
cef70fb80d | ||
|
|
e66ef4f477 | ||
|
|
1f6a8368fd | ||
|
|
9a046fd1cb | ||
|
|
aae2bb5999 |
1020
Pipfile.lock
generated
1020
Pipfile.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -2,14 +2,94 @@
|
|||||||
# These requirements were autogenerated by pipenv
|
# These requirements were autogenerated by pipenv
|
||||||
# To regenerate from the project's Pipfile, run:
|
# To regenerate from the project's Pipfile, run:
|
||||||
#
|
#
|
||||||
# pipenv lock --requirements
|
# pipenv lock --requirements --dev
|
||||||
#
|
#
|
||||||
|
|
||||||
certifi==2022.6.15
|
# Note: in pipenv 2020.x, "--dev" changed to emit both default and development
|
||||||
charset-normalizer==2.0.12
|
# requirements. To emit only development requirements, pass "--dev-only".
|
||||||
idna==3.3
|
|
||||||
requests==2.28.0
|
# -i https://pypi.org/simple
|
||||||
urllib3==1.26.9
|
alabaster==0.7.13; python_version >= '3.6'
|
||||||
vk-api==11.9.8
|
attrs==22.2.0; python_version >= '3.6'
|
||||||
python-dotenv==0.20.0
|
babel==2.11.0; python_version >= '3.6'
|
||||||
yt-dlp==2022.7.18
|
beautifulsoup4==4.11.2; python_version >= '3.6'
|
||||||
|
black==22.3.0
|
||||||
|
bleach==6.0.0; python_version >= '3.7'
|
||||||
|
brotli==1.0.9; platform_python_implementation == 'CPython'
|
||||||
|
certifi==2022.12.7; python_version >= '3.6'
|
||||||
|
cffi==1.15.1
|
||||||
|
charset-normalizer==3.0.1; python_version >= '3.6'
|
||||||
|
click==8.1.3; python_version >= '3.7'
|
||||||
|
colorama==0.4.6; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
|
||||||
|
coverage[toml]==7.2.0; python_version >= '3.7'
|
||||||
|
cryptography==39.0.1; python_version >= '3.6'
|
||||||
|
docutils==0.18.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||||
|
exceptiongroup==1.1.0; python_version < '3.11'
|
||||||
|
flake8==6.0.0
|
||||||
|
furo==2022.6.21
|
||||||
|
idna==3.4; python_version >= '3.5'
|
||||||
|
imagesize==1.4.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||||
|
importlib-metadata==6.0.0; python_version >= '3.7'
|
||||||
|
iniconfig==2.0.0; python_version >= '3.7'
|
||||||
|
isort==5.10.1
|
||||||
|
jaraco.classes==3.2.3; python_version >= '3.7'
|
||||||
|
jeepney==0.8.0; sys_platform == 'linux'
|
||||||
|
jinja2==3.1.2; python_version >= '3.7'
|
||||||
|
keyring==23.13.1; python_version >= '3.7'
|
||||||
|
livereload==2.6.3
|
||||||
|
markdown-it-py==2.2.0; python_version >= '3.7'
|
||||||
|
markupsafe==2.1.2; python_version >= '3.7'
|
||||||
|
mccabe==0.7.0; python_version >= '3.6'
|
||||||
|
mdit-py-plugins==0.3.4; python_version >= '3.7'
|
||||||
|
mdurl==0.1.2; python_version >= '3.7'
|
||||||
|
more-itertools==9.0.0; python_version >= '3.7'
|
||||||
|
mutagen==1.46.0; python_version >= '3.7'
|
||||||
|
mypy-extensions==1.0.0; python_version >= '3.5'
|
||||||
|
mypy==0.961
|
||||||
|
myst-parser==0.18.1
|
||||||
|
packaging==23.0; python_version >= '3.7'
|
||||||
|
pathspec==0.11.0; python_version >= '3.7'
|
||||||
|
pkginfo==1.9.6; python_version >= '3.6'
|
||||||
|
platformdirs==3.0.0; python_version >= '3.7'
|
||||||
|
pluggy==1.0.0; python_version >= '3.6'
|
||||||
|
pycodestyle==2.10.0; python_version >= '3.6'
|
||||||
|
pycparser==2.21
|
||||||
|
pycryptodomex==3.17; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||||
|
pyflakes==3.0.1; python_version >= '3.6'
|
||||||
|
pygments==2.14.0; python_version >= '3.6'
|
||||||
|
pytest-cov==4.0.0
|
||||||
|
pytest-sphinx==0.5.0
|
||||||
|
pytest==7.2.1
|
||||||
|
python-dotenv==0.21.1
|
||||||
|
pytz==2022.7.1
|
||||||
|
pyyaml==6.0; python_version >= '3.6'
|
||||||
|
readme-renderer==37.3; python_version >= '3.7'
|
||||||
|
requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||||
|
requests==2.28.2; python_version >= '3.7' and python_version < '4'
|
||||||
|
rfc3986==2.0.0; python_version >= '3.7'
|
||||||
|
rich==13.3.1; python_version >= '3.7'
|
||||||
|
secretstorage==3.3.3; sys_platform == 'linux'
|
||||||
|
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||||
|
snowballstemmer==2.2.0
|
||||||
|
soupsieve==2.4; python_version >= '3.7'
|
||||||
|
sphinx-autobuild==2021.3.14
|
||||||
|
sphinx-autodoc-typehints==1.19.1
|
||||||
|
sphinx-basic-ng==1.0.0b1; python_version >= '3.7'
|
||||||
|
sphinx-copybutton==0.5.0
|
||||||
|
sphinx==5.0.2
|
||||||
|
sphinxcontrib-applehelp==1.0.4; python_version >= '3.8'
|
||||||
|
sphinxcontrib-devhelp==1.0.2; python_version >= '3.5'
|
||||||
|
sphinxcontrib-htmlhelp==2.0.1; python_version >= '3.8'
|
||||||
|
sphinxcontrib-jsmath==1.0.1; python_version >= '3.5'
|
||||||
|
sphinxcontrib-qthelp==1.0.3; python_version >= '3.5'
|
||||||
|
sphinxcontrib-serializinghtml==1.1.5; python_version >= '3.5'
|
||||||
|
tomli==2.0.1; python_version < '3.11'
|
||||||
|
tornado==6.2; python_version > '2.7'
|
||||||
|
twine==4.0.2
|
||||||
|
typing-extensions==4.5.0; python_version >= '3.7'
|
||||||
|
urllib3==1.26.14; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
||||||
|
vk-api==11.9.9
|
||||||
|
webencodings==0.5.1
|
||||||
|
websockets==10.4; python_version >= '3.7'
|
||||||
|
yt-dlp==2023.2.17
|
||||||
|
zipp==3.14.0; python_version >= '3.7'
|
||||||
|
|||||||
5
setup.py
5
setup.py
@@ -44,7 +44,10 @@ setup(
|
|||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
],
|
],
|
||||||
keywords=["scraper", "vk", "vkontakte", "vk-api", "media-downloader"],
|
keywords=["scraper", "vk", "vkontakte", "vk-api", "media-downloader"],
|
||||||
url="https://github.com/bellingcat/vk-url-scraper",
|
project_urls={
|
||||||
|
"Code": "https://github.com/bellingcat/vk-url-scraper",
|
||||||
|
"Documentation": "https://vk-url-scraper.readthedocs.io/en/latest/",
|
||||||
|
},
|
||||||
author="Bellingcat",
|
author="Bellingcat",
|
||||||
author_email="tech@bellingcat.com",
|
author_email="tech@bellingcat.com",
|
||||||
license="MIT",
|
license="MIT",
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ def test_login_custom_file():
|
|||||||
VkScraper(
|
VkScraper(
|
||||||
os.environ["VK_USERNAME"],
|
os.environ["VK_USERNAME"],
|
||||||
os.environ["VK_PASSWORD"],
|
os.environ["VK_PASSWORD"],
|
||||||
os.environ.get("VK_TOKEN"),
|
|
||||||
session_file=session_filename,
|
session_file=session_filename,
|
||||||
)
|
)
|
||||||
assert os.path.isfile(session_filename)
|
assert os.path.isfile(session_filename)
|
||||||
@@ -81,7 +80,7 @@ def test_scrape_wall_url_with_photos():
|
|||||||
== "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея."
|
== "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея."
|
||||||
)
|
)
|
||||||
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24))
|
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24))
|
||||||
assert len(res[0]["payload"]) == 16
|
assert len(res[0]["payload"]) == 17
|
||||||
assert len(res[0]["attachments"].keys()) == 1
|
assert len(res[0]["attachments"].keys()) == 1
|
||||||
assert list(res[0]["attachments"].keys()) == ["photo"]
|
assert list(res[0]["attachments"].keys()) == ["photo"]
|
||||||
assert len(res[0]["attachments"]["photo"]) == 9
|
assert len(res[0]["attachments"]["photo"]) == 9
|
||||||
@@ -93,7 +92,7 @@ def test_scrape_wall_url_with_photos_inner_videos_and_links_with_inner_photos():
|
|||||||
assert res[0]["id"] == "wall-17315087_74182"
|
assert res[0]["id"] == "wall-17315087_74182"
|
||||||
assert res[0]["text"] == ""
|
assert res[0]["text"] == ""
|
||||||
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 11, 1, 9))
|
assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 11, 1, 9))
|
||||||
assert len(res[0]["payload"]) == 15
|
assert len(res[0]["payload"]) == 17
|
||||||
assert len(res[0]["attachments"].keys()) == 3
|
assert len(res[0]["attachments"].keys()) == 3
|
||||||
for k in ["photo", "link", "video"]:
|
for k in ["photo", "link", "video"]:
|
||||||
assert k in list(res[0]["attachments"].keys())
|
assert k in list(res[0]["attachments"].keys())
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ class VkScraper:
|
|||||||
token : str
|
token : str
|
||||||
Access token received after authenticating, can be found in the vl_config.v2.json file
|
Access token received after authenticating, can be found in the vl_config.v2.json file
|
||||||
session_file : str
|
session_file : str
|
||||||
File name where the VK session is saved so future logins are easier
|
File name where the VK session is saved so future logins are easier, this will not be created if token is passed
|
||||||
captcha_handler : func
|
captcha_handler : func
|
||||||
Function that can receive a vk_api captcha instance and help the user solve it, default is a complete CLI handler
|
Function that can receive a vk_api captcha instance and help the user solve it, default is a complete CLI handler
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "3"
|
_MINOR = "3"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "7"
|
_PATCH = "13"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user