archival notification

Bump version to v0.3.34 for release
adapting CD
2026-06-08 11:28:38 +03:00 · 2025-05-05 11:45:33 +01:00 · 2025-05-05 11:26:10 +01:00 · 2025-05-05 11:25:58 +01:00 · 2025-05-05 11:20:03 +01:00 · 2025-05-05 11:19:57 +01:00
32 changed files with 2566 additions and 894 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,3 @@
+VK_USERNAME="your username"
+VK_PASSWORD="your password"
+VK_TOKEN="optional token"
--- a/.github/actions/setup-venv/action.yml
+++ b/.github/actions/setup-venv/action.yml
@@ -16,6 +16,11 @@ runs:
      with:
        python-version: ${{ inputs.python-version }}

+    - shell: bash
+      run: |
+        # install ffmpeg
+        sudo apt install ffmpeg
+
    - shell: bash
      run: |
        # Install prerequisites.
@@ -26,7 +31,7 @@ runs:
        # Get the exact Python version to use in the cache key.
        echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV

-    - uses: actions/cache@v2
+    - uses: actions/cache@v4
      id: virtualenv-cache
      with:
        path: .venv
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,11 +0,0 @@
-version: 2
-updates:
- package-ecosystem: "pip"
-  directory: "/"
-  schedule:
-    interval: "daily"
-  open-pull-requests-limit: 10
- package-ecosystem: "github-actions"
-  directory: "/"
-  schedule:
-    interval: "daily"
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,18 +1,18 @@
-<!-- To ensure we can review your pull request promptly please complete this template entirely. -->
-
-<!-- Please reference the issue number here. You can replace "Fixes" with "Closes" if it makes more sense. -->
-Fixes #
-
-Changes proposed in this pull request:
-<!-- Please list all changes/additions here. -->
-
-
-## Before submitting
-
-<!-- Please complete this checklist BEFORE submitting your PR to speed along the review process. -->
- [ ] I've read and followed all steps in the [Making a pull request](https://github.com/bellingcat/vk-url-scraper/blob/main/CONTRIBUTING.md#making-a-pull-request)
-    section of the `CONTRIBUTING` docs.
- [ ] I've updated or added any relevant docstrings following the syntax described in the
-    [Writing docstrings](https://github.com/bellingcat/vk-url-scraper/blob/main/CONTRIBUTING.md#writing-docstrings) section of the `CONTRIBUTING` docs.
- [ ] If this PR fixes a bug, I've added a test that will fail without my fix.
- [ ] If this PR adds a new feature, I've added tests that sufficiently cover my new functionality.
+<!-- To ensure we can review your pull request promptly please complete this template entirely. -->
+
+<!-- Please reference the issue number here. You can replace "Fixes" with "Closes" if it makes more sense. -->
+Fixes #
+
+Changes proposed in this pull request:
+<!-- Please list all changes/additions here. -->
+-
+
+## Before submitting
+
+<!-- Please complete this checklist BEFORE submitting your PR to speed along the review process. -->
+- [ ] I've read and followed all steps in the [Making a pull request](https://github.com/bellingcat/vk-url-scraper/blob/main/CONTRIBUTING.md#making-a-pull-request) 
+    section of the `CONTRIBUTING` docs.
+- [ ] I've updated or added any relevant docstrings following the syntax described in the
+    [Writing docstrings](https://github.com/bellingcat/vk-url-scraper/blob/main/CONTRIBUTING.md#writing-docstrings) section of the `CONTRIBUTING` docs.
+- [ ] If this PR fixes a bug, I've added a test that will fail without my fix.
+- [ ] If this PR adds a new feature, I've added tests that sufficiently cover my new functionality.
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -20,6 +20,7 @@ env:
  PYTHONPATH: ./
  VK_USERNAME: ${{ secrets.VK_USERNAME }} 
  VK_PASSWORD: ${{ secrets.VK_PASSWORD }}
+  VK_TOKEN: ${{ secrets.VK_TOKEN }}

 jobs:
  checks:
@@ -29,11 +30,11 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python: ['3.7', '3.10']
-        task: #  --show-capture=no on purpose
+        python: ['3.10']
+        task: # --show-capture=no on purpose, -s for captchas
          - name: Test
            run: |
-              pytest  --show-capture=no --color=yes tests/
+              pytest -s --show-capture=no --color=yes tests/

        include:
          - python: '3.10'
@@ -78,10 +79,11 @@ jobs:
        run: |
          . .venv/bin/activate
          ${{ matrix.task.run }}
+        continue-on-error: ${{ matrix.task.name != 'Build' }}

      - name: Upload package distribution files
        if: matrix.task.name == 'Build'
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
          name: package
          path: dist
@@ -116,15 +118,11 @@ jobs:
          echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV

      - name: Download package distribution files
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
        with:
          name: package
          path: dist

-      # - name: Generate release notes
-      #   run: |
-      #     python scripts/release_notes.py > ${{ github.workspace }}-RELEASE_NOTES.md
-
      - name: Publish package to PyPI
        run: |
          twine upload -u '${{ secrets.PYPI_USERNAME }}' -p '${{ secrets.PYPI_PASSWORD }}' dist/*
--- a/.github/workflows/pr_checks.yml
+++ b/.github/workflows/pr_checks.yml
@@ -1,27 +0,0 @@
-name: PR Checks
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-on:
-  pull_request:
-    branches:
-      - main
-    paths:
-      - 'vk_url_scraper/**'
-
-jobs:
-  changelog:
-    name: CHANGELOG
-    runs-on: ubuntu-latest
-    if: github.event_name == 'pull_request'
-
-    steps:
-    - uses: actions/checkout@v1
-
-    - name: Check that CHANGELOG has been updated
-      run: |
-        # If this step fails, this means you haven't updated the CHANGELOG.md
-        # file with notes on your contribution.
-        git diff --name-only $(git merge-base origin/main HEAD) | grep '^CHANGELOG.md$' && echo "Thanks for helping keep our CHANGELOG up-to-date!"
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 .env
 vk_config.v2.json
+output/
+tmp*/
 # build artifacts

 .eggs/
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -4,8 +4,12 @@ sphinx:
  configuration: docs/source/conf.py
  fail_on_warning: false

+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "3.10"
+    
 python:
-  version: "3.8"
  install:
    - requirements: requirements.txt
    - requirements: dev-requirements.txt
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,13 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## Unreleased
-
-## [0.1.2]
-* Added wall scraper with tests
-* Added photo scraper with tests
-* Added scraper with tests
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -156,8 +156,6 @@ When you're ready to contribute code to address an open issue, please follow the

    If the build fails, it's most likely due to small formatting issues. If the error message isn't clear, feel free to comment on this in your pull request.

-    And finally, please update the [CHANGELOG](https://github.com/bellingcat/vk-url-scraper/blob/main/CHANGELOG.md) with notes on your contribution in the "Unreleased" section at the top.
-
    After all of the above checks have passed, you can now open [a new GitHub pull request](https://github.com/bellingcat/vk-url-scraper/pulls).
    Make sure you have a clear description of the problem and the solution, and include a link to relevant issues.

--- a/2
+++ b/2
@@ -13,4 +13,4 @@ run-checks :
 	black .
 	flake8 .
 	mypy .
-	CUDA_VISIBLE_DEVICES='' pytest -v --color=yes --doctest-modules tests/ vk_url_scraper/
+	CUDA_VISIBLE_DEVICES='' pytest -v --color=yes .
--- a/34
+++ b/34
@@ -4,7 +4,32 @@ verify_ssl = true
 name = "pypi"

 [packages]
-vk-api = "*"
+yt-dlp = ">=2023.2.17"
+certifi = ">=2023.7.22"
+charset-normalizer = ">=3.0.1"
+idna = ">=3.4"
+mutagen = ">=1.46.0"
+pycryptodomex = ">=3.17"
+requests = ">=2.28.2"
+urllib3 = ">=1.26.14"
+websockets = ">=10.4"
+vk-api = {ref = "b99dac0ec2f832a6c4b20bde49869e7229ce4742", git = "git+https://github.com/python273/vk_api.git"}
+flake8 = "*"
+mypy = ">=0.961"
+black = ">=22.3.0"
+isort = ">=5.10.1"
+pytest = "*"
+pytest-sphinx = "*"
+pytest-cov = "*"
+twine = ">=1.11.0"
+sphinx = "<5.1.0,>=4.3.0"
+furo = ">=2022.6.4.1"
+myst-parser = "<0.19.0,>=0.15.2"
+sphinx-copybutton = ">=0.5.0"
+sphinx-autobuild = ">=2021.3.14"
+sphinx-autodoc-typehints = "*"
+packaging = "*"
+python-dotenv = ">=0.21.1"

 [dev-packages]
 sphinx-copybutton = "==0.5.0"
@@ -17,11 +42,14 @@ pytest-sphinx = "*"
 pytest-cov = "*"
 twine = ">=1.11.0"
 sphinx = ">=4.3.0,<5.1.0"
-furo = "==2022.6.4.1"
+furo = "==2022.6.21"
 myst-parser = ">=0.15.2,<0.19.0"
 sphinx-autobuild = "==2021.3.14"
 sphinx-autodoc-typehints = "*"
 python-dotenv = "*"

 [requires]
-python_version = "3.9"
+python_version = "3.11"
+
+[pipenv]
+allow_prereleases = true
--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/README.md
+++ b/README.md
@@ -1,14 +1,54 @@
 # vk-url-scraper
-Library to scrape data and especially media links (videos and photos) from vk.com URLs.
+Python library to scrape data, and especially media links like videos and photos, from vk.com URLs.
+
+> This repo has been archived because it relies on a fixed git commit of the vk_api library which we can no longer publish to pypi, see [issue](https://github.com/bellingcat/vk-url-scraper/issues/66). You can still install the latest install. This archived state may change if a solution is found to publish the library to pypi again.
+
+[![PyPI version](https://badge.fury.io/py/vk-url-scraper.svg)](https://badge.fury.io/py/vk-url-scraper)
+[![PyPI download month](https://img.shields.io/pypi/dm/vk-url-scraper.svg)](https://pypi.python.org/pypi/vk-url-scraper/)
+[![Documentation Status](https://readthedocs.org/projects/vk-url-scraper/badge/?version=latest)](https://vk-url-scraper.readthedocs.io/en/latest/?badge=latest)


-# TODO
-* docs online from sphinx
+You can use it via the [command line](#command-line-usage) or as a [python library](#python-library-usage), check the **[documentation](https://vk-url-scraper.readthedocs.io/en/latest/)**.

-## Quick usage
-`pip install vk-url-scraper` to install.
+## Installation
+You can install the most recent release from [pypi](https://pypi.org/project/vk-url-scraper/) via `pip install vk-url-scraper`.

+Currently you need to manually unsintall and re-install one dependency (as it is installed from github and not pypi):
+```bash
+pip uninstall vk-api
+pip install git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742
+```

+To use the library you will need a valid username/password combination for vk.com. 
+
+## Command line usage
+```bash
+# run this to learn more about the parameters
+vk_url_scraper --help
+
+# scrape a URL and get the JSON result in the console
+vk_url_scraper --username "username here" --password "password here" --urls https://vk.com/wall12345_6789
+# OR
+vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall12345_6789
+# you can also have multiple urls
+vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall12345_6789 https://vk.com/photo-12345_6789 https://vk.com/video12345_6789
+
+# you can pass a token as well to avoid always authenticating 
+# and possibly getting captcha prompts
+# you can fetch the token from the vk_config.v2.json file generated under by searching for "access_token"
+vk_url_scraper -u "username" -p "password" -t "vktoken goes here" --urls https://vk.com/wall12345_6789
+
+# save the JSON output into a file
+vk_url_scraper -u "username here" -p "password here" --urls https://vk.com/wall12345_6789 > output.json
+
+# download any photos or videos found in these URLS
+# this will use or create an output/ folder and dump the files there
+vk_url_scraper -u "username here" -p "password here" --download --urls https://vk.com/wall12345_6789
+# or
+vk_url_scraper -u "username here" -p "password here" -d --urls https://vk.com/wall12345_6789
+```
+
+## Python library usage
 ```python
 from vk_url_scraper import VkScraper

@@ -22,7 +62,7 @@ res = vks.scrape("https://vk.com/wall-1_398461")

 # scrape any "video" URL
 res = vks.scrape("https://vk.com/video-6596301_145810025")
-print(res[0]["text]) # eg: -> to get the text from code
+print(res[0]["text"]) # eg: -> to get the text from code
 ```

 ```python
@@ -43,15 +83,41 @@ print(res[0]["text]) # eg: -> to get the text from code

 see [docs] for all available functions. 

-### Development
-1. setup environment with `pip install -r requirements` or `pipenv install -r requirements`
+### TODO
+* scrape album links
+* scrape profile links
+* docs online from sphinx
+
+## Development
+(more info in [CONTRIBUTING.md](CONTRIBUTING.md)).
+
+1. setup dev environment with `pipenv install --dev`
+1. setup environment with `pipenv install -r requirements.txt`
+1. Activate the environment with `pipenv shell` (or prepend `pipenv run` to all commands)
 2. To run all checks to `make run-checks` (fixes style) or individually
   1. To fix style: `black .` and `isort .` -> `flake8 .` to validate lint
   2. To do type checking: `mypy .`
-   3. To test: `pytest .` (`pytest -v --color=yes --doctest-modules tests/ vk_url_scraper/` to user verbose, colors, and test docstring examples)
+   3. To test: `pytest .` (`pytest -v --color=yes --doctest-modules tests/ vk_url_scraper/` to use verbose, colors, and test docstring examples)
 3. `make docs` to generate shpynx docs -> edit [config.py](docs/source/conf.py) if needed

-### Releasing new version
+To test the command line interface available in [__main__.py](__vk_url_scraper/__main__.py) you need to pass the `-m` option to python like so: `python -m vk_url_scraper -u "" -p "" --urls ...`
+
+
+## Releasing new version
 1. edit [version.py](vk_url_scraper/version.py) with proper versioning
-2. `git tag vx.y.z` to tag version
-3. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)
+2. make sure to run `pipenv run pip freeze > requirements.txt` if you manage libs with pipenv
+   1. if the hardcoded version of [vk_api](https://github.com/python273/vk_api) is still being used, then you must comment/remove that line from the generated requirements file and instruct users to manually install the version from the source as pypi does not allow repo/commit tags. Additionally, add the latest released version, currently `vk-api==11.9.9`. 
+3. run `./scripts/release.sh` to create a tag and push, alternatively
+   1. `git tag vx.y.z` to tag version
+   2. `git push origin vx.y.z` -> this will trigger workflow and put project on [pypi](https://pypi.org/project/vk-url-scraper/)
+4. go to https://readthedocs.org/ to deploy new docs version (if webhook is not setup)
+
+### Fixing a failed release
+
+If for some reason the GitHub Actions release workflow failed with an error that needs to be fixed, you'll have to delete both the tag and corresponding release from GitHub. After you've pushed a fix, delete the tag from your local clone with
+
+```bash
+git tag -l | xargs git tag -d && git fetch -t
+```
+
+Then repeat the steps above.
--- a/RELEASE_PROCESS.md
+++ b/RELEASE_PROCESS.md
@@ -1,24 +0,0 @@
-# GitHub Release Process
-
-## Steps
-
-1. Update the version in `vk_url_scraper/version.py`.
-
-3. Run the release script:
-
-    ```bash
-    ./scripts/release.sh
-    ```
-
-    This will commit the changes to the CHANGELOG and `version.py` files and then create a new tag in git
-    which will trigger a workflow on GitHub Actions that handles the rest.
-
-## Fixing a failed release
-
-If for some reason the GitHub Actions release workflow failed with an error that needs to be fixed, you'll have to delete both the tag and corresponding release from GitHub. After you've pushed a fix, delete the tag from your local clone with
-
-```bash
-git tag -l | xargs git tag -d && git fetch -t
-```
-
-Then repeat the steps above.
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -2,11 +2,11 @@
 flake8

 # Static type checking
-mypy==0.961
+mypy>=0.961

 # Automatic code formatting
-black==22.3.0
-isort==5.10.1
+black>=22.3.0
+isort>=5.10.1

 # Running tests
 pytest
@@ -24,19 +24,20 @@ wheel
 Sphinx>=4.3.0,<5.1.0

 # Sphinx theme: https://sphinx-themes.org/sample-sites/furo/
-furo==2022.6.4.1
+furo>=2022.6.4.1

 # Lets Sphinx parse markdown files in addition to rst.
 myst-parser>=0.15.2,<0.19.0

 # Adds a copy button to code examples in the docs.
-sphinx-copybutton==0.5.0
+sphinx-copybutton>=0.5.0

 # Live rebuilding and reloading of docs for developing locally.
-sphinx-autobuild==2021.3.14
+sphinx-autobuild>=2021.3.14

 # Automatically adds types to docs
 sphinx-autodoc-typehints

 # For parsing and comparing version numbers.
 packaging
+python-dotenv>=0.21.1
--- a/docs/source/CHANGELOG.md
+++ b/docs/source/CHANGELOG.md
@@ -1 +0,0 @@
-../../CHANGELOG.md
--- a/docs/source/_static/favicon.ico
+++ b/docs/source/_static/favicon.ico
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -23,7 +23,6 @@ Contents

   installation
   overview
-   CHANGELOG

 .. toctree::
   :hidden:
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -1,7 +1,7 @@
 Installation
 ============

-**vk-url-scraper** supports Python >= 3.7.
+**vk-url-scraper** supports Python >= 3.10.

 ## Installing with `pip`

--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,87 @@
-#
-# These requirements were autogenerated by pipenv
-# To regenerate from the project's Pipfile, run:
-#
-#    pipenv lock --requirements
-#
-
-certifi==2022.6.15
-charset-normalizer==2.0.12
-idna==3.3
-requests==2.28.0
-urllib3==1.26.9
-vk-api==11.9.8
-python-dotenv==0.20.0
+alabaster==0.7.16
+anyio==4.9.0
+babel==2.17.0
+backports.tarfile==1.2.0
+beautifulsoup4==4.13.4
+black==25.1.0
+certifi==2025.4.26
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.1.8
+colorama==0.4.6
+coverage==7.8.0
+cryptography==44.0.3
+docutils==0.18.1
+flake8==7.2.0
+furo==2023.3.27
+h11==0.16.0
+id==1.5.0
+idna==3.10
+imagesize==1.4.1
+importlib_metadata==8.7.0
+iniconfig==2.1.0
+isort==6.0.1
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.1.0
+jeepney==0.9.0
+Jinja2==3.1.6
+keyring==25.6.0
+livereload==2.7.1
+markdown-it-py==2.2.0
+MarkupSafe==3.0.2
+mccabe==0.7.0
+mdit-py-plugins==0.3.5
+mdurl==0.1.2
+more-itertools==10.7.0
+mutagen==1.47.0
+mypy==1.15.0
+mypy_extensions==1.1.0
+myst-parser==0.18.1
+nh3==0.2.21
+packaging==25.0
+pathspec==0.12.1
+pkginfo==1.10.0
+platformdirs==4.3.7
+pluggy==1.5.0
+pycodestyle==2.13.0
+pycparser==2.22
+pycryptodomex==3.22.0
+pyflakes==3.3.2
+Pygments==2.19.1
+pytest==8.3.5
+pytest-cov==6.1.1
+pytest-sphinx==0.6.3
+python-dotenv==1.1.0
+PyYAML==6.0.2
+readme_renderer==43.0
+requests==2.32.3
+requests-toolbelt==1.0.0
+rfc3986==2.0.0
+rich==14.0.0
+SecretStorage==3.3.3
+sniffio==1.3.1
+snowballstemmer==2.2.0
+soupsieve==2.7
+Sphinx==5.0.2
+sphinx-autobuild==2024.10.3
+sphinx-autodoc-typehints==1.19.1
+sphinx-basic-ng==1.0.0b2
+sphinx-copybutton==0.5.2
+sphinxcontrib-applehelp==2.0.0
+sphinxcontrib-devhelp==2.0.0
+sphinxcontrib-htmlhelp==2.1.0
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==2.0.0
+sphinxcontrib-serializinghtml==2.0.0
+starlette==0.46.2
+tornado==6.5b1
+twine==6.1.0
+typing_extensions==4.13.2
+urllib3==2.4.0
+uvicorn==0.34.2
+vk_api @ git+https://github.com/python273/vk_api.git@b99dac0ec2f832a6c4b20bde49869e7229ce4742
+watchfiles==1.0.5
+websockets==15.0.1
+yt-dlp==2025.5.3.232917.dev0
+zipp==3.21.0
--- a/scripts/prepare_changelog.py
+++ b/scripts/prepare_changelog.py
@@ -1,39 +0,0 @@
-from datetime import datetime
-from pathlib import Path
-
-from vk_url_scraper.version import VERSION
-
-
-def main():
-    changelog = Path("CHANGELOG.md")
-
-    with changelog.open() as f:
-        lines = f.readlines()
-
-    insert_index: int = -1
-    for i in range(len(lines)):
-        line = lines[i]
-        if line.startswith("## Unreleased"):
-            insert_index = i + 1
-        elif line.startswith(f"## [v{VERSION}]"):
-            print("CHANGELOG already up-to-date")
-            return
-        elif line.startswith("## [v"):
-            break
-
-    if insert_index < 0:
-        raise RuntimeError("Couldn't find 'Unreleased' section")
-
-    lines.insert(insert_index, "\n")
-    lines.insert(
-        insert_index + 1,
-        f"## [v{VERSION}](https://github.com/bellingcat/vk-url-scraper/releases/tag/v{VERSION}) - "
-        f"{datetime.now().strftime('%Y-%m-%d')}\n",
-    )
-
-    with changelog.open("w") as f:
-        f.writelines(lines)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -7,7 +7,6 @@ TAG=$(python -c 'from vk_url_scraper.version import VERSION; print("v" + VERSION
 read -p "Creating new release for $TAG. Do you want to continue? [Y/n] " prompt

 if [[ $prompt == "y" || $prompt == "Y" || $prompt == "yes" || $prompt == "Yes" ]]; then
-    python scripts/prepare_changelog.py
    git add -A
    git commit -m "Bump version to $TAG for release" || true && git push
    echo "Creating new git tag $TAG"
--- a/scripts/release_notes.py
+++ b/scripts/release_notes.py
@@ -1,78 +0,0 @@
-# encoding: utf-8
-
-"""
-Prepares markdown release notes for GitHub releases.
-"""
-
-import os
-from typing import List, Optional
-
-import packaging.version
-
-TAG = os.environ["TAG"]
-
-ADDED_HEADER = "### Added 🎉"
-CHANGED_HEADER = "### Changed ⚠️"
-FIXED_HEADER = "### Fixed ✅"
-REMOVED_HEADER = "### Removed 👋"
-
-
-def get_change_log_notes() -> str:
-    in_current_section = False
-    current_section_notes: List[str] = []
-    with open("CHANGELOG.md") as changelog:
-        for line in changelog:
-            if line.startswith("## "):
-                if line.startswith("## Unreleased"):
-                    continue
-                if line.startswith(f"## [{TAG}]"):
-                    in_current_section = True
-                    continue
-                break
-            if in_current_section:
-                if line.startswith("### Added"):
-                    line = ADDED_HEADER + "\n"
-                elif line.startswith("### Changed"):
-                    line = CHANGED_HEADER + "\n"
-                elif line.startswith("### Fixed"):
-                    line = FIXED_HEADER + "\n"
-                elif line.startswith("### Removed"):
-                    line = REMOVED_HEADER + "\n"
-                current_section_notes.append(line)
-    assert current_section_notes
-    return "## What's new\n\n" + "".join(current_section_notes).strip() + "\n"
-
-
-def get_commit_history() -> str:
-    new_version = packaging.version.parse(TAG)
-
-    # Get all tags sorted by version, latest first.
-    all_tags = os.popen("git tag -l --sort=-version:refname 'v*'").read().split("\n")
-
-    # Out of `all_tags`, find the latest previous version so that we can collect all
-    # commits between that version and the new version we're about to publish.
-    # Note that we ignore pre-releases unless the new version is also a pre-release.
-    last_tag: Optional[str] = None
-    for tag in all_tags:
-        if not tag.strip():  # could be blank line
-            continue
-        version = packaging.version.parse(tag)
-        if new_version.pre is None and version.pre is not None:
-            continue
-        if version < new_version:
-            last_tag = tag
-            break
-    if last_tag is not None:
-        commits = os.popen(f"git log {last_tag}..{TAG}^ --oneline --first-parent").read()
-    else:
-        commits = os.popen("git log --oneline --first-parent").read()
-    return "## Commits\n\n" + commits
-
-
-def main():
-    print(get_change_log_notes())
-    print(get_commit_history())
-
-
-if __name__ == "__main__":
-    main()
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ with open("vk_url_scraper/version.py", "r") as version_file:
 setup(
    name="vk-url-scraper",
    version=VERSION["VERSION"],
-    description="",
+    description="Scrape VK URLs to fetch info and media - python API or command line tool.",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    classifiers=[
@@ -43,16 +43,24 @@ setup(
        "License :: OSI Approved :: MIT License",
        "Programming Language :: Python :: 3",
    ],
-    keywords="",
-    url="https://github.com/bellingcat/vk-url-scraper",
+    keywords=["scraper", "vk", "vkontakte", "vk-api", "media-downloader"],
+    project_urls={
+        "Code": "https://github.com/bellingcat/vk-url-scraper",
+        "Documentation": "https://vk-url-scraper.readthedocs.io/en/latest/",
+    },
    author="Bellingcat",
    author_email="tech@bellingcat.com",
-    license="Apache",
+    license="MIT",
    packages=find_packages(
        exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
    ),
    package_data={"vk_url_scraper": ["py.typed"]},
    install_requires=read_requirements("requirements.txt"),
    extras_require={"dev": read_requirements("dev-requirements.txt")},
-    python_requires=">=3.7",
+    python_requires=">=3.10",
+    entry_points={
+        "console_scripts": [
+            "vk_url_scraper=vk_url_scraper.__main__:main",
+        ],
+    },
 )
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -1,12 +1,11 @@
 import datetime
 import os
+import tempfile

 import pytest

 from vk_url_scraper import VkScraper

-from .util import assert_equal_lists
-
 vks = None


@@ -15,9 +14,23 @@ def test_login_fail():
        VkScraper("invalid", "combination")


+# disabled due to CI
+# def test_login_custom_file():
+#     session_filename = "test-session.json"
+#     VkScraper(
+#         os.environ["VK_USERNAME"],
+#         os.environ["VK_PASSWORD"],
+#         session_file=session_filename,
+#     )
+#     assert os.path.isfile(session_filename)
+#     os.unlink(session_filename)
+
+
 def test_login_success():
    global vks
-    vks = VkScraper(os.environ["VK_USERNAME"], os.environ["VK_PASSWORD"])
+    vks = VkScraper(
+        os.environ["VK_USERNAME"], os.environ["VK_PASSWORD"], os.environ.get("VK_TOKEN")
+    )


 def test_scrape_empty_urll():
@@ -68,7 +81,7 @@ def test_scrape_wall_url_with_photos():
        == "Хабаровск\nАллея героев\nПомолимся об укокоении воинов:\nАлександра, Игоря, Эдуарда, \nДионисия, Евгения, Александра, Артемия, Иннокентия, Андрея."
    )
    assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 15, 10, 37, 24))
-    assert len(res[0]["payload"]) == 16
+    assert len(res[0]["payload"]) == 19
    assert len(res[0]["attachments"].keys()) == 1
    assert list(res[0]["attachments"].keys()) == ["photo"]
    assert len(res[0]["attachments"]["photo"]) == 9
@@ -80,14 +93,32 @@ def test_scrape_wall_url_with_photos_inner_videos_and_links_with_inner_photos():
    assert res[0]["id"] == "wall-17315087_74182"
    assert res[0]["text"] == ""
    assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 11, 1, 9))
-    assert len(res[0]["payload"]) == 15
+    assert len(res[0]["payload"]) == 18
    assert len(res[0]["attachments"].keys()) == 3
-    assert_equal_lists(list(res[0]["attachments"].keys()), ["photo", "link", "video"])
+    for k in ["photo", "link", "video"]:
+        assert k in list(res[0]["attachments"].keys())
    assert len(res[0]["attachments"]["photo"]) == 5
    assert len(res[0]["attachments"]["link"]) == 1
    assert len(res[0]["attachments"]["video"]) == 1


+def test_scrape_download_multiple_media():
+    res = vks.scrape("https://vk.com/w=wall-17315087_74182")
+
+    with tempfile.TemporaryDirectory(dir="./") as tempdir:
+        vks.download_media(res, tempdir)
+        expect_files = {
+            "wall-17315087_74182_0.jpg",
+            "wall-17315087_74182_1.jpg",
+            "wall-17315087_74182_2.jpg",
+            "wall-17315087_74182_3.jpg",
+            "wall-17315087_74182_4.jpg",
+            "wall-17315087_74182_0.mp4",
+        }
+        found_files = set(os.listdir(tempdir))
+        assert len(expect_files) == len(expect_files & found_files)
+
+
 def test_scrape_photo_only():
    res = vks.scrape("https://vk.com/apiclub?z=photo-1_457242435%2Falbum-1_00%2Frev")
    assert len(res) == 1
@@ -97,7 +128,7 @@ def test_scrape_photo_only():
        == "Делимся расписанием конкурса [https://vk.com/wall-1_399468|«Код Петербурга»]. Все важные этапы — на одной схеме \n\nЕсли участвуете, обязательно сохраните себе. Так будет удобнее планировать работу над проектом, и вы точно не упустите лучший момент для отправки сервиса на модерацию."
    )
    assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 6, 7, 9, 43))
-    assert len(res[0]["payload"]) == 15
+    assert len(res[0]["payload"]) == 16
    assert len(res[0]["attachments"].keys()) == 1
    assert list(res[0]["attachments"].keys()) == ["photo"]
    assert len(res[0]["attachments"]["photo"]) == 1
@@ -108,11 +139,31 @@ def test_scrape_video_only():
    assert len(res) == 1
    assert res[0]["id"] == "video38556806_456251917"
    assert str(res[0]["datetime"]) == str(datetime.datetime(2022, 3, 24, 5, 42, 38))
-    assert len(res[0]["payload"]) == 31
    assert len(res[0]["attachments"].keys()) == 1
    assert list(res[0]["attachments"].keys()) == ["video"]


 def test_scrape_video_only2():
-    res = vks.scrape("https://vk.com/video-1_456239018")
-    print(res[0]["attachments"]["video"][0])
+    res = vks.scrape("https://vk.com/video-17546758_456239898")
+    with tempfile.TemporaryDirectory(dir="./") as tempdir:
+        vks.download_media(res, tempdir)
+        found_files = set(os.listdir(tempdir))
+        assert "video-17546758_456239898_0.mp4" in found_files
+
+
+def test_scrape_private_video():
+    """
+    > Some videos are kept private and cannot be accessed without a passkey . In this case, send the ID in {owner_id}_{video_id}_{access_key}.
+    From https://dev.vk.com/ru/method/video.get
+    """
+    res = vks.scrape("https://vk.com/wall-127774884_178565")
+
+    with tempfile.TemporaryDirectory(dir="./") as tempdir:
+        vks.download_media(res, tempdir)
+        expect_files = {
+            "wall-127774884_178565_0.mp4",
+            "wall-127774884_178565_1.mp4",
+            "wall-127774884_178565_2.mp4",
+        }
+        found_files = set(os.listdir(tempdir))
+        assert len(expect_files) == len(expect_files & found_files)
--- a/tests/util.py
+++ b/tests/util.py
@@ -1,3 +0,0 @@
-def assert_equal_lists(l1, l2):
-    assert len(l1) == len(l2)
-    assert str(sorted(l1)) == str(sorted(l2))
--- a/vk_url_scraper/init.py
+++ b/vk_url_scraper/init.py
@@ -1 +1,2 @@
 from .scraper import VkScraper
+from .utils import DateTimeEncoder, suppress_stdout
--- a/vk_url_scraper/main.py
+++ b/vk_url_scraper/main.py
@@ -0,0 +1,71 @@
+import argparse
+import json
+
+from .scraper import VkScraper
+from .utils import DateTimeEncoder
+
+
+def get_argument_parser():
+    """
+    Creates the CMD line arguments. 'python vk_url_scraper.py --help'
+    """
+    parser = argparse.ArgumentParser(
+        description="Authenticate and scrape information from vk.com based on a URL or set of URLs."
+    )
+
+    parser.add_argument(
+        "-u",
+        "--username",
+        action="store",
+        dest="username",
+        required=True,
+        help="username for a valid vk.com account (pass empty if using --token)",
+    )
+    parser.add_argument(
+        "-p",
+        "--password",
+        action="store",
+        dest="password",
+        required=True,
+        help="password for the valid vk.com account (pass empty if using --token)",
+    )
+    parser.add_argument(
+        "-t",
+        "--token",
+        action="store",
+        dest="token",
+        required=False,
+        help="optional token, when passed username/password authentication will not be done - good to avoid captcha issues",
+    )
+    parser.add_argument(
+        "-d",
+        "--download",
+        action=argparse.BooleanOptionalAction,
+        dest="download",
+        help="if set then all photos and videos will be downloaded to folder output/",
+    )
+    parser.add_argument(
+        "--urls",
+        action="store",
+        dest="urls",
+        nargs=argparse.REMAINDER,
+        required=True,
+        help="must be the last argument: any text with one or more urls to scrape",
+    )
+    return parser
+
+
+def main():
+    parser = get_argument_parser()
+    args = parser.parse_args()
+    vks = VkScraper(args.username, args.password, args.token)
+    text = " ".join(args.urls)
+    res = vks.scrape(text)
+    res_json = json.dumps(res, ensure_ascii=False, indent=4, cls=DateTimeEncoder)
+    print(res_json)
+    if args.download:
+        vks.download_media(res)
+
+
+if __name__ == "__main__":
+    main()
--- a/vk_url_scraper/scraper.py
+++ b/vk_url_scraper/scraper.py
@@ -1,10 +1,16 @@
+import os
 import re
+import shutil
 from collections import defaultdict
 from datetime import datetime
-from typing import List
+from typing import List, Optional
+from urllib.parse import urlparse

 import requests
 import vk_api  # used to get api_token after authentication
+import yt_dlp  # to download videos from url
+
+from .utils import captcha_handler, suppress_stdout


 class VkScraper:
@@ -31,12 +37,20 @@ class VkScraper:

    WALL_PATTERN = re.compile(r"(wall.{0,1}\d+_\d+)")
    PHOTO_PATTERN = re.compile(r"(photo.{0,1}\d+_\d+)")
-    VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+)")
+    VIDEO_PATTERN = re.compile(r"(video.{0,1}\d+_\d+(?:_\w+)?)")

-    def __init__(self, username: str, password: str) -> None:
+    def __init__(
+        self,
+        username: str,
+        password: str,
+        token: Optional[str] = None,
+        session_file="vk_config.v2.json",
+        captcha_handler=captcha_handler,
+    ) -> None:
        """Initializes the scraper.

-        This function receives a username and password and performs authentication on vk.com to then call api endpoints
+        This function receives a username and password (or access token) and performs
+        authentication on vk.com to then call api endpoints. If token is passed, authentication will not be performed again.

        Parameters
        ----------
@@ -44,9 +58,22 @@ class VkScraper:
            Username on vk.com, can be a phone number or email
        password : str
            Matching password on vk.com
+        token : str
+            Access token received after authenticating, can be found in the vk_config.v2.json file
+        session_file : str
+            File name where the VK session is saved so future logins are easier, this will not be created if token is passed
+        captcha_handler : func
+            Function that can receive a vk_api captcha instance and help the user solve it, default is a complete CLI handler
        """
-        self.session = vk_api.VkApi(username, password)
-        self.session.auth(token_only=True)
+        self.session = vk_api.VkApi(
+            username,
+            password,
+            token=token,
+            config_filename=session_file,
+            captcha_handler=captcha_handler,
+        )
+        if token is None or len(token) == 0:
+            self.session.auth(token_only=True)

    def scrape(self, url: str) -> List:
        """Scrapes a URL for multiple possibilities of inner links such as wall, video, photo, ...
@@ -117,10 +144,11 @@ class VkScraper:
                    first_type = a["type"]
                    attachment = a[first_type]
                    if first_type == "video":
+                        video_path = f'video{attachment["owner_id"]}_{attachment["id"]}'
+                        if "access_key" in attachment:
+                            video_path += f"_{attachment['access_key']}"
                        attachments["video"].extend(
-                            self.scrape_videos(f'video{attachment["owner_id"]}_{attachment["id"]}')[
-                                0
-                            ]
+                            self.scrape_videos(video_path)[0]
                            .get("attachments", {})
                            .get("video", [""])
                        )
@@ -273,3 +301,64 @@ class VkScraper:
                }
            )
        return res
+
+    def download_media(self, results: List[dict], destination: str = "./output/") -> List[str]:
+        """
+        Receives a list of dicts as returned by any of the scrape* methods and downloads the URLS present
+        if they are of type photo or video into the destination folder
+
+        Parameters
+        ----------
+        results : List[dict]
+            list with valid dictionary results (see class definition)
+        destination : str
+            the directory to save the downloaded files to. defaults to output/
+
+        Returns
+        -------
+        a list of filenames for the downloaded files
+        """
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
+        }
+        os.makedirs(destination, exist_ok=True)
+        downloaded = []
+        for r in results:
+            for k, attachments in r["attachments"].items():
+                if k == "photo":
+                    for i, url in enumerate(attachments):
+                        ext = os.path.splitext(urlparse(url).path)[1]
+                        filename = os.path.join(destination, f"{r['id']}_{i}{ext}")
+                        d = requests.get(url, headers=headers)
+                        with open(filename, "wb") as f:
+                            f.write(d.content)
+                            downloaded.append(filename)
+                elif k == "video":
+                    with suppress_stdout():  # ytdlp is not 100% quiet
+                        for i, url in enumerate(attachments):
+                            filename = os.path.join(destination, f"{r['id']}_{i}.%(ext)s")
+                            ydl = yt_dlp.YoutubeDL(
+                                {
+                                    "format": (
+                                        "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"
+                                    ),
+                                    "merge_output_format": "mp4",
+                                    "retries": 5,
+                                    "noplaylist": True,
+                                    "outtmpl": filename,
+                                    "quiet": True,
+                                    "restrictfilenames": True,
+                                    "forcefilename": True,
+                                    "simulate": False,
+                                }
+                            )
+                            info = ydl.extract_info(url, download=True)
+                            filename = ydl.prepare_filename(info)
+                            if "unknown_video" in filename:
+                                old_filename = filename
+                                filename = shutil.copy(
+                                    filename, filename.replace("unknown_video", "mp4")
+                                )
+                                os.remove(old_filename)
+                            downloaded.append(filename)
+        return downloaded
--- a/vk_url_scraper/utils.py
+++ b/vk_url_scraper/utils.py
@@ -0,0 +1,33 @@
+import json
+import os
+import sys
+from contextlib import contextmanager
+from datetime import datetime
+
+
+class DateTimeEncoder(json.JSONEncoder):
+    # to allow json.dump with datetimes do json.dumps(obj, cls=DateTimeEncoder)
+    def default(self, o):
+        if isinstance(o, datetime):
+            return str(o)  # with timezone
+        return json.JSONEncoder.default(self, o)
+
+
+def captcha_handler(captcha):
+    key = input(
+        f"CAPTCHA DETECTED, please solve it and input the solution. url= {captcha.get_url()} :"
+    ).strip()
+    return captcha.try_again(key.strip())
+
+
+@contextmanager
+def suppress_stdout():
+    # https://thesmithfam.org/blog/2012/10/25/temporarily-suppress-console-output-in-python/
+    # this is used to silence ytdlp which does not fully respects quite=True and outputs filenames to the console
+    with open(os.devnull, "w") as devnull:
+        old_stdout = sys.stdout
+        sys.stdout = devnull
+        try:
+            yield
+        finally:
+            sys.stdout = old_stdout
--- a/vk_url_scraper/version.py
+++ b/vk_url_scraper/version.py
@@ -1,8 +1,8 @@
 _MAJOR = "0"
-_MINOR = "1"
+_MINOR = "3"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "5"
+_PATCH = "34"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""
Author	SHA1	Message	Date
msramalho	8e5fba712c	archival notification	2025-05-05 11:45:33 +01:00
msramalho	f522f891cb	Bump version to v0.3.34 for release	2025-05-05 11:26:10 +01:00
msramalho	743ca9c165	adapting CD	2025-05-05 11:25:58 +01:00
msramalho	2130a33829	Bump version to v0.3.33 for release	2025-05-05 11:20:03 +01:00
msramalho	3d5b6de557	release requires build	2025-05-05 11:19:57 +01:00
msramalho	b9a6b2b747	Bump version to v0.3.32 for release	2025-05-05 11:17:54 +01:00
msramalho	d948044ae9	removes need for checks for release, as yt will always fail	2025-05-05 11:17:39 +01:00
msramalho	49c254a413	version bump	2025-05-05 11:02:55 +01:00
msramalho	d840b280d7	removes unnecessary dependency and does minor cleanup	2025-05-05 11:01:50 +01:00
Miguel Sozinho Ramalho	e6c98c73ea	Merge pull request #65 from bellingcat/migrate-gh-artifacts-to-v4 migrate gh artifact actions to v4	2025-01-09 15:16:05 +00:00
Miguel Sozinho Ramalho	e6fdd54518	cache v2 is also being deprecated https://github.com/actions/cache/discussions/1510	2025-01-09 15:05:01 +00:00
Miguel Sozinho Ramalho	f61204c4b1	Update main.yml from migration guide no breaking changes apply here.	2025-01-09 14:57:37 +00:00
msramalho	ea834c37e2	improved documentation of vk-api dependency	2024-07-16 16:40:04 +01:00
msramalho	3e22709430	Bump version to v0.3.30 for release	2024-07-16 16:18:32 +01:00
msramalho	9c7eadc716	attempts to circumvent pypi not allowing repo reference	2024-07-16 16:18:23 +01:00
msramalho	5d30d18b7b	Bump version to v0.3.29 for release	2024-07-16 16:05:59 +01:00
msramalho	b2d462441e	fixing issues with upstream vk api	2024-07-16 16:05:35 +01:00
msramalho	73f17407c0	reverting library dependencies	2024-01-23 18:09:56 +00:00
msramalho	95d249f5d0	min py to 3.10	2024-01-23 13:01:38 +00:00
msramalho	ccb8c1f5c7	min python to 3.8	2024-01-23 12:50:55 +00:00
msramalho	e525ff24b1	lint	2024-01-23 12:45:45 +00:00
msramalho	699b4ebdd8	fix lib dependencies in pypi version	2024-01-23 12:41:25 +00:00
msramalho	8d1a86a7fa	fix captcha processing	2024-01-23 12:41:14 +00:00
msramalho	b01dbe6299	fix vk_api dependency changes	2024-01-23 11:56:49 +00:00
msramalho	5b0f034c12	Bump version to v0.3.26 for release	2023-08-18 21:15:54 +01:00
msramalho	a1c098335c	fix: private videos	2023-08-18 21:15:34 +01:00
msramalho	12a5d22f64	fix: certifi	2023-08-18 21:12:44 +01:00
Miguel Sozinho Ramalho	ab602e5d31	Update .readthedocs.yaml https://blog.readthedocs.com/use-build-os-config/ https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python	2023-08-16 18:34:36 +01:00
msramalho	67bc8b5569	Bump version to v0.3.24 for release	2023-05-10 17:09:22 +01:00
msramalho	021e7c2304	disables test due to CI	2023-05-10 17:08:39 +01:00
msramalho	91b6dcf291	Bump version to v0.3.23 for release	2023-05-10 16:47:53 +01:00
msramalho	2a1a4e2cae	minor CI update	2023-05-10 16:47:39 +01:00
msramalho	fc6b914e2d	Bump version to v0.3.22 for release	2023-05-10 16:28:30 +01:00
Logan Williams	d155c1364a	Bump version number	2023-05-10 14:56:39 +02:00
Logan Williams	8882a87048	Fix import order	2023-05-10 14:33:47 +02:00
Logan Williams	a95c675e9c	No implicit optional	2023-05-10 14:28:59 +02:00
Logan Williams	8864e7c87d	Fix failing test	2023-05-10 14:25:50 +02:00
Logan Williams	db9b613ae4	Loosen dependency version requirements	2023-05-10 14:15:56 +02:00
Miguel Sozinho Ramalho	37828b4be4	Delete dependabot.yml	2023-02-27 10:21:19 +01:00
msramalho	1a3a7dc0f3	Bump version to v0.3.15 for release	2023-02-23 17:07:13 +01:00
msramalho	f67707a740	Bump version to v0.3.14 for release	2023-02-23 17:05:43 +01:00
msramalho	798684a334	Bump version to v0.3.13 for release	2023-02-23 17:02:14 +01:00
msramalho	a556b237e9	Bump version to v0.3.12 for release	2023-02-23 16:58:11 +01:00
msramalho	283bc35658	Bump version to v0.3.11 for release	2023-02-23 16:52:59 +01:00
msramalho	cef70fb80d	update yt-dlp	2023-02-23 16:52:52 +01:00
msramalho	e66ef4f477	fix tests	2023-02-23 16:52:45 +01:00
msramalho	1f6a8368fd	updates	2022-11-03 17:07:34 +00:00
msramalho	9a046fd1cb	Bump version to v0.3.9 for release	2022-11-03 16:35:59 +00:00
msramalho	aae2bb5999	Bump version to v0.3.8 for release	2022-11-03 16:19:30 +00:00
msramalho	9e30b81d16	Bump version to v0.3.7 for release	2022-11-03 16:05:18 +00:00
msramalho	72bc355606	updates readme	2022-11-03 16:03:12 +00:00
msramalho	7f59eefb73	Merge branch 'main' of https://github.com/bellingcat/vk-url-scraper	2022-11-03 16:02:50 +00:00
msramalho	30003c524e	Bump version to v0.3.6 for release	2022-11-03 16:01:15 +00:00
msramalho	d1b27bef1d	adds session_file name customization	2022-11-03 16:00:58 +00:00
Miguel Sozinho Ramalho	e5e9e08ee6	Update README.md	2022-09-30 16:27:07 +01:00
Miguel Sozinho Ramalho	3a8a3f54c0	Merge pull request #12 from bellingcat/dependabot/pip/yt-dlp-2022.7.18 Bump yt-dlp from 2022.5.18 to 2022.7.18	2022-07-20 12:54:08 +02:00
dependabot[bot]	4d73864dbb	Bump yt-dlp from 2022.5.18 to 2022.7.18 Bumps [yt-dlp](https://github.com/yt-dlp/yt-dlp) from 2022.5.18 to 2022.7.18. - [Release notes](https://github.com/yt-dlp/yt-dlp/releases) - [Changelog](https://github.com/yt-dlp/yt-dlp/blob/master/Changelog.md) - [Commits](https://github.com/yt-dlp/yt-dlp/compare/2022.05.18...2022.07.18) --- updated-dependencies: - dependency-name: yt-dlp dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2022-07-19 17:44:01 +00:00
Miguel Sozinho Ramalho	ceaa8e45f3	Merge pull request #10 from bellingcat/dependabot/pip/yt-dlp-2022.7.18	2022-07-19 10:13:13 +02:00
dependabot[bot]	007c8e07a8	Bump yt-dlp from 2022.5.18 to 2022.7.18 Bumps [yt-dlp](https://github.com/yt-dlp/yt-dlp) from 2022.5.18 to 2022.7.18. - [Release notes](https://github.com/yt-dlp/yt-dlp/releases) - [Changelog](https://github.com/yt-dlp/yt-dlp/blob/master/Changelog.md) - [Commits](https://github.com/yt-dlp/yt-dlp/compare/2022.05.18...2022.07.18) --- updated-dependencies: - dependency-name: yt-dlp dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2022-07-18 17:58:45 +00:00
Miguel Sozinho Ramalho	a515b2c3de	Update README.md	2022-06-27 16:12:55 +01:00
Miguel Sozinho Ramalho	54540cd132	Update README.md	2022-06-27 16:07:09 +01:00
Miguel Sozinho Ramalho	cfb13e5d82	Merge pull request #2 from bellingcat/dependabot/pip/yt-dlp-2022.6.22.1 Bump yt-dlp from 2022.5.18 to 2022.6.22.1	2022-06-24 13:11:52 +01:00
Miguel Sozinho Ramalho	926c3cb8a4	Merge pull request #3 from bellingcat/dependabot/pip/furo-2022.6.21 Bump furo from 2022.6.4.1 to 2022.6.21	2022-06-24 11:38:27 +01:00
dependabot[bot]	15ebe2e66c	Bump furo from 2022.6.4.1 to 2022.6.21 Bumps [furo](https://github.com/pradyunsg/furo) from 2022.6.4.1 to 2022.6.21. - [Release notes](https://github.com/pradyunsg/furo/releases) - [Changelog](https://github.com/pradyunsg/furo/blob/main/docs/changelog.md) - [Commits](https://github.com/pradyunsg/furo/compare/2022.06.04.1...2022.06.21) --- updated-dependencies: - dependency-name: furo dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2022-06-22 22:44:17 +00:00
dependabot[bot]	eaff88b2d9	Bump yt-dlp from 2022.5.18 to 2022.6.22.1 Bumps [yt-dlp](https://github.com/yt-dlp/yt-dlp) from 2022.5.18 to 2022.6.22.1. - [Release notes](https://github.com/yt-dlp/yt-dlp/releases) - [Changelog](https://github.com/yt-dlp/yt-dlp/blob/master/Changelog.md) - [Commits](https://github.com/yt-dlp/yt-dlp/compare/2022.05.18...2022.06.22.1) --- updated-dependencies: - dependency-name: yt-dlp dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2022-06-22 22:42:51 +00:00
Miguel Ramalho	a6d066a192	Bump version to v0.3.5 for release	2022-06-21 19:27:27 +02:00
msramalho	9078a17400	fix py3.7	2022-06-21 19:27:19 +02:00
Miguel Ramalho	17b516bd7f	Bump version to v0.3.4 for release	2022-06-21 19:24:47 +02:00
msramalho	8bd182b041	fix py3-7	2022-06-21 19:24:36 +02:00
msramalho	0b8abfb5cb	workflow updates	2022-06-21 19:20:46 +02:00
msramalho	cf5fb91c84	workflow updates	2022-06-21 19:19:56 +02:00
Miguel Ramalho	5c965102a4	Bump version to v0.3.3 for release	2022-06-21 18:24:24 +02:00
msramalho	df10e6f55f	applying feedback	2022-06-21 18:24:04 +02:00
Miguel Ramalho	863dd44463	Bump version to v0.3.2 for release	2022-06-21 14:58:27 +02:00
Miguel Ramalho	578ec81443	Bump version to v0.3.1 for release	2022-06-21 14:46:56 +02:00
Miguel Ramalho	c32caec442	Bump version to v0.3.0 for release	2022-06-21 14:25:48 +02:00
msramalho	80b43f7c95	token functionality	2022-06-21 14:23:54 +02:00
msramalho	90b72b6d22	trying with token	2022-06-21 14:16:54 +02:00
msramalho	d96e0c0a3a	captcha fix	2022-06-21 14:05:33 +02:00
msramalho	db03a4c0f6	captch regex fix	2022-06-21 13:57:13 +02:00
msramalho	cf100ee69e	updated captcha logic	2022-06-21 12:59:45 +02:00
msramalho	a09cf32b3e	captch fix 2	2022-06-21 12:38:55 +02:00
msramalho	e1eb3ed620	-s	2022-06-21 12:23:23 +02:00
msramalho	72bd951d9c	show capture	2022-06-21 12:21:47 +02:00
msramalho	59d53be68b	attempts at captch fix in workflow	2022-06-21 12:16:58 +02:00
Miguel Ramalho	24a1313a65	Bump version to v0.2.4 for release	2022-06-21 01:33:38 +02:00
msramalho	64df4eec28	3.10 only due to test issues	2022-06-21 01:33:16 +02:00
Miguel Ramalho	42bdc1441c	Bump version to v0.2.3 for release	2022-06-21 01:23:29 +02:00
msramalho	c25880ee6d	fix tests	2022-06-21 01:21:53 +02:00
msramalho	e1e3648852	remove print	2022-06-21 01:17:47 +02:00
msramalho	c74dc280d8	fix ytdlp naming	2022-06-21 01:17:26 +02:00
Miguel Ramalho	ab15b35008	Bump version to v0.2.2 for release	2022-06-21 01:04:24 +02:00
msramalho	62c4536d0b	fix ytdl filenames	2022-06-21 01:03:48 +02:00
Miguel Ramalho	eac0fc4904	Bump version to v0.2.1 for release	2022-06-20 23:57:04 +02:00
Miguel Ramalho	1341cd866f	Bump version to v0.2.0 for release	2022-06-20 23:56:13 +02:00
Miguel Ramalho	7824c2922d	Bump version to v0.2.0 for release	2022-06-20 23:54:52 +02:00
msramalho	c9a3ece9af	adds command line interface	2022-06-20 23:52:14 +02:00
msramalho	50b78d618a	.txt	2022-06-20 13:45:23 +02:00
msramalho	c4a1333428	cleanup	2022-06-20 13:44:05 +02:00