mirror of
https://github.com/bellingcat/vk-url-scraper.git
synced 2026-06-13 13:58:36 +03:00
adds command line interface
This commit is contained in:
63
vk_url_scraper/__main__.py
Normal file
63
vk_url_scraper/__main__.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from .scraper import VkScraper
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
|
||||
def get_argument_parser():
|
||||
"""
|
||||
Creates the CMD line arguments. 'python vk_url_scraper.py --help'
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Authenticate and scrape information from vk.com based on a URL or set of URLs."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--username",
|
||||
action="store",
|
||||
dest="username",
|
||||
required=True,
|
||||
help="username for a valid vk.com account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--password",
|
||||
action="store",
|
||||
dest="password",
|
||||
required=True,
|
||||
help="password for the valid vk.com account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--download",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
dest="download",
|
||||
help="if set then all photos and videos will be downloaded to folder output/",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--urls",
|
||||
action="store",
|
||||
dest="urls",
|
||||
nargs=argparse.REMAINDER,
|
||||
required=True,
|
||||
help="must be the last argument: any text with one or more urls to scrape",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def main():
|
||||
parser = get_argument_parser()
|
||||
args = parser.parse_args()
|
||||
vks = VkScraper(args.username, args.password)
|
||||
text = " ".join(args.urls)
|
||||
res = vks.scrape(text)
|
||||
res_json = json.dumps(res, ensure_ascii=False, indent=4, cls=DateTimeEncoder)
|
||||
print(res_json)
|
||||
if args.download:
|
||||
vks.download_media(res)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user