mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-12 20:38:29 +03:00
Add --progress option that prints a status update every 100 results and at the end
Closes #116
This commit is contained in:
@@ -8,6 +8,7 @@ import requests.models
|
|||||||
#import snscrape.base
|
#import snscrape.base
|
||||||
#import snscrape.modules
|
#import snscrape.modules
|
||||||
#import snscrape.version
|
#import snscrape.version
|
||||||
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
@@ -171,6 +172,7 @@ def parse_args():
|
|||||||
group.add_argument('--jsonl', dest = 'jsonl', action = 'store_true', default = False, help = 'Output JSONL')
|
group.add_argument('--jsonl', dest = 'jsonl', action = 'store_true', default = False, help = 'Output JSONL')
|
||||||
parser.add_argument('--with-entity', dest = 'withEntity', action = 'store_true', default = False, help = 'Include the entity (e.g. user, channel) as the first output item')
|
parser.add_argument('--with-entity', dest = 'withEntity', action = 'store_true', default = False, help = 'Include the entity (e.g. user, channel) as the first output item')
|
||||||
parser.add_argument('--since', type = parse_datetime_arg, metavar = 'DATETIME', help = 'Only return results newer than DATETIME')
|
parser.add_argument('--since', type = parse_datetime_arg, metavar = 'DATETIME', help = 'Only return results newer than DATETIME')
|
||||||
|
parser.add_argument('--progress', action = 'store_true', default = False, help = 'Report progress on stderr')
|
||||||
|
|
||||||
subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
|
subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
|
||||||
classes = snscrape.base.Scraper.__subclasses__()
|
classes = snscrape.base.Scraper.__subclasses__()
|
||||||
@@ -251,8 +253,14 @@ def main():
|
|||||||
print(args.format.format(**item._asdict()))
|
print(args.format.format(**item._asdict()))
|
||||||
else:
|
else:
|
||||||
print(item)
|
print(item)
|
||||||
|
if args.progress and i % 100 == 0:
|
||||||
|
print(f'Scraping, {i} results so far', file = sys.stderr)
|
||||||
if args.maxResults and i >= args.maxResults:
|
if args.maxResults and i >= args.maxResults:
|
||||||
logger.info(f'Exiting after {i} results')
|
logger.info(f'Exiting after {i} results')
|
||||||
|
if args.progress:
|
||||||
|
print(f'Stopped scraping after {i} results due to --max-results', file = sys.stderr)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
logger.info(f'Done, found {i} results')
|
logger.info(f'Done, found {i} results')
|
||||||
|
if args.progress:
|
||||||
|
print(f'Finished, {i} results', file = sys.stderr)
|
||||||
|
|||||||
Reference in New Issue
Block a user