From 2b5444f89e8491f81102804787e4286f90da1c69 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Fri, 11 Sep 2020 15:37:22 +0000 Subject: [PATCH] Restrict --max-results to zero or positive values; use zero to indicate fetching only the entity --- snscrape/cli.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/snscrape/cli.py b/snscrape/cli.py index 9904e1a..350e7da 100644 --- a/snscrape/cli.py +++ b/snscrape/cli.py @@ -166,7 +166,7 @@ def parse_args(): parser.add_argument('--dump-locals', dest = 'dumpLocals', action = 'store_true', default = False, help = 'Dump local variables on serious log messages (warnings or higher)') parser.add_argument('--retry', '--retries', dest = 'retries', type = int, default = 3, metavar = 'N', help = 'When the connection fails or the server returns an unexpected response, retry up to N times with an exponential backoff') - parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results') + parser.add_argument('-n', '--max-results', dest = 'maxResults', type = lambda x: int(x) if int(x) >= 0 else parser.error('--max-results N must be zero or positive'), metavar = 'N', help = 'Only return the first N results') group = parser.add_mutually_exclusive_group(required = False) group.add_argument('-f', '--format', dest = 'format', type = str, default = None, help = 'Output format') group.add_argument('--jsonl', dest = 'jsonl', action = 'store_true', default = False, help = 'Output JSONL') @@ -188,6 +188,9 @@ def parse_args(): if not args.scraper: raise RuntimeError('Error: no scraper specified') + if not args.withEntity and args.maxResults == 0: + parser.error('--max-results 0 is only valid when used with --with-entity') + return args @@ -258,6 +261,9 @@ def main(): print(json.dumps(namedtuple_to_dict_recursive(entity), default = json_serialise_datetime)) else: print(entity) + if args.maxResults == 0: + logger.info('Exiting after 0 results') + return for i, item in enumerate(scraper.get_items(), start = 1): if args.since is not None and item.date < args.since: logger.info(f'Exiting due to reaching older results than {args.since}')