mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-13 04:48:28 +03:00
Add JSONL output format
This commit is contained in:
@@ -2,6 +2,7 @@ import argparse
|
|||||||
import contextlib
|
import contextlib
|
||||||
import datetime
|
import datetime
|
||||||
import inspect
|
import inspect
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import requests.models
|
import requests.models
|
||||||
# Imported in parse_args() after setting up the logger:
|
# Imported in parse_args() after setting up the logger:
|
||||||
@@ -166,7 +167,9 @@ def parse_args():
|
|||||||
parser.add_argument('--retry', '--retries', dest = 'retries', type = int, default = 3, metavar = 'N',
|
parser.add_argument('--retry', '--retries', dest = 'retries', type = int, default = 3, metavar = 'N',
|
||||||
help = 'When the connection fails or the server returns an unexpected response, retry up to N times with an exponential backoff')
|
help = 'When the connection fails or the server returns an unexpected response, retry up to N times with an exponential backoff')
|
||||||
parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results')
|
parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results')
|
||||||
parser.add_argument('-f', '--format', dest = 'format', type = str, default = None, help = 'Output format')
|
group = parser.add_mutually_exclusive_group(required = False)
|
||||||
|
group.add_argument('-f', '--format', dest = 'format', type = str, default = None, help = 'Output format')
|
||||||
|
group.add_argument('--jsonl', dest = 'jsonl', action = 'store_true', default = False, help = 'Output JSONL')
|
||||||
parser.add_argument('--since', type = parse_datetime_arg, metavar = 'DATETIME', help = 'Only return results newer than DATETIME')
|
parser.add_argument('--since', type = parse_datetime_arg, metavar = 'DATETIME', help = 'Only return results newer than DATETIME')
|
||||||
|
|
||||||
subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
|
subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
|
||||||
@@ -219,6 +222,12 @@ def configure_logging(verbosity, dumpLocals_):
|
|||||||
rootLogger.addHandler(handler)
|
rootLogger.addHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
|
def json_serialise_datetime(obj):
|
||||||
|
if isinstance(obj, datetime.datetime):
|
||||||
|
return obj.isoformat()
|
||||||
|
raise TypeError(f'Object of type {type(obj)} is not JSON serializable')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
setup_logging()
|
setup_logging()
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
@@ -231,7 +240,9 @@ def main():
|
|||||||
if args.since is not None and item.date < args.since:
|
if args.since is not None and item.date < args.since:
|
||||||
logger.info(f'Exiting due to reaching older results than {args.since}')
|
logger.info(f'Exiting due to reaching older results than {args.since}')
|
||||||
break
|
break
|
||||||
if args.format is not None:
|
if args.jsonl:
|
||||||
|
print(json.dumps(item._asdict(), default = json_serialise_datetime))
|
||||||
|
elif args.format is not None:
|
||||||
print(args.format.format(**item._asdict()))
|
print(args.format.format(**item._asdict()))
|
||||||
else:
|
else:
|
||||||
print(item)
|
print(item)
|
||||||
|
|||||||
Reference in New Issue
Block a user