mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-10 19:38:29 +03:00
Clean Facebook group post URLs
Most of the time, the URLs are already clean, but occasionally, Facebook includes tracking parameters (__xts__[0], __tn__)...
This commit is contained in:
@@ -41,7 +41,7 @@ class FacebookCommonScraper(snscrape.base.Scraper):
|
||||
if setVal.rstrip('0123456789').endswith('.a.'):
|
||||
setVal = f'a.{setVal.rsplit(".", 1)[1]}'
|
||||
clean = (u.scheme, u.netloc, u.path, urllib.parse.urlencode((('set', setVal),)), '')
|
||||
elif u.path.split('/')[2] == 'posts' or u.path.startswith('/events/') or u.path.startswith('/notes/'):
|
||||
elif u.path.split('/')[2] == 'posts' or u.path.startswith('/events/') or u.path.startswith('/notes/') or u.path.split('/')[1:4:2] == ['groups', 'permalink']:
|
||||
# No manipulation of the path needed, but strip the query string
|
||||
clean = (u.scheme, u.netloc, u.path, '', '')
|
||||
elif u.path.split('/')[2] in ('photos', 'videos'):
|
||||
|
||||
Reference in New Issue
Block a user