diff --git a/snscrape/modules/vkontakte.py b/snscrape/modules/vkontakte.py index aef0610..9ee4419 100644 --- a/snscrape/modules/vkontakte.py +++ b/snscrape/modules/vkontakte.py @@ -26,6 +26,13 @@ else: logger = logging.getLogger(__name__) +months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] +datePattern = re.compile(r'^(?Ptoday' + r'|yesterday' + r'|(?P\d+)\s+(?P' + '|'.join(months) + ')(\s+(?P\d{4}))?' + r'|(?P' + '|'.join(months) + r')\s+(?P\d+),\s+(?P\d{4})' + ')' + r'\s+at\s+(?P\d+):(?P\d+)\s+(?P[ap]m)$') @dataclasses.dataclass @@ -112,8 +119,7 @@ class VKontakteUserScraper(snscrape.base.Scraper): return None if 'time' in dateSpan.attrs: return datetime.datetime.fromtimestamp(int(dateSpan['time']), datetime.timezone.utc) - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - if (match := re.match(r'^(?Ptoday|yesterday|(?P\d+)\s+(?P' + '|'.join(months) + ')|(?P' + '|'.join(months) + r')\s+(?P\d+),\s+(?P\d{4}))\s+at\s+(?P\d+):(?P\d+)\s+(?P[ap]m)$', dateSpan.text)): + if (match := datePattern.match(dateSpan.text)): # Datetime information down to minutes tz = timezone('Europe/Moscow') if match.group('date') in ('today', 'yesterday'): @@ -122,7 +128,7 @@ class VKontakteUserScraper(snscrape.base.Scraper): date -= datetime.timedelta(days = 1) year, month, day = date.year, date.month, date.day else: - year = int(match.group('year2') or datetime.datetime.now(tz = tz).year) + year = int(match.group('year1') or match.group('year2') or datetime.datetime.now(tz = tz).year) month = months.index(match.group('month1') or match.group('month2')) + 1 day = int(match.group('day1') or match.group('day2')) hour = int(match.group('hour'))