Merge branch 'main' into channel-db

2026-06-29 05:28:40 +03:00 · 2022-03-22 11:49:07 +01:00
parent fa516da763 d5bf3629c2
commit 2a3b5c8200
41 changed files with 970 additions and 313 deletions
--- a/cisticola/base.py
+++ b/cisticola/base.py
@@ -1,33 +1,47 @@
 from typing import List
 from dataclasses import dataclass
 from datetime import datetime
+import tempfile 
+import json
+import io
+
 from sqlalchemy.orm import registry
 from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey, Boolean
 import pytesseract
 import PIL
-import io
 import exiftool
-import json
-import os

 from .utils import make_request

-mapper_registry = registry()
-
@dataclass
 class ScraperResult:
-    """A minimally processed result from a scraper"""
+    """A minimally processed result from a scraper
+    """

+    #: String specifying name and version of scraper used to generate result, e.g. ``"TwitterScraper 0.0.1"``.
    scraper: str
+
+    #: Name of platform from which result was scraped, e.g. ``"Twitter"``.
    platform: str
+
+    #: Foreign key of channel ID that this was scraped from
    channel: int
+
+    #: String that uniquely identifies the scraped post on the given platform, e.g. ``"1503397267675533313"``
    platform_id: str
+
+    #: Datetime (relative to UTC) that the scraped post was created at.
    date: datetime
+
+    #: JSON dump of dict that contains all data scraped for the post.
    raw_data: str
+
+    #: Datetime (relative to UTC) that the scraped post was archived at.
    date_archived: datetime
+
+    #: Dict in which the keys are the original media URLs from the post, and the corresponding values are the URLs of the archived media files. 
    archived_urls: dict
-
-
+      
 raw_data_table = Table('raw_data', mapper_registry.metadata,
                       Column('id', Integer, primary_key=True,
                              autoincrement=True),
@@ -40,22 +54,45 @@ raw_data_table = Table('raw_data', mapper_registry.metadata,
                       Column('date_archived', DateTime),
                       Column('archived_urls', JSON))

-mapper_registry.map_imperatively(ScraperResult, raw_data_table)
-
-
@dataclass
 class Channel:
+    """Information about a specific channel to be scraped.
+    """
+
+    #: Name of channel (different from username because it can be non-unique and contain emojis), e.g. ``T🕊Редакция Президент Гордон🕊"``.
    name: str
+
+    #: String that uniquely identifies the channel on the given platform, e.g. ``"-1001101170442"``.
    platform_id: str
+
+    #: User-specified category for the channel, e.g. ``"explicit_qanon"``.
    category: str
+
+    #: Name of platform the given channel is on, e.g. ``"Telegram"``.
    platform: str
+
+    #: URL for the given channel on the platform, e.g. ``"https://t.me/prezidentgordonteam"``
    url: str
+
+    #: Screen name/username of channel.
    screenname: str
+      
+    #: 2 digit country code for the country of origin for the channel, e.g. ``"RU"``.
    country: str = None
+    
+    #: Name of influencer, if channel belongs to an influencer that operates on multiple platforms.    
    influencer: str = None
+      
+    #: Whether or not the channel is publicly-accessible. 
    public: bool = None
+      
+    #: Whether or not the channel is a chat (i.e. allows users who are not the channel creator to post/message)
    chat: bool = None
+      
+    #: Any other additional notes about the channel.
    notes: str = ""
+      
+    #: Did the channel come from a researcher or a scraping process?
    source: str = None

    def hydrate(self):
@@ -82,26 +119,52 @@ mapper_registry.map_imperatively(Channel, channel_table)
@dataclass
 class Post:
    """An object with fields for columns in the analysis table"""
+
+    #: ID number of the scraped post in the ``raw_data`` table
    raw_id: int
+      
+    #: Platform specific post ID
    platform_id: str
+
+    #: String specifying name and version of scraper used to generate result, e.g. ``"TwitterScraper 0.0.1"``.
    scraper: str
+
+    #: String specifying name and version of transformer used to tranform result, e.g. ``"TwitterTransformer 0.0.1"``.
    transformer: str
+
+    #: Name of platform from which result was scraped, e.g. ``"Twitter"``.
    platform: str
+
+    #: User-specified integer that uniquely identifies a channel, e.g. ``15``.
    channel: int
+
+    #: Datetime (relative to UTC) that the scraped post was created at.
    date: datetime
+
+    #: Datetime (relative to UTC) that the scraped post was archived at.
    date_archived: datetime
+    
+    #: URL of the original post
    url: str
+
+    #: String that uniquely identifies the channel on the given platform, e.g. ``"-1001101170442"``.
    author_id: str
+    
+    #: Username of author who made post.
    author_username: str
+      
+    #: Text of the original post
    content: str
+
+    #: The ID of the Channel that the post was forwarded or quoted from
    forwarded_from: int = None
+      
+    #: The ID of the Post that this Post is a reply to or reblog of
    reply_to: int = None

    def hydrate(self):
        pass

-
-
 post_table = Table('posts', mapper_registry.metadata,
                       Column('id', Integer, primary_key=True,
                              autoincrement=True),
@@ -125,39 +188,64 @@ mapper_registry.map_imperatively(Post, post_table)

@dataclass
 class Media:
+    """Base class for organizing information about a media file.
+    """
+
+    #: ID number of the media's corresponding scraped post in the ``raw_data`` table.
    raw_id: int
+
+    #: ID number of the media's corresponging scraped post in the ``analysis`` table.
    post: int
+
+    #: URL of the original post.
    url: str
+
+    #: Original URL of the media from the the original post.
    original_url: str

+    #: JSON dump of the dict containing metadata information for the media file.
    exif: str = None

    def get_blob(self):
+        """Download media file as bytes blob.
+        """
+
        blob = make_request(self.url)
        return blob.content

    def hydrate(self, blob = None):
+        """Download media file as bytes blob and extract data from content.
+        """
+
        if blob is None:
            blob = self.get_blob()

        self.hydrate_exif(blob)

    def hydrate_exif(self, blob):
-        f = open('tmp', 'wb')
-        f.write(blob)
-        f.close()
+        """Extract Exif metadata from bytes blob.
+        """

-        with exiftool.ExifTool() as et:
-            exif = et.get_metadata('tmp')
-            self.exif = json.dumps(exif)
+        with tempfile.NamedTemporaryFile() as temp_file:
+            temp_file.write(blob)

-        os.remove('tmp')
+            with exiftool.ExifTool() as et:
+                exif = et.get_metadata(temp_file.name)
+                self.exif = json.dumps(exif)

@dataclass
 class Image(Media):
+    """Class for organizing information about an image file. 
+    """
+
+    #: Extracted OCR content from image
    ocr: str = None

    def hydrate(self, blob=None):
+        """Download image file as bytes blob and extract Exif and OCR content 
+        from the image.
+        """
+
        if blob is None:
            blob = self.get_blob()

@@ -165,25 +253,62 @@ class Image(Media):
        self.hydrate_ocr(blob)

    def hydrate_ocr(self, blob):
+        """Extract OCR (optical character recognition) data from image bytes blob.
+        """
+
        image = PIL.Image.open(io.BytesIO(blob))
        self.ocr = pytesseract.image_to_string(image)

@dataclass
 class Video(Media):
+    """Class for organizing information about an image file. 
+    """
+    
    pass

+mapper_registry = registry()
+
+raw_data_table = Table('raw_data', mapper_registry.metadata,
+                       Column('id', Integer, primary_key=True,
+                              autoincrement=True),
+                       Column('scraper', String),
+                       Column('platform', String),
+                       Column('channel', Integer),
+                       Column('platform_id', String),
+                       Column('date', DateTime),
+                       Column('raw_data', String),
+                       Column('date_archived', DateTime),
+                       Column('archived_urls', JSON))
+
+
+analysis_table = Table('analysis', mapper_registry.metadata,
+                       Column('id', Integer, primary_key=True,
+                              autoincrement=True),
+                       Column('raw_id', Integer, ForeignKey('raw_data.id')),
+                       Column('scraper', String),
+                       Column('transformer', String),
+                       Column('platform', String),
+                       Column('channel', Integer),
+                       Column('date', DateTime),
+                       Column('date_archived', DateTime),
+                       Column('url', String),
+                       Column('author_id', String),
+                       Column('author_username', String),
+                       Column('content', String))
+
 media_table = Table('media', mapper_registry.metadata,
                       Column('id', Integer, primary_key=True,
                              autoincrement=True),
-                        Column('type', String),
+                       Column('type', String),
                       Column('raw_id', Integer, ForeignKey('raw_data.id')),
                       Column('post', Integer, ForeignKey('posts.id')),
                       Column('url', String),
                       Column('original_url', String),
                       Column('exif', String),
-                       Column('ocr', String)
-                       )
+                       Column('ocr', String))

+mapper_registry.map_imperatively(TransformedResult, analysis_table)
+mapper_registry.map_imperatively(ScraperResult, raw_data_table)
 mapper_registry.map_imperatively(Media, media_table, polymorphic_on='type', polymorphic_identity='media')
 mapper_registry.map_imperatively(Image, media_table, inherits=Media, polymorphic_on='type', polymorphic_identity='image')
 mapper_registry.map_imperatively(Video, media_table, inherits=Media, polymorphic_on='type', polymorphic_identity='video')
--- a/cisticola/scraper/base.py
+++ b/cisticola/scraper/base.py
@@ -14,29 +14,91 @@ from cisticola.base import Channel, ScraperResult, mapper_registry
 from cisticola.utils import make_request

 class Scraper:
+    """Base class for defining platform-specific scrapers for scraping all posts 
+    from a given channel on that specific platform. 
+    """
+
    __version__ = "Scraper 0.0.0"

    def __init__(self):
-        self.s3_client = boto3.client('s3',
-                                      region_name=os.environ['DO_SPACES_REGION'],
-                                      endpoint_url='https://{}.digitaloceanspaces.com'.format(
-                                          os.environ['DO_SPACES_REGION']),
-                                      aws_access_key_id=os.environ['DO_SPACES_KEY'],
-                                      aws_secret_access_key=os.environ['DO_SPACES_SECRET'])

+        # Initialize client to transfer files to the storage archive
+        self.s3_client = boto3.client(
+            service_name='s3',
+            region_name=os.environ['DO_SPACES_REGION'],
+            endpoint_url=f'https://{os.environ["DO_SPACES_REGION"]}.digitaloceanspaces.com',
+            aws_access_key_id=os.environ['DO_SPACES_KEY'],
+            aws_secret_access_key=os.environ['DO_SPACES_SECRET'])
+        
+        # Define request headers (necessary to bypass scraping protection 
+        # for several platform scrapers)
        self.headers = {
            'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0'}

-        pass
-
    def __str__(self):
        return self.__version__

+    def get_username_from_url(self, url: str) -> str:
+        """Extract a channel's username from its URL. 
+
+        Parameters
+        ----------
+        url: str
+            URL of the channel on a given platform
+            e.g. ``"https://twitter.com/EliotHiggins"``
+        
+        Returns
+        -------
+        username: str
+            Extracted username of the channel.
+            e.g. ``"EliotHiggins"``
+        """
+        
+        raise NotImplementedError
+
    def url_to_key(self, url: str, content_type: str) -> str:
+        """Generate a unique identifier for media from a specified post.
+
+        Parameters
+        ---------
+        url: str
+            URL of original post. 
+            e.g. ``"https://twitter.com/bellingcat/status/1503397267675533313"``
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"image/jpeg"``
+
+        Returns
+        -------
+        key: str
+            Unique identifier for the media file from a specified post based on 
+            the original post URL and the media's Content-Type. 
+        """
+
        key = urlparse(url).path.split('/')[-1]
        return key 

    def url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
+        """Download media file from a specified media file URL.
+
+        Parameters
+        ---------
+        url: str
+            URL of media file from original post. 
+            e.g. ``"https://pbs.twimg.com/media/FN0j0dYWUAcQxfK?format=png&name=medium"``
+        key: str or None
+            Pre-defined unique identifier for the media file.
+
+        Returns
+        -------
+        blob: bytes
+            Raw bytes of the downloaded media file. 
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"image/jpeg"``.
+        key: str
+            Unique identifier for the media file.
+        """

        r = make_request(url, headers = self.headers)

@@ -49,6 +111,27 @@ class Scraper:
        return blob, content_type, key

    def m3u8_url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
+        """Download media file from a specified media URL, where the media file 
+        is formatted as an m3u8 playlist, which is then decoded to an mp4 file.
+
+        Parameters
+        ---------
+        url: str
+            URL of m3u8 playlist file from original post. 
+            e.g. ``"https://media.gettr.com/group47/origin/2022/03/15/01/cbc436c1-1a1a-4b97-671d-c42109f3ec9b/out.m3u8"``
+        key: str or None
+            Pre-defined unique identifier for the media file.
+
+        Returns
+        -------
+        blob: bytes
+            Raw bytes of the downloaded media file. 
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"video/mp4"``.
+        key: str
+            Unique identifier for the media file.
+        """
        
        content_type = 'video/mp4'
        ext = '.' + content_type.split('/')[-1]
@@ -71,7 +154,28 @@ class Scraper:
        return blob, content_type, key

    def ytdlp_url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
-        
+        """Download media file from a specified media URL, using a fork of 
+        youtube-dl that enables faster downloading.
+
+        Parameters
+        ---------
+        url: str
+            URL of media file from original post. 
+            e.g. ``"https://rumble.com/embed/vgt7gh/"``
+        key: str or None
+            Pre-defined unique identifier for the media file.
+
+        Returns
+        -------
+        blob: bytes
+            Raw bytes of the downloaded media file. 
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"video/mp4"``.
+        key: str
+            Unique identifier for the media file.
+        """
+
        content_type = 'video/mp4'

        with tempfile.TemporaryDirectory() as temp_dir:
@@ -103,6 +207,23 @@ class Scraper:
        return blob, content_type, key

    def archive_blob(self, blob: bytes, content_type: str, key: str) -> str:
+        """Upload raw bytes of a media file to the storage archive. 
+
+        Parameters
+        ----------
+        blob: bytes
+            Raw bytes of the media file to be archived.
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"video/mp4"``.
+        key: str
+            Unique identifier for the media file.
+
+        Returns
+        -------
+        archived_url: str
+            URL specifying the file on the storage archive.
+        """

        filename = self.__version__.replace(' ', '_') + '/' + key

@@ -114,9 +235,42 @@ class Scraper:
        return archived_url

    def can_handle(self, channel: Channel) -> bool:
+        """Whether or not the scraper can scrape the specified channel.
+
+        Parameters
+        ----------
+        channel: Channel
+            Channel to be scraped. 
+        
+        Returns
+        -------
+        bool
+            ``True`` if the scraper is capable of scraping ``channel``,
+            ``False`` if not. 
+        """
+
        raise NotImplementedError

    def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
+        """Scrape all posts from the specified Channel.
+
+        Parameters
+        ----------
+        channel: Channel
+            Channel to be scraped.
+        since: ScraperResult or None
+            Most recently scraped ScraperResult from a previous scrape, or 
+            ``None`` if scraper has not run before.
+        archive_media: bool
+            If ``True``, any media files (images, video, etc.) from posts are archived. 
+            If ``False``, media files are not archived. 
+
+        Yields
+        ------
+        ScraperResult
+            Scraper result from a single post/comment from the specified Channel.
+        """
+        
        raise NotImplementedError


@@ -129,9 +283,13 @@ class ScraperController:
        self.session = None

    def register_scraper(self, scraper: Scraper):
+        """Register a single Scraper instance to the controller.
+        """
        self.scrapers.append(scraper)

    def register_scrapers(self, scraper: List[Scraper]):
+        """Register a list of Scraper instances to the controller.
+        """
        self.scrapers.extend(scraper)

    def scrape_all_channels(self, archive_media: bool = True):
@@ -147,6 +305,17 @@ class ScraperController:
    
    @logger.catch(reraise = True)
    def scrape_channels(self, channels: List[Channel], archive_media: bool = True):
+        """Scrape all posts for all specified channels. 
+
+        Parameters
+        ----------
+        channels: list<Channel>
+            List of Channel instances to be scraped
+        archive_media: bool
+            If ``True``, any media files (images, video, etc.) from posts are archived. 
+            If ``False``, media files are not archived. 
+        """
+
        if self.session is None:
            logger.error("No DB session")
            return
@@ -185,6 +354,9 @@ class ScraperController:
                logger.warning(f"No handler found for Channel {channel}")

    def connect_to_db(self, engine):
+        """Connect the specified SQLAlchemy engine to the controller.
+        """
+        
        # create tables
        mapper_registry.metadata.create_all(bind=engine)

@@ -193,8 +365,8 @@ class ScraperController:
        self.session.configure(bind=self.engine)

    def reset_db(self):
+        """Drop all data from the connected SQLAlchemy database.
+        """

        mapper_registry.metadata.drop_all(bind=self.engine)
-        self.connect_to_db(self.engine)
-
-
+        self.connect_to_db(self.engine)
--- a/cisticola/scraper/bitchute.py
+++ b/cisticola/scraper/bitchute.py
@@ -17,7 +17,7 @@ class BitchuteScraper(Scraper):
    library"""
    __version__ = "BitchuteScraper 0.0.1"

-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
        username = url.split('bitchute.com/channel/')[-1].strip('/')

        return username
@@ -33,7 +33,7 @@ class BitchuteScraper(Scraper):

        detail = 'comments'

-        username = BitchuteScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
        scraper = get_videos_user(session, username, csrftoken, detail)

        for post in scraper:
@@ -61,7 +61,7 @@ class BitchuteScraper(Scraper):
                archived_urls=archived_urls)

    def can_handle(self, channel):
-        if channel.platform == "Bitchute" and BitchuteScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None:
            return True

 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
--- a/cisticola/scraper/gab.py
+++ b/cisticola/scraper/gab.py
@@ -11,14 +11,14 @@ class GabScraper(Scraper):
    """An implementation of a Scraper for Gab, using GARC library"""
    __version__ = "GabScraper 0.0.1"

-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
        username = url.split('https://gab.com/')[-1]

        return username

    def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
        client = Garc(profile = 'main')
-        username = GabScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)

        scraper = client.userposts(username)

@@ -52,5 +52,5 @@ class GabScraper(Scraper):
                archived_urls=archived_urls)

    def can_handle(self, channel):
-        if channel.platform == "Gab" and GabScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None:
            return True
--- a/cisticola/scraper/gettr.py
+++ b/cisticola/scraper/gettr.py
@@ -12,7 +12,7 @@ class GettrScraper(Scraper):
    """An implementation of a Scraper for Gettr, using gogettr library"""
    __version__ = "GettrScraper 0.0.1"

-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
        username = url.split("gettr.com/user/")[1]
        if len(username.split("/")) > 1:
            return None
@@ -21,7 +21,7 @@ class GettrScraper(Scraper):

    def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
        client = PublicClient()
-        username = GettrScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
        scraper = client.user_activity(username=username, type="posts")

        for post in scraper:
@@ -62,7 +62,7 @@ class GettrScraper(Scraper):
                archived_urls=archived_urls)

    def can_handle(self, channel):
-        if channel.platform == "Gettr" and GettrScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Gettr" and self.get_username_from_url(channel.url) is not None:
            return True

    def url_to_key(self, url: str, content_type: str) -> str:
--- a/cisticola/scraper/instagram.py
+++ b/cisticola/scraper/instagram.py
@@ -18,6 +18,7 @@ CONTENT_TYPES = {
    'mp4' : 'video/mp4'}

 class InstagramScraper(Scraper):
+    """An implementation of a Scraper for Instagram, using instaloader library"""
    __version__ = "InstagramScraper 0.0.1"

    def get_username_from_url(self, url):
--- a/cisticola/scraper/odysee.py
+++ b/cisticola/scraper/odysee.py
@@ -13,7 +13,7 @@ class OdyseeScraper(Scraper):
    """An implementation of a Scraper for Odysee, using polyphemus library"""
    __version__ = "OdyseeScraper 0.0.1"

-    def get_username_from_url(url):
+    def get_username_from_url(self, url):

        username = url.split('odysee.com/')[-1].strip('@').split(':')[0]

@@ -21,7 +21,7 @@ class OdyseeScraper(Scraper):

    def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:

-        username = OdyseeScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
        odysee_channel = OdyseeChannel(channel_name = username)
        
        all_videos = odysee_channel.get_all_videos()
@@ -70,7 +70,7 @@ class OdyseeScraper(Scraper):
                    archived_urls={})

    def can_handle(self, channel):
-        if channel.platform == "Odysee" and OdyseeScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Odysee" and self.get_username_from_url(channel.url) is not None:
            return True

    def url_to_key(self, url: str, content_type: str) -> str:
--- a/cisticola/scraper/rumble.py
+++ b/cisticola/scraper/rumble.py
@@ -14,14 +14,14 @@ class RumbleScraper(Scraper):
    """An implementation of a Scraper for Rumble, using custom functions"""
    __version__ = "RumbleScraper 0.0.1"

-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
        username = url.split('https://rumble.com/c/')[1]

        return username

    def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:

-        username = RumbleScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
        scraper = get_channel_videos(username)

        for post in scraper:
@@ -54,7 +54,7 @@ class RumbleScraper(Scraper):
        return key 

    def can_handle(self, channel):
-        if channel.platform == "Rumble" and RumbleScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Rumble" and self.get_username_from_url(channel.url) is not None:
            return True

 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
--- a/cisticola/scraper/telegram_snscrape.py
+++ b/cisticola/scraper/telegram_snscrape.py
@@ -8,6 +8,7 @@ from cisticola.base import Channel, ScraperResult
 from cisticola.scraper.base import Scraper

 class TelegramSnscrapeScraper(Scraper):
+    """An implementation of a Scraper for Telegram, using snscrape library"""
    __version__ = "TelegramSnscrapeScraper 0.0.1"

    def can_handle(self, channel):
--- a/cisticola/scraper/telegram_telethon.py
+++ b/cisticola/scraper/telegram_telethon.py
@@ -14,6 +14,7 @@ from cisticola.scraper.base import Scraper
 MEDIA_TYPES = ['photo', 'video', 'document', 'webpage']

 class TelegramTelethonScraper(Scraper):
+    """An implementation of a Scraper for Telegram, using Telethon library"""
    __version__ = "TelegramTelethonScraper 0.0.1"

    def get_username_from_url(self, url):
@@ -30,9 +31,9 @@ class TelegramTelethonScraper(Scraper):

        username = self.get_username_from_url(channel.url)

-        api_id = os.environ['TELEGRAM_API_ID_1']
-        api_hash = os.environ['TELEGRAM_API_HASH_1']
-        phone = os.environ['TELEGRAM_PHONE_1']
+        api_id = os.environ['TELEGRAM_API_ID']
+        api_hash = os.environ['TELEGRAM_API_HASH']
+        phone = os.environ['TELEGRAM_PHONE']

        with TelegramClient(phone, api_id, api_hash) as client: