From f038233125713f008a549a1a31a66cf5861ef43d Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:33:16 -0800 Subject: [PATCH] adapt scraping code to mainline library version --- src/scraper.py | 4 ++-- src/tweety_utils.py | 36 +++++++++++++++++++----------------- src/util.py | 2 +- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/scraper.py b/src/scraper.py index 49eb392..df20f45 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -19,7 +19,7 @@ class Scraper: def __init__(self): Scraper.instance = self self.__account = AccountPool() - self.try_login() + self.try_login(0) def try_login(self, account_idx: int = None) -> bool: # decide on which account to use @@ -159,7 +159,7 @@ class Scraper: search = self.app.search( f"from:{username}", filter_=SearchFilters.Latest(), cursor=cur ) - cur_page = search.tweets + cur_page = search.results print(f"obtained {len(cur_page)} tweets") if len(cur_page) == 0: diff --git a/src/tweety_utils.py b/src/tweety_utils.py index ad21e05..02ba186 100644 --- a/src/tweety_utils.py +++ b/src/tweety_utils.py @@ -1,24 +1,26 @@ from tweety.types import * + def url(t: Tweet): - return f'https://twitter.com/{t.author.username}/status/{t.id}' + return f"https://twitter.com/{t.author.username}/status/{t.id}" -def print_tweets(tweets: list[Tweet | TweetThread]): - print(f'{len(tweets)} tweets:') - for t in tweets: - if isinstance(t, Tweet): - print(f'{t.date} : {url(t)} :', end=' ') - if t.is_retweet: - print(f'RT ({t.retweeted_tweet.author.username})', end=' ') +def print_tweets(tweets: list[Tweet | SelfThread]): + print(f"{len(tweets)} tweets:") + for t in tweets: + if isinstance(t, Tweet): + print(f"{t.date} : {url(t)} :", end=" ") - if t.is_reply: - print(f'is reply!', end=' ') - if t.replied_to is not None: - print(f'reply to {t.replied_to.author.username}', end=' ') + if t.is_retweet: + print(f"RT ({t.retweeted_tweet.author.username})", end=" ") - print("m=" + ",".join([x.username for x in t.user_mentions])) - elif isinstance(t, TweetThread): - print('-----------TTd----------') - print_tweets(t.tweets) - print('-----------end----------') \ No newline at end of file + if t.is_reply: + print(f"is reply!", end=" ") + if t.replied_to is not None: + print(f"reply to {t.replied_to.author.username}", end=" ") + + print("m=" + ",".join([x.username for x in t.user_mentions])) + elif isinstance(t, SelfThread): + print("-----------TTd----------") + print_tweets(t.tweets) + print("-----------end----------") diff --git a/src/util.py b/src/util.py index 14be2c6..6f8e8cb 100644 --- a/src/util.py +++ b/src/util.py @@ -26,7 +26,7 @@ def project_root(dir_path: tuple[str] = tuple(), file: str = None): def working_path(dir_path: tuple[str] = tuple(), file: str = None): - """Returns file path relative to the working ephemeral directory.""" + """Returns file path relative to the working ephemeral directory "run".""" dir_path = project_root(("run", *dir_path)) Path(dir_path).mkdir(parents=True, exist_ok=True)