From 4878716460cb476f663e10876ffa13b99a59b4d6 Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Fri, 26 Jan 2024 22:15:13 -0800 Subject: [PATCH 1/8] format --- src/main.py | 52 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/src/main.py b/src/main.py index ab9c57e..0fde8ef 100644 --- a/src/main.py +++ b/src/main.py @@ -14,27 +14,49 @@ from twapi import TwAPI PROGRAM_ARGS = None -MODES_HELP_STR = '''mode to run the bot at: +MODES_HELP_STR = """mode to run the bot at: scrape accounts in lists and post cross-company tweets if relevant -cmd drop into Python interpretor with access to initialized variables''' +cmd drop into Python interpretor with access to initialized variables""" + def init_argparse(): - p = argparse.ArgumentParser(description='Twitter bot that follows interactions between Nijisanji EN/ID and hololive EN/ID members.', formatter_class=RawTextHelpFormatter) - p.add_argument('mode', nargs='?', help=MODES_HELP_STR) - p.add_argument('--no-listen', action='store_true', help='Run one scraping-posting cycle without waiting to run again.') - p.add_argument('--refresh-queue', action='store_true', help='Refresh the details on each tweet currently in queue.') - p.add_argument('--straight-to-queue', action='store_true', help='Go through queue first before attempting to pull tweets.') - p.add_argument('--post-id', action='append', help='ID of a tweet to try and post right away. Specify multiple to post multiple tweets in a row.') + p = argparse.ArgumentParser( + description="Twitter bot that follows interactions between Nijisanji EN/ID and hololive EN/ID members.", + formatter_class=RawTextHelpFormatter, + ) + p.add_argument("mode", nargs="?", help=MODES_HELP_STR) + p.add_argument( + "--no-listen", + action="store_true", + help="Run one scraping-posting cycle without waiting to run again.", + ) + p.add_argument( + "--refresh-queue", + action="store_true", + help="Refresh the details on each tweet currently in queue.", + ) + p.add_argument( + "--straight-to-queue", + action="store_true", + help="Go through queue first before attempting to pull tweets.", + ) + p.add_argument( + "--post-id", + action="append", + help="ID of a tweet to try and post right away. Specify multiple to post multiple tweets in a row.", + ) return p + def command_line(): # TODO (extra): implement command line mode for manually controlling the bot - print('Here\'s a Python interpretor.') + print("Here's a Python interpreter.") try: code.interact(local=globals()) except SystemExit: pass + async def async_main(): global PROGRAM_ARGS @@ -44,12 +66,13 @@ async def async_main(): else: listen.run(PROGRAM_ARGS) return - + mode = PROGRAM_ARGS.mode.lower() - if mode == 'cmd': + if mode == "cmd": command_line() else: - print('\nunknown mode. run with no arguments or -h for help and modes') + print("\nunknown mode. run with no arguments or -h for help and modes") + def init_data(): # Initialize shared API instance @@ -60,12 +83,13 @@ def init_data(): if PROGRAM_ARGS.mode: mode = PROGRAM_ARGS.mode.lower() - if mode != 'cmd': + if mode != "cmd": # Initialize queue files system ttq.TalentTweetQueue() else: ttq.TalentTweetQueue() + def main(): global PROGRAM_ARGS @@ -81,7 +105,7 @@ def main(): ## Asynchronous execution nest_asyncio.apply() asyncio.run(async_main()) - + if __name__ == "__main__": main() From 3e93b533a781dc93248adb49c391dcfe2c55803b Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Fri, 26 Jan 2024 23:34:32 -0800 Subject: [PATCH 2/8] begin transition to original tweety code --- .gitignore | 2 +- requirements.txt | 2 +- src/scraper.py | 4 +- src/talenttweet.py | 275 ++++++++++++++++++++++++++++----------------- 4 files changed, 176 insertions(+), 107 deletions(-) diff --git a/.gitignore b/.gitignore index bf619a7..3546b59 100644 --- a/.gitignore +++ b/.gitignore @@ -144,4 +144,4 @@ cython_debug/ # project-specific run/ -*.json \ No newline at end of file +*.tw_session \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b687e64..74ef182 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ python-dotenv nest-asyncio pytz -git+https://github.com/muskit/tweety.git +git+https://github.com/mahrtayyab/tweety.git@e3d330280cb3b2e8f9d2bf2f20425c476f7671a5 tweepy tweet-capture opencv-python-headless diff --git a/src/scraper.py b/src/scraper.py index 68fd414..49eb392 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -81,7 +81,7 @@ class Scraper: if tweet.is_reply and tweet.replied_to is None: # print(f'{tweet.author.username}/{tweet.id} is missing reply-to tweet! Recovering...') tweet.replied_to = self.get_tweet( - tweet.original_tweet["in_reply_to_status_id_str"] + tweet._original_tweet["in_reply_to_status_id_str"] ) return tweet @@ -168,7 +168,7 @@ class Scraper: for e in cur_page: if isinstance(e, Tweet): add_tweet(e) - elif isinstance(e, TweetThread): + elif isinstance(e, SelfThread): # FIXME: rework when replied_to is fixed (currently populates user_mentions) # latest tweet in thread = og author's reply for t in e: diff --git a/src/talenttweet.py b/src/talenttweet.py index 53ca8ad..3fc1a00 100644 --- a/src/talenttweet.py +++ b/src/talenttweet.py @@ -8,80 +8,84 @@ from tweety.types import * import talent_lists as tl import util + class TalentTweet: # Serialized one-liner format: # {tweet} {author} {time in seconds since epoch UTC} m {mention set} r {reply to author} q {quote tweet author} rt {retweeted user's id} rtm {mentions in retweet} def serialize(self): - s = f'{self.tweet_id} {self.author_id} {int(self.date_time.timestamp())} ' + s = f"{self.tweet_id} {self.author_id} {int(self.date_time.timestamp())} " if self.date_time.tzinfo is None: - print(f'warning: serialized tweet {self.tweet_id} has a NAIVE timestamp!') + print(f"warning: serialized tweet {self.tweet_id} has a NAIVE timestamp!") if len(self.rt_mentions) > 0: - s += 'rtm ' + s += "rtm " for n in self.rt_mentions: - s += f'{n} ' + s += f"{n} " if self.rt_author_id != None: - s += f'rt {self.rt_author_id} ' - return s[:-1] # stop here since retweets can't have other info - + s += f"rt {self.rt_author_id} " + return s[:-1] # stop here since retweets can't have other info + if len(self.mentions) > 0: - s += 'm ' + s += "m " for id in self.mentions: - s += f'{id} ' + s += f"{id} " if self.reply_to: - s += f'r {self.reply_to} ' + s += f"r {self.reply_to} " if self.quote_tweeted: - s += f'q {self.quote_tweeted} ' - + s += f"q {self.quote_tweeted} " + return s[:-1] @staticmethod def deserialize(serialized_str: str): - token_check = serialized_str.split('#')[0] + token_check = serialized_str.split("#")[0] if len(token_check) < 3: - raise ValueError('not enough tokens to reconstruct a TalentTweet') - + raise ValueError("not enough tokens to reconstruct a TalentTweet") + tokens = serialized_str.split() - + tweet_id, author_id = int(tokens[0]), int(tokens[1]) date_time = datetime.fromtimestamp(float(tokens[2]), tz=pytz.utc) - + mentions = list() reply_to = None quote_retweeted = None rt = None rtm = list() - mode = '' + mode = "" for i in range(3, len(tokens)): - if not tokens[i].isnumeric(): # mode switch + if not tokens[i].isnumeric(): # mode switch mode = tokens[i] continue - + if tokens[i].isnumeric(): - if mode == 'm': # mentions + if mode == "m": # mentions mentions.append(int(tokens[i])) continue - elif mode == 'r': # reply_to + elif mode == "r": # reply_to reply_to = int(tokens[i]) continue - elif mode == 'q': # quote_retweeted + elif mode == "q": # quote_retweeted quote_retweeted = int(tokens[i]) - elif mode == 'rt': # retweeted user + elif mode == "rt": # retweeted user rt = int(tokens[i]) - elif mode == 'rtm': # retweet/qrt mentions + elif mode == "rtm": # retweet/qrt mentions rtm.append(int(tokens[i])) else: - raise ValueError(f'encountered invalid mode token {mode}') - + raise ValueError(f"encountered invalid mode token {mode}") + return TalentTweet( - tweet_id=tweet_id, author_id=author_id, - date_time=date_time, mrq=(mentions, reply_to, quote_retweeted), - rt_author_id=rt, rt_mentions=rtm + tweet_id=tweet_id, + author_id=author_id, + date_time=date_time, + mrq=(mentions, reply_to, quote_retweeted), + rt_author_id=rt, + rt_mentions=rtm, ) - - ## Creates a TalentTweet from a Tweety-library Tweet. + + ## Creates a TalentTweet from a Tweety Tweet. @staticmethod def create_from_tweety(tweety: Tweet): if tweety.is_retweet: @@ -94,18 +98,35 @@ class TalentTweet: rtm = set() return TalentTweet( - tweet_id=int(tweety.id), author_id=int(tweety.author.id), - date_time=tweety.date, text=tweety.text, + tweet_id=int(tweety.id), + author_id=int(tweety.author.id), + date_time=tweety.date, + text=tweety.text, mrq=( {int(x.id) for x in tweety.user_mentions}, - int(tweety.original_tweet['in_reply_to_user_id_str']) if tweety.is_reply else None, - int(tweety.quoted_tweet.author.id) if tweety.quoted_tweet is not None else None + int(tweety._original_tweet["in_reply_to_user_id_str"]) + if tweety.is_reply + else None, + int(tweety.quoted_tweet.author.id) + if tweety.quoted_tweet is not None + else None, ), - rt_author_id=tweety.retweeted_tweet.author.id if tweety.is_retweet else None, - rt_mentions=rtm + rt_author_id=tweety.retweeted_tweet.author.id + if tweety.is_retweet + else None, + rt_mentions=rtm, ) - def __init__(self, tweet_id: int, author_id: int, date_time: datetime, text: str = None, mrq: tuple[list[int], int|None, int|None]=None, rt_author_id: int=None, rt_mentions: list[int]=None): + def __init__( + self, + tweet_id: int, + author_id: int, + date_time: datetime, + text: str = None, + mrq: tuple[list[int], int | None, int | None] = None, + rt_author_id: int = None, + rt_mentions: list[int] = None, + ): # basic information self.tweet_id, self.author_id = tweet_id, author_id self.username = util.get_username_local(self.author_id) @@ -116,47 +137,66 @@ class TalentTweet: self.mentions = {x for x in mrq[0] if x in tl.talents} self.rt_mentions = {x for x in rt_mentions if x in tl.talents} self.mentions.difference_update(self.rt_mentions) - try: self.rt_mentions.remove(self.author_id) - except: pass + try: + self.rt_mentions.remove(self.author_id) + except: + pass self.reply_to = mrq[1] self.quote_tweeted = mrq[2] self.rt_author_id = rt_author_id - try: self.mentions.remove(self.reply_to) - except: pass + try: + self.mentions.remove(self.reply_to) + except: + pass # -1 if user is not in company - self.reply_to = self.reply_to if self.reply_to is None or self.reply_to in tl.talents else -1 - self.quote_tweeted = self.quote_tweeted if self.quote_tweeted is None or self.quote_tweeted in tl.talents else -1 - self.rt_author_id = self.rt_author_id if self.rt_author_id is None or self.rt_author_id in tl.talents else -1 + self.reply_to = ( + self.reply_to + if self.reply_to is None or self.reply_to in tl.talents + else -1 + ) + self.quote_tweeted = ( + self.quote_tweeted + if self.quote_tweeted is None or self.quote_tweeted in tl.talents + else -1 + ) + self.rt_author_id = ( + self.rt_author_id + if self.rt_author_id is None or self.rt_author_id in tl.talents + else -1 + ) # all users involved except for the author self.all_parties = {self.reply_to, self.quote_tweeted, rt_author_id} self.all_parties.update(self.mentions, self.rt_mentions) - try: self.all_parties.remove(None) - except: pass - try: self.all_parties.remove(self.author_id) - except: pass + try: + self.all_parties.remove(None) + except: + pass + try: + self.all_parties.remove(self.author_id) + except: + pass # if not self.is_cross_company(): # print(f'WARNING: {self.tweet_id} is not cross-company!') - def __repr__(self) -> str: return ( - f'======================================================\n' - f'{self.tweet_id} from {self.username}:\n' - f'{self.get_datetime_str()}\n' - f'parties: {self.get_all_parties_usernames()}\n' - f'mentions: {self.mentions}\n' - f'reply_to: {self.reply_to}\n' - f'rtm: {self.rt_mentions}\n' - f'quote_retweeted: {self.quote_tweeted}\n' - f'cross-company? {self.is_cross_company()}\n' - f'{self.serialize()}\n' - f'----\n{self.announce_text()}\n----\n' - f'{self.url()}' + f"======================================================\n" + f"{self.tweet_id} from {self.username}:\n" + f"{self.get_datetime_str()}\n" + f"parties: {self.get_all_parties_usernames()}\n" + f"mentions: {self.mentions}\n" + f"reply_to: {self.reply_to}\n" + f"rtm: {self.rt_mentions}\n" + f"quote_retweeted: {self.quote_tweeted}\n" + f"cross-company? {self.is_cross_company()}\n" + f"{self.serialize()}\n" + f"----\n{self.announce_text()}\n----\n" + f"{self.url()}" ) def url(self): @@ -165,89 +205,118 @@ class TalentTweet: def is_cross_company(self): if self.author_id == self.rt_author_id: return False - + for other_id in self.all_parties: if tl.is_cross_company(self.author_id, other_id): return True - + return False - + def get_all_parties_usernames(self): if len(self.all_parties) > 0: s = str() for id in self.all_parties: - s += f'{util.get_username_local(id)}, ' + s += f"{util.get_username_local(id)}, " return s[0:-2] - return 'none' + return "none" def get_datetime_str(self): - unpad = '#' if platform.system() == 'Windows' else '-' - return self.date_time.strftime(f'%b %{unpad}d, %Y · %{unpad}I:%M%p (%Z)') + unpad = "#" if platform.system() == "Windows" else "-" + return self.date_time.strftime(f"%b %{unpad}d, %Y · %{unpad}I:%M%p (%Z)") def announce_text(self): # templates - TWEET = '{0} tweeted mentioning {1}!' - REPLY = '{0} replied to {1}!' - REPLY_TO_MENTION_B = '{0} replied to a tweet{1}mentioning {2}!' ######################### - RETWEET = '{0} retweeted {1}!' - RETWEET_MENTIONS_B = '{0} shared a tweet{1}mentioning {2}!' ######################### - QUOTE_TWEET = '{0} quote tweeted {1}!' - QUOTED_TWEET_MENTIONS_B = '{0} quoted a tweet{1}mentioning {2}!' ######################### + TWEET = "{0} tweeted mentioning {1}!" + REPLY = "{0} replied to {1}!" + REPLY_TO_MENTION_B = ( + "{0} replied to a tweet{1}mentioning {2}!" ######################### + ) + RETWEET = "{0} retweeted {1}!" + RETWEET_MENTIONS_B = ( + "{0} shared a tweet{1}mentioning {2}!" ######################### + ) + QUOTE_TWEET = "{0} quote tweeted {1}!" + QUOTED_TWEET_MENTIONS_B = ( + "{0} quoted a tweet{1}mentioning {2}!" ######################### + ) - author_username = f'@/{util.get_username_with_company(self.author_id)}' + author_username = f"@/{util.get_username_with_company(self.author_id)}" ret = str() print_mention_ids = set(self.mentions) - try: print_mention_ids.remove(None) - except: pass - mention_usernames = [f'@/{util.get_username_with_company(x)}' for x in print_mention_ids] + try: + print_mention_ids.remove(None) + except: + pass + mention_usernames = [ + f"@/{util.get_username_with_company(x)}" for x in print_mention_ids + ] def rtm_msg(TEMPLATE: str, rtm_author_username: str): nonlocal ret - if (self.rt_author_id is not None and self.rt_author_id != -1) \ - or (self.quote_tweeted is not None and self.quote_tweeted != -1) \ - or (self.reply_to is not None and self.reply_to != -1): # rtm tweet is from talent; rtm should be everyone - rtm_names = [f'@/{util.get_username_with_company(x)}' for x in self.rt_mentions] - between = f' from {rtm_author_username} ' + if ( + (self.rt_author_id is not None and self.rt_author_id != -1) + or (self.quote_tweeted is not None and self.quote_tweeted != -1) + or (self.reply_to is not None and self.reply_to != -1) + ): # rtm tweet is from talent; rtm should be everyone + rtm_names = [ + f"@/{util.get_username_with_company(x)}" for x in self.rt_mentions + ] + between = f" from {rtm_author_username} " ret += TEMPLATE.format(author_username, between, ", ".join(rtm_names)) - else: # rtm tweet is not from a talent; rtm should just be cross company - rtm_names = [f'@/{util.get_username_with_company(x)}' for x in self.rt_mentions if tl.is_cross_company(self.author_id, x)] - ret += TEMPLATE.format(author_username, ' ', ", ".join(rtm_names)) + else: # rtm tweet is not from a talent; rtm should just be cross company + rtm_names = [ + f"@/{util.get_username_with_company(x)}" + for x in self.rt_mentions + if tl.is_cross_company(self.author_id, x) + ] + ret += TEMPLATE.format(author_username, " ", ", ".join(rtm_names)) # Tweet types - if self.rt_author_id is not None: # retweet - rt_username = f'@/{util.get_username_with_company(self.rt_author_id)}' if self.rt_author_id != -1 else None + if self.rt_author_id is not None: # retweet + rt_username = ( + f"@/{util.get_username_with_company(self.rt_author_id)}" + if self.rt_author_id != -1 + else None + ) if len(self.rt_mentions) > 0: rtm_msg(RETWEET_MENTIONS_B, rt_username) else: ret += RETWEET.format(author_username, rt_username) mention_usernames.clear() - elif self.reply_to is not None: # reply - reply_username = f'@/{util.get_username_with_company(self.reply_to)}' if self.reply_to != -1 else None + elif self.reply_to is not None: # reply + reply_username = ( + f"@/{util.get_username_with_company(self.reply_to)}" + if self.reply_to != -1 + else None + ) if len(self.rt_mentions) > 0: rtm_msg(REPLY_TO_MENTION_B, reply_username) else: ret += REPLY.format(author_username, reply_username) - elif self.quote_tweeted is not None: # qrt - quoted_username = f'@/{util.get_username_with_company(self.quote_tweeted)}' if self.quote_tweeted != -1 else None + elif self.quote_tweeted is not None: # qrt + quoted_username = ( + f"@/{util.get_username_with_company(self.quote_tweeted)}" + if self.quote_tweeted != -1 + else None + ) if len(self.rt_mentions) > 0: rtm_msg(QUOTED_TWEET_MENTIONS_B, quoted_username) else: ret += QUOTE_TWEET.format(author_username, quoted_username) - elif len(self.mentions) > 0: # standalone tweet + elif len(self.mentions) > 0: # standalone tweet ret += TWEET.format(author_username, ", ".join(mention_usernames)) mention_usernames.clear() else: - raise ValueError(f'TalentTweet {self.tweet_id} has insufficient other parties') + raise ValueError( + f"TalentTweet {self.tweet_id} has insufficient other parties" + ) # mention line if len(mention_usernames) > 0: - ret += ( - '\nMentions: ' - f'{", ".join(mention_usernames)}' - ) - + ret += "\nMentions: " f'{", ".join(mention_usernames)}' + # date - ret += f'\n\n{self.get_datetime_str()}' + ret += f"\n\n{self.get_datetime_str()}" return ret From 8fda47331629fd49307db6bced2985271b66dee0 Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Fri, 2 Feb 2024 02:23:09 -0800 Subject: [PATCH 3/8] docs: update README.md --- README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fe6336c..f00833a 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,20 @@ Twitter bot that tracks cross-company interactions between the non-JP branches o **This project was created to run [this account](https://twitter.com/NijiHolo_EN_ID).** ## Running -Install dependencies. -``` -pip install -r requirements.txt -``` -Setup the `.env` in the project root. Refer to the `.env` section for variables. +With the way packages are setup, **you must have Docker installed and running!!** -Run the program from project root (not in `src`). Refer to the following section for options. +Setup the `.env` in the project root. Refer to the [`.env`](#env) section for variables. + +Build and run the Docker container: +```bash +# to run attached (can CTRL+P,CTRL+Q to detach) +sh run.sh + +# ... or to run headless +sh run_detached.sh +``` + +If attached to a container prepared by Dockerfile, you can run the program from project root (not in `src`). Refer to the following section for options. ``` python src/main.py ``` From 84e56d1ead88a3f10077b76e198f097ea4f0b782 Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:04:02 -0800 Subject: [PATCH 4/8] why were there full-width spaces --- lists/nijien.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lists/nijien.txt b/lists/nijien.txt index 08964d1..afadc18 100644 --- a/lists/nijien.txt +++ b/lists/nijien.txt @@ -3,7 +3,7 @@ # ----- [NIJISANJI EN] ----- # --- [Lazulight] --- -1390637197167038464 PomuRainpuff +1390637197167038464 PomuRainpuff p 1390620618001838086 EliraPendora 1390209302120394754 FinanaRyugu @@ -26,11 +26,11 @@ 1465858739970273281 luca_kaneshiro # --- [Noctyx] --- -1490867613915828224 alban_knox -1491195742123397124 uki_violeta -1492604168145539072 Yugo_Asuma p -1493392149664219138 Fulgur_Ovid -1493394108014292993 sonny_brisko +1490867613915828224 alban_knox +1491195742123397124 uki_violeta +1492604168145539072 Yugo_Asuma p +1493392149664219138 Fulgur_Ovid +1493394108014292993 sonny_brisko # --- [ILUNA] --- 1545351225293426688 MariaMari0nette From 6d61814f00ae89162bb4412a7e2329709266d851 Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:05:32 -0800 Subject: [PATCH 5/8] add requirements list for dev --- .gitignore | 3 ++- requirements_dev.txt | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 requirements_dev.txt diff --git a/.gitignore b/.gitignore index 3546b59..fa95045 100644 --- a/.gitignore +++ b/.gitignore @@ -144,4 +144,5 @@ cython_debug/ # project-specific run/ -*.tw_session \ No newline at end of file +*.tw_session +.venv* \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..b8178f8 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,7 @@ +python-dotenv +nest-asyncio +pytz +git+https://github.com/mahrtayyab/tweety.git@e3d330280cb3b2e8f9d2bf2f20425c476f7671a5 +tweepy +tweet-capture +opencv-python-headless \ No newline at end of file From f038233125713f008a549a1a31a66cf5861ef43d Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:33:16 -0800 Subject: [PATCH 6/8] adapt scraping code to mainline library version --- src/scraper.py | 4 ++-- src/tweety_utils.py | 36 +++++++++++++++++++----------------- src/util.py | 2 +- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/scraper.py b/src/scraper.py index 49eb392..df20f45 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -19,7 +19,7 @@ class Scraper: def __init__(self): Scraper.instance = self self.__account = AccountPool() - self.try_login() + self.try_login(0) def try_login(self, account_idx: int = None) -> bool: # decide on which account to use @@ -159,7 +159,7 @@ class Scraper: search = self.app.search( f"from:{username}", filter_=SearchFilters.Latest(), cursor=cur ) - cur_page = search.tweets + cur_page = search.results print(f"obtained {len(cur_page)} tweets") if len(cur_page) == 0: diff --git a/src/tweety_utils.py b/src/tweety_utils.py index ad21e05..02ba186 100644 --- a/src/tweety_utils.py +++ b/src/tweety_utils.py @@ -1,24 +1,26 @@ from tweety.types import * + def url(t: Tweet): - return f'https://twitter.com/{t.author.username}/status/{t.id}' + return f"https://twitter.com/{t.author.username}/status/{t.id}" -def print_tweets(tweets: list[Tweet | TweetThread]): - print(f'{len(tweets)} tweets:') - for t in tweets: - if isinstance(t, Tweet): - print(f'{t.date} : {url(t)} :', end=' ') - if t.is_retweet: - print(f'RT ({t.retweeted_tweet.author.username})', end=' ') +def print_tweets(tweets: list[Tweet | SelfThread]): + print(f"{len(tweets)} tweets:") + for t in tweets: + if isinstance(t, Tweet): + print(f"{t.date} : {url(t)} :", end=" ") - if t.is_reply: - print(f'is reply!', end=' ') - if t.replied_to is not None: - print(f'reply to {t.replied_to.author.username}', end=' ') + if t.is_retweet: + print(f"RT ({t.retweeted_tweet.author.username})", end=" ") - print("m=" + ",".join([x.username for x in t.user_mentions])) - elif isinstance(t, TweetThread): - print('-----------TTd----------') - print_tweets(t.tweets) - print('-----------end----------') \ No newline at end of file + if t.is_reply: + print(f"is reply!", end=" ") + if t.replied_to is not None: + print(f"reply to {t.replied_to.author.username}", end=" ") + + print("m=" + ",".join([x.username for x in t.user_mentions])) + elif isinstance(t, SelfThread): + print("-----------TTd----------") + print_tweets(t.tweets) + print("-----------end----------") diff --git a/src/util.py b/src/util.py index 14be2c6..6f8e8cb 100644 --- a/src/util.py +++ b/src/util.py @@ -26,7 +26,7 @@ def project_root(dir_path: tuple[str] = tuple(), file: str = None): def working_path(dir_path: tuple[str] = tuple(), file: str = None): - """Returns file path relative to the working ephemeral directory.""" + """Returns file path relative to the working ephemeral directory "run".""" dir_path = project_root(("run", *dir_path)) Path(dir_path).mkdir(parents=True, exist_ok=True) From 5636443581f71de50ec18730ea5ce2cc0ace5f82 Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Sat, 10 Feb 2024 01:25:30 -0800 Subject: [PATCH 7/8] shorten catchup interval --- src/catchup.py | 2 +- src/listen.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/catchup.py b/src/catchup.py index be70804..29ffc80 100644 --- a/src/catchup.py +++ b/src/catchup.py @@ -59,7 +59,7 @@ async def get_cross_tweets_online(): print(f"Queue has {queue.get_count()} tweets so far") except KeyboardInterrupt as e: print( - "Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!" + "Interrupting tweet pulling. The remaining dates in queue file will not be updated!" ) queue.save_file() raise e diff --git a/src/listen.py b/src/listen.py index 9df17fb..202a21e 100644 --- a/src/listen.py +++ b/src/listen.py @@ -11,9 +11,9 @@ def run(PROGRAM_ARGS): while True: try: asyncio.run(catchup.run(PROGRAM_ARGS)) - print('Sleeping for 60 minutes...') - sleep(60*60) # run every hour + print("Sleeping for 60 minutes...") + sleep(60 * 30) # run every half-hour except KeyboardInterrupt: - print('Interrupt signal received. Exiting listen mode.') - print(f'errors encountered throughout session.') + print("Interrupt signal received. Exiting listen mode.") + print(f"errors encountered throughout session.") break From ca3da14bd5c40c00d64c64a7576454146392c7c3 Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:33:11 -0800 Subject: [PATCH 8/8] add dry run, handling of None names (#7) --- src/catchup.py | 2 +- src/main.py | 5 +++++ src/talenttweet.py | 6 +++--- src/twapi.py | 10 +++++----- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/catchup.py b/src/catchup.py index 29ffc80..77c2c5b 100644 --- a/src/catchup.py +++ b/src/catchup.py @@ -144,7 +144,7 @@ async def run(PROGRAM_ARGS): print(f"Invalid tweet {id}!") continue - posted = await TwAPI.instance.post_ttweet_by_id(i) + posted = await TwAPI.instance.post_ttweet_by_id(i, PROGRAM_ARGS.dry_run) if posted: queue.add_finished_tweet(i) print("Successfully posted tweet. Sleeping for 5 minutes") diff --git a/src/main.py b/src/main.py index 0fde8ef..076a844 100644 --- a/src/main.py +++ b/src/main.py @@ -40,6 +40,11 @@ def init_argparse(): action="store_true", help="Go through queue first before attempting to pull tweets.", ) + p.add_argument( + "--dry-run", + action="store_true", + help="Don't actually post anything to Twitter; use to check outputs from console.", + ) p.add_argument( "--post-id", action="append", diff --git a/src/talenttweet.py b/src/talenttweet.py index e76863a..88812a2 100644 --- a/src/talenttweet.py +++ b/src/talenttweet.py @@ -278,7 +278,7 @@ class TalentTweet: rt_username = ( util.get_username_with_company(self.rt_author_id) if self.rt_author_id != -1 - else None + else "someone" ) if rt_username == author_username: rt_username = "themselves" @@ -291,7 +291,7 @@ class TalentTweet: reply_username = ( util.get_username_with_company(self.reply_to) if self.reply_to != -1 - else None + else "someone" ) if reply_username == author_username: reply_username = "themselves" @@ -303,7 +303,7 @@ class TalentTweet: quoted_username = ( util.get_username_with_company(self.quote_tweeted) if self.quote_tweeted != -1 - else None + else "someone" ) if quoted_username == author_username: quoted_username = "themselves" diff --git a/src/twapi.py b/src/twapi.py index 368c328..a7adb6a 100644 --- a/src/twapi.py +++ b/src/twapi.py @@ -136,6 +136,8 @@ class TwAPI: # return True = successfully posted a single ttweet # return False = did not post ttweet (duplicate) async def post_ttweet(self, ttweet: tt.TalentTweet, dry_run=False): + import main + print( f"------{ttweet.tweet_id} ({util.get_username_local(ttweet.author_id)})------" ) @@ -145,11 +147,9 @@ class TwAPI: if dry_run: print("-------------------- DRY RUN --------------------") - print(ttweet) - if dry_run: + print(ttweet) return False - # NO DRY-RUN: actually post tweet # main tweet: text + screenshot try: print("creating main QRT w/ screenshot...") @@ -188,7 +188,7 @@ class TwAPI: raise e return True - async def post_ttweet_by_id(self, id: int): + async def post_ttweet_by_id(self, id: int, dry_run=False): from scraper import Scraper print(f"Manually posting tweet {id}") @@ -204,4 +204,4 @@ class TwAPI: return False print(f"Posting {ttweet.username}/{ttweet.tweet_id}...") - return await self.post_ttweet(ttweet) + return await self.post_ttweet(ttweet, dry_run)