From 596598f31bd6da0491bd16e9b171fa7841f50e30 Mon Sep 17 00:00:00 2001 From: muskit <15199219+muskit@users.noreply.github.com> Date: Sun, 27 Aug 2023 02:55:18 -0700 Subject: [PATCH] rework error handling slightly --- src/catchup.py | 60 +++++++++++++++++++++++--------------------------- src/listen.py | 13 +---------- src/scraper.py | 12 ++++++---- 3 files changed, 37 insertions(+), 48 deletions(-) diff --git a/src/catchup.py b/src/catchup.py index 2e12669..0c15023 100644 --- a/src/catchup.py +++ b/src/catchup.py @@ -17,8 +17,6 @@ import ttweetqueue as ttq PROGRAM_ARGS = None safe_to_post_tweets = True -errored = False - scraper: Scraper # Updates TTweetQueue @@ -42,10 +40,10 @@ async def get_cross_tweets_online(): queue.add_ttweet(ttweet) except KeyboardInterrupt as e: raise e - except: - print('Error occurred processing tweet data.') + except Exception as e: + print('Unhandled error occurred processing tweet data.') safe_to_post_tweets = False - traceback.print_exc() + raise e else: queue.finished_user_dates[talent_id] = get_current_date() queue.save_file() @@ -56,19 +54,22 @@ async def get_cross_tweets_online(): except: print('Unhandled error occurred while pulling tweets.') traceback.print_exc() + with open("error_catchup.txt", "a") as f: + traceback.print_exc(file=f) safe_to_post_tweets = False else: print('Successfully saved all tweets from online!') queue.save_file() -# return False = errored or we posted at least one ttweet +# return False = we posted at least one ttweet # return True = we didn't post a single ttweet async def process_queue() -> bool: - global errored + ''' + Go through the queue and post stored TalentTweets. + ''' global scraper global queue - errored = False queued_ttweets_count = queue.get_count() WAIT_TIME = 60*15 @@ -98,19 +99,17 @@ async def process_queue() -> bool: await asyncio.sleep(WAIT_TIME-5) print('5 second warning!') await asyncio.sleep(5) - except: + except Exception as e: print('Unhandled error occurred while posting tweets from queue.') - errored = True traceback.print_exc() - if errored or ttweets_posted > 0: + if ttweets_posted > 0: return False return True # return True = no problems # return False = issue occurred where we couldn't post all past tweets properly async def run(PROGRAM_ARGS): - global errored global safe_to_post_tweets global scraper global queue @@ -118,15 +117,7 @@ async def run(PROGRAM_ARGS): scraper = Scraper() queue = ttq.TalentTweetQueue.instance - if PROGRAM_ARGS.refresh_queue: - PROGRAM_ARGS.refresh_queue = False - print('Refreshing queue tweets...') - for id in queue.ttweets_dict: - t = scraper.get_tweet(id, queue.ttweets_dict[id].author_id in privated_accounts) - queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t) - queue.save_file() - - # post tweets given in command line first + # post tweets given in command line first if PROGRAM_ARGS.post_id is not None and len(PROGRAM_ARGS.post_id) > 0: PROGRAM_ARGS.post_id.sort() print('Posting specified tweets first.') @@ -144,42 +135,47 @@ async def run(PROGRAM_ARGS): await asyncio.sleep(60*5) else: print('Did not post tweet') - print('Done processing specified tweets') PROGRAM_ARGS.post_id = None + # refresh stored queue first + if PROGRAM_ARGS.refresh_queue: + PROGRAM_ARGS.refresh_queue = False + print('Refreshing queue tweets...') + for id in queue.ttweets_dict: + t = scraper.get_tweet(id, queue.ttweets_dict[id].author_id in privated_accounts) + queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t) + queue.save_file() + async def queue_loop(): while True: print(f'{queue.get_count()} cross-company tweets to announce.') try: if safe_to_post_tweets: if await process_queue(): + print("Finished processing queue") + else: print('Posted no new tweets; we\'re caught up!') - return True + return else: print('Tweets were not retrieved cleanly. Not processing queue.') - return False + return except KeyboardInterrupt as e: print('Interrupting queue processing...') raise e except: print('Unhandled error occurred while running catch up in posting phase.') traceback.print_exc() - return False - - if errored: - return False - await get_cross_tweets_online() try: if PROGRAM_ARGS.straight_to_queue: PROGRAM_ARGS.straight_to_queue = False - print('Processing queue first before pulling tweets...') - return await queue_loop() + print('Processing queue first before fetching tweets...') + await queue_loop() else: await get_cross_tweets_online() - return await queue_loop() + await queue_loop() except KeyboardInterrupt: print('Interrupt received. Ending catchup mode...') return False \ No newline at end of file diff --git a/src/listen.py b/src/listen.py index 131942c..4e28a8e 100644 --- a/src/listen.py +++ b/src/listen.py @@ -3,14 +3,11 @@ from time import sleep import asyncio -import traceback import catchup -errors_encountered = 0 def run(PROGRAM_ARGS): - global errors_encountered while True: try: asyncio.run(catchup.run(PROGRAM_ARGS)) @@ -18,13 +15,5 @@ def run(PROGRAM_ARGS): sleep(60*10) # run every 10 minutes except KeyboardInterrupt: print('Interrupt signal received. Exiting listen mode.') - print(f'{errors_encountered} errors encountered throughout session.') + print(f'errors encountered throughout session.') break - except: - errors_encountered += 1 - print('Ran into an error while in listen mode.') - traceback.print_exc() - else: - print('API stream exited gracefully.') - print('Re-running listen mode...') - print(f'(Had {errors_encountered} errors so far.)') diff --git a/src/scraper.py b/src/scraper.py index 97b786f..4ffab51 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -65,7 +65,7 @@ class Scraper: # tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet tweet.is_retweet = False elif tweet.retweeted_tweet.author is None: - print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...') + print(f'{tweet.author.username}/{tweet.id} is missing the RT author! Fetching RT\'d...') tweet.retweeted_tweet = self.get_tweet(tweet.retweeted_tweet.id) if tweet.is_quoted: @@ -74,11 +74,11 @@ class Scraper: # tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet tweet.is_quoted = False elif tweet.quoted_tweet.author is None: - print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...') + print(f'{tweet.author.username}/{tweet.id} is missing the QRT author! Fetching QRT\'d...') tweet.quoted_tweet = self.get_tweet(tweet.quoted_tweet.id) if tweet.is_reply and tweet.replied_to is None: - print('missing reply-to tweet. recovering...') + print(f'{tweet.author.username}/{tweet.id} is missing reply-to tweet! Recovering...') tweet.replied_to = self.get_tweet(tweet.original_tweet['in_reply_to_status_id_str']) return tweet @@ -90,8 +90,12 @@ class Scraper: try: t = self.app.tweet_detail(str(id)) return self.fix_tweet(t) if t is not None else None + except RateLimitReached: + print("RateLimitReached occurred") + self.login_wait(private_user) except UnknownError: print("UnknownError occurred, probably rate-limited") + #traceback.print_exc() self.login_wait(private_user) except Exception as e: if private_user: @@ -157,7 +161,7 @@ class Scraper: add_tweet(t) cur = search.cursor - except UnknownError: + except (UnknownError, RateLimitReached): print("UnknownError occurred, probably rate-limited") self.login_wait(uid in talent_lists.privated_accounts)