ignore privated tweets entirely (prob the reason why my scrapers get kicked)

This commit is contained in:
muskit
2024-04-29 23:55:29 -07:00
parent 85b4bfe939
commit cb921ee911
+20 -16
View File
@@ -89,7 +89,7 @@ class Scraper:
return tweet return tweet
def get_tweet(self, id: int, private_user=False): def get_tweet(self, id: int, private_user=False):
# print(f'{id}{" on private" if private_user else ""}') # print(f'getting {id}{" on private" if private_user else ""}')
if private_user: if private_user:
self.try_login(0) self.try_login(0)
while True: while True:
@@ -101,23 +101,27 @@ class Scraper:
self.login_wait(private_user) self.login_wait(private_user)
except UnknownError as e: except UnknownError as e:
print(f"UnknownError occurred: {e.message.rstrip()}") print(f"UnknownError occurred: {e.message.rstrip()}")
if any(x in e.message.lower() for x in ["missing", "post is unavailable"]) : # tweet is probably unavailable print(f"skipping attempt to get tweet {id}...")
print(f"tweet {id} seems unavailable; skipping...")
return None return None
print("treating like RateLimitReached and using the next scraper...") # if any(x in e.message.lower() for x in ["missing", "post is unavailable", "delete"]) : # tweet is probably unavailable
# print(f"tweet {id} seems unavailable; skipping...")
# return None
# if "account owner limits" in e.message.lower(): # private tweet
# print("trying again as pvt-accessible...\n")
# return self.get_tweet(id, True)
# print("treating like RateLimitReached and using the next scraper...")
# traceback.print_exc() # traceback.print_exc()
self.login_wait(private_user) # self.login_wait(private_user)
except Exception as e: except Exception as e:
if not private_user: # if not private_user:
print("Unhandled exception occurred getting tweet!") # print("Unhandled exception occurred getting tweet!")
# traceback.print_exc()
# print("trying again as pvt-accessible...\n")
# return self.get_tweet(id, True)
# else:
print("Unhandled exception occurred")
traceback.print_exc() traceback.print_exc()
print("trying again as pvt-accessible...\n") print(f"skipping tweet {id}")
return self.get_tweet(id, True)
else:
print(
f"Unhandled exception occurred, tweet {id} is probably unavailable"
)
print(e)
return None return None
# since MUST BE TIMEZONE AWARE # since MUST BE TIMEZONE AWARE
@@ -187,10 +191,10 @@ class Scraper:
cur = search.cursor cur = search.cursor
except RateLimitReached: except RateLimitReached:
print("RateLimitReached occurred") print("RateLimitReached occurred getting tweets from user")
self.login_wait(uid in talent_lists.privated_accounts) self.login_wait(uid in talent_lists.privated_accounts)
except UnknownError as e: except UnknownError as e:
print(f"UnknownError occurred: {e.message.rstrip()}") print(f"UnknownError occurred getting tweets from user: {e.message.rstrip()}")
print("treating like RateLimitReached...") print("treating like RateLimitReached...")
self.login_wait(uid in talent_lists.privated_accounts) self.login_wait(uid in talent_lists.privated_accounts)
sleep(5) # FIXME: temporary attempt to avoid scraper lock-up sleep(5) # FIXME: temporary attempt to avoid scraper lock-up