ignore privated tweets entirely (prob the reason why my scrapers get kicked)
This commit is contained in:
+20
-16
@@ -89,7 +89,7 @@ class Scraper:
|
|||||||
return tweet
|
return tweet
|
||||||
|
|
||||||
def get_tweet(self, id: int, private_user=False):
|
def get_tweet(self, id: int, private_user=False):
|
||||||
# print(f'{id}{" on private" if private_user else ""}')
|
# print(f'getting {id}{" on private" if private_user else ""}')
|
||||||
if private_user:
|
if private_user:
|
||||||
self.try_login(0)
|
self.try_login(0)
|
||||||
while True:
|
while True:
|
||||||
@@ -101,23 +101,27 @@ class Scraper:
|
|||||||
self.login_wait(private_user)
|
self.login_wait(private_user)
|
||||||
except UnknownError as e:
|
except UnknownError as e:
|
||||||
print(f"UnknownError occurred: {e.message.rstrip()}")
|
print(f"UnknownError occurred: {e.message.rstrip()}")
|
||||||
if any(x in e.message.lower() for x in ["missing", "post is unavailable"]) : # tweet is probably unavailable
|
print(f"skipping attempt to get tweet {id}...")
|
||||||
print(f"tweet {id} seems unavailable; skipping...")
|
|
||||||
return None
|
return None
|
||||||
print("treating like RateLimitReached and using the next scraper...")
|
# if any(x in e.message.lower() for x in ["missing", "post is unavailable", "delete"]) : # tweet is probably unavailable
|
||||||
|
# print(f"tweet {id} seems unavailable; skipping...")
|
||||||
|
# return None
|
||||||
|
# if "account owner limits" in e.message.lower(): # private tweet
|
||||||
|
# print("trying again as pvt-accessible...\n")
|
||||||
|
# return self.get_tweet(id, True)
|
||||||
|
# print("treating like RateLimitReached and using the next scraper...")
|
||||||
# traceback.print_exc()
|
# traceback.print_exc()
|
||||||
self.login_wait(private_user)
|
# self.login_wait(private_user)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if not private_user:
|
# if not private_user:
|
||||||
print("Unhandled exception occurred getting tweet!")
|
# print("Unhandled exception occurred getting tweet!")
|
||||||
|
# traceback.print_exc()
|
||||||
|
# print("trying again as pvt-accessible...\n")
|
||||||
|
# return self.get_tweet(id, True)
|
||||||
|
# else:
|
||||||
|
print("Unhandled exception occurred")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print("trying again as pvt-accessible...\n")
|
print(f"skipping tweet {id}")
|
||||||
return self.get_tweet(id, True)
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
f"Unhandled exception occurred, tweet {id} is probably unavailable"
|
|
||||||
)
|
|
||||||
print(e)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# since MUST BE TIMEZONE AWARE
|
# since MUST BE TIMEZONE AWARE
|
||||||
@@ -187,10 +191,10 @@ class Scraper:
|
|||||||
|
|
||||||
cur = search.cursor
|
cur = search.cursor
|
||||||
except RateLimitReached:
|
except RateLimitReached:
|
||||||
print("RateLimitReached occurred")
|
print("RateLimitReached occurred getting tweets from user")
|
||||||
self.login_wait(uid in talent_lists.privated_accounts)
|
self.login_wait(uid in talent_lists.privated_accounts)
|
||||||
except UnknownError as e:
|
except UnknownError as e:
|
||||||
print(f"UnknownError occurred: {e.message.rstrip()}")
|
print(f"UnknownError occurred getting tweets from user: {e.message.rstrip()}")
|
||||||
print("treating like RateLimitReached...")
|
print("treating like RateLimitReached...")
|
||||||
self.login_wait(uid in talent_lists.privated_accounts)
|
self.login_wait(uid in talent_lists.privated_accounts)
|
||||||
sleep(5) # FIXME: temporary attempt to avoid scraper lock-up
|
sleep(5) # FIXME: temporary attempt to avoid scraper lock-up
|
||||||
|
|||||||
Reference in New Issue
Block a user