(de)serialization and queuing works now
This commit is contained in:
+20
-40
@@ -8,9 +8,9 @@ import traceback
|
||||
import datetime
|
||||
import asyncio
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
import twint
|
||||
|
||||
from scraper import Scraper
|
||||
from util import *
|
||||
from talent_lists import *
|
||||
from twapi import TwAPI
|
||||
@@ -21,41 +21,12 @@ PROGRAM_ARGS = None
|
||||
safe_to_post_tweets = False
|
||||
errored = False
|
||||
|
||||
## Returns the ID of all tweets (up to limit) from a user ID.
|
||||
def get_user_tweets(id, since_date=None, limit=None):
|
||||
global safe_to_post_tweets
|
||||
|
||||
qrt_count = 0
|
||||
tweets = list()
|
||||
c = twint.Config()
|
||||
c.User_id = id
|
||||
c.Limit = limit
|
||||
c.Store_object = True
|
||||
c.Store_object_tweets_list = tweets
|
||||
c.Hide_output = True
|
||||
c.Since = '' if since_date == None else f'{since_date} 00:00:00'
|
||||
|
||||
user_str = f'@{util.get_username_local(id)}'
|
||||
print(f'Scraping tweets from {user_str} since {"forever ago" if c.Since == "" else c.Since}...')
|
||||
try:
|
||||
twint.run.Search(c)
|
||||
except:
|
||||
print(f'Had trouble getting tweets from {user_str}')
|
||||
safe_to_post_tweets = False
|
||||
traceback.print_exc()
|
||||
|
||||
for twt in tweets:
|
||||
if type(twt.quote_url) is str and twt.quote_url != '':
|
||||
qrt_count += 1
|
||||
|
||||
print(f'Scraped {len(tweets)} tweets, {qrt_count} of which are quote tweets.')
|
||||
return tweets
|
||||
|
||||
# Returns a list of sorted and filtered TalentTweets (should
|
||||
# be equivalent to queue.txt)
|
||||
async def get_cross_talent_tweets():
|
||||
global safe_to_post_tweets
|
||||
|
||||
scraper = Scraper()
|
||||
queue = ttq.TalentTweetQueue.instance
|
||||
|
||||
# Begin getting tweets from online
|
||||
@@ -64,19 +35,28 @@ async def get_cross_talent_tweets():
|
||||
for i, (talent_id, talent_username) in enumerate(talent_lists.talents.items()):
|
||||
print(f'[{i+1}/{len(talent_lists.talents)}] {talent_username}-----------------------------------')
|
||||
try:
|
||||
tweets = get_user_tweets(talent_id, since_date=queue.finished_user_dates.get(talent_id, None))
|
||||
for tweet in tweets:
|
||||
if tweet.id not in queue.ttweets_dict and tweet.id not in queue.finished_ttweets:
|
||||
ttweet = await tt.TalentTweet.create_from_twint_tweet(tweet)
|
||||
if ttweet.is_cross_company():
|
||||
queue.add_ttweet(ttweet)
|
||||
# tweets = get_user_tweets(talent_id, since_date=queue.finished_user_dates.get(talent_id, None))
|
||||
since_date = queue.finished_user_dates.get(talent_id, None)
|
||||
ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date)
|
||||
for ttweet in ttweets:
|
||||
if ttweet.tweet_id not in queue.ttweets_dict \
|
||||
and ttweet.tweet_id not in queue.finished_ttweets \
|
||||
and ttweet.is_cross_company():
|
||||
queue.add_ttweet(ttweet)
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
except:
|
||||
print('Error occurred processing tweet data.')
|
||||
safe_to_post_tweets = False
|
||||
print(traceback.format_exc())
|
||||
queue.finished_user_dates[talent_id] = '2000-01-01'
|
||||
traceback.print_exc()
|
||||
if talent_id not in queue.finished_user_dates:
|
||||
queue.finished_user_dates[talent_id] = '2023-04-26' # date is when bot token first got revoked
|
||||
else:
|
||||
queue.finished_user_dates[talent_id] = util.get_current_date()
|
||||
queue.save_file()
|
||||
except KeyboardInterrupt:
|
||||
print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!')
|
||||
queue.save_file()
|
||||
except:
|
||||
print('Unhandled error occurred while pulling tweets.')
|
||||
traceback.print_exc()
|
||||
|
||||
Reference in New Issue
Block a user