diff --git a/src/catchup.py b/src/catchup.py index 8c1ecf3..fd8ec3b 100644 --- a/src/catchup.py +++ b/src/catchup.py @@ -25,7 +25,7 @@ def get_local_queue(): pass ## Returns the ID of all tweets (up to limit) from a user ID. -def get_user_tweet_ids(id, limit=None): +def get_user_tweets(id, limit=None): tweets = list() c = twint.Config() c.User_id = id @@ -41,53 +41,8 @@ def get_user_tweet_ids(id, limit=None): except: print(f'Had trouble getting tweets from {user_str}') - ret = [x.id for x in tweets] - print(f'Scraped {len(ret)} tweets') - return ret - -# Produce tweet IDs from talent_list.talents for the producer/consumer model. -# Put lists of tweet IDs as we create them. -# Put None to queue to indicate end. -async def produce_ids_from_talents(queue: asyncio.Queue, finished_users): - def debug(str): - print(f'[prd] {str}') - - for talent_id in talents.keys(): - if talent_id in finished_users: - debug(f'@{util.get_username(talent_id)} already done, skipping...') - else: - tweet_ids = get_user_tweet_ids(talent_id) - debug(f'adding {util.get_username(talent_id)}\'s tweets to queue') - await queue.put(tweet_ids) - - await queue.put(None) - -async def consume_ids_into_ttweets(queue: asyncio.Queue, queue_file: str): - def debug(str): - print(f'[con] {str}') - - ttweets_dict = dict() - with open(queue_file, 'w') as f: - while True: - tweet_ids = await queue.get() - if tweet_ids is None: break - try: - for tweet_id in tweet_ids: - ttweet = await tt.TalentTweet.create_from_id(id=tweet_id) - if ttweet.is_cross_company(): - ttweets_dict['tweet_id'] = ttweet - except: - debug(traceback.format_exc()) - debug(f'Error retrieving Tweet #{tweet_id} from api!') - f.write('1\n') # 1 = error/incomplete - break - else: - f.write('0\n') # 0 = success - f.write('\n') - ttweets_dict = dict(sorted(ttweets_dict.items())) - for ttweet in ttweets_dict.values(): - f.write(f'{ttweet.serialize()}\n') - return ttweets_dict + print(f'Scraped {len(tweets)} tweets') + return tweets # If queue.txt doesn't exist, creates and populates it. # Returns a list of sorted and filtered TalentTweets (should @@ -120,11 +75,7 @@ async def get_cross_talent_tweets(queue_file): except FileNotFoundError: print('Couldn\'t find queue.txt.') - async_queue = asyncio.Queue() - consumer = asyncio.create_task(consume_ids_into_ttweets(queue=async_queue, queue_file=queue_file)) - await produce_ids_from_talents(queue=async_queue, finished_users=finished_users) - ttweets_dict = await consumer - return ttweets_dict + # TODO: implement ordered cross-company ttweets dict creation using twint def process_queue(file): print('TODO: implement process_queue') diff --git a/src/talenttweet.py b/src/talenttweet.py index 4e14dd7..0c499b8 100644 --- a/src/talenttweet.py +++ b/src/talenttweet.py @@ -2,6 +2,7 @@ from datetime import datetime import platform import pytz +import twint from twapi import * import talent_lists @@ -41,6 +42,27 @@ class TalentTweet: date_time=date_time, mrq=(mentions, reply_to, quote_retweeted) ) + @staticmethod + def create_from_twint_tweet(tweet): + # qrt + if tweet.quote_url != '': + return TalentTweet(tweet_id=tweet.id) + + # MRQ (Q is guaranteed to be None) + mentions = set() + reply_to = None + + # reply_to/mentions + is_reply = tweet.id == int(tweet.conversation_id) + if is_reply: + reply_to = tweet.reply_to[0] + mentions = set(tweet.reply_to[1:]) + mentions.add(*tweet.mentions) + + datetime = datetime.strptime(tweet.datetime, '%Y-%m-%d %H:%M:%S %Z') + return TalentTweet(tweet_id=tweet.id, author_id=tweet.user_id, date_time=datetime, mrq=(mentions, reply_to, None)) + + @staticmethod async def create_from_id(id): resp = await TwAPI.instance.get_tweet_response(id)