transition to using twint for ttweet construction

This commit is contained in:
muskit
2022-09-26 03:50:11 -07:00
committed by msk
parent 4575466874
commit a8e30809e7
2 changed files with 26 additions and 53 deletions
+4 -53
View File
@@ -25,7 +25,7 @@ def get_local_queue():
pass
## Returns the ID of all tweets (up to limit) from a user ID.
def get_user_tweet_ids(id, limit=None):
def get_user_tweets(id, limit=None):
tweets = list()
c = twint.Config()
c.User_id = id
@@ -41,53 +41,8 @@ def get_user_tweet_ids(id, limit=None):
except:
print(f'Had trouble getting tweets from {user_str}')
ret = [x.id for x in tweets]
print(f'Scraped {len(ret)} tweets')
return ret
# Produce tweet IDs from talent_list.talents for the producer/consumer model.
# Put lists of tweet IDs as we create them.
# Put None to queue to indicate end.
async def produce_ids_from_talents(queue: asyncio.Queue, finished_users):
def debug(str):
print(f'[prd] {str}')
for talent_id in talents.keys():
if talent_id in finished_users:
debug(f'@{util.get_username(talent_id)} already done, skipping...')
else:
tweet_ids = get_user_tweet_ids(talent_id)
debug(f'adding {util.get_username(talent_id)}\'s tweets to queue')
await queue.put(tweet_ids)
await queue.put(None)
async def consume_ids_into_ttweets(queue: asyncio.Queue, queue_file: str):
def debug(str):
print(f'[con] {str}')
ttweets_dict = dict()
with open(queue_file, 'w') as f:
while True:
tweet_ids = await queue.get()
if tweet_ids is None: break
try:
for tweet_id in tweet_ids:
ttweet = await tt.TalentTweet.create_from_id(id=tweet_id)
if ttweet.is_cross_company():
ttweets_dict['tweet_id'] = ttweet
except:
debug(traceback.format_exc())
debug(f'Error retrieving Tweet #{tweet_id} from api!')
f.write('1\n') # 1 = error/incomplete
break
else:
f.write('0\n') # 0 = success
f.write('\n')
ttweets_dict = dict(sorted(ttweets_dict.items()))
for ttweet in ttweets_dict.values():
f.write(f'{ttweet.serialize()}\n')
return ttweets_dict
print(f'Scraped {len(tweets)} tweets')
return tweets
# If queue.txt doesn't exist, creates and populates it.
# Returns a list of sorted and filtered TalentTweets (should
@@ -120,11 +75,7 @@ async def get_cross_talent_tweets(queue_file):
except FileNotFoundError:
print('Couldn\'t find queue.txt.')
async_queue = asyncio.Queue()
consumer = asyncio.create_task(consume_ids_into_ttweets(queue=async_queue, queue_file=queue_file))
await produce_ids_from_talents(queue=async_queue, finished_users=finished_users)
ttweets_dict = await consumer
return ttweets_dict
# TODO: implement ordered cross-company ttweets dict creation using twint
def process_queue(file):
print('TODO: implement process_queue')