transition to using twint for ttweet construction
This commit is contained in:
+4
-53
@@ -25,7 +25,7 @@ def get_local_queue():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
## Returns the ID of all tweets (up to limit) from a user ID.
|
## Returns the ID of all tweets (up to limit) from a user ID.
|
||||||
def get_user_tweet_ids(id, limit=None):
|
def get_user_tweets(id, limit=None):
|
||||||
tweets = list()
|
tweets = list()
|
||||||
c = twint.Config()
|
c = twint.Config()
|
||||||
c.User_id = id
|
c.User_id = id
|
||||||
@@ -41,53 +41,8 @@ def get_user_tweet_ids(id, limit=None):
|
|||||||
except:
|
except:
|
||||||
print(f'Had trouble getting tweets from {user_str}')
|
print(f'Had trouble getting tweets from {user_str}')
|
||||||
|
|
||||||
ret = [x.id for x in tweets]
|
print(f'Scraped {len(tweets)} tweets')
|
||||||
print(f'Scraped {len(ret)} tweets')
|
return tweets
|
||||||
return ret
|
|
||||||
|
|
||||||
# Produce tweet IDs from talent_list.talents for the producer/consumer model.
|
|
||||||
# Put lists of tweet IDs as we create them.
|
|
||||||
# Put None to queue to indicate end.
|
|
||||||
async def produce_ids_from_talents(queue: asyncio.Queue, finished_users):
|
|
||||||
def debug(str):
|
|
||||||
print(f'[prd] {str}')
|
|
||||||
|
|
||||||
for talent_id in talents.keys():
|
|
||||||
if talent_id in finished_users:
|
|
||||||
debug(f'@{util.get_username(talent_id)} already done, skipping...')
|
|
||||||
else:
|
|
||||||
tweet_ids = get_user_tweet_ids(talent_id)
|
|
||||||
debug(f'adding {util.get_username(talent_id)}\'s tweets to queue')
|
|
||||||
await queue.put(tweet_ids)
|
|
||||||
|
|
||||||
await queue.put(None)
|
|
||||||
|
|
||||||
async def consume_ids_into_ttweets(queue: asyncio.Queue, queue_file: str):
|
|
||||||
def debug(str):
|
|
||||||
print(f'[con] {str}')
|
|
||||||
|
|
||||||
ttweets_dict = dict()
|
|
||||||
with open(queue_file, 'w') as f:
|
|
||||||
while True:
|
|
||||||
tweet_ids = await queue.get()
|
|
||||||
if tweet_ids is None: break
|
|
||||||
try:
|
|
||||||
for tweet_id in tweet_ids:
|
|
||||||
ttweet = await tt.TalentTweet.create_from_id(id=tweet_id)
|
|
||||||
if ttweet.is_cross_company():
|
|
||||||
ttweets_dict['tweet_id'] = ttweet
|
|
||||||
except:
|
|
||||||
debug(traceback.format_exc())
|
|
||||||
debug(f'Error retrieving Tweet #{tweet_id} from api!')
|
|
||||||
f.write('1\n') # 1 = error/incomplete
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
f.write('0\n') # 0 = success
|
|
||||||
f.write('\n')
|
|
||||||
ttweets_dict = dict(sorted(ttweets_dict.items()))
|
|
||||||
for ttweet in ttweets_dict.values():
|
|
||||||
f.write(f'{ttweet.serialize()}\n')
|
|
||||||
return ttweets_dict
|
|
||||||
|
|
||||||
# If queue.txt doesn't exist, creates and populates it.
|
# If queue.txt doesn't exist, creates and populates it.
|
||||||
# Returns a list of sorted and filtered TalentTweets (should
|
# Returns a list of sorted and filtered TalentTweets (should
|
||||||
@@ -120,11 +75,7 @@ async def get_cross_talent_tweets(queue_file):
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print('Couldn\'t find queue.txt.')
|
print('Couldn\'t find queue.txt.')
|
||||||
|
|
||||||
async_queue = asyncio.Queue()
|
# TODO: implement ordered cross-company ttweets dict creation using twint
|
||||||
consumer = asyncio.create_task(consume_ids_into_ttweets(queue=async_queue, queue_file=queue_file))
|
|
||||||
await produce_ids_from_talents(queue=async_queue, finished_users=finished_users)
|
|
||||||
ttweets_dict = await consumer
|
|
||||||
return ttweets_dict
|
|
||||||
|
|
||||||
def process_queue(file):
|
def process_queue(file):
|
||||||
print('TODO: implement process_queue')
|
print('TODO: implement process_queue')
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from datetime import datetime
|
|||||||
import platform
|
import platform
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
import twint
|
||||||
|
|
||||||
from twapi import *
|
from twapi import *
|
||||||
import talent_lists
|
import talent_lists
|
||||||
@@ -41,6 +42,27 @@ class TalentTweet:
|
|||||||
date_time=date_time, mrq=(mentions, reply_to, quote_retweeted)
|
date_time=date_time, mrq=(mentions, reply_to, quote_retweeted)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_from_twint_tweet(tweet):
|
||||||
|
# qrt
|
||||||
|
if tweet.quote_url != '':
|
||||||
|
return TalentTweet(tweet_id=tweet.id)
|
||||||
|
|
||||||
|
# MRQ (Q is guaranteed to be None)
|
||||||
|
mentions = set()
|
||||||
|
reply_to = None
|
||||||
|
|
||||||
|
# reply_to/mentions
|
||||||
|
is_reply = tweet.id == int(tweet.conversation_id)
|
||||||
|
if is_reply:
|
||||||
|
reply_to = tweet.reply_to[0]
|
||||||
|
mentions = set(tweet.reply_to[1:])
|
||||||
|
mentions.add(*tweet.mentions)
|
||||||
|
|
||||||
|
datetime = datetime.strptime(tweet.datetime, '%Y-%m-%d %H:%M:%S %Z')
|
||||||
|
return TalentTweet(tweet_id=tweet.id, author_id=tweet.user_id, date_time=datetime, mrq=(mentions, reply_to, None))
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def create_from_id(id):
|
async def create_from_id(id):
|
||||||
resp = await TwAPI.instance.get_tweet_response(id)
|
resp = await TwAPI.instance.get_tweet_response(id)
|
||||||
|
|||||||
Reference in New Issue
Block a user