2022-09-24 17:56:58 -07:00
|
|
|
## The bot's catch-up mode
|
|
|
|
|
# Scan all accounts for cross-company interactions.
|
|
|
|
|
# Terminates when finished scanning and posting.
|
|
|
|
|
#
|
|
|
|
|
# We should post, at the fastest, one tweet per minute.
|
|
|
|
|
|
2022-09-26 02:44:26 -07:00
|
|
|
import traceback
|
|
|
|
|
import sys
|
2022-09-24 17:56:58 -07:00
|
|
|
import os
|
2022-09-25 03:39:15 -07:00
|
|
|
import asyncio
|
2022-09-24 17:56:58 -07:00
|
|
|
|
|
|
|
|
import twint
|
2022-09-26 02:44:26 -07:00
|
|
|
import tweepy
|
2022-09-24 17:56:58 -07:00
|
|
|
|
|
|
|
|
from util import *
|
|
|
|
|
from talent_lists import *
|
2022-09-25 18:31:50 -07:00
|
|
|
from twapi import TwAPI
|
2022-09-24 17:56:58 -07:00
|
|
|
import talenttweet as tt
|
|
|
|
|
|
2022-09-25 18:31:50 -07:00
|
|
|
def get_queue_file():
|
|
|
|
|
return f'{util.get_project_dir()}/queue.txt'
|
|
|
|
|
|
2022-09-24 17:56:58 -07:00
|
|
|
def get_local_queue():
|
|
|
|
|
# f = open(os.path.join(get_project_dir(), 'queue.txt'))
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
## Returns the ID of all tweets (up to limit) from a user ID.
|
2022-09-26 03:50:11 -07:00
|
|
|
def get_user_tweets(id, limit=None):
|
2022-09-24 17:56:58 -07:00
|
|
|
tweets = list()
|
|
|
|
|
c = twint.Config()
|
|
|
|
|
c.User_id = id
|
|
|
|
|
c.Limit = limit
|
|
|
|
|
c.Store_object = True
|
|
|
|
|
c.Store_object_tweets_list = tweets
|
2022-09-25 03:39:15 -07:00
|
|
|
c.Hide_output = True
|
2022-09-24 17:56:58 -07:00
|
|
|
|
2022-09-26 02:44:26 -07:00
|
|
|
user_str = f'{util.get_username(id)}'
|
|
|
|
|
print(f'Scraping tweets from {user_str}...')
|
2022-09-25 18:31:50 -07:00
|
|
|
try:
|
|
|
|
|
twint.run.Search(c)
|
|
|
|
|
except:
|
|
|
|
|
print(f'Had trouble getting tweets from {user_str}')
|
2022-09-26 02:44:26 -07:00
|
|
|
|
2022-09-26 03:50:11 -07:00
|
|
|
print(f'Scraped {len(tweets)} tweets')
|
|
|
|
|
return tweets
|
2022-09-26 02:44:26 -07:00
|
|
|
|
|
|
|
|
# If queue.txt doesn't exist, creates and populates it.
|
|
|
|
|
# Returns a list of sorted and filtered TalentTweets (should
|
|
|
|
|
# be equivalent to queue.txt)
|
|
|
|
|
async def get_cross_talent_tweets(queue_file):
|
|
|
|
|
finished_users = set()
|
|
|
|
|
ttweets_dict = dict()
|
2022-09-24 17:56:58 -07:00
|
|
|
|
2022-09-26 02:44:26 -07:00
|
|
|
# Populate structures with existing data from queue.txt
|
|
|
|
|
try:
|
|
|
|
|
print('Processing existing data in queue.txt...')
|
|
|
|
|
with open(queue_file, 'r') as f:
|
|
|
|
|
# Check for finished and incomplete accounts
|
|
|
|
|
# LINE FORMAT: "# {user_id} {status_num}"
|
|
|
|
|
for line in f:
|
|
|
|
|
tokens = line.split()
|
|
|
|
|
if len(tokens) != 3 or tokens[0][0] != '#':
|
|
|
|
|
# reached end of accounts list
|
|
|
|
|
break
|
|
|
|
|
if tokens[2] == 0:
|
|
|
|
|
finished_users.add(tokens[1])
|
|
|
|
|
|
|
|
|
|
# Add existing serialized TalentTweets into ttweets
|
|
|
|
|
for line in f:
|
|
|
|
|
tokens = line.split()
|
|
|
|
|
if len(tokens) == 0 or tokens[0][0] == '#':
|
|
|
|
|
continue
|
|
|
|
|
ttweet = tt.TalentTweet.deserialize(line)
|
|
|
|
|
ttweets_dict[ttweet.tweet_id] = ttweet
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
print('Couldn\'t find queue.txt.')
|
|
|
|
|
|
2022-09-26 03:50:11 -07:00
|
|
|
# TODO: implement ordered cross-company ttweets dict creation using twint
|
2022-09-26 02:44:26 -07:00
|
|
|
|
|
|
|
|
def process_queue(file):
|
|
|
|
|
print('TODO: implement process_queue')
|
2022-09-25 04:39:43 -07:00
|
|
|
# while Queue.txt has lines present
|
|
|
|
|
# attempt to deserialize first line of Queue.txt
|
|
|
|
|
# exit program if failed, stating error
|
|
|
|
|
# while post isn't successful
|
|
|
|
|
# attempt to post tweet
|
|
|
|
|
# delete serialized line from Queue.txt, save it
|
|
|
|
|
#
|
|
|
|
|
# we're done! post tweet announcing done with archives
|
|
|
|
|
pass
|
2022-09-25 03:39:15 -07:00
|
|
|
|
2022-09-25 04:39:43 -07:00
|
|
|
async def run():
|
|
|
|
|
# if Queue.txt exists
|
|
|
|
|
# work through the tweets in Queue.txt
|
|
|
|
|
# else
|
|
|
|
|
# look through every talent's tweets, saving only cross-company tweets into a list
|
|
|
|
|
# sort the list by tweet_id
|
2022-09-26 02:44:26 -07:00
|
|
|
# create Queue.txt and save all tweets (serialized) there
|
2022-09-25 04:39:43 -07:00
|
|
|
# post a tweet announcing archival intent
|
2022-09-25 18:31:50 -07:00
|
|
|
# work through the tweets in Queue.txt
|
2022-09-26 02:44:26 -07:00
|
|
|
|
2022-09-25 18:31:50 -07:00
|
|
|
queue_path = get_queue_file()
|
2022-09-26 02:44:26 -07:00
|
|
|
ttweet_dict = await get_cross_talent_tweets(queue_path)
|
|
|
|
|
for ttweet in ttweet_dict.values():
|
|
|
|
|
print(ttweet)
|