2022-09-28 20:00:02 -07:00
|
|
|
# TODO: move queue structures and file handling here
|
|
|
|
|
import os
|
|
|
|
|
import shutil
|
2023-08-17 02:28:29 -07:00
|
|
|
import traceback
|
2022-09-28 20:00:02 -07:00
|
|
|
|
|
|
|
|
import util
|
|
|
|
|
import talenttweet as tt
|
|
|
|
|
|
|
|
|
|
# User timestamps line format:
|
2023-01-11 17:11:41 -08:00
|
|
|
# {user_id} {status_num} {UNIX_timestamp}
|
2022-09-28 20:00:02 -07:00
|
|
|
|
|
|
|
|
class TalentTweetQueue:
|
|
|
|
|
instance = None
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
TalentTweetQueue.instance = self
|
|
|
|
|
self.queue_path = util.get_queue_path()
|
|
|
|
|
self.queue_backup_path = util.get_queue_backup_path()
|
2022-10-02 04:57:24 -07:00
|
|
|
self.current_ttweet_path = f'{util.get_project_dir()}/_current_ttweet.txt'
|
|
|
|
|
self.finished_ttweets_path = f'{util.get_project_dir()}/finished_ttweets.txt'
|
|
|
|
|
self.is_good = True
|
|
|
|
|
self.__sorted = False
|
2023-01-11 22:50:47 -08:00
|
|
|
self.finished_user_dates = dict()
|
2022-09-28 20:00:02 -07:00
|
|
|
self.ttweets_dict = dict()
|
2022-10-02 04:57:24 -07:00
|
|
|
self.finished_ttweets = list()
|
2022-09-28 20:00:02 -07:00
|
|
|
|
|
|
|
|
## file check, backup copy
|
|
|
|
|
if os.path.exists(self.queue_backup_path):
|
2022-10-02 15:39:31 -07:00
|
|
|
print('Found backup queue! We errored in the previous run.')
|
2022-09-28 20:00:02 -07:00
|
|
|
shutil.copyfile(self.queue_backup_path, self.queue_path)
|
|
|
|
|
elif os.path.exists(self.queue_path):
|
|
|
|
|
print('Creating backup queue...')
|
|
|
|
|
shutil.copyfile(self.queue_path, self.queue_backup_path)
|
|
|
|
|
|
|
|
|
|
## initialize structures
|
|
|
|
|
# user timestamps
|
2022-10-02 04:57:24 -07:00
|
|
|
try:
|
|
|
|
|
with open(self.queue_path, 'r') as f:
|
|
|
|
|
for line in f:
|
|
|
|
|
tokens = line.split()
|
|
|
|
|
if len(tokens) == 0: continue
|
|
|
|
|
|
|
|
|
|
if tokens[0][0] != '#':
|
2023-01-11 22:50:47 -08:00
|
|
|
print(f'Stopped finding user dates at {line}')
|
2022-10-02 04:57:24 -07:00
|
|
|
# reached end of accounts list
|
|
|
|
|
break
|
|
|
|
|
if tokens[2] != '-1':
|
2023-01-11 22:50:47 -08:00
|
|
|
self.finished_user_dates[int(tokens[1])] = tokens[2]
|
2022-10-02 04:57:24 -07:00
|
|
|
except: pass
|
|
|
|
|
# ttweets
|
|
|
|
|
try:
|
|
|
|
|
with open(self.queue_path, 'r') as f: # reset seek head
|
|
|
|
|
# Get existing queued TalentTweets
|
|
|
|
|
for line in f:
|
|
|
|
|
tokens = line.split()
|
|
|
|
|
if len(tokens) == 0 or tokens[0][0] == '#':
|
|
|
|
|
continue
|
|
|
|
|
ttweet = tt.TalentTweet.deserialize(line)
|
2023-08-17 02:28:29 -07:00
|
|
|
# print(f'{ttweet.tweet_id}:\n{ttweet}')
|
2022-10-02 04:57:24 -07:00
|
|
|
self.ttweets_dict[ttweet.tweet_id] = ttweet
|
2023-01-11 22:50:47 -08:00
|
|
|
print(f'Found {len(self.finished_user_dates)} scraped accounts and {len(self.ttweets_dict)} tweets in queue.')
|
2023-08-17 02:28:29 -07:00
|
|
|
except:
|
|
|
|
|
traceback.print_exc()
|
|
|
|
|
pass
|
2022-10-02 04:57:24 -07:00
|
|
|
# finished ttweets
|
|
|
|
|
try:
|
|
|
|
|
with open(self.finished_ttweets_path, 'r') as f:
|
|
|
|
|
for line in f:
|
|
|
|
|
self.finished_ttweets.append(int(line))
|
|
|
|
|
except: pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_empty(self):
|
|
|
|
|
return self.get_count() <= 0
|
2022-09-28 20:00:02 -07:00
|
|
|
|
|
|
|
|
def add_ttweet(self, ttweet):
|
|
|
|
|
self.ttweets_dict[ttweet.tweet_id] = ttweet
|
2023-08-18 01:34:25 -07:00
|
|
|
self.__sorted = False
|
2022-09-28 20:00:02 -07:00
|
|
|
|
|
|
|
|
def get_ttweet(self, id):
|
2022-10-02 04:57:24 -07:00
|
|
|
return self.ttweets_dict[id]
|
|
|
|
|
|
|
|
|
|
def get_next_ttweet(self):
|
2022-10-02 15:39:31 -07:00
|
|
|
self.is_good = False
|
2022-10-02 04:57:24 -07:00
|
|
|
if os.path.exists(self.current_ttweet_path):
|
|
|
|
|
with open(self.current_ttweet_path, 'r') as f:
|
2023-08-18 01:34:25 -07:00
|
|
|
ttweet = tt.TalentTweet.deserialize(f.readline())
|
|
|
|
|
if ttweet.tweet_id in self.ttweets_dict:
|
|
|
|
|
self.ttweets_dict.pop(ttweet.tweet_id)
|
|
|
|
|
return ttweet
|
2022-10-02 04:57:24 -07:00
|
|
|
|
|
|
|
|
self.__sort_ttweets_dict()
|
|
|
|
|
key = list(self.ttweets_dict.keys())[0]
|
|
|
|
|
ttweet = self.ttweets_dict.pop(key)
|
|
|
|
|
with open(self.current_ttweet_path, 'w') as f:
|
|
|
|
|
f.write(ttweet.serialize())
|
|
|
|
|
return ttweet
|
|
|
|
|
|
|
|
|
|
def get_count(self):
|
|
|
|
|
return len(self.ttweets_dict)
|
2022-09-28 20:00:02 -07:00
|
|
|
|
2022-10-02 04:57:24 -07:00
|
|
|
## Call when the TalentTweet retrieved from get_next_ttweet() was
|
|
|
|
|
# posted successfully.
|
|
|
|
|
def good(self):
|
|
|
|
|
with open(self.current_ttweet_path, 'r') as f:
|
|
|
|
|
ttweet = tt.TalentTweet.deserialize(f.readline())
|
|
|
|
|
|
|
|
|
|
self.add_finished_tweet(ttweet.tweet_id)
|
|
|
|
|
os.remove(self.current_ttweet_path)
|
|
|
|
|
self.save_file()
|
|
|
|
|
self.is_good = True
|
2022-09-28 20:00:02 -07:00
|
|
|
|
|
|
|
|
# overwrite queue.txt
|
|
|
|
|
def save_file(self):
|
2023-08-18 01:34:25 -07:00
|
|
|
print('saving file...', end='')
|
2022-10-02 04:57:24 -07:00
|
|
|
shutil.copyfile(self.queue_path, self.queue_backup_path)
|
2022-09-28 20:00:02 -07:00
|
|
|
self.__sort_ttweets_dict()
|
|
|
|
|
with open(self.queue_path, 'w') as f:
|
2023-01-11 22:50:47 -08:00
|
|
|
# write dates
|
|
|
|
|
for (id, date) in self.finished_user_dates.items():
|
|
|
|
|
f.write(f'# {id} {date}\n')
|
2022-10-02 04:57:24 -07:00
|
|
|
|
2022-09-28 20:00:02 -07:00
|
|
|
f.write('\n')
|
2022-10-02 04:57:24 -07:00
|
|
|
|
2022-09-28 20:00:02 -07:00
|
|
|
# write sorted ttweets
|
|
|
|
|
for ttweet in self.ttweets_dict.values():
|
|
|
|
|
f.write(ttweet.serialize() + '\n')
|
2023-08-18 01:34:25 -07:00
|
|
|
print('done')
|
2022-10-02 04:57:24 -07:00
|
|
|
|
|
|
|
|
def add_finished_tweet(self, id):
|
|
|
|
|
self.finished_ttweets.append(id)
|
|
|
|
|
with open(self.finished_ttweets_path, 'a') as f:
|
|
|
|
|
f.write(f'{id}\n')
|
2022-09-28 20:00:02 -07:00
|
|
|
|
|
|
|
|
def __sort_ttweets_dict(self):
|
2022-10-02 04:57:24 -07:00
|
|
|
if not self.__sorted:
|
|
|
|
|
self.ttweets_dict = dict(sorted(self.ttweets_dict.items()))
|
2022-09-28 20:00:02 -07:00
|
|
|
self.__sorted = True
|
|
|
|
|
|
|
|
|
|
# destructor
|
|
|
|
|
def __del__(self):
|
2022-10-02 04:57:24 -07:00
|
|
|
if self.is_good:
|
2022-09-28 20:00:02 -07:00
|
|
|
print('Ended in good state, deleting backup queue...')
|
2022-10-01 13:33:20 -07:00
|
|
|
os.remove(self.queue_backup_path)
|
|
|
|
|
else:
|
|
|
|
|
print('Ended in bad state, keeping backup queue.')
|