Files
NijiHolo_EN_ID_Bot/src/ttweetqueue.py
T

158 lines
5.4 KiB
Python
Raw Normal View History

# TODO: move queue structures and file handling here
import os
import shutil
2023-08-17 02:28:29 -07:00
import traceback
import util
import talenttweet as tt
# User timestamps line format:
# {user_id} {status_num} {UNIX_timestamp}
2024-01-25 16:29:01 -08:00
class TalentTweetQueue:
instance = None
2024-01-25 16:29:01 -08:00
def __init__(self):
TalentTweetQueue.instance = self
2024-01-25 16:29:01 -08:00
self.queue_path = util.working_path(file="queue.txt")
self.queue_backup_path = util.working_path(file="_queue_backup.txt")
self.current_ttweet_path = util.working_path(file="_current_ttweet.txt")
self.finished_ttweets_path = util.working_path(file="finished_ttweets.txt")
self.is_good = True
self.__sorted = False
2023-08-18 18:20:53 -07:00
self.finished_user_dates: dict[int, str] = dict()
self.ttweets_dict: dict[int, tt.TalentTweet] = dict()
2023-08-18 22:57:24 -07:00
self.finished_ttweets: set[int] = set()
## file check, backup copy
if os.path.exists(self.queue_backup_path):
2024-01-25 16:29:01 -08:00
print("Found backup queue! We errored in the previous run.")
shutil.copyfile(self.queue_backup_path, self.queue_path)
elif os.path.exists(self.queue_path):
2024-01-25 16:29:01 -08:00
print("Creating backup queue...")
shutil.copyfile(self.queue_path, self.queue_backup_path)
## initialize structures
# user timestamps
try:
2024-01-25 16:29:01 -08:00
with open(self.queue_path, "r") as f:
for line in f:
tokens = line.split()
2024-01-25 16:29:01 -08:00
if len(tokens) == 0:
continue
2024-01-25 16:29:01 -08:00
if tokens[0][0] != "#":
print(f"Stopped finding user dates at {line}")
# reached end of accounts list
break
2024-01-25 16:29:01 -08:00
if tokens[2] != "-1":
self.finished_user_dates[int(tokens[1])] = tokens[2]
2024-01-25 16:29:01 -08:00
except:
pass
# ttweets
try:
2024-01-25 16:29:01 -08:00
with open(self.queue_path, "r") as f: # reset seek head
# Get existing queued TalentTweets
for line in f:
tokens = line.split()
2024-01-25 16:29:01 -08:00
if len(tokens) == 0 or tokens[0][0] == "#":
continue
ttweet = tt.TalentTweet.deserialize(line)
2023-08-17 02:28:29 -07:00
# print(f'{ttweet.tweet_id}:\n{ttweet}')
self.ttweets_dict[ttweet.tweet_id] = ttweet
2024-01-25 16:29:01 -08:00
print(
f"Found {len(self.finished_user_dates)} scraped accounts and {len(self.ttweets_dict)} tweets in queue."
)
2023-08-17 02:28:29 -07:00
except:
traceback.print_exc()
pass
2023-08-18 18:20:53 -07:00
# unfinished ttweet
if os.path.exists(self.current_ttweet_path):
2024-01-25 16:29:01 -08:00
with open(self.current_ttweet_path, "r") as f:
2023-08-18 18:20:53 -07:00
for line in f:
if len(line) > 0:
ttweet = tt.TalentTweet.deserialize(line)
2024-04-26 00:15:45 -07:00
if ttweet.tweet_id not in self.ttweets_dict:
2024-01-25 16:29:01 -08:00
print(f"adding unfinished tweet {ttweet.tweet_id}")
2024-04-26 00:15:45 -07:00
self.ttweets_dict[ttweet.tweet_id] = ttweet
# finished ttweets
try:
2024-01-25 16:29:01 -08:00
with open(self.finished_ttweets_path, "r") as f:
for line in f:
2023-08-18 22:57:24 -07:00
self.finished_ttweets.add(int(line))
2024-01-25 16:29:01 -08:00
except:
pass
def is_empty(self):
return self.get_count() <= 0
def add_ttweet(self, ttweet):
self.ttweets_dict[ttweet.tweet_id] = ttweet
2023-08-18 01:34:25 -07:00
self.__sorted = False
def get_ttweet(self, id):
return self.ttweets_dict[id]
def get_next_ttweet(self):
2022-10-02 15:39:31 -07:00
self.is_good = False
self.__sort_ttweets_dict()
key = list(self.ttweets_dict.keys())[0]
ttweet = self.ttweets_dict.pop(key)
2024-01-25 16:29:01 -08:00
with open(self.current_ttweet_path, "w") as f:
f.write(ttweet.serialize())
return ttweet
2024-01-25 16:29:01 -08:00
def get_count(self):
return len(self.ttweets_dict)
2024-01-25 16:29:01 -08:00
## Call when the TalentTweet retrieved from get_next_ttweet() was
# posted successfully.
2023-08-18 22:57:24 -07:00
def good(self, tweet_id: int):
2024-01-25 16:29:01 -08:00
try:
os.remove(self.current_ttweet_path)
except:
pass
2023-08-18 22:57:24 -07:00
self.add_finished_tweet(tweet_id)
self.save_file()
self.is_good = True
2024-01-25 16:29:01 -08:00
# overwrite queue.txt
2023-08-27 02:56:34 -07:00
def save_file(self, replace_backup=True):
2024-01-25 16:29:01 -08:00
print("saving queue...", end="")
2023-08-27 02:56:34 -07:00
if replace_backup:
2024-01-25 16:29:01 -08:00
print("overwriting backup...", end="")
2023-08-27 02:56:34 -07:00
shutil.copyfile(self.queue_path, self.queue_backup_path)
self.__sort_ttweets_dict()
2024-01-25 16:29:01 -08:00
with open(self.queue_path, "w") as f:
# write dates
2024-01-25 16:29:01 -08:00
for id, date in self.finished_user_dates.items():
f.write(f"# {id} {date}\n")
2024-01-25 16:29:01 -08:00
f.write("\n")
# write sorted ttweets
for ttweet in self.ttweets_dict.values():
2024-01-25 16:29:01 -08:00
f.write(ttweet.serialize() + "\n")
print("done")
def add_finished_tweet(self, id):
2023-08-18 22:57:24 -07:00
self.finished_ttweets.add(id)
2024-01-25 16:29:01 -08:00
with open(self.finished_ttweets_path, "a") as f:
f.write(f"{id}\n")
def __sort_ttweets_dict(self):
if not self.__sorted:
self.ttweets_dict = dict(sorted(self.ttweets_dict.items()))
self.__sorted = True
2024-01-25 16:29:01 -08:00
# destructor
def __del__(self):
if self.is_good:
2024-01-25 16:29:01 -08:00
print("Ended in good state, deleting backup queue...")
2022-10-01 13:33:20 -07:00
os.remove(self.queue_backup_path)
else:
2024-01-25 16:29:01 -08:00
print("Ended in bad state, keeping backup queue.")