move working files into its own directory

This commit is contained in:
muskit
2024-01-25 16:29:01 -08:00
parent 22743c58ef
commit bfdcaf37fa
8 changed files with 280 additions and 188 deletions
+2 -6
View File
@@ -143,9 +143,5 @@ cython_debug/
.vscode .vscode
# project-specific # project-specific
*.png run/
*.json *.json
queue.txt
_queue_backup.txt
finished_ttweets.txt
_current_ttweet.txt
+1 -1
View File
@@ -53,7 +53,7 @@ This is the authentication token obtained from a browser when signed in on the T
``` ```
web_auth_token= web_auth_token=
``` ```
### Example contents of `.env` without values ### Example `.env` without values
``` ```
scraper_username0= scraper_username0=
scraper_password0= scraper_password0=
+10 -9
View File
@@ -1,23 +1,24 @@
import os
from dotenv import dotenv_values from dotenv import dotenv_values
from util import working_path
## Track multiple accounts in a pool, cycling to the next one when requested. ## Track multiple accounts in a pool, cycling to the next one when requested.
class AccountPool: class AccountPool:
def __init__(self): def __init__(self):
self.__accounts: list[tuple[str, str]] = list() self.__accounts: list[tuple[str, str]] = list()
self.__idx = 0 self.__idx = 0
creds = dotenv_values() creds = dotenv_values(working_path(file=".env"))
i = 0 i = 0
while True: while True:
if f'scraper_username{i}' in creds \ if f"scraper_username{i}" in creds and f"scraper_password{i}" in creds:
and f'scraper_password{i}' in creds: self.__accounts.append(
self.__accounts.append(( (creds[f"scraper_username{i}"], creds[f"scraper_password{i}"])
creds[f'scraper_username{i}'], )
creds[f'scraper_password{i}']
))
i += 1 i += 1
else: else:
break break
def use_index(self, idx): def use_index(self, idx):
self.__idx = idx self.__idx = idx
return self.current() return self.current()
@@ -26,7 +27,7 @@ class AccountPool:
if 0 <= self.__idx < len(self.__accounts): if 0 <= self.__idx < len(self.__accounts):
return self.__accounts[self.__idx] return self.__accounts[self.__idx]
return None return None
def next(self) -> tuple[str, str] | None: def next(self) -> tuple[str, str] | None:
self.__idx += 1 self.__idx += 1
if self.__idx >= len(self.__accounts): if self.__idx >= len(self.__accounts):
+59 -44
View File
@@ -19,6 +19,7 @@ PROGRAM_ARGS = None
safe_to_post_tweets = True safe_to_post_tweets = True
scraper: Scraper scraper: Scraper
# Updates TTweetQueue # Updates TTweetQueue
async def get_cross_tweets_online(): async def get_cross_tweets_online():
global safe_to_post_tweets global safe_to_post_tweets
@@ -26,91 +27,101 @@ async def get_cross_tweets_online():
global scraper global scraper
safe_to_post_tweets = True safe_to_post_tweets = True
dbg_curr_user = '' dbg_curr_user = ""
# Begin getting tweets from online # Begin getting tweets from online
print('Pulling tweets from online!') print("Pulling tweets from online!")
try: try:
for i, (talent_id, talent_username) in enumerate(talents.items()): for i, (talent_id, talent_username) in enumerate(talents.items()):
print(f'[{i+1}/{len(talents)}] {talent_username}-----------------------------------') print(
dbg_curr_user = f'{talent_id}: {talent_username}' f"[{i+1}/{len(talents)}] {talent_username}-----------------------------------"
)
dbg_curr_user = f"{talent_id}: {talent_username}"
try: try:
since_date = queue.finished_user_dates.get(talent_id, None) since_date = queue.finished_user_dates.get(talent_id, None)
ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date) ttweets = scraper.get_cross_ttweets_from_user(
print(f'got {len(ttweets)} TalentTweets') talent_username, since_date=since_date
)
print(f"got {len(ttweets)} TalentTweets")
for ttweet in ttweets: for ttweet in ttweets:
if ttweet.tweet_id not in queue.finished_ttweets \ if (
and ttweet.is_cross_company(): ttweet.tweet_id not in queue.finished_ttweets
and ttweet.is_cross_company()
):
queue.add_ttweet(ttweet) queue.add_ttweet(ttweet)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
print('Unhandled error occurred processing tweet data.') print("Unhandled error occurred processing tweet data.")
safe_to_post_tweets = False safe_to_post_tweets = False
raise e raise e
else: else:
queue.finished_user_dates[talent_id] = get_current_date() queue.finished_user_dates[talent_id] = get_current_date()
queue.save_file() queue.save_file()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!') print(
"Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!"
)
queue.save_file() queue.save_file()
raise e raise e
except: except:
print('Unhandled error occurred while pulling tweets.') print("Unhandled error occurred while pulling tweets.")
traceback.print_exc() traceback.print_exc()
with open("error_catchup.txt", "a") as f: with open(working_path(file="error_catchup.txt"), "a") as f:
f.write(f'Error getting tweets from user {dbg_curr_user}\n') f.write(f"Error getting tweets from user {dbg_curr_user}\n")
traceback.print_exc(file=f) traceback.print_exc(file=f)
safe_to_post_tweets = False safe_to_post_tweets = False
else: else:
print('Successfully saved all tweets from online!') print("Successfully saved all tweets from online!")
queue.save_file() queue.save_file()
# return False = we posted at least one ttweet # return False = we posted at least one ttweet
# return True = we didn't post a single ttweet # return True = we didn't post a single ttweet
async def process_queue() -> bool: async def process_queue() -> bool:
''' """
Go through the queue and post stored TalentTweets. Go through the queue and post stored TalentTweets.
''' """
global scraper global scraper
global queue global queue
queued_ttweets_count = queue.get_count() queued_ttweets_count = queue.get_count()
WAIT_TIME = 60*15 WAIT_TIME = 60 * 15
ttweets_posted = 0 ttweets_posted = 0
if queued_ttweets_count == 0: if queued_ttweets_count == 0:
print('Posting queue is empty!') print("Posting queue is empty!")
return True return True
try: try:
while not queue.is_empty(): while not queue.is_empty():
ttweet = queue.get_next_ttweet() ttweet = queue.get_next_ttweet()
if ttweet.tweet_id in queue.finished_ttweets: if ttweet.tweet_id in queue.finished_ttweets:
print('skipping finished tweet...') print("skipping finished tweet...")
queue.good(ttweet.tweet_id) queue.good(ttweet.tweet_id)
continue continue
tweet_was_successful = await TwAPI.instance.post_ttweet(ttweet) tweet_was_successful = await TwAPI.instance.post_ttweet(ttweet)
print('running queue.good()...') print("running queue.good()...")
queue.good(ttweet.tweet_id) queue.good(ttweet.tweet_id)
if tweet_was_successful: if tweet_was_successful:
ttweets_posted += 1 ttweets_posted += 1
print(f'({ttweets_posted}/{queued_ttweets_count}) done') print(f"({ttweets_posted}/{queued_ttweets_count}) done")
if not queue.is_empty(): if not queue.is_empty():
print(f'resting for {WAIT_TIME}s...') print(f"resting for {WAIT_TIME}s...")
await asyncio.sleep(WAIT_TIME-5) await asyncio.sleep(WAIT_TIME - 5)
print('5 second warning!') print("5 second warning!")
await asyncio.sleep(5) await asyncio.sleep(5)
except Exception as e: except Exception as e:
print('Unhandled error occurred while posting tweets from queue.') print("Unhandled error occurred while posting tweets from queue.")
traceback.print_exc() traceback.print_exc()
if ttweets_posted > 0: if ttweets_posted > 0:
return False return False
return True return True
# return True = no problems # return True = no problems
# return False = issue occurred where we couldn't post all past tweets properly # return False = issue occurred where we couldn't post all past tweets properly
async def run(PROGRAM_ARGS): async def run(PROGRAM_ARGS):
@@ -124,63 +135,67 @@ async def run(PROGRAM_ARGS):
# post tweets given in command line first # post tweets given in command line first
if PROGRAM_ARGS.post_id is not None and len(PROGRAM_ARGS.post_id) > 0: if PROGRAM_ARGS.post_id is not None and len(PROGRAM_ARGS.post_id) > 0:
PROGRAM_ARGS.post_id.sort() PROGRAM_ARGS.post_id.sort()
print('Posting specified tweets first.') print("Posting specified tweets first.")
for id in PROGRAM_ARGS.post_id: for id in PROGRAM_ARGS.post_id:
try: try:
i = int(id) i = int(id)
except ValueError: except ValueError:
print(f'Invalid tweet {id}!') print(f"Invalid tweet {id}!")
continue continue
posted = await TwAPI.instance.post_ttweet_by_id(i) posted = await TwAPI.instance.post_ttweet_by_id(i)
if posted: if posted:
queue.add_finished_tweet(i) queue.add_finished_tweet(i)
print('Successfully posted tweet. Sleeping for 5 minutes') print("Successfully posted tweet. Sleeping for 5 minutes")
await asyncio.sleep(60*5) await asyncio.sleep(60 * 5)
else: else:
print('Did not post tweet') print("Did not post tweet")
print('Done processing specified tweets') print("Done processing specified tweets")
PROGRAM_ARGS.post_id = None PROGRAM_ARGS.post_id = None
# refresh stored queue first # refresh stored queue first
if PROGRAM_ARGS.refresh_queue: if PROGRAM_ARGS.refresh_queue:
PROGRAM_ARGS.refresh_queue = False PROGRAM_ARGS.refresh_queue = False
print('Refreshing queue tweets...') print("Refreshing queue tweets...")
for id in queue.ttweets_dict: for id in queue.ttweets_dict:
t = scraper.get_tweet(id, queue.ttweets_dict[id].author_id in privated_accounts) t = scraper.get_tweet(
id, queue.ttweets_dict[id].author_id in privated_accounts
)
queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t) queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t)
queue.save_file() queue.save_file()
async def queue_loop(): async def queue_loop():
while True: while True:
print(f'{queue.get_count()} cross-company tweets to announce.') print(f"{queue.get_count()} cross-company tweets to announce.")
try: try:
if safe_to_post_tweets: if safe_to_post_tweets:
if await process_queue(): if await process_queue():
print("Finished processing queue") print("Finished processing queue")
return return
else: else:
print('Posted no new tweets; we\'re caught up!') print("Posted no new tweets; we're caught up!")
return return
else: else:
print('Tweets were not retrieved cleanly. Not processing queue.') print("Tweets were not retrieved cleanly. Not processing queue.")
return return
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
print('Interrupting queue processing...') print("Interrupting queue processing...")
raise e raise e
except: except:
print('Unhandled error occurred while running catch up in posting phase.') print(
"Unhandled error occurred while running catch up in posting phase."
)
traceback.print_exc() traceback.print_exc()
await get_cross_tweets_online() await get_cross_tweets_online()
try: try:
if PROGRAM_ARGS.straight_to_queue: if PROGRAM_ARGS.straight_to_queue:
PROGRAM_ARGS.straight_to_queue = False PROGRAM_ARGS.straight_to_queue = False
print('Processing queue first before fetching tweets...') print("Processing queue first before fetching tweets...")
await queue_loop() await queue_loop()
else: else:
await get_cross_tweets_online() await get_cross_tweets_online()
await queue_loop() await queue_loop()
except KeyboardInterrupt: except KeyboardInterrupt:
print('Interrupt received. Ending catchup mode...') print("Interrupt received. Ending catchup mode...")
return False return False
+19 -12
View File
@@ -1,4 +1,4 @@
from util import get_project_dir from util import project_root
holo_en: dict[int, str] = dict() holo_en: dict[int, str] = dict()
holo_id: dict[int, str] = dict() holo_id: dict[int, str] = dict()
@@ -10,22 +10,25 @@ privated_accounts: dict[int, str] = dict()
test_talents = dict() test_talents = dict()
# TODO: talents(id) -> (name, company) # TODO: talents(id) -> (name, company)
def __create_dict(file, _dict, company): def __create_dict(file, _dict, company):
print(f'Initializing talents\' account list from {file}...') print(f"Initializing talents' account list from {file}...")
global talents global talents
with open(file, 'r') as f: with open(file, "r") as f:
for line in f: for line in f:
words = line.split() words = line.split()
if len(words) >= 2 and line[0] != '#': if len(words) >= 2 and line[0] != "#":
t = line.split() t = line.split()
id, name = int(t[0]), t[1] id, name = int(t[0]), t[1]
# name = f'{util.get_username_online(id, default=name)}' # attempt to get updated name # name = f'{util.get_username_online(id, default=name)}' # attempt to get updated name
talents[id] = name talents[id] = name
_dict[id] = name _dict[id] = name
talents_company[id] = company talents_company[id] = company
if len(words) > 2 and words[2] == 'p': if len(words) > 2 and words[2] == "p":
privated_accounts[id] = name privated_accounts[id] = name
def init(): def init():
global holo_en global holo_en
global holo_id global holo_id
@@ -34,26 +37,30 @@ def init():
global test_talents global test_talents
# holoEN # holoEN
__create_dict(f'{get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN') __create_dict(project_root(("lists",), "holoen.txt"), holo_en, "holoEN")
# holoID # holoID
__create_dict(f'{get_project_dir()}/lists/holoid.txt', holo_id, 'holoID') __create_dict(project_root(("lists",), "holoid.txt"), holo_id, "holoID")
# nijiEN # nijiEN
__create_dict(f'{get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN') __create_dict(project_root(("lists",), "nijien.txt"), niji_en, "nijiEN")
# nijiexID # nijiexID
__create_dict(f'{get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID') __create_dict(project_root(("lists",), "nijiexid.txt"), niji_exid, "nijiex'ID")
# TODO: nijiex-KR # TODO: nijiex-KR
test_talents = holo_en test_talents = holo_en
def is_niji(id: int) -> bool: def is_niji(id: int) -> bool:
return id in niji_en or id in niji_exid return id in niji_en or id in niji_exid
def is_holo(id: int) -> bool: def is_holo(id: int) -> bool:
return id in holo_en or id in holo_id return id in holo_en or id in holo_id
def is_cross_company(id1: int, id2: int): def is_cross_company(id1: int, id2: int):
return (is_niji(id1) and is_holo(id2)) or (is_holo(id1) and is_niji(id2)) return (is_niji(id1) and is_holo(id2)) or (is_holo(id1) and is_niji(id2))
# For filtered stream # For filtered stream
# DEPRECATED: thx elon # DEPRECATED: thx elon
def get_twitter_rules(): def get_twitter_rules():
@@ -61,12 +68,12 @@ def get_twitter_rules():
rules = list() rules = list()
names = list(talents.values()) names = list(talents.values())
curr_rule = f'from:{names}' curr_rule = f"from:{names}"
for name in list(talents.values())[1:]: for name in list(talents.values())[1:]:
test_rule = curr_rule + f' OR from:{name}' test_rule = curr_rule + f" OR from:{name}"
if len(test_rule) > 512: if len(test_rule) > 512:
rules.append(curr_rule) rules.append(curr_rule)
curr_rule = f'from:{name}' curr_rule = f"from:{name}"
else: else:
curr_rule = test_rule curr_rule = test_rule
rules.append(curr_rule) rules.append(curr_rule)
+48 -41
View File
@@ -9,15 +9,16 @@ import talenttweet as tt
# User timestamps line format: # User timestamps line format:
# {user_id} {status_num} {UNIX_timestamp} # {user_id} {status_num} {UNIX_timestamp}
class TalentTweetQueue: class TalentTweetQueue:
instance = None instance = None
def __init__(self): def __init__(self):
TalentTweetQueue.instance = self TalentTweetQueue.instance = self
self.queue_path = util.get_queue_path() self.queue_path = util.working_path(file="queue.txt")
self.queue_backup_path = util.get_queue_backup_path() self.queue_backup_path = util.working_path(file="_queue_backup.txt")
self.current_ttweet_path = f'{util.get_project_dir()}/_current_ttweet.txt' self.current_ttweet_path = util.working_path(file="_current_ttweet.txt")
self.finished_ttweets_path = f'{util.get_project_dir()}/finished_ttweets.txt' self.finished_ttweets_path = util.working_path(file="finished_ttweets.txt")
self.is_good = True self.is_good = True
self.__sorted = False self.__sorted = False
self.finished_user_dates: dict[int, str] = dict() self.finished_user_dates: dict[int, str] = dict()
@@ -26,58 +27,62 @@ class TalentTweetQueue:
## file check, backup copy ## file check, backup copy
if os.path.exists(self.queue_backup_path): if os.path.exists(self.queue_backup_path):
print('Found backup queue! We errored in the previous run.') print("Found backup queue! We errored in the previous run.")
shutil.copyfile(self.queue_backup_path, self.queue_path) shutil.copyfile(self.queue_backup_path, self.queue_path)
elif os.path.exists(self.queue_path): elif os.path.exists(self.queue_path):
print('Creating backup queue...') print("Creating backup queue...")
shutil.copyfile(self.queue_path, self.queue_backup_path) shutil.copyfile(self.queue_path, self.queue_backup_path)
## initialize structures ## initialize structures
# user timestamps # user timestamps
try: try:
with open(self.queue_path, 'r') as f: with open(self.queue_path, "r") as f:
for line in f: for line in f:
tokens = line.split() tokens = line.split()
if len(tokens) == 0: continue if len(tokens) == 0:
continue
if tokens[0][0] != '#': if tokens[0][0] != "#":
print(f'Stopped finding user dates at {line}') print(f"Stopped finding user dates at {line}")
# reached end of accounts list # reached end of accounts list
break break
if tokens[2] != '-1': if tokens[2] != "-1":
self.finished_user_dates[int(tokens[1])] = tokens[2] self.finished_user_dates[int(tokens[1])] = tokens[2]
except: pass except:
pass
# ttweets # ttweets
try: try:
with open(self.queue_path, 'r') as f: # reset seek head with open(self.queue_path, "r") as f: # reset seek head
# Get existing queued TalentTweets # Get existing queued TalentTweets
for line in f: for line in f:
tokens = line.split() tokens = line.split()
if len(tokens) == 0 or tokens[0][0] == '#': if len(tokens) == 0 or tokens[0][0] == "#":
continue continue
ttweet = tt.TalentTweet.deserialize(line) ttweet = tt.TalentTweet.deserialize(line)
# print(f'{ttweet.tweet_id}:\n{ttweet}') # print(f'{ttweet.tweet_id}:\n{ttweet}')
self.ttweets_dict[ttweet.tweet_id] = ttweet self.ttweets_dict[ttweet.tweet_id] = ttweet
print(f'Found {len(self.finished_user_dates)} scraped accounts and {len(self.ttweets_dict)} tweets in queue.') print(
f"Found {len(self.finished_user_dates)} scraped accounts and {len(self.ttweets_dict)} tweets in queue."
)
except: except:
traceback.print_exc() traceback.print_exc()
pass pass
# unfinished ttweet # unfinished ttweet
if os.path.exists(self.current_ttweet_path): if os.path.exists(self.current_ttweet_path):
with open(self.current_ttweet_path, 'r') as f: with open(self.current_ttweet_path, "r") as f:
for line in f: for line in f:
if len(line) > 0: if len(line) > 0:
ttweet = tt.TalentTweet.deserialize(line) ttweet = tt.TalentTweet.deserialize(line)
if ttweet.tweet_id in self.ttweets_dict: if ttweet.tweet_id in self.ttweets_dict:
self.ttweets_dict[ttweet.tweet_id] = ttweet self.ttweets_dict[ttweet.tweet_id] = ttweet
print(f'adding unfinished tweet {ttweet.tweet_id}') print(f"adding unfinished tweet {ttweet.tweet_id}")
# finished ttweets # finished ttweets
try: try:
with open(self.finished_ttweets_path, 'r') as f: with open(self.finished_ttweets_path, "r") as f:
for line in f: for line in f:
self.finished_ttweets.add(int(line)) self.finished_ttweets.add(int(line))
except: pass except:
pass
def is_empty(self): def is_empty(self):
return self.get_count() <= 0 return self.get_count() <= 0
@@ -94,57 +99,59 @@ class TalentTweetQueue:
self.__sort_ttweets_dict() self.__sort_ttweets_dict()
key = list(self.ttweets_dict.keys())[0] key = list(self.ttweets_dict.keys())[0]
ttweet = self.ttweets_dict.pop(key) ttweet = self.ttweets_dict.pop(key)
with open(self.current_ttweet_path, 'w') as f: with open(self.current_ttweet_path, "w") as f:
f.write(ttweet.serialize()) f.write(ttweet.serialize())
return ttweet return ttweet
def get_count(self): def get_count(self):
return len(self.ttweets_dict) return len(self.ttweets_dict)
## Call when the TalentTweet retrieved from get_next_ttweet() was ## Call when the TalentTweet retrieved from get_next_ttweet() was
# posted successfully. # posted successfully.
def good(self, tweet_id: int): def good(self, tweet_id: int):
try: os.remove(self.current_ttweet_path) try:
except: pass os.remove(self.current_ttweet_path)
except:
pass
self.add_finished_tweet(tweet_id) self.add_finished_tweet(tweet_id)
self.save_file() self.save_file()
self.is_good = True self.is_good = True
# overwrite queue.txt # overwrite queue.txt
def save_file(self, replace_backup=True): def save_file(self, replace_backup=True):
print('saving queue...', end='') print("saving queue...", end="")
if replace_backup: if replace_backup:
print('overwriting backup...', end='') print("overwriting backup...", end="")
shutil.copyfile(self.queue_path, self.queue_backup_path) shutil.copyfile(self.queue_path, self.queue_backup_path)
self.__sort_ttweets_dict() self.__sort_ttweets_dict()
with open(self.queue_path, 'w') as f: with open(self.queue_path, "w") as f:
# write dates # write dates
for (id, date) in self.finished_user_dates.items(): for id, date in self.finished_user_dates.items():
f.write(f'# {id} {date}\n') f.write(f"# {id} {date}\n")
f.write('\n') f.write("\n")
# write sorted ttweets # write sorted ttweets
for ttweet in self.ttweets_dict.values(): for ttweet in self.ttweets_dict.values():
f.write(ttweet.serialize() + '\n') f.write(ttweet.serialize() + "\n")
print('done') print("done")
def add_finished_tweet(self, id): def add_finished_tweet(self, id):
self.finished_ttweets.add(id) self.finished_ttweets.add(id)
with open(self.finished_ttweets_path, 'a') as f: with open(self.finished_ttweets_path, "a") as f:
f.write(f'{id}\n') f.write(f"{id}\n")
def __sort_ttweets_dict(self): def __sort_ttweets_dict(self):
if not self.__sorted: if not self.__sorted:
self.ttweets_dict = dict(sorted(self.ttweets_dict.items())) self.ttweets_dict = dict(sorted(self.ttweets_dict.items()))
self.__sorted = True self.__sorted = True
# destructor # destructor
def __del__(self): def __del__(self):
if self.is_good: if self.is_good:
print('Ended in good state, deleting backup queue...') print("Ended in good state, deleting backup queue...")
os.remove(self.queue_backup_path) os.remove(self.queue_backup_path)
else: else:
print('Ended in bad state, keeping backup queue.') print("Ended in bad state, keeping backup queue.")
+89 -49
View File
@@ -9,13 +9,14 @@ import talenttweet as tt
import talent_lists as tl import talent_lists as tl
import util import util
class TwAPI: class TwAPI:
tweets_fetched = 0 tweets_fetched = 0
instance = None instance = None
TWEET_MEDIA_FIELDS = ['url'] TWEET_MEDIA_FIELDS = ["url"]
TWEET_FIELDS = ['created_at', 'in_reply_to_user_id', 'referenced_tweets'] TWEET_FIELDS = ["created_at", "in_reply_to_user_id", "referenced_tweets"]
TWEET_EXPANSIONS = ['entities.mentions.username', 'referenced_tweets.id.author_id'] TWEET_EXPANSIONS = ["entities.mentions.username", "referenced_tweets.id.author_id"]
# Returns a tuple of user IDs:(reply_to, qrt, {mentions}) # Returns a tuple of user IDs:(reply_to, qrt, {mentions})
# for a single tweet. # for a single tweet.
# #
@@ -35,9 +36,9 @@ class TwAPI:
# mentions # mentions
try: try:
mention_list = tweet.entities['mentions'] mention_list = tweet.entities["mentions"]
for mention in mention_list: for mention in mention_list:
mentions.add(int(mention['id'])) mentions.add(int(mention["id"]))
except: except:
pass pass
# reply-to # reply-to
@@ -46,17 +47,19 @@ class TwAPI:
# qrt # qrt
if tweet.referenced_tweets: if tweet.referenced_tweets:
for ref_tweet in tweet.referenced_tweets: for ref_tweet in tweet.referenced_tweets:
if ref_tweet.type == 'quoted': if ref_tweet.type == "quoted":
for incl_tweet in response.includes['tweets']: for incl_tweet in response.includes["tweets"]:
if incl_tweet.id == ref_tweet.id: if incl_tweet.id == ref_tweet.id:
qrt = incl_tweet.author_id qrt = incl_tweet.author_id
try: try:
mentions.remove(reply_to) mentions.remove(reply_to)
except: pass except:
pass
try: try:
mentions.remove(qrt) mentions.remove(qrt)
except: pass except:
pass
mention_list = list(mentions) mention_list = list(mentions)
for uid in mention_list: for uid in mention_list:
@@ -64,39 +67,66 @@ class TwAPI:
mentions.remove(uid) mentions.remove(uid)
if reply_to not in tl.talents.keys(): if reply_to not in tl.talents.keys():
reply_to = None reply_to = None
return (mentions, reply_to, qrt) return (mentions, reply_to, qrt)
def __init__(self): def __init__(self):
creds = dotenv_values() creds = dotenv_values(util.working_path(file=".env"))
TwAPI.instance = self TwAPI.instance = self
self.client = tweepy.Client( self.client = tweepy.Client(
consumer_key=creds['app_key'], consumer_secret=creds['app_secret'], consumer_key=creds["app_key"],
access_token=creds['user_token'], access_token_secret=creds['user_secret'] consumer_secret=creds["app_secret"],
access_token=creds["user_token"],
access_token_secret=creds["user_secret"],
) )
self.api = tweepy.API( self.api = tweepy.API(
auth=tweepy.OAuthHandler( auth=tweepy.OAuthHandler(
consumer_key=creds['app_key'], consumer_secret=creds['app_secret'], consumer_key=creds["app_key"],
access_token=creds['user_token'], access_token_secret=creds['user_secret'] consumer_secret=creds["app_secret"],
access_token=creds["user_token"],
access_token_secret=creds["user_secret"],
) )
) )
try: try:
self.me = self.client.get_me().data self.me = self.client.get_me().data
print(f'Assuming the account of @{self.me.data["username"]} ({self.me["id"]})') print(
f'Assuming the account of @{self.me.data["username"]} ({self.me["id"]})'
)
except: except:
pass pass
async def post_tweet(self, text='', media_ids: list=None, reply_to_tweet: int=None, quote_tweet_id: int=None): async def post_tweet(
self,
text="",
media_ids: list = None,
reply_to_tweet: int = None,
quote_tweet_id: int = None,
):
try: try:
tweet = self.client.create_tweet(text=text, media_ids=media_ids, in_reply_to_tweet_id=reply_to_tweet, quote_tweet_id=quote_tweet_id) tweet = self.client.create_tweet(
text=text,
media_ids=media_ids,
in_reply_to_tweet_id=reply_to_tweet,
quote_tweet_id=quote_tweet_id,
)
return tweet return tweet
except tweepy.TooManyRequests as e: except tweepy.TooManyRequests as e:
wait_for = abs(float(e.response.headers["x-rate-limit-reset"]) - datetime.datetime.now().timestamp()) + 1 wait_for = (
print(f'\thit rate limit: trying again in {wait_for}s...') abs(
float(e.response.headers["x-rate-limit-reset"])
- datetime.datetime.now().timestamp()
)
+ 1
)
print(f"\thit rate limit: trying again in {wait_for}s...")
await asyncio.sleep(wait_for) await asyncio.sleep(wait_for)
return await self.post_tweet(text=text, media_ids=media_ids, reply_to_tweet=reply_to_tweet, quote_tweet_id=quote_tweet_id) return await self.post_tweet(
text=text,
media_ids=media_ids,
reply_to_tweet=reply_to_tweet,
quote_tweet_id=quote_tweet_id,
)
async def get_ttweet_image_media_id(self, ttweet): async def get_ttweet_image_media_id(self, ttweet):
img = await util.create_ttweet_image(ttweet) img = await util.create_ttweet_image(ttweet)
@@ -106,43 +136,53 @@ class TwAPI:
# return True = successfully posted a single ttweet # return True = successfully posted a single ttweet
# return False = did not post ttweet (duplicate) # return False = did not post ttweet (duplicate)
async def post_ttweet(self, ttweet: tt.TalentTweet, dry_run=False): async def post_ttweet(self, ttweet: tt.TalentTweet, dry_run=False):
print(f'------{ttweet.tweet_id} ({util.get_username_local(ttweet.author_id)})------') print(
f"------{ttweet.tweet_id} ({util.get_username_local(ttweet.author_id)})------"
)
text = ttweet.announce_text() text = ttweet.announce_text()
ttweet_url = ttweet.url() ttweet_url = ttweet.url()
if dry_run: print('-------------------- DRY RUN --------------------') if dry_run:
print("-------------------- DRY RUN --------------------")
print(ttweet) print(ttweet)
if dry_run: return False if dry_run:
return False
# NO DRY-RUN: actually post tweet # NO DRY-RUN: actually post tweet
# main tweet: text + screenshot # main tweet: text + screenshot
try: try:
print('creating main QRT w/ screenshot...') print("creating main QRT w/ screenshot...")
media_ids = [await self.get_ttweet_image_media_id(ttweet)] media_ids = [await self.get_ttweet_image_media_id(ttweet)]
twt_resp = await self.post_tweet(text, media_ids=media_ids, quote_tweet_id=ttweet.tweet_id) twt_resp = await self.post_tweet(
print('done') text, media_ids=media_ids, quote_tweet_id=ttweet.tweet_id
)
print("done")
except: except:
print('error occurred trying to create main tweet, falling back to URL-main + reply screencap format') print(
"error occurred trying to create main tweet, falling back to URL-main + reply screencap format"
)
traceback.print_exc() traceback.print_exc()
try: try:
print('posting main tweet...') print("posting main tweet...")
twt_resp = await self.post_tweet(text, quote_tweet_id=ttweet.tweet_id) twt_resp = await self.post_tweet(text, quote_tweet_id=ttweet.tweet_id)
print('done') print("done")
twt_id = twt_resp.data['id'] twt_id = twt_resp.data["id"]
try: try:
print('creating reply img...', end='') print("creating reply img...", end="")
media_ids = [await self.get_ttweet_image_media_id(ttweet)] media_ids = [await self.get_ttweet_image_media_id(ttweet)]
print('posting reply tweet...', end='') print("posting reply tweet...", end="")
await self.post_tweet(reply_to_tweet=twt_id, media_ids=media_ids) await self.post_tweet(reply_to_tweet=twt_id, media_ids=media_ids)
print('done') print("done")
except: except:
print('Had trouble posting reply image tweet.') print("Had trouble posting reply image tweet.")
print('successfully posted ttweet!') print("successfully posted ttweet!")
except tweepy.Forbidden as e: except tweepy.Forbidden as e:
if 'duplicate content' in e.api_messages[0]: if "duplicate content" in e.api_messages[0]:
print('Twitter says the TalentTweet is a duplicate; skipping error-free...') print(
"Twitter says the TalentTweet is a duplicate; skipping error-free..."
)
return False return False
else: else:
raise e raise e
@@ -151,17 +191,17 @@ class TwAPI:
async def post_ttweet_by_id(self, id: int): async def post_ttweet_by_id(self, id: int):
from scraper import Scraper from scraper import Scraper
print(f'Manually posting tweet {id}') print(f"Manually posting tweet {id}")
s = Scraper() s = Scraper()
t = s.get_tweet(id, True) t = s.get_tweet(id, True)
if not t: if not t:
print('Tweet could not be retrieved') print("Tweet could not be retrieved")
return False return False
ttweet = tt.TalentTweet.create_from_tweety(t) ttweet = tt.TalentTweet.create_from_tweety(t)
if not ttweet.is_cross_company(): if not ttweet.is_cross_company():
print(f'{ttweet.username}/{ttweet.tweet_id} is not cross-company!') print(f"{ttweet.username}/{ttweet.tweet_id} is not cross-company!")
return False return False
print(f'Posting {ttweet.username}/{ttweet.tweet_id}...') print(f"Posting {ttweet.username}/{ttweet.tweet_id}...")
return await self.post_ttweet(ttweet) return await self.post_ttweet(ttweet)
+52 -26
View File
@@ -3,6 +3,7 @@
import os import os
import sys import sys
import traceback import traceback
from pathlib import Path
from datetime import datetime from datetime import datetime
from dotenv import dotenv_values from dotenv import dotenv_values
@@ -13,36 +14,52 @@ import tweepy
from recrop import fix_aspect_ratio from recrop import fix_aspect_ratio
import talent_lists import talent_lists
# returns system path to this project, which is
# up one level from this file's directory (effective path: ..../src/../).
def get_project_dir():
return os.path.join(os.path.dirname(__file__), os.pardir)
def get_queue_path(): def project_root(dir_path: tuple[str] = tuple(), file: str = None):
return f'{get_project_dir()}/queue.txt' """Returns path relative to the project root."""
dir_path = os.path.join(os.path.dirname(__file__), os.pardir, *dir_path)
Path(dir_path).mkdir(parents=True, exist_ok=True)
if file is not None:
return os.path.join(dir_path, file)
return os.path.join(dir_path)
def working_path(dir_path: tuple[str] = tuple(), file: str = None):
"""Returns file path relative to the working ephemeral directory."""
dir_path = project_root("run", *dir_path)
Path(dir_path).mkdir(parents=True, exist_ok=True)
if file is not None:
return os.path.join(dir_path, file)
return os.path.join(dir_path)
def get_queue_backup_path():
return f'{get_project_dir()}/_queue_backup.txt'
def clamp(n, smallest, largest): def clamp(n, smallest, largest):
return max(smallest, min(n, largest)) return max(smallest, min(n, largest))
def datetime_to_tdate(date_time: datetime): def datetime_to_tdate(date_time: datetime):
return date_time.strftime("%Y-%m-%d") return date_time.strftime("%Y-%m-%d")
def tdate_to_datetime(tdate: str): def tdate_to_datetime(tdate: str):
return datetime.strptime("%Y-%m-%d") return datetime.strptime("%Y-%m-%d")
def timestamp_to_tdate(timestamp=None): def timestamp_to_tdate(timestamp=None):
if timestamp==None: if timestamp == None:
timestamp = datetime.now().timestamp() timestamp = datetime.now().timestamp()
return datetime_to_tdate(datetime.fromtimestamp(timestamp, tz=pytz.utc)) return datetime_to_tdate(datetime.fromtimestamp(timestamp, tz=pytz.utc))
def get_current_timestamp(): def get_current_timestamp():
return datetime.now().timestamp() return datetime.now().timestamp()
def get_current_date(): def get_current_date():
return datetime.today().strftime('%Y-%m-%d') return datetime.today().strftime("%Y-%m-%d")
def get_key_from_value(d: dict, val): def get_key_from_value(d: dict, val):
keys = [k for k, v in d.items() if v == val] keys = [k for k, v in d.items() if v == val]
@@ -50,38 +67,43 @@ def get_key_from_value(d: dict, val):
return keys[0] return keys[0]
return None return None
# FIXME: web_auth_token under rate-limitation will fail to screenshot # FIXME: web_auth_token under rate-limitation will fail to screenshot
async def create_ttweet_image(ttweet): async def create_ttweet_image(ttweet):
tc = TweetCapture() tc = TweetCapture()
auth_token = dotenv_values().get('web_auth_token') auth_token = dotenv_values(working_path(file=".env")).get("web_auth_token")
if auth_token: if auth_token:
tc.cookies = [{'name': 'auth_token', 'value': auth_token}] tc.cookies = [{"name": "auth_token", "value": auth_token}]
if 'linux' in sys.platform: if "linux" in sys.platform:
# Linux chromedriver path # Linux chromedriver path
tc.driver_path = '/usr/bin/chromedriver' tc.driver_path = "/usr/bin/chromedriver"
filename = f'{get_project_dir()}/img.png' filename = working_path(file="img.png")
img = None img = None
try: os.remove(filename) try:
except: pass os.remove(filename)
except:
pass
try: try:
img = await tc.screenshot( img = await tc.screenshot(
url=ttweet.url(), url=ttweet.url(),
path=filename, path=filename,
mode=4, mode=4,
night_mode=1, night_mode=1,
show_parent_tweets=True show_parent_tweets=True,
) )
img = fix_aspect_ratio(img) img = fix_aspect_ratio(img)
except: except:
print('unable to create tweet image') print("unable to create tweet image")
traceback.print_exc() traceback.print_exc()
return None return None
print(f'successfully saved {img}') print(f"successfully saved {img}")
return img return img
def get_tweet_url(id, username): def get_tweet_url(id, username):
return f'https://www.twitter.com/{username}/status/{id}' return f"https://www.twitter.com/{username}/status/{id}"
## Attempt to pull username from local; pull from online if doesn't exist. ## Attempt to pull username from local; pull from online if doesn't exist.
def get_username(id): def get_username(id):
@@ -90,22 +112,26 @@ def get_username(id):
return get_username_online(id) return get_username_online(id)
return ret return ret
def get_username_with_company(id): def get_username_with_company(id):
company = talent_lists.talents_company.get(id, None) company = talent_lists.talents_company.get(id, None)
return f'{get_username(id)} {f"({company})" if company is not None else ""}' return f'{get_username(id)} {f"({company})" if company is not None else ""}'
def get_username_local(id: int): def get_username_local(id: int):
return talent_lists.talents.get(id, f'{id}') return talent_lists.talents.get(id, f"{id}")
# Retrieve username via API v2 (tweepy) # Retrieve username via API v2 (tweepy)
def get_username_online(id, default=None): def get_username_online(id, default=None):
import twapi import twapi
try: try:
resp = twapi.TwAPI.instance.client.get_user(id=id) resp = twapi.TwAPI.instance.client.get_user(id=id)
return resp.data.username return resp.data.username
except tweepy.TooManyRequests: except tweepy.TooManyRequests:
return str(default) if default is not None else f'id:{id}' return str(default) if default is not None else f"id:{id}"
except: except:
print(f'Unhandled error retrieving username for {id}!') print(f"Unhandled error retrieving username for {id}!")
traceback.print_exc() traceback.print_exc()
return str(default) if default is not None else f'id:{id}' return str(default) if default is not None else f"id:{id}"