clean ups, fixes, and additions

- cleaned up unused imports
- fix circular import occuring with util
- add queue refreshing
This commit is contained in:
muskit
2023-08-18 18:20:53 -07:00
parent 45461954e3
commit a5119ae298
9 changed files with 130 additions and 81 deletions
+6 -2
View File
@@ -142,6 +142,10 @@ cython_debug/
# VS Code files # VS Code files
.vscode .vscode
# project-specific (secret.ini: can't ignore existing file?) # project-specific
*.png *.png
*.json *.json
queue.txt
_queue_backup.txt
finished_ttweets.txt
_current_ttweet.txt
+24 -13
View File
@@ -5,10 +5,7 @@
# We should post, at the fastest, one tweet per minute. # We should post, at the fastest, one tweet per minute.
import traceback import traceback
import datetime
import asyncio import asyncio
import shutil
from datetime import datetime
from scraper import Scraper from scraper import Scraper
from util import * from util import *
@@ -20,19 +17,20 @@ import ttweetqueue as ttq
safe_to_post_tweets = True safe_to_post_tweets = True
errored = False errored = False
scraper: Scraper
# Returns a list of sorted and filtered TalentTweets (should # Returns a list of sorted and filtered TalentTweets (should
# be equivalent to queue.txt) # be equivalent to queue.txt)
async def get_cross_tweets_online(): async def get_cross_tweets_online():
global safe_to_post_tweets global safe_to_post_tweets
global queue
scraper = Scraper() global scraper
queue = ttq.TalentTweetQueue.instance
# Begin getting tweets from online # Begin getting tweets from online
print('Pulling tweets from online!') print('Pulling tweets from online!')
try: try:
for i, (talent_id, talent_username) in enumerate(talent_lists.talents.items()): for i, (talent_id, talent_username) in enumerate(talents.items()):
print(f'[{i+1}/{len(talent_lists.talents)}] {talent_username}-----------------------------------') print(f'[{i+1}/{len(talents)}] {talent_username}-----------------------------------')
try: try:
since_date = queue.finished_user_dates.get(talent_id, None) since_date = queue.finished_user_dates.get(talent_id, None)
ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date) ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date)
@@ -48,7 +46,7 @@ async def get_cross_tweets_online():
safe_to_post_tweets = False safe_to_post_tweets = False
traceback.print_exc() traceback.print_exc()
else: else:
queue.finished_user_dates[talent_id] = util.get_current_date() queue.finished_user_dates[talent_id] = get_current_date()
queue.save_file() queue.save_file()
except KeyboardInterrupt: except KeyboardInterrupt:
print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!') print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!')
@@ -65,13 +63,14 @@ async def get_cross_tweets_online():
# return True = we didn't post a single ttweet # return True = we didn't post a single ttweet
async def process_queue() -> bool: async def process_queue() -> bool:
global errored global errored
global scraper
global queue
errored = False
queued_ttweets_count = queue.get_count()
WAIT_TIME = 60*15 WAIT_TIME = 60*15
ttweets_posted = 0 ttweets_posted = 0
errored = False
queue = ttq.TalentTweetQueue.instance
queued_ttweets_count = queue.get_count()
if queued_ttweets_count == 0: if queued_ttweets_count == 0:
print('Posting queue is empty!') print('Posting queue is empty!')
@@ -106,9 +105,20 @@ async def process_queue() -> bool:
async def run(PROGRAM_ARGS): async def run(PROGRAM_ARGS):
global errored global errored
global safe_to_post_tweets global safe_to_post_tweets
global scraper
global queue
scraper = Scraper()
queue = ttq.TalentTweetQueue.instance queue = ttq.TalentTweetQueue.instance
if PROGRAM_ARGS.refresh_queue:
PROGRAM_ARGS.refresh_queue = False
print('Refreshing queue tweets...')
for id in queue.ttweets_dict:
t = scraper.get_tweet(id, queue.ttweets_dict[id].author_id in privated_accounts)
queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t)
queue.save_file()
async def queue_loop(): async def queue_loop():
while True: while True:
print(f'{queue.get_count()} cross-company tweets to attempt sharing.') print(f'{queue.get_count()} cross-company tweets to attempt sharing.')
@@ -134,6 +144,7 @@ async def run(PROGRAM_ARGS):
await get_cross_tweets_online() await get_cross_tweets_online()
if PROGRAM_ARGS.straight_to_queue: if PROGRAM_ARGS.straight_to_queue:
PROGRAM_ARGS.straight_to_queue = False
print('Processing queue first before pulling tweets...') print('Processing queue first before pulling tweets...')
return await queue_loop() return await queue_loop()
else: else:
+12 -8
View File
@@ -22,6 +22,7 @@ def init_argparse():
p = argparse.ArgumentParser(description='Twitter bot that follows interactions between Nijisanji EN/ID and hololive EN/ID members.', formatter_class=RawTextHelpFormatter) p = argparse.ArgumentParser(description='Twitter bot that follows interactions between Nijisanji EN/ID and hololive EN/ID members.', formatter_class=RawTextHelpFormatter)
p.add_argument('mode', nargs='?', help=MODES_HELP_STR) p.add_argument('mode', nargs='?', help=MODES_HELP_STR)
p.add_argument('--no-listen', action='store_true', help='Run one scraping-posting cycle without waiting to run again.') p.add_argument('--no-listen', action='store_true', help='Run one scraping-posting cycle without waiting to run again.')
p.add_argument('--refresh-queue', action='store_true', help='Refresh the details on each tweet currently in queue.')
p.add_argument('--straight-to-queue', action='store_true', help='Go through queue first before attempting to pull tweets.') p.add_argument('--straight-to-queue', action='store_true', help='Go through queue first before attempting to pull tweets.')
return p return p
@@ -46,6 +47,16 @@ async def async_main():
else: else:
print('\nunknown mode. run with no arguments or -h for help and modes') print('\nunknown mode. run with no arguments or -h for help and modes')
def init_data():
# Initialize shared API instance
TwAPI()
# Initialize talent account lists
talent_lists.init()
# Initialize queue files system
ttq.TalentTweetQueue()
def main(): def main():
global PROGRAM_ARGS global PROGRAM_ARGS
@@ -56,14 +67,7 @@ def main():
PROGRAM_ARGS = parser.parse_args() PROGRAM_ARGS = parser.parse_args()
# Initialize shared API instance init_data()
TwAPI()
# Initialize talent account lists
talent_lists.init()
# Initialize queue files system
ttq.TalentTweetQueue()
## Asynchronous execution ## Asynchronous execution
nest_asyncio.apply() nest_asyncio.apply()
+63 -35
View File
@@ -40,6 +40,65 @@ class Scraper:
return True return True
print('exhausted all accounts!') print('exhausted all accounts!')
return False return False
def login_wait(self, private=False):
if private:
print(f"keeping pvt-accessible account ({self.__account.use_index(0)[0]}). sleeping for 2 minutes...")
sleep(120)
print()
l = self.try_login(0)
else:
l = self.try_login()
if not l:
print("sleeping for 2 minutes...")
sleep(120)
print()
self.try_login()
# recover lost info
def fix_tweet(self, tweet: Tweet):
if tweet.is_retweet:
if tweet.retweeted_tweet is None:
print(f'{tweet.author.username}/{tweet.id} is missing the RT! It\'s probably nothing...')
# tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet
tweet.is_retweet = False
elif tweet.retweeted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...')
tweet.retweeted_tweet = self.get_tweet(tweet.retweeted_tweet.id)
if tweet.is_quoted:
if tweet.quoted_tweet is None: # quoted tweet is deleted
# print(f'{tweet.author.username}/{tweet.id} is missing the QRT! Recovering...')
# tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet
tweet.is_quoted = False
elif tweet.quoted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...')
tweet.quoted_tweet = self.get_tweet(tweet.quoted_tweet.id)
if tweet.is_reply and tweet.replied_to is None:
print('missing reply-to tweet. recovering...')
tweet.replied_to = self.get_tweet(tweet.original_tweet['in_reply_to_status_id_str'])
return tweet
def get_tweet(self, id: int, private_user=False):
print(f'{id}{" on private" if private_user else ""}')
if private_user:
self.try_login(0)
while True:
try:
t = self.app.tweet_detail(str(id))
return self.fix_tweet(t) if t is not None else None
except UnknownError:
print("UnknownError occurred, probably rate-limited")
self.login_wait(private_user)
except Exception as e:
if private_user:
print("Unknown exception occurred, tweet is probably unavailable")
print(e.with_traceback())
return None
else:
print("Unknown exception occurred, trying again as private...")
self.get_tweet(id, True)
# since MUST BE TIMEZONE AWARE # since MUST BE TIMEZONE AWARE
# usage example: since=datetime(2023, 8, 1).replace(tzinfo=pytz.utc) # usage example: since=datetime(2023, 8, 1).replace(tzinfo=pytz.utc)
@@ -66,28 +125,8 @@ class Scraper:
print(f"skipping malformed tweet: {tweet}") print(f"skipping malformed tweet: {tweet}")
return return
# recover lost info tweet = self.fix_tweet(self, tweet)
if tweet.is_retweet:
if tweet.retweeted_tweet is None:
print(f'{tweet.author.username}/{tweet.id} is missing the RT! It\'s probably nothing...')
# tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet
tweet.is_retweet = False
elif tweet.retweeted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...')
tweet.retweeted_tweet = self.app.tweet_detail(tweet.retweeted_tweet.id)
if tweet.is_quoted:
if tweet.quoted_tweet is None: # quoted tweet is deleted
# print(f'{tweet.author.username}/{tweet.id} is missing the QRT! Recovering...')
# tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet
tweet.is_quoted = False
elif tweet.quoted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...')
tweet.quoted_tweet = self.app.tweet_detail(tweet.quoted_tweet.id)
# fix reply if it exists
# if tweet.is_reply and tweet.replied_to is None:
# tweet.replied_to = self.app.tweet_detail(tweet.original_tweet['in_reply_to_status_id_str'])
tweets.append(tweet) tweets.append(tweet)
if not reached_backdate and int(tweet.author.id) == uid and tweet.date <= since: if not reached_backdate and int(tweet.author.id) == uid and tweet.date <= since:
@@ -118,18 +157,7 @@ class Scraper:
cur = search.cursor cur = search.cursor
except UnknownError: except UnknownError:
print("UnknownError occurred, probably rate-limited") print("UnknownError occurred, probably rate-limited")
if uid in talent_lists.privated_accounts: self.login_wait(uid in talent_lists.privated_accounts)
print("sticking pvt-accessible account. sleeping for 2 minutes...")
sleep(120)
print()
l = self.try_login(0)
else:
l = self.try_login()
if not l:
print("sleeping for 2 minutes...")
sleep(120)
print()
self.try_login()
tweets.sort(key=lambda t: t.id) tweets.sort(key=lambda t: t.id)
return tweets return tweets
@@ -153,4 +181,4 @@ if __name__== '__main__':
talent_lists.init() talent_lists.init()
s = Scraper() s = Scraper()
ttweets = s.get_cross_ttweets_from_user("pomurainpuff", since=datetime(2023, 7, 30).replace(tzinfo=pytz.utc)) ttweets = s.get_cross_ttweets_from_user("pomurainpuff", since=datetime(2023, 7, 30).replace(tzinfo=pytz.utc))
print("\n".join([x.__repr__() for x in ttweets])) print("\n".join([x.__repr__() for x in ttweets]))
+5 -5
View File
@@ -1,4 +1,4 @@
import util from util import get_project_dir
holo_en: dict[int, str] = dict() holo_en: dict[int, str] = dict()
holo_id: dict[int, str] = dict() holo_id: dict[int, str] = dict()
@@ -34,13 +34,13 @@ def init():
global test_talents global test_talents
# holoEN # holoEN
__create_dict(f'{util.get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN') __create_dict(f'{get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN')
# holoID # holoID
__create_dict(f'{util.get_project_dir()}/lists/holoid.txt', holo_id, 'holoID') __create_dict(f'{get_project_dir()}/lists/holoid.txt', holo_id, 'holoID')
# nijiEN # nijiEN
__create_dict(f'{util.get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN') __create_dict(f'{get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN')
# nijiexID # nijiexID
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID') __create_dict(f'{get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID')
# TODO: nijiex-KR # TODO: nijiex-KR
test_talents = holo_en test_talents = holo_en
+5 -3
View File
@@ -1,5 +1,4 @@
from datetime import datetime from datetime import datetime
from zoneinfo import ZoneInfo
import platform import platform
import pytz import pytz
@@ -85,7 +84,7 @@ class TalentTweet:
def create_from_tweety(tweety: Tweet): def create_from_tweety(tweety: Tweet):
if tweety.is_retweet: if tweety.is_retweet:
rtm = [int(x.id) for x in tweety.retweeted_tweet.user_mentions] rtm = [int(x.id) for x in tweety.retweeted_tweet.user_mentions]
elif tweety.is_quoted: elif tweety.quoted_tweet:
rtm = [int(x.id) for x in tweety.quoted_tweet.user_mentions] rtm = [int(x.id) for x in tweety.quoted_tweet.user_mentions]
else: else:
rtm = list() rtm = list()
@@ -132,6 +131,9 @@ class TalentTweet:
except: pass except: pass
try: self.all_parties.remove(self.author_id) try: self.all_parties.remove(self.author_id)
except: pass except: pass
if not self.is_cross_company():
print(f'WARNING: {self.tweet_id} is not cross-company!')
def __repr__(self) -> str: def __repr__(self) -> str:
@@ -169,7 +171,7 @@ class TalentTweet:
def get_datetime_str(self): def get_datetime_str(self):
unpad = '#' if platform.system() == 'Windows' else '-' unpad = '#' if platform.system() == 'Windows' else '-'
return self.date_time.strftime(f'%b %{unpad}d %Y, %{unpad}I:%M%p (%Z)') return self.date_time.strftime(f'%{unpad}I:%M%p (%Z) · %b %{unpad}d, %Y')
def announce_text(self): def announce_text(self):
# templates # templates
+12 -11
View File
@@ -20,9 +20,9 @@ class TalentTweetQueue:
self.finished_ttweets_path = f'{util.get_project_dir()}/finished_ttweets.txt' self.finished_ttweets_path = f'{util.get_project_dir()}/finished_ttweets.txt'
self.is_good = True self.is_good = True
self.__sorted = False self.__sorted = False
self.finished_user_dates = dict() self.finished_user_dates: dict[int, str] = dict()
self.ttweets_dict = dict() self.ttweets_dict: dict[int, tt.TalentTweet] = dict()
self.finished_ttweets = list() self.finished_ttweets: list[int] = list()
## file check, backup copy ## file check, backup copy
if os.path.exists(self.queue_backup_path): if os.path.exists(self.queue_backup_path):
@@ -62,6 +62,14 @@ class TalentTweetQueue:
except: except:
traceback.print_exc() traceback.print_exc()
pass pass
# unfinished ttweet
if os.path.exists(self.current_ttweet_path):
with open(self.current_ttweet_path, 'r') as f:
for line in f:
if len(line) > 0:
ttweet = tt.TalentTweet.deserialize(line)
if ttweet.tweet_id in self.ttweets_dict:
self.ttweets_dict[ttweet.tweet_id] = ttweet
# finished ttweets # finished ttweets
try: try:
with open(self.finished_ttweets_path, 'r') as f: with open(self.finished_ttweets_path, 'r') as f:
@@ -82,13 +90,6 @@ class TalentTweetQueue:
def get_next_ttweet(self): def get_next_ttweet(self):
self.is_good = False self.is_good = False
if os.path.exists(self.current_ttweet_path):
with open(self.current_ttweet_path, 'r') as f:
ttweet = tt.TalentTweet.deserialize(f.readline())
if ttweet.tweet_id in self.ttweets_dict:
self.ttweets_dict.pop(ttweet.tweet_id)
return ttweet
self.__sort_ttweets_dict() self.__sort_ttweets_dict()
key = list(self.ttweets_dict.keys())[0] key = list(self.ttweets_dict.keys())[0]
ttweet = self.ttweets_dict.pop(key) ttweet = self.ttweets_dict.pop(key)
@@ -112,7 +113,7 @@ class TalentTweetQueue:
# overwrite queue.txt # overwrite queue.txt
def save_file(self): def save_file(self):
print('saving file...', end='') print('saving queue files...', end='')
shutil.copyfile(self.queue_path, self.queue_backup_path) shutil.copyfile(self.queue_path, self.queue_backup_path)
self.__sort_ttweets_dict() self.__sort_ttweets_dict()
with open(self.queue_path, 'w') as f: with open(self.queue_path, 'w') as f:
-1
View File
@@ -7,7 +7,6 @@ import tweepy
import talenttweet as tt import talenttweet as tt
import talent_lists as tl import talent_lists as tl
import ttweetqueue as ttq
import util import util
class TwAPI: class TwAPI:
+3 -3
View File
@@ -6,11 +6,9 @@ import traceback
from datetime import datetime from datetime import datetime
from dotenv import dotenv_values from dotenv import dotenv_values
import tweepy
import pytz import pytz
import twint
import twapi
from tweetcapture import TweetCapture from tweetcapture import TweetCapture
import tweepy
from recrop import fix_aspect_ratio from recrop import fix_aspect_ratio
import talent_lists import talent_lists
@@ -52,6 +50,7 @@ def get_key_from_value(d: dict, val):
return keys[0] return keys[0]
return None return None
# FIXME: web_auth_token under rate-limitation will fail to screenshot
async def create_ttweet_image(ttweet): async def create_ttweet_image(ttweet):
tc = TweetCapture() tc = TweetCapture()
tc.cookies = [{'name': 'auth_token', 'value': dotenv_values()['web_auth_token']}] tc.cookies = [{'name': 'auth_token', 'value': dotenv_values()['web_auth_token']}]
@@ -100,6 +99,7 @@ def get_username_local(id: int):
# Retrieve username via API v2 (tweepy) # Retrieve username via API v2 (tweepy)
def get_username_online(id, default=None): def get_username_online(id, default=None):
import twapi
try: try:
resp = twapi.TwAPI.instance.client.get_user(id=id) resp = twapi.TwAPI.instance.client.get_user(id=id)
return resp.data.username return resp.data.username