clean ups, fixes, and additions

- cleaned up unused imports
- fix circular import occuring with util
- add queue refreshing
This commit is contained in:
muskit
2023-08-18 18:20:53 -07:00
parent 45461954e3
commit a5119ae298
9 changed files with 130 additions and 81 deletions
+6 -2
View File
@@ -142,6 +142,10 @@ cython_debug/
# VS Code files
.vscode
# project-specific (secret.ini: can't ignore existing file?)
# project-specific
*.png
*.json
*.json
queue.txt
_queue_backup.txt
finished_ttweets.txt
_current_ttweet.txt
+24 -13
View File
@@ -5,10 +5,7 @@
# We should post, at the fastest, one tweet per minute.
import traceback
import datetime
import asyncio
import shutil
from datetime import datetime
from scraper import Scraper
from util import *
@@ -20,19 +17,20 @@ import ttweetqueue as ttq
safe_to_post_tweets = True
errored = False
scraper: Scraper
# Returns a list of sorted and filtered TalentTweets (should
# be equivalent to queue.txt)
async def get_cross_tweets_online():
global safe_to_post_tweets
scraper = Scraper()
queue = ttq.TalentTweetQueue.instance
global queue
global scraper
# Begin getting tweets from online
print('Pulling tweets from online!')
try:
for i, (talent_id, talent_username) in enumerate(talent_lists.talents.items()):
print(f'[{i+1}/{len(talent_lists.talents)}] {talent_username}-----------------------------------')
for i, (talent_id, talent_username) in enumerate(talents.items()):
print(f'[{i+1}/{len(talents)}] {talent_username}-----------------------------------')
try:
since_date = queue.finished_user_dates.get(talent_id, None)
ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date)
@@ -48,7 +46,7 @@ async def get_cross_tweets_online():
safe_to_post_tweets = False
traceback.print_exc()
else:
queue.finished_user_dates[talent_id] = util.get_current_date()
queue.finished_user_dates[talent_id] = get_current_date()
queue.save_file()
except KeyboardInterrupt:
print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!')
@@ -65,13 +63,14 @@ async def get_cross_tweets_online():
# return True = we didn't post a single ttweet
async def process_queue() -> bool:
global errored
global scraper
global queue
errored = False
queued_ttweets_count = queue.get_count()
WAIT_TIME = 60*15
ttweets_posted = 0
errored = False
queue = ttq.TalentTweetQueue.instance
queued_ttweets_count = queue.get_count()
if queued_ttweets_count == 0:
print('Posting queue is empty!')
@@ -106,9 +105,20 @@ async def process_queue() -> bool:
async def run(PROGRAM_ARGS):
global errored
global safe_to_post_tweets
global scraper
global queue
scraper = Scraper()
queue = ttq.TalentTweetQueue.instance
if PROGRAM_ARGS.refresh_queue:
PROGRAM_ARGS.refresh_queue = False
print('Refreshing queue tweets...')
for id in queue.ttweets_dict:
t = scraper.get_tweet(id, queue.ttweets_dict[id].author_id in privated_accounts)
queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t)
queue.save_file()
async def queue_loop():
while True:
print(f'{queue.get_count()} cross-company tweets to attempt sharing.')
@@ -134,6 +144,7 @@ async def run(PROGRAM_ARGS):
await get_cross_tweets_online()
if PROGRAM_ARGS.straight_to_queue:
PROGRAM_ARGS.straight_to_queue = False
print('Processing queue first before pulling tweets...')
return await queue_loop()
else:
+12 -8
View File
@@ -22,6 +22,7 @@ def init_argparse():
p = argparse.ArgumentParser(description='Twitter bot that follows interactions between Nijisanji EN/ID and hololive EN/ID members.', formatter_class=RawTextHelpFormatter)
p.add_argument('mode', nargs='?', help=MODES_HELP_STR)
p.add_argument('--no-listen', action='store_true', help='Run one scraping-posting cycle without waiting to run again.')
p.add_argument('--refresh-queue', action='store_true', help='Refresh the details on each tweet currently in queue.')
p.add_argument('--straight-to-queue', action='store_true', help='Go through queue first before attempting to pull tweets.')
return p
@@ -46,6 +47,16 @@ async def async_main():
else:
print('\nunknown mode. run with no arguments or -h for help and modes')
def init_data():
# Initialize shared API instance
TwAPI()
# Initialize talent account lists
talent_lists.init()
# Initialize queue files system
ttq.TalentTweetQueue()
def main():
global PROGRAM_ARGS
@@ -56,14 +67,7 @@ def main():
PROGRAM_ARGS = parser.parse_args()
# Initialize shared API instance
TwAPI()
# Initialize talent account lists
talent_lists.init()
# Initialize queue files system
ttq.TalentTweetQueue()
init_data()
## Asynchronous execution
nest_asyncio.apply()
+63 -35
View File
@@ -40,6 +40,65 @@ class Scraper:
return True
print('exhausted all accounts!')
return False
def login_wait(self, private=False):
if private:
print(f"keeping pvt-accessible account ({self.__account.use_index(0)[0]}). sleeping for 2 minutes...")
sleep(120)
print()
l = self.try_login(0)
else:
l = self.try_login()
if not l:
print("sleeping for 2 minutes...")
sleep(120)
print()
self.try_login()
# recover lost info
def fix_tweet(self, tweet: Tweet):
if tweet.is_retweet:
if tweet.retweeted_tweet is None:
print(f'{tweet.author.username}/{tweet.id} is missing the RT! It\'s probably nothing...')
# tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet
tweet.is_retweet = False
elif tweet.retweeted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...')
tweet.retweeted_tweet = self.get_tweet(tweet.retweeted_tweet.id)
if tweet.is_quoted:
if tweet.quoted_tweet is None: # quoted tweet is deleted
# print(f'{tweet.author.username}/{tweet.id} is missing the QRT! Recovering...')
# tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet
tweet.is_quoted = False
elif tweet.quoted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...')
tweet.quoted_tweet = self.get_tweet(tweet.quoted_tweet.id)
if tweet.is_reply and tweet.replied_to is None:
print('missing reply-to tweet. recovering...')
tweet.replied_to = self.get_tweet(tweet.original_tweet['in_reply_to_status_id_str'])
return tweet
def get_tweet(self, id: int, private_user=False):
print(f'{id}{" on private" if private_user else ""}')
if private_user:
self.try_login(0)
while True:
try:
t = self.app.tweet_detail(str(id))
return self.fix_tweet(t) if t is not None else None
except UnknownError:
print("UnknownError occurred, probably rate-limited")
self.login_wait(private_user)
except Exception as e:
if private_user:
print("Unknown exception occurred, tweet is probably unavailable")
print(e.with_traceback())
return None
else:
print("Unknown exception occurred, trying again as private...")
self.get_tweet(id, True)
# since MUST BE TIMEZONE AWARE
# usage example: since=datetime(2023, 8, 1).replace(tzinfo=pytz.utc)
@@ -66,28 +125,8 @@ class Scraper:
print(f"skipping malformed tweet: {tweet}")
return
# recover lost info
if tweet.is_retweet:
if tweet.retweeted_tweet is None:
print(f'{tweet.author.username}/{tweet.id} is missing the RT! It\'s probably nothing...')
# tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet
tweet.is_retweet = False
elif tweet.retweeted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...')
tweet.retweeted_tweet = self.app.tweet_detail(tweet.retweeted_tweet.id)
if tweet.is_quoted:
if tweet.quoted_tweet is None: # quoted tweet is deleted
# print(f'{tweet.author.username}/{tweet.id} is missing the QRT! Recovering...')
# tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet
tweet.is_quoted = False
elif tweet.quoted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...')
tweet.quoted_tweet = self.app.tweet_detail(tweet.quoted_tweet.id)
# fix reply if it exists
# if tweet.is_reply and tweet.replied_to is None:
# tweet.replied_to = self.app.tweet_detail(tweet.original_tweet['in_reply_to_status_id_str'])
tweet = self.fix_tweet(self, tweet)
tweets.append(tweet)
if not reached_backdate and int(tweet.author.id) == uid and tweet.date <= since:
@@ -118,18 +157,7 @@ class Scraper:
cur = search.cursor
except UnknownError:
print("UnknownError occurred, probably rate-limited")
if uid in talent_lists.privated_accounts:
print("sticking pvt-accessible account. sleeping for 2 minutes...")
sleep(120)
print()
l = self.try_login(0)
else:
l = self.try_login()
if not l:
print("sleeping for 2 minutes...")
sleep(120)
print()
self.try_login()
self.login_wait(uid in talent_lists.privated_accounts)
tweets.sort(key=lambda t: t.id)
return tweets
@@ -153,4 +181,4 @@ if __name__== '__main__':
talent_lists.init()
s = Scraper()
ttweets = s.get_cross_ttweets_from_user("pomurainpuff", since=datetime(2023, 7, 30).replace(tzinfo=pytz.utc))
print("\n".join([x.__repr__() for x in ttweets]))
print("\n".join([x.__repr__() for x in ttweets]))
+5 -5
View File
@@ -1,4 +1,4 @@
import util
from util import get_project_dir
holo_en: dict[int, str] = dict()
holo_id: dict[int, str] = dict()
@@ -34,13 +34,13 @@ def init():
global test_talents
# holoEN
__create_dict(f'{util.get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN')
__create_dict(f'{get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN')
# holoID
__create_dict(f'{util.get_project_dir()}/lists/holoid.txt', holo_id, 'holoID')
__create_dict(f'{get_project_dir()}/lists/holoid.txt', holo_id, 'holoID')
# nijiEN
__create_dict(f'{util.get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN')
__create_dict(f'{get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN')
# nijiexID
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID')
__create_dict(f'{get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID')
# TODO: nijiex-KR
test_talents = holo_en
+5 -3
View File
@@ -1,5 +1,4 @@
from datetime import datetime
from zoneinfo import ZoneInfo
import platform
import pytz
@@ -85,7 +84,7 @@ class TalentTweet:
def create_from_tweety(tweety: Tweet):
if tweety.is_retweet:
rtm = [int(x.id) for x in tweety.retweeted_tweet.user_mentions]
elif tweety.is_quoted:
elif tweety.quoted_tweet:
rtm = [int(x.id) for x in tweety.quoted_tweet.user_mentions]
else:
rtm = list()
@@ -132,6 +131,9 @@ class TalentTweet:
except: pass
try: self.all_parties.remove(self.author_id)
except: pass
if not self.is_cross_company():
print(f'WARNING: {self.tweet_id} is not cross-company!')
def __repr__(self) -> str:
@@ -169,7 +171,7 @@ class TalentTweet:
def get_datetime_str(self):
unpad = '#' if platform.system() == 'Windows' else '-'
return self.date_time.strftime(f'%b %{unpad}d %Y, %{unpad}I:%M%p (%Z)')
return self.date_time.strftime(f'%{unpad}I:%M%p (%Z) · %b %{unpad}d, %Y')
def announce_text(self):
# templates
+12 -11
View File
@@ -20,9 +20,9 @@ class TalentTweetQueue:
self.finished_ttweets_path = f'{util.get_project_dir()}/finished_ttweets.txt'
self.is_good = True
self.__sorted = False
self.finished_user_dates = dict()
self.ttweets_dict = dict()
self.finished_ttweets = list()
self.finished_user_dates: dict[int, str] = dict()
self.ttweets_dict: dict[int, tt.TalentTweet] = dict()
self.finished_ttweets: list[int] = list()
## file check, backup copy
if os.path.exists(self.queue_backup_path):
@@ -62,6 +62,14 @@ class TalentTweetQueue:
except:
traceback.print_exc()
pass
# unfinished ttweet
if os.path.exists(self.current_ttweet_path):
with open(self.current_ttweet_path, 'r') as f:
for line in f:
if len(line) > 0:
ttweet = tt.TalentTweet.deserialize(line)
if ttweet.tweet_id in self.ttweets_dict:
self.ttweets_dict[ttweet.tweet_id] = ttweet
# finished ttweets
try:
with open(self.finished_ttweets_path, 'r') as f:
@@ -82,13 +90,6 @@ class TalentTweetQueue:
def get_next_ttweet(self):
self.is_good = False
if os.path.exists(self.current_ttweet_path):
with open(self.current_ttweet_path, 'r') as f:
ttweet = tt.TalentTweet.deserialize(f.readline())
if ttweet.tweet_id in self.ttweets_dict:
self.ttweets_dict.pop(ttweet.tweet_id)
return ttweet
self.__sort_ttweets_dict()
key = list(self.ttweets_dict.keys())[0]
ttweet = self.ttweets_dict.pop(key)
@@ -112,7 +113,7 @@ class TalentTweetQueue:
# overwrite queue.txt
def save_file(self):
print('saving file...', end='')
print('saving queue files...', end='')
shutil.copyfile(self.queue_path, self.queue_backup_path)
self.__sort_ttweets_dict()
with open(self.queue_path, 'w') as f:
-1
View File
@@ -7,7 +7,6 @@ import tweepy
import talenttweet as tt
import talent_lists as tl
import ttweetqueue as ttq
import util
class TwAPI:
+3 -3
View File
@@ -6,11 +6,9 @@ import traceback
from datetime import datetime
from dotenv import dotenv_values
import tweepy
import pytz
import twint
import twapi
from tweetcapture import TweetCapture
import tweepy
from recrop import fix_aspect_ratio
import talent_lists
@@ -52,6 +50,7 @@ def get_key_from_value(d: dict, val):
return keys[0]
return None
# FIXME: web_auth_token under rate-limitation will fail to screenshot
async def create_ttweet_image(ttweet):
tc = TweetCapture()
tc.cookies = [{'name': 'auth_token', 'value': dotenv_values()['web_auth_token']}]
@@ -100,6 +99,7 @@ def get_username_local(id: int):
# Retrieve username via API v2 (tweepy)
def get_username_online(id, default=None):
import twapi
try:
resp = twapi.TwAPI.instance.client.get_user(id=id)
return resp.data.username