clean ups, fixes, and additions
- cleaned up unused imports - fix circular import occuring with util - add queue refreshing
This commit is contained in:
+5
-1
@@ -142,6 +142,10 @@ cython_debug/
|
||||
# VS Code files
|
||||
.vscode
|
||||
|
||||
# project-specific (secret.ini: can't ignore existing file?)
|
||||
# project-specific
|
||||
*.png
|
||||
*.json
|
||||
queue.txt
|
||||
_queue_backup.txt
|
||||
finished_ttweets.txt
|
||||
_current_ttweet.txt
|
||||
+24
-13
@@ -5,10 +5,7 @@
|
||||
# We should post, at the fastest, one tweet per minute.
|
||||
|
||||
import traceback
|
||||
import datetime
|
||||
import asyncio
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
from scraper import Scraper
|
||||
from util import *
|
||||
@@ -20,19 +17,20 @@ import ttweetqueue as ttq
|
||||
safe_to_post_tweets = True
|
||||
errored = False
|
||||
|
||||
scraper: Scraper
|
||||
|
||||
# Returns a list of sorted and filtered TalentTweets (should
|
||||
# be equivalent to queue.txt)
|
||||
async def get_cross_tweets_online():
|
||||
global safe_to_post_tweets
|
||||
|
||||
scraper = Scraper()
|
||||
queue = ttq.TalentTweetQueue.instance
|
||||
global queue
|
||||
global scraper
|
||||
|
||||
# Begin getting tweets from online
|
||||
print('Pulling tweets from online!')
|
||||
try:
|
||||
for i, (talent_id, talent_username) in enumerate(talent_lists.talents.items()):
|
||||
print(f'[{i+1}/{len(talent_lists.talents)}] {talent_username}-----------------------------------')
|
||||
for i, (talent_id, talent_username) in enumerate(talents.items()):
|
||||
print(f'[{i+1}/{len(talents)}] {talent_username}-----------------------------------')
|
||||
try:
|
||||
since_date = queue.finished_user_dates.get(talent_id, None)
|
||||
ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date)
|
||||
@@ -48,7 +46,7 @@ async def get_cross_tweets_online():
|
||||
safe_to_post_tweets = False
|
||||
traceback.print_exc()
|
||||
else:
|
||||
queue.finished_user_dates[talent_id] = util.get_current_date()
|
||||
queue.finished_user_dates[talent_id] = get_current_date()
|
||||
queue.save_file()
|
||||
except KeyboardInterrupt:
|
||||
print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!')
|
||||
@@ -65,13 +63,14 @@ async def get_cross_tweets_online():
|
||||
# return True = we didn't post a single ttweet
|
||||
async def process_queue() -> bool:
|
||||
global errored
|
||||
global scraper
|
||||
global queue
|
||||
|
||||
errored = False
|
||||
queued_ttweets_count = queue.get_count()
|
||||
|
||||
WAIT_TIME = 60*15
|
||||
ttweets_posted = 0
|
||||
errored = False
|
||||
|
||||
queue = ttq.TalentTweetQueue.instance
|
||||
queued_ttweets_count = queue.get_count()
|
||||
|
||||
if queued_ttweets_count == 0:
|
||||
print('Posting queue is empty!')
|
||||
@@ -106,9 +105,20 @@ async def process_queue() -> bool:
|
||||
async def run(PROGRAM_ARGS):
|
||||
global errored
|
||||
global safe_to_post_tweets
|
||||
global scraper
|
||||
global queue
|
||||
|
||||
scraper = Scraper()
|
||||
queue = ttq.TalentTweetQueue.instance
|
||||
|
||||
if PROGRAM_ARGS.refresh_queue:
|
||||
PROGRAM_ARGS.refresh_queue = False
|
||||
print('Refreshing queue tweets...')
|
||||
for id in queue.ttweets_dict:
|
||||
t = scraper.get_tweet(id, queue.ttweets_dict[id].author_id in privated_accounts)
|
||||
queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t)
|
||||
queue.save_file()
|
||||
|
||||
async def queue_loop():
|
||||
while True:
|
||||
print(f'{queue.get_count()} cross-company tweets to attempt sharing.')
|
||||
@@ -134,6 +144,7 @@ async def run(PROGRAM_ARGS):
|
||||
await get_cross_tweets_online()
|
||||
|
||||
if PROGRAM_ARGS.straight_to_queue:
|
||||
PROGRAM_ARGS.straight_to_queue = False
|
||||
print('Processing queue first before pulling tweets...')
|
||||
return await queue_loop()
|
||||
else:
|
||||
|
||||
+12
-8
@@ -22,6 +22,7 @@ def init_argparse():
|
||||
p = argparse.ArgumentParser(description='Twitter bot that follows interactions between Nijisanji EN/ID and hololive EN/ID members.', formatter_class=RawTextHelpFormatter)
|
||||
p.add_argument('mode', nargs='?', help=MODES_HELP_STR)
|
||||
p.add_argument('--no-listen', action='store_true', help='Run one scraping-posting cycle without waiting to run again.')
|
||||
p.add_argument('--refresh-queue', action='store_true', help='Refresh the details on each tweet currently in queue.')
|
||||
p.add_argument('--straight-to-queue', action='store_true', help='Go through queue first before attempting to pull tweets.')
|
||||
return p
|
||||
|
||||
@@ -46,6 +47,16 @@ async def async_main():
|
||||
else:
|
||||
print('\nunknown mode. run with no arguments or -h for help and modes')
|
||||
|
||||
def init_data():
|
||||
# Initialize shared API instance
|
||||
TwAPI()
|
||||
|
||||
# Initialize talent account lists
|
||||
talent_lists.init()
|
||||
|
||||
# Initialize queue files system
|
||||
ttq.TalentTweetQueue()
|
||||
|
||||
def main():
|
||||
global PROGRAM_ARGS
|
||||
|
||||
@@ -56,14 +67,7 @@ def main():
|
||||
|
||||
PROGRAM_ARGS = parser.parse_args()
|
||||
|
||||
# Initialize shared API instance
|
||||
TwAPI()
|
||||
|
||||
# Initialize talent account lists
|
||||
talent_lists.init()
|
||||
|
||||
# Initialize queue files system
|
||||
ttq.TalentTweetQueue()
|
||||
init_data()
|
||||
|
||||
## Asynchronous execution
|
||||
nest_asyncio.apply()
|
||||
|
||||
+61
-33
@@ -41,6 +41,65 @@ class Scraper:
|
||||
print('exhausted all accounts!')
|
||||
return False
|
||||
|
||||
def login_wait(self, private=False):
|
||||
if private:
|
||||
print(f"keeping pvt-accessible account ({self.__account.use_index(0)[0]}). sleeping for 2 minutes...")
|
||||
sleep(120)
|
||||
print()
|
||||
l = self.try_login(0)
|
||||
else:
|
||||
l = self.try_login()
|
||||
if not l:
|
||||
print("sleeping for 2 minutes...")
|
||||
sleep(120)
|
||||
print()
|
||||
self.try_login()
|
||||
|
||||
# recover lost info
|
||||
def fix_tweet(self, tweet: Tweet):
|
||||
if tweet.is_retweet:
|
||||
if tweet.retweeted_tweet is None:
|
||||
print(f'{tweet.author.username}/{tweet.id} is missing the RT! It\'s probably nothing...')
|
||||
# tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet
|
||||
tweet.is_retweet = False
|
||||
elif tweet.retweeted_tweet.author is None:
|
||||
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...')
|
||||
tweet.retweeted_tweet = self.get_tweet(tweet.retweeted_tweet.id)
|
||||
|
||||
if tweet.is_quoted:
|
||||
if tweet.quoted_tweet is None: # quoted tweet is deleted
|
||||
# print(f'{tweet.author.username}/{tweet.id} is missing the QRT! Recovering...')
|
||||
# tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet
|
||||
tweet.is_quoted = False
|
||||
elif tweet.quoted_tweet.author is None:
|
||||
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...')
|
||||
tweet.quoted_tweet = self.get_tweet(tweet.quoted_tweet.id)
|
||||
|
||||
if tweet.is_reply and tweet.replied_to is None:
|
||||
print('missing reply-to tweet. recovering...')
|
||||
tweet.replied_to = self.get_tweet(tweet.original_tweet['in_reply_to_status_id_str'])
|
||||
return tweet
|
||||
|
||||
def get_tweet(self, id: int, private_user=False):
|
||||
print(f'{id}{" on private" if private_user else ""}')
|
||||
if private_user:
|
||||
self.try_login(0)
|
||||
while True:
|
||||
try:
|
||||
t = self.app.tweet_detail(str(id))
|
||||
return self.fix_tweet(t) if t is not None else None
|
||||
except UnknownError:
|
||||
print("UnknownError occurred, probably rate-limited")
|
||||
self.login_wait(private_user)
|
||||
except Exception as e:
|
||||
if private_user:
|
||||
print("Unknown exception occurred, tweet is probably unavailable")
|
||||
print(e.with_traceback())
|
||||
return None
|
||||
else:
|
||||
print("Unknown exception occurred, trying again as private...")
|
||||
self.get_tweet(id, True)
|
||||
|
||||
# since MUST BE TIMEZONE AWARE
|
||||
# usage example: since=datetime(2023, 8, 1).replace(tzinfo=pytz.utc)
|
||||
def get_tweets_from_user(self, username: str, since: datetime = None) -> list[Tweet]:
|
||||
@@ -66,28 +125,8 @@ class Scraper:
|
||||
print(f"skipping malformed tweet: {tweet}")
|
||||
return
|
||||
|
||||
# recover lost info
|
||||
if tweet.is_retweet:
|
||||
if tweet.retweeted_tweet is None:
|
||||
print(f'{tweet.author.username}/{tweet.id} is missing the RT! It\'s probably nothing...')
|
||||
# tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet
|
||||
tweet.is_retweet = False
|
||||
elif tweet.retweeted_tweet.author is None:
|
||||
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...')
|
||||
tweet.retweeted_tweet = self.app.tweet_detail(tweet.retweeted_tweet.id)
|
||||
tweet = self.fix_tweet(self, tweet)
|
||||
|
||||
if tweet.is_quoted:
|
||||
if tweet.quoted_tweet is None: # quoted tweet is deleted
|
||||
# print(f'{tweet.author.username}/{tweet.id} is missing the QRT! Recovering...')
|
||||
# tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet
|
||||
tweet.is_quoted = False
|
||||
elif tweet.quoted_tweet.author is None:
|
||||
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...')
|
||||
tweet.quoted_tweet = self.app.tweet_detail(tweet.quoted_tweet.id)
|
||||
|
||||
# fix reply if it exists
|
||||
# if tweet.is_reply and tweet.replied_to is None:
|
||||
# tweet.replied_to = self.app.tweet_detail(tweet.original_tweet['in_reply_to_status_id_str'])
|
||||
tweets.append(tweet)
|
||||
|
||||
if not reached_backdate and int(tweet.author.id) == uid and tweet.date <= since:
|
||||
@@ -118,18 +157,7 @@ class Scraper:
|
||||
cur = search.cursor
|
||||
except UnknownError:
|
||||
print("UnknownError occurred, probably rate-limited")
|
||||
if uid in talent_lists.privated_accounts:
|
||||
print("sticking pvt-accessible account. sleeping for 2 minutes...")
|
||||
sleep(120)
|
||||
print()
|
||||
l = self.try_login(0)
|
||||
else:
|
||||
l = self.try_login()
|
||||
if not l:
|
||||
print("sleeping for 2 minutes...")
|
||||
sleep(120)
|
||||
print()
|
||||
self.try_login()
|
||||
self.login_wait(uid in talent_lists.privated_accounts)
|
||||
|
||||
tweets.sort(key=lambda t: t.id)
|
||||
return tweets
|
||||
|
||||
+5
-5
@@ -1,4 +1,4 @@
|
||||
import util
|
||||
from util import get_project_dir
|
||||
|
||||
holo_en: dict[int, str] = dict()
|
||||
holo_id: dict[int, str] = dict()
|
||||
@@ -34,13 +34,13 @@ def init():
|
||||
global test_talents
|
||||
|
||||
# holoEN
|
||||
__create_dict(f'{util.get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN')
|
||||
__create_dict(f'{get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN')
|
||||
# holoID
|
||||
__create_dict(f'{util.get_project_dir()}/lists/holoid.txt', holo_id, 'holoID')
|
||||
__create_dict(f'{get_project_dir()}/lists/holoid.txt', holo_id, 'holoID')
|
||||
# nijiEN
|
||||
__create_dict(f'{util.get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN')
|
||||
__create_dict(f'{get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN')
|
||||
# nijiexID
|
||||
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID')
|
||||
__create_dict(f'{get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID')
|
||||
# TODO: nijiex-KR
|
||||
|
||||
test_talents = holo_en
|
||||
|
||||
+5
-3
@@ -1,5 +1,4 @@
|
||||
from datetime import datetime
|
||||
from zoneinfo import ZoneInfo
|
||||
import platform
|
||||
|
||||
import pytz
|
||||
@@ -85,7 +84,7 @@ class TalentTweet:
|
||||
def create_from_tweety(tweety: Tweet):
|
||||
if tweety.is_retweet:
|
||||
rtm = [int(x.id) for x in tweety.retweeted_tweet.user_mentions]
|
||||
elif tweety.is_quoted:
|
||||
elif tweety.quoted_tweet:
|
||||
rtm = [int(x.id) for x in tweety.quoted_tweet.user_mentions]
|
||||
else:
|
||||
rtm = list()
|
||||
@@ -133,6 +132,9 @@ class TalentTweet:
|
||||
try: self.all_parties.remove(self.author_id)
|
||||
except: pass
|
||||
|
||||
if not self.is_cross_company():
|
||||
print(f'WARNING: {self.tweet_id} is not cross-company!')
|
||||
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
@@ -169,7 +171,7 @@ class TalentTweet:
|
||||
|
||||
def get_datetime_str(self):
|
||||
unpad = '#' if platform.system() == 'Windows' else '-'
|
||||
return self.date_time.strftime(f'%b %{unpad}d %Y, %{unpad}I:%M%p (%Z)')
|
||||
return self.date_time.strftime(f'%{unpad}I:%M%p (%Z) · %b %{unpad}d, %Y')
|
||||
|
||||
def announce_text(self):
|
||||
# templates
|
||||
|
||||
+12
-11
@@ -20,9 +20,9 @@ class TalentTweetQueue:
|
||||
self.finished_ttweets_path = f'{util.get_project_dir()}/finished_ttweets.txt'
|
||||
self.is_good = True
|
||||
self.__sorted = False
|
||||
self.finished_user_dates = dict()
|
||||
self.ttweets_dict = dict()
|
||||
self.finished_ttweets = list()
|
||||
self.finished_user_dates: dict[int, str] = dict()
|
||||
self.ttweets_dict: dict[int, tt.TalentTweet] = dict()
|
||||
self.finished_ttweets: list[int] = list()
|
||||
|
||||
## file check, backup copy
|
||||
if os.path.exists(self.queue_backup_path):
|
||||
@@ -62,6 +62,14 @@ class TalentTweetQueue:
|
||||
except:
|
||||
traceback.print_exc()
|
||||
pass
|
||||
# unfinished ttweet
|
||||
if os.path.exists(self.current_ttweet_path):
|
||||
with open(self.current_ttweet_path, 'r') as f:
|
||||
for line in f:
|
||||
if len(line) > 0:
|
||||
ttweet = tt.TalentTweet.deserialize(line)
|
||||
if ttweet.tweet_id in self.ttweets_dict:
|
||||
self.ttweets_dict[ttweet.tweet_id] = ttweet
|
||||
# finished ttweets
|
||||
try:
|
||||
with open(self.finished_ttweets_path, 'r') as f:
|
||||
@@ -82,13 +90,6 @@ class TalentTweetQueue:
|
||||
|
||||
def get_next_ttweet(self):
|
||||
self.is_good = False
|
||||
if os.path.exists(self.current_ttweet_path):
|
||||
with open(self.current_ttweet_path, 'r') as f:
|
||||
ttweet = tt.TalentTweet.deserialize(f.readline())
|
||||
if ttweet.tweet_id in self.ttweets_dict:
|
||||
self.ttweets_dict.pop(ttweet.tweet_id)
|
||||
return ttweet
|
||||
|
||||
self.__sort_ttweets_dict()
|
||||
key = list(self.ttweets_dict.keys())[0]
|
||||
ttweet = self.ttweets_dict.pop(key)
|
||||
@@ -112,7 +113,7 @@ class TalentTweetQueue:
|
||||
|
||||
# overwrite queue.txt
|
||||
def save_file(self):
|
||||
print('saving file...', end='')
|
||||
print('saving queue files...', end='')
|
||||
shutil.copyfile(self.queue_path, self.queue_backup_path)
|
||||
self.__sort_ttweets_dict()
|
||||
with open(self.queue_path, 'w') as f:
|
||||
|
||||
@@ -7,7 +7,6 @@ import tweepy
|
||||
|
||||
import talenttweet as tt
|
||||
import talent_lists as tl
|
||||
import ttweetqueue as ttq
|
||||
import util
|
||||
|
||||
class TwAPI:
|
||||
|
||||
+3
-3
@@ -6,11 +6,9 @@ import traceback
|
||||
from datetime import datetime
|
||||
from dotenv import dotenv_values
|
||||
|
||||
import tweepy
|
||||
import pytz
|
||||
import twint
|
||||
import twapi
|
||||
from tweetcapture import TweetCapture
|
||||
import tweepy
|
||||
|
||||
from recrop import fix_aspect_ratio
|
||||
import talent_lists
|
||||
@@ -52,6 +50,7 @@ def get_key_from_value(d: dict, val):
|
||||
return keys[0]
|
||||
return None
|
||||
|
||||
# FIXME: web_auth_token under rate-limitation will fail to screenshot
|
||||
async def create_ttweet_image(ttweet):
|
||||
tc = TweetCapture()
|
||||
tc.cookies = [{'name': 'auth_token', 'value': dotenv_values()['web_auth_token']}]
|
||||
@@ -100,6 +99,7 @@ def get_username_local(id: int):
|
||||
|
||||
# Retrieve username via API v2 (tweepy)
|
||||
def get_username_online(id, default=None):
|
||||
import twapi
|
||||
try:
|
||||
resp = twapi.TwAPI.instance.client.get_user(id=id)
|
||||
return resp.data.username
|
||||
|
||||
Reference in New Issue
Block a user