fixes, example queue from beginning of time

This commit is contained in:
msk
2022-09-28 02:20:06 -07:00
parent 5665a22325
commit 2e78cb1ed7
5 changed files with 1304 additions and 31 deletions
File diff suppressed because it is too large Load Diff
+36 -28
View File
@@ -15,6 +15,7 @@ from twapi import TwAPI
import talenttweet as tt import talenttweet as tt
PROGRAM_ARGS = None PROGRAM_ARGS = None
safe_to_post_tweets = True
def write_user_timestamp(user_id, file, timestamp = None, error = False): def write_user_timestamp(user_id, file, timestamp = None, error = False):
if timestamp is None: if timestamp is None:
@@ -42,15 +43,16 @@ def get_user_tweets(id, since_timestamp=None, limit=None):
c.Hide_output = True c.Hide_output = True
c.Since = '' if since_timestamp == None else util.timestamp_to_tdate(since_timestamp) c.Since = '' if since_timestamp == None else util.timestamp_to_tdate(since_timestamp)
user_str = f'{util.get_username_local(id)}' user_str = f'@{util.get_username_local(id)}'
print(f'Scraping tweets from {user_str} since {"forever ago" if c.Since == "" else c.Since}...') print(f'Scraping tweets from {user_str} since {"forever ago" if c.Since == "" else c.Since}...')
try: try:
twint.run.Search(c) twint.run.Search(c)
except: except:
print(f'Had trouble getting tweets from {user_str}') print(f'Had trouble getting tweets from {user_str}')
traceback.print_exc()
for twt in tweets: for twt in tweets:
if twt.quote_url != '': if type(twt.quote_url) is str and twt.quote_url != '':
qrt_count += 1 qrt_count += 1
print(f'Scraped {len(tweets)} tweets, {qrt_count} of which are quote tweets.') print(f'Scraped {len(tweets)} tweets, {qrt_count} of which are quote tweets.')
@@ -89,13 +91,14 @@ async def get_cross_talent_tweets(queue_path):
finished_user_timestamps = dict() finished_user_timestamps = dict()
ttweets_dict = dict() ttweets_dict = dict()
posted_ttweets = set() # TODO: don't add TTweet to ttweets_dict if its id exists in posted_ttweets posted_ttweets = set() # TODO: don't add TTweet to ttweets_dict if its id exists in posted_ttweets
global safe_to_post_tweets
# Populate structures with existing data from queue.txt # Populate structures with existing data from queue.txt
try: try:
with open(queue_path, 'r') as f: with open(queue_path, 'r') as f:
finished_user_timestamps = get_finished_user_timestamps(f) finished_user_timestamps = get_finished_user_timestamps(f)
print(finished_user_timestamps)
with open(queue_path, 'r') as f: # reset seek head
# Get existing queued TalentTweets # Get existing queued TalentTweets
for line in f: for line in f:
tokens = line.split() tokens = line.split()
@@ -107,30 +110,27 @@ async def get_cross_talent_tweets(queue_path):
except FileNotFoundError: except FileNotFoundError:
print('queue.txt not found.') print('queue.txt not found.')
# Pull tweets from twint # Begin getting tweets from online
with open(queue_path, 'w') as f: with open(queue_path, 'w') as f:
print('Pulling tweets from online!') print('Pulling tweets from online!')
try: try:
for talent_id in talent_lists.talents: for i, (talent_id, talent_username) in enumerate(talent_lists.talents.items()):
if talent_id not in finished_user_timestamps or \ print(f'[{i+1}/{len(talent_lists.talents)}]{util.get_username(talent_id)}----------------------------')
finished_user_timestamps[talent_id] < datetime.datetime.now().timestamp(): try:
try: # tweets = get_user_tweets(talent_id, since_timestamp=1663698621) # shorten test runs
# tweets = get_user_tweets(talent_id, since_timestamp=1663698621) # shorten test runs tweets = get_user_tweets(talent_id, since_timestamp=finished_user_timestamps.get(talent_id, None))
tweets = get_user_tweets(talent_id, since_timestamp=finished_user_timestamps.get(talent_id, None)) for tweet in tweets:
for tweet in tweets: if tweet.id not in ttweets_dict:
if tweet.id not in ttweets_dict: ttweet = await tt.TalentTweet.create_from_twint_tweet(tweet)
ttweet = await tt.TalentTweet.create_from_twint_tweet(tweet) if ttweet.is_cross_company():
if ttweet.is_cross_company(): ttweets_dict[ttweet.tweet_id] = ttweet
ttweets_dict[ttweet.tweet_id] = ttweet except:
except: print('Error occurred processing tweet data.')
print('Error occurred processing tweet data.') safe_to_post_tweets = False
print(traceback.format_exc()) print(traceback.format_exc())
write_user_timestamp(user_id=talent_id, file=f, error=True) write_user_timestamp(user_id=talent_id, file=f, error=True)
else:
write_user_timestamp(user_id=talent_id, file=f)
else: else:
print(f'Skipping already completed {util.get_username_local(talent_id)}') write_user_timestamp(user_id=talent_id, file=f)
write_user_timestamp(user_id=talent_id, file=f, timestamp=finished_user_timestamps[talent_id])
f.write('\n') f.write('\n')
ttweets_dict = dict(sorted(ttweets_dict.items())) ttweets_dict = dict(sorted(ttweets_dict.items()))
for ttweet in ttweets_dict.values(): for ttweet in ttweets_dict.values():
@@ -139,10 +139,11 @@ async def get_cross_talent_tweets(queue_path):
print('Unhandled error occurred while pulling tweets.') print('Unhandled error occurred while pulling tweets.')
traceback.print_exc() traceback.print_exc()
print('Saving queue.txt and exiting.') print('Saving queue.txt and exiting.')
exit(1) safe_to_post_tweets = False
return ttweets_dict return ttweets_dict
# Return number of TalentTweets successfully posted
async def process_queue(ttweets_dict: dict) -> int: async def process_queue(ttweets_dict: dict) -> int:
global PROGRAM_ARGS global PROGRAM_ARGS
ttweets_posted = 0 ttweets_posted = 0
@@ -177,13 +178,20 @@ async def process_queue(ttweets_dict: dict) -> int:
return ttweets_posted return ttweets_posted
# return True = no problems
# return False = issue occurred where we couldn't post all past tweets properly
async def run(program_args): async def run(program_args):
global PROGRAM_ARGS global PROGRAM_ARGS
global safe_to_post_tweets
PROGRAM_ARGS = program_args PROGRAM_ARGS = program_args
queue_path = get_queue_path() queue_path = get_queue_path()
while True: while True:
ttweets_dict = await get_cross_talent_tweets(queue_path) ttweets_dict = await get_cross_talent_tweets(queue_path)
print(f'found {len(ttweets_dict)} cross-company tweets') print(f'found {len(ttweets_dict)} cross-company tweets')
if await process_queue(ttweets_dict) == 0: if safe_to_post_tweets:
print('Posted no new tweets; we\'re caught up!') if await process_queue(ttweets_dict) == 0:
break print('Posted no new tweets; we\'re caught up!')
return True
else:
print('Tweets were not retrieved cleanly.')
return False
+3 -2
View File
@@ -50,8 +50,9 @@ async def async_main():
await listen.run() await listen.run()
case 'c' | 'catchup': case 'c' | 'catchup':
print('RUNNING IN CATCH-UP MODE\n') print('RUNNING IN CATCH-UP MODE\n')
await catchup.run(PROGRAM_ARGS) if await catchup.run(PROGRAM_ARGS):
await listen.run() print('CATCH-UP MODE DONE, GOING INTO LISTEN MODE')
await listen.run()
case 'd' | 'delete-all': case 'd' | 'delete-all':
print('WARNING: SELF-DESTRUCT MODE') print('WARNING: SELF-DESTRUCT MODE')
await self_destruct() await self_destruct()
-1
View File
@@ -16,7 +16,6 @@ def __create_dict(file, _dict):
words = line.split() words = line.split()
if len(words) == 2 and line[0] != '#': if len(words) == 2 and line[0] != '#':
name, id = line.split() name, id = line.split()
talents[int(id)] = name
name = util.get_username_online(id, default=name) # attempt to get updated name name = util.get_username_online(id, default=name) # attempt to get updated name
talents[int(id)] = name talents[int(id)] = name
_dict[int(id)] = name _dict[int(id)] = name
+1
View File
@@ -0,0 +1 @@
# TODO: move queue structures and file handling here