fixes, example queue from beginning of time
This commit is contained in:
File diff suppressed because it is too large
Load Diff
+20
-12
@@ -15,6 +15,7 @@ from twapi import TwAPI
|
|||||||
import talenttweet as tt
|
import talenttweet as tt
|
||||||
|
|
||||||
PROGRAM_ARGS = None
|
PROGRAM_ARGS = None
|
||||||
|
safe_to_post_tweets = True
|
||||||
|
|
||||||
def write_user_timestamp(user_id, file, timestamp = None, error = False):
|
def write_user_timestamp(user_id, file, timestamp = None, error = False):
|
||||||
if timestamp is None:
|
if timestamp is None:
|
||||||
@@ -42,15 +43,16 @@ def get_user_tweets(id, since_timestamp=None, limit=None):
|
|||||||
c.Hide_output = True
|
c.Hide_output = True
|
||||||
c.Since = '' if since_timestamp == None else util.timestamp_to_tdate(since_timestamp)
|
c.Since = '' if since_timestamp == None else util.timestamp_to_tdate(since_timestamp)
|
||||||
|
|
||||||
user_str = f'{util.get_username_local(id)}'
|
user_str = f'@{util.get_username_local(id)}'
|
||||||
print(f'Scraping tweets from {user_str} since {"forever ago" if c.Since == "" else c.Since}...')
|
print(f'Scraping tweets from {user_str} since {"forever ago" if c.Since == "" else c.Since}...')
|
||||||
try:
|
try:
|
||||||
twint.run.Search(c)
|
twint.run.Search(c)
|
||||||
except:
|
except:
|
||||||
print(f'Had trouble getting tweets from {user_str}')
|
print(f'Had trouble getting tweets from {user_str}')
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
for twt in tweets:
|
for twt in tweets:
|
||||||
if twt.quote_url != '':
|
if type(twt.quote_url) is str and twt.quote_url != '':
|
||||||
qrt_count += 1
|
qrt_count += 1
|
||||||
|
|
||||||
print(f'Scraped {len(tweets)} tweets, {qrt_count} of which are quote tweets.')
|
print(f'Scraped {len(tweets)} tweets, {qrt_count} of which are quote tweets.')
|
||||||
@@ -89,13 +91,14 @@ async def get_cross_talent_tweets(queue_path):
|
|||||||
finished_user_timestamps = dict()
|
finished_user_timestamps = dict()
|
||||||
ttweets_dict = dict()
|
ttweets_dict = dict()
|
||||||
posted_ttweets = set() # TODO: don't add TTweet to ttweets_dict if its id exists in posted_ttweets
|
posted_ttweets = set() # TODO: don't add TTweet to ttweets_dict if its id exists in posted_ttweets
|
||||||
|
global safe_to_post_tweets
|
||||||
|
|
||||||
# Populate structures with existing data from queue.txt
|
# Populate structures with existing data from queue.txt
|
||||||
try:
|
try:
|
||||||
with open(queue_path, 'r') as f:
|
with open(queue_path, 'r') as f:
|
||||||
finished_user_timestamps = get_finished_user_timestamps(f)
|
finished_user_timestamps = get_finished_user_timestamps(f)
|
||||||
print(finished_user_timestamps)
|
|
||||||
|
|
||||||
|
with open(queue_path, 'r') as f: # reset seek head
|
||||||
# Get existing queued TalentTweets
|
# Get existing queued TalentTweets
|
||||||
for line in f:
|
for line in f:
|
||||||
tokens = line.split()
|
tokens = line.split()
|
||||||
@@ -107,13 +110,12 @@ async def get_cross_talent_tweets(queue_path):
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print('queue.txt not found.')
|
print('queue.txt not found.')
|
||||||
|
|
||||||
# Pull tweets from twint
|
# Begin getting tweets from online
|
||||||
with open(queue_path, 'w') as f:
|
with open(queue_path, 'w') as f:
|
||||||
print('Pulling tweets from online!')
|
print('Pulling tweets from online!')
|
||||||
try:
|
try:
|
||||||
for talent_id in talent_lists.talents:
|
for i, (talent_id, talent_username) in enumerate(talent_lists.talents.items()):
|
||||||
if talent_id not in finished_user_timestamps or \
|
print(f'[{i+1}/{len(talent_lists.talents)}]{util.get_username(talent_id)}----------------------------')
|
||||||
finished_user_timestamps[talent_id] < datetime.datetime.now().timestamp():
|
|
||||||
try:
|
try:
|
||||||
# tweets = get_user_tweets(talent_id, since_timestamp=1663698621) # shorten test runs
|
# tweets = get_user_tweets(talent_id, since_timestamp=1663698621) # shorten test runs
|
||||||
tweets = get_user_tweets(talent_id, since_timestamp=finished_user_timestamps.get(talent_id, None))
|
tweets = get_user_tweets(talent_id, since_timestamp=finished_user_timestamps.get(talent_id, None))
|
||||||
@@ -124,13 +126,11 @@ async def get_cross_talent_tweets(queue_path):
|
|||||||
ttweets_dict[ttweet.tweet_id] = ttweet
|
ttweets_dict[ttweet.tweet_id] = ttweet
|
||||||
except:
|
except:
|
||||||
print('Error occurred processing tweet data.')
|
print('Error occurred processing tweet data.')
|
||||||
|
safe_to_post_tweets = False
|
||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
write_user_timestamp(user_id=talent_id, file=f, error=True)
|
write_user_timestamp(user_id=talent_id, file=f, error=True)
|
||||||
else:
|
else:
|
||||||
write_user_timestamp(user_id=talent_id, file=f)
|
write_user_timestamp(user_id=talent_id, file=f)
|
||||||
else:
|
|
||||||
print(f'Skipping already completed {util.get_username_local(talent_id)}')
|
|
||||||
write_user_timestamp(user_id=talent_id, file=f, timestamp=finished_user_timestamps[talent_id])
|
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
ttweets_dict = dict(sorted(ttweets_dict.items()))
|
ttweets_dict = dict(sorted(ttweets_dict.items()))
|
||||||
for ttweet in ttweets_dict.values():
|
for ttweet in ttweets_dict.values():
|
||||||
@@ -139,10 +139,11 @@ async def get_cross_talent_tweets(queue_path):
|
|||||||
print('Unhandled error occurred while pulling tweets.')
|
print('Unhandled error occurred while pulling tweets.')
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print('Saving queue.txt and exiting.')
|
print('Saving queue.txt and exiting.')
|
||||||
exit(1)
|
safe_to_post_tweets = False
|
||||||
|
|
||||||
return ttweets_dict
|
return ttweets_dict
|
||||||
|
|
||||||
|
# Return number of TalentTweets successfully posted
|
||||||
async def process_queue(ttweets_dict: dict) -> int:
|
async def process_queue(ttweets_dict: dict) -> int:
|
||||||
global PROGRAM_ARGS
|
global PROGRAM_ARGS
|
||||||
ttweets_posted = 0
|
ttweets_posted = 0
|
||||||
@@ -177,13 +178,20 @@ async def process_queue(ttweets_dict: dict) -> int:
|
|||||||
|
|
||||||
return ttweets_posted
|
return ttweets_posted
|
||||||
|
|
||||||
|
# return True = no problems
|
||||||
|
# return False = issue occurred where we couldn't post all past tweets properly
|
||||||
async def run(program_args):
|
async def run(program_args):
|
||||||
global PROGRAM_ARGS
|
global PROGRAM_ARGS
|
||||||
|
global safe_to_post_tweets
|
||||||
PROGRAM_ARGS = program_args
|
PROGRAM_ARGS = program_args
|
||||||
queue_path = get_queue_path()
|
queue_path = get_queue_path()
|
||||||
while True:
|
while True:
|
||||||
ttweets_dict = await get_cross_talent_tweets(queue_path)
|
ttweets_dict = await get_cross_talent_tweets(queue_path)
|
||||||
print(f'found {len(ttweets_dict)} cross-company tweets')
|
print(f'found {len(ttweets_dict)} cross-company tweets')
|
||||||
|
if safe_to_post_tweets:
|
||||||
if await process_queue(ttweets_dict) == 0:
|
if await process_queue(ttweets_dict) == 0:
|
||||||
print('Posted no new tweets; we\'re caught up!')
|
print('Posted no new tweets; we\'re caught up!')
|
||||||
break
|
return True
|
||||||
|
else:
|
||||||
|
print('Tweets were not retrieved cleanly.')
|
||||||
|
return False
|
||||||
+2
-1
@@ -50,7 +50,8 @@ async def async_main():
|
|||||||
await listen.run()
|
await listen.run()
|
||||||
case 'c' | 'catchup':
|
case 'c' | 'catchup':
|
||||||
print('RUNNING IN CATCH-UP MODE\n')
|
print('RUNNING IN CATCH-UP MODE\n')
|
||||||
await catchup.run(PROGRAM_ARGS)
|
if await catchup.run(PROGRAM_ARGS):
|
||||||
|
print('CATCH-UP MODE DONE, GOING INTO LISTEN MODE')
|
||||||
await listen.run()
|
await listen.run()
|
||||||
case 'd' | 'delete-all':
|
case 'd' | 'delete-all':
|
||||||
print('WARNING: SELF-DESTRUCT MODE')
|
print('WARNING: SELF-DESTRUCT MODE')
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ def __create_dict(file, _dict):
|
|||||||
words = line.split()
|
words = line.split()
|
||||||
if len(words) == 2 and line[0] != '#':
|
if len(words) == 2 and line[0] != '#':
|
||||||
name, id = line.split()
|
name, id = line.split()
|
||||||
talents[int(id)] = name
|
|
||||||
name = util.get_username_online(id, default=name) # attempt to get updated name
|
name = util.get_username_online(id, default=name) # attempt to get updated name
|
||||||
talents[int(id)] = name
|
talents[int(id)] = name
|
||||||
_dict[int(id)] = name
|
_dict[int(id)] = name
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
# TODO: move queue structures and file handling here
|
||||||
Reference in New Issue
Block a user