fix code, implement twint stuff

This commit is contained in:
muskit
2022-09-26 14:44:46 -07:00
committed by msk
parent a8e30809e7
commit c06e712e06
4 changed files with 79 additions and 24 deletions
+46 -11
View File
@@ -5,18 +5,25 @@
# We should post, at the fastest, one tweet per minute. # We should post, at the fastest, one tweet per minute.
import traceback import traceback
import datetime
import sys import sys
import os import os
import asyncio import asyncio
import twint import twint
import tweepy
from util import * from util import *
from talent_lists import * from talent_lists import *
from twapi import TwAPI from twapi import TwAPI
import talenttweet as tt import talenttweet as tt
def write_user_date(user_id, file, date_str = None, error = False):
if date_str is None:
date_str = util.datetime_to_tdate(datetime.datetime.now())
file.write(f'# {user_id} {date_str if not error else "-1"}\n')
pass
def get_queue_file(): def get_queue_file():
return f'{util.get_project_dir()}/queue.txt' return f'{util.get_project_dir()}/queue.txt'
@@ -25,7 +32,7 @@ def get_local_queue():
pass pass
## Returns the ID of all tweets (up to limit) from a user ID. ## Returns the ID of all tweets (up to limit) from a user ID.
def get_user_tweets(id, limit=None): def get_user_tweets(id, since_date='', limit=None):
tweets = list() tweets = list()
c = twint.Config() c = twint.Config()
c.User_id = id c.User_id = id
@@ -33,6 +40,7 @@ def get_user_tweets(id, limit=None):
c.Store_object = True c.Store_object = True
c.Store_object_tweets_list = tweets c.Store_object_tweets_list = tweets
c.Hide_output = True c.Hide_output = True
c.Since = since_date
user_str = f'{util.get_username(id)}' user_str = f'{util.get_username(id)}'
print(f'Scraping tweets from {user_str}...') print(f'Scraping tweets from {user_str}...')
@@ -47,23 +55,24 @@ def get_user_tweets(id, limit=None):
# If queue.txt doesn't exist, creates and populates it. # If queue.txt doesn't exist, creates and populates it.
# Returns a list of sorted and filtered TalentTweets (should # Returns a list of sorted and filtered TalentTweets (should
# be equivalent to queue.txt) # be equivalent to queue.txt)
async def get_cross_talent_tweets(queue_file): async def get_cross_talent_tweets(queue_path):
finished_users = set() finished_user_tdates = dict()
ttweets_dict = dict() ttweets_dict = dict()
# Populate structures with existing data from queue.txt # Populate structures with existing data from queue.txt
try: try:
print('Processing existing data in queue.txt...') print('Processing existing data in queue.txt...')
with open(queue_file, 'r') as f: with open(queue_path, 'r') as f:
# Check for finished and incomplete accounts # Check for finished and incomplete accounts
# LINE FORMAT: "# {user_id} {status_num}" # LINE FORMAT: "# {user_id} {status_num} (TODO: use date of retrival YYYY-MM-DD)
for line in f: for line in f:
tokens = line.split() tokens = line.split()
if len(tokens) != 3 or tokens[0][0] != '#': if len(tokens) != 3 or tokens[0][0] != '#':
# reached end of accounts list # reached end of accounts list
break break
if tokens[2] == 0:
finished_users.add(tokens[1]) if tokens[2] != '-1':
finished_user_tdates[int(tokens[1])] = tokens[2]
# Add existing serialized TalentTweets into ttweets # Add existing serialized TalentTweets into ttweets
for line in f: for line in f:
@@ -75,7 +84,34 @@ async def get_cross_talent_tweets(queue_file):
except FileNotFoundError: except FileNotFoundError:
print('Couldn\'t find queue.txt.') print('Couldn\'t find queue.txt.')
# TODO: implement ordered cross-company ttweets dict creation using twint # Pull tweets from twint
with open(queue_path, 'w') as f:
# for talent_id in talent_lists.talents:
for talent_id in talent_lists.test_talents:
print('using test_talents')
if talent_id not in finished_user_tdates or \
finished_user_tdates[talent_id] != util.datetime_to_tdate(datetime.datetime.today()):
try:
tweets = get_user_tweets(talent_id, since_date=finished_user_tdates.get(talent_id, None))
for tweet in tweets:
ttweet = await tt.TalentTweet.create_from_twint_tweet(tweet)
if ttweet.is_cross_company():
ttweets_dict[ttweet.tweet_id] = ttweet
except:
print('Error occurred processing tweet data. Traceback:')
print(traceback.format_exc())
write_user_date(user_id=talent_id, file=f, error=True)
else:
write_user_date(user_id=talent_id, file=f)
else:
print(f'Skipping already completed {util.get_username(talent_id)}')
write_user_date(user_id=talent_id, file=f, date_str=finished_user_tdates[talent_id])
f.write('\n')
ttweets_dict = dict(sorted(ttweets_dict.items()))
for ttweet in ttweets_dict.values():
f.write(f'{ttweet.serialize()}\n')
return ttweets_dict
def process_queue(file): def process_queue(file):
print('TODO: implement process_queue') print('TODO: implement process_queue')
@@ -101,5 +137,4 @@ async def run():
queue_path = get_queue_file() queue_path = get_queue_file()
ttweet_dict = await get_cross_talent_tweets(queue_path) ttweet_dict = await get_cross_talent_tweets(queue_path)
for ttweet in ttweet_dict.values(): print(f'got {len(ttweet_dict)} tweets')
print(ttweet)
+7
View File
@@ -6,6 +6,8 @@ niji_en = dict()
niji_exid = dict() niji_exid = dict()
talents = dict() talents = dict()
test_talents = dict()
def __create_dict(file, _dict): def __create_dict(file, _dict):
print(f'Initializing talents\' account list from {file}...') print(f'Initializing talents\' account list from {file}...')
global talents global talents
@@ -23,6 +25,7 @@ def init():
global holo_id global holo_id
global niji_en global niji_en
global niji_exid global niji_exid
global test_talents
# holoEN # holoEN
__create_dict(f'{util.get_project_dir()}/lists/holoen.txt', holo_en) __create_dict(f'{util.get_project_dir()}/lists/holoen.txt', holo_en)
@@ -33,3 +36,7 @@ def init():
# nijiexID # nijiexID
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid) __create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid)
test_talents = {
1390637197167038464: 'PomuRainpuff'
}
+19 -13
View File
@@ -43,24 +43,30 @@ class TalentTweet:
) )
@staticmethod @staticmethod
def create_from_twint_tweet(tweet): async def create_from_twint_tweet(tweet):
# qrt # qrt
if tweet.quote_url != '': # -- COMMENTED OUT FOR TESTING PURPOSES --
return TalentTweet(tweet_id=tweet.id) # TODO: uncomment
# if tweet.quote_url != '':
# api_ttweet = await TalentTweet.create_from_id(tweet.id)
# return api_ttweet
# MRQ (Q is guaranteed to be None) # MRQ (Q is guaranteed to be None)
mentions = set() mentions = set()
reply_to = None reply_to = None
# reply_to/mentions # reply_to/mentions
is_reply = tweet.id == int(tweet.conversation_id) is_reply = tweet.id != int(tweet.conversation_id)
if is_reply: mentions = set([x['id'] for x in tweet.mentions])
reply_to = tweet.reply_to[0] if is_reply and len(tweet.reply_to) > 0: # FIXME: QRT = is_reply and len(tweet.reply_to) == 0?
mentions = set(tweet.reply_to[1:]) reply_to = tweet.reply_to[0]['id']
mentions.add(*tweet.mentions) reply_others = [x['id'] for x in tweet.reply_to[1:]]
mentions.update(reply_others)
try: mentions.remove(reply_to)
except: pass
datetime = datetime.strptime(tweet.datetime, '%Y-%m-%d %H:%M:%S %Z') date_time = datetime.strptime(tweet.datetime, '%Y-%m-%d %H:%M:%S %Z')
return TalentTweet(tweet_id=tweet.id, author_id=tweet.user_id, date_time=datetime, mrq=(mentions, reply_to, None)) return TalentTweet(tweet_id=tweet.id, author_id=tweet.user_id, date_time=date_time, mrq=(mentions, reply_to, None))
@staticmethod @staticmethod
@@ -79,9 +85,9 @@ class TalentTweet:
def __init__(self, tweet_id: int, author_id: int,date_time: datetime, mrq: tuple): def __init__(self, tweet_id: int, author_id: int,date_time: datetime, mrq: tuple):
self.tweet_id, self.author_id = tweet_id, author_id self.tweet_id, self.author_id = tweet_id, author_id
self.date_time = date_time self.date_time = date_time
self.mentions = mrq[0] self.mentions = tuple(int(x) for x in mrq[0])
self.reply_to = mrq[1] self.reply_to = int(mrq[1]) if mrq[1] is not None else None
self.quote_retweeted = mrq[2] self.quote_retweeted = int(mrq[2]) if mrq[2] is not None else None
# all users involved, except for the author # all users involved, except for the author
self.all_parties = {self.reply_to, self.quote_retweeted} self.all_parties = {self.reply_to, self.quote_retweeted}
+7
View File
@@ -1,5 +1,6 @@
## Shared utility functions. ## Shared utility functions.
import datetime
import os import os
import twint import twint
@@ -16,6 +17,12 @@ def get_project_dir():
def clamp(n, smallest, largest): def clamp(n, smallest, largest):
return max(smallest, min(n, largest)) return max(smallest, min(n, largest))
def datetime_to_tdate(date_time: datetime.datetime):
return date_time.strftime("%Y-%m-%d")
def tdate_to_datetime(tdate: str):
return datetime.datetime.strptime("%Y-%m-%d")
async def create_ttweet_image(ttweet): async def create_ttweet_image(ttweet):
tc = TweetCapture() tc = TweetCapture()
filename = 'img.png' filename = 'img.png'