fix code, implement twint stuff
This commit is contained in:
+46
-11
@@ -5,18 +5,25 @@
|
|||||||
# We should post, at the fastest, one tweet per minute.
|
# We should post, at the fastest, one tweet per minute.
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
|
import datetime
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
import twint
|
import twint
|
||||||
import tweepy
|
|
||||||
|
|
||||||
from util import *
|
from util import *
|
||||||
from talent_lists import *
|
from talent_lists import *
|
||||||
from twapi import TwAPI
|
from twapi import TwAPI
|
||||||
import talenttweet as tt
|
import talenttweet as tt
|
||||||
|
|
||||||
|
def write_user_date(user_id, file, date_str = None, error = False):
|
||||||
|
if date_str is None:
|
||||||
|
date_str = util.datetime_to_tdate(datetime.datetime.now())
|
||||||
|
|
||||||
|
file.write(f'# {user_id} {date_str if not error else "-1"}\n')
|
||||||
|
pass
|
||||||
|
|
||||||
def get_queue_file():
|
def get_queue_file():
|
||||||
return f'{util.get_project_dir()}/queue.txt'
|
return f'{util.get_project_dir()}/queue.txt'
|
||||||
|
|
||||||
@@ -25,7 +32,7 @@ def get_local_queue():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
## Returns the ID of all tweets (up to limit) from a user ID.
|
## Returns the ID of all tweets (up to limit) from a user ID.
|
||||||
def get_user_tweets(id, limit=None):
|
def get_user_tweets(id, since_date='', limit=None):
|
||||||
tweets = list()
|
tweets = list()
|
||||||
c = twint.Config()
|
c = twint.Config()
|
||||||
c.User_id = id
|
c.User_id = id
|
||||||
@@ -33,6 +40,7 @@ def get_user_tweets(id, limit=None):
|
|||||||
c.Store_object = True
|
c.Store_object = True
|
||||||
c.Store_object_tweets_list = tweets
|
c.Store_object_tweets_list = tweets
|
||||||
c.Hide_output = True
|
c.Hide_output = True
|
||||||
|
c.Since = since_date
|
||||||
|
|
||||||
user_str = f'{util.get_username(id)}'
|
user_str = f'{util.get_username(id)}'
|
||||||
print(f'Scraping tweets from {user_str}...')
|
print(f'Scraping tweets from {user_str}...')
|
||||||
@@ -47,23 +55,24 @@ def get_user_tweets(id, limit=None):
|
|||||||
# If queue.txt doesn't exist, creates and populates it.
|
# If queue.txt doesn't exist, creates and populates it.
|
||||||
# Returns a list of sorted and filtered TalentTweets (should
|
# Returns a list of sorted and filtered TalentTweets (should
|
||||||
# be equivalent to queue.txt)
|
# be equivalent to queue.txt)
|
||||||
async def get_cross_talent_tweets(queue_file):
|
async def get_cross_talent_tweets(queue_path):
|
||||||
finished_users = set()
|
finished_user_tdates = dict()
|
||||||
ttweets_dict = dict()
|
ttweets_dict = dict()
|
||||||
|
|
||||||
# Populate structures with existing data from queue.txt
|
# Populate structures with existing data from queue.txt
|
||||||
try:
|
try:
|
||||||
print('Processing existing data in queue.txt...')
|
print('Processing existing data in queue.txt...')
|
||||||
with open(queue_file, 'r') as f:
|
with open(queue_path, 'r') as f:
|
||||||
# Check for finished and incomplete accounts
|
# Check for finished and incomplete accounts
|
||||||
# LINE FORMAT: "# {user_id} {status_num}"
|
# LINE FORMAT: "# {user_id} {status_num} (TODO: use date of retrival YYYY-MM-DD)
|
||||||
for line in f:
|
for line in f:
|
||||||
tokens = line.split()
|
tokens = line.split()
|
||||||
if len(tokens) != 3 or tokens[0][0] != '#':
|
if len(tokens) != 3 or tokens[0][0] != '#':
|
||||||
# reached end of accounts list
|
# reached end of accounts list
|
||||||
break
|
break
|
||||||
if tokens[2] == 0:
|
|
||||||
finished_users.add(tokens[1])
|
if tokens[2] != '-1':
|
||||||
|
finished_user_tdates[int(tokens[1])] = tokens[2]
|
||||||
|
|
||||||
# Add existing serialized TalentTweets into ttweets
|
# Add existing serialized TalentTweets into ttweets
|
||||||
for line in f:
|
for line in f:
|
||||||
@@ -75,7 +84,34 @@ async def get_cross_talent_tweets(queue_file):
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print('Couldn\'t find queue.txt.')
|
print('Couldn\'t find queue.txt.')
|
||||||
|
|
||||||
# TODO: implement ordered cross-company ttweets dict creation using twint
|
# Pull tweets from twint
|
||||||
|
with open(queue_path, 'w') as f:
|
||||||
|
# for talent_id in talent_lists.talents:
|
||||||
|
for talent_id in talent_lists.test_talents:
|
||||||
|
print('using test_talents')
|
||||||
|
if talent_id not in finished_user_tdates or \
|
||||||
|
finished_user_tdates[talent_id] != util.datetime_to_tdate(datetime.datetime.today()):
|
||||||
|
try:
|
||||||
|
tweets = get_user_tweets(talent_id, since_date=finished_user_tdates.get(talent_id, None))
|
||||||
|
for tweet in tweets:
|
||||||
|
ttweet = await tt.TalentTweet.create_from_twint_tweet(tweet)
|
||||||
|
if ttweet.is_cross_company():
|
||||||
|
ttweets_dict[ttweet.tweet_id] = ttweet
|
||||||
|
except:
|
||||||
|
print('Error occurred processing tweet data. Traceback:')
|
||||||
|
print(traceback.format_exc())
|
||||||
|
write_user_date(user_id=talent_id, file=f, error=True)
|
||||||
|
else:
|
||||||
|
write_user_date(user_id=talent_id, file=f)
|
||||||
|
else:
|
||||||
|
print(f'Skipping already completed {util.get_username(talent_id)}')
|
||||||
|
write_user_date(user_id=talent_id, file=f, date_str=finished_user_tdates[talent_id])
|
||||||
|
f.write('\n')
|
||||||
|
ttweets_dict = dict(sorted(ttweets_dict.items()))
|
||||||
|
for ttweet in ttweets_dict.values():
|
||||||
|
f.write(f'{ttweet.serialize()}\n')
|
||||||
|
|
||||||
|
return ttweets_dict
|
||||||
|
|
||||||
def process_queue(file):
|
def process_queue(file):
|
||||||
print('TODO: implement process_queue')
|
print('TODO: implement process_queue')
|
||||||
@@ -101,5 +137,4 @@ async def run():
|
|||||||
|
|
||||||
queue_path = get_queue_file()
|
queue_path = get_queue_file()
|
||||||
ttweet_dict = await get_cross_talent_tweets(queue_path)
|
ttweet_dict = await get_cross_talent_tweets(queue_path)
|
||||||
for ttweet in ttweet_dict.values():
|
print(f'got {len(ttweet_dict)} tweets')
|
||||||
print(ttweet)
|
|
||||||
@@ -6,6 +6,8 @@ niji_en = dict()
|
|||||||
niji_exid = dict()
|
niji_exid = dict()
|
||||||
talents = dict()
|
talents = dict()
|
||||||
|
|
||||||
|
test_talents = dict()
|
||||||
|
|
||||||
def __create_dict(file, _dict):
|
def __create_dict(file, _dict):
|
||||||
print(f'Initializing talents\' account list from {file}...')
|
print(f'Initializing talents\' account list from {file}...')
|
||||||
global talents
|
global talents
|
||||||
@@ -23,6 +25,7 @@ def init():
|
|||||||
global holo_id
|
global holo_id
|
||||||
global niji_en
|
global niji_en
|
||||||
global niji_exid
|
global niji_exid
|
||||||
|
global test_talents
|
||||||
|
|
||||||
# holoEN
|
# holoEN
|
||||||
__create_dict(f'{util.get_project_dir()}/lists/holoen.txt', holo_en)
|
__create_dict(f'{util.get_project_dir()}/lists/holoen.txt', holo_en)
|
||||||
@@ -33,3 +36,7 @@ def init():
|
|||||||
# nijiexID
|
# nijiexID
|
||||||
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid)
|
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid)
|
||||||
|
|
||||||
|
test_talents = {
|
||||||
|
1390637197167038464: 'PomuRainpuff'
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
+19
-13
@@ -43,24 +43,30 @@ class TalentTweet:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_from_twint_tweet(tweet):
|
async def create_from_twint_tweet(tweet):
|
||||||
# qrt
|
# qrt
|
||||||
if tweet.quote_url != '':
|
# -- COMMENTED OUT FOR TESTING PURPOSES --
|
||||||
return TalentTweet(tweet_id=tweet.id)
|
# TODO: uncomment
|
||||||
|
# if tweet.quote_url != '':
|
||||||
|
# api_ttweet = await TalentTweet.create_from_id(tweet.id)
|
||||||
|
# return api_ttweet
|
||||||
|
|
||||||
# MRQ (Q is guaranteed to be None)
|
# MRQ (Q is guaranteed to be None)
|
||||||
mentions = set()
|
mentions = set()
|
||||||
reply_to = None
|
reply_to = None
|
||||||
|
|
||||||
# reply_to/mentions
|
# reply_to/mentions
|
||||||
is_reply = tweet.id == int(tweet.conversation_id)
|
is_reply = tweet.id != int(tweet.conversation_id)
|
||||||
if is_reply:
|
mentions = set([x['id'] for x in tweet.mentions])
|
||||||
reply_to = tweet.reply_to[0]
|
if is_reply and len(tweet.reply_to) > 0: # FIXME: QRT = is_reply and len(tweet.reply_to) == 0?
|
||||||
mentions = set(tweet.reply_to[1:])
|
reply_to = tweet.reply_to[0]['id']
|
||||||
mentions.add(*tweet.mentions)
|
reply_others = [x['id'] for x in tweet.reply_to[1:]]
|
||||||
|
mentions.update(reply_others)
|
||||||
|
try: mentions.remove(reply_to)
|
||||||
|
except: pass
|
||||||
|
|
||||||
datetime = datetime.strptime(tweet.datetime, '%Y-%m-%d %H:%M:%S %Z')
|
date_time = datetime.strptime(tweet.datetime, '%Y-%m-%d %H:%M:%S %Z')
|
||||||
return TalentTweet(tweet_id=tweet.id, author_id=tweet.user_id, date_time=datetime, mrq=(mentions, reply_to, None))
|
return TalentTweet(tweet_id=tweet.id, author_id=tweet.user_id, date_time=date_time, mrq=(mentions, reply_to, None))
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -79,9 +85,9 @@ class TalentTweet:
|
|||||||
def __init__(self, tweet_id: int, author_id: int,date_time: datetime, mrq: tuple):
|
def __init__(self, tweet_id: int, author_id: int,date_time: datetime, mrq: tuple):
|
||||||
self.tweet_id, self.author_id = tweet_id, author_id
|
self.tweet_id, self.author_id = tweet_id, author_id
|
||||||
self.date_time = date_time
|
self.date_time = date_time
|
||||||
self.mentions = mrq[0]
|
self.mentions = tuple(int(x) for x in mrq[0])
|
||||||
self.reply_to = mrq[1]
|
self.reply_to = int(mrq[1]) if mrq[1] is not None else None
|
||||||
self.quote_retweeted = mrq[2]
|
self.quote_retweeted = int(mrq[2]) if mrq[2] is not None else None
|
||||||
|
|
||||||
# all users involved, except for the author
|
# all users involved, except for the author
|
||||||
self.all_parties = {self.reply_to, self.quote_retweeted}
|
self.all_parties = {self.reply_to, self.quote_retweeted}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
## Shared utility functions.
|
## Shared utility functions.
|
||||||
|
|
||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import twint
|
import twint
|
||||||
@@ -16,6 +17,12 @@ def get_project_dir():
|
|||||||
def clamp(n, smallest, largest):
|
def clamp(n, smallest, largest):
|
||||||
return max(smallest, min(n, largest))
|
return max(smallest, min(n, largest))
|
||||||
|
|
||||||
|
def datetime_to_tdate(date_time: datetime.datetime):
|
||||||
|
return date_time.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
def tdate_to_datetime(tdate: str):
|
||||||
|
return datetime.datetime.strptime("%Y-%m-%d")
|
||||||
|
|
||||||
async def create_ttweet_image(ttweet):
|
async def create_ttweet_image(ttweet):
|
||||||
tc = TweetCapture()
|
tc = TweetCapture()
|
||||||
filename = 'img.png'
|
filename = 'img.png'
|
||||||
|
|||||||
Reference in New Issue
Block a user