added twint (scraper), restructuring
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
tweepy
|
||||
tweet-capture
|
||||
git+https://github.com/muskit/twint_2022_fix.git
|
||||
+18
-30
@@ -3,7 +3,8 @@ from math import inf
|
||||
from urllib import response
|
||||
import tweepy
|
||||
|
||||
import secrets
|
||||
import api_secrets
|
||||
import talenttweet as tt
|
||||
import util
|
||||
|
||||
class TwAPI:
|
||||
@@ -12,14 +13,6 @@ class TwAPI:
|
||||
TWEET_FIELDS = ['created_at', 'in_reply_to_user_id']
|
||||
TWEET_EXPANSIONS = ['entities.mentions.username', 'referenced_tweets.id.author_id']
|
||||
|
||||
def __init__(self):
|
||||
TwAPI.instance = self
|
||||
self.client = tweepy.Client(
|
||||
bearer_token=secrets.bearer_token(),
|
||||
consumer_key=secrets.api_key(), consumer_secret=secrets.api_secret(),
|
||||
access_token=secrets.access_token(), access_token_secret=secrets.access_secret()
|
||||
)
|
||||
|
||||
# Returns a set of involved parties for a single tweet.
|
||||
#
|
||||
# Tweet must have been queried with these parameters:
|
||||
@@ -48,22 +41,17 @@ class TwAPI:
|
||||
|
||||
return involved_parties
|
||||
|
||||
# Returns a tweet and mention-set pair, given a tweet ID.
|
||||
def get_tweet_mentions(self, id):
|
||||
resp = self.client.get_tweet(id,
|
||||
media_fields=TwAPI.TWEET_MEDIA_FIELDS,
|
||||
tweet_fields=TwAPI.TWEET_FIELDS,
|
||||
expansions=TwAPI.TWEET_EXPANSIONS)
|
||||
def __init__(self):
|
||||
TwAPI.instance = self
|
||||
self.client = tweepy.Client(
|
||||
bearer_token=api_secrets.bearer_token(),
|
||||
consumer_key=api_secrets.api_key(), consumer_secret=api_secrets.api_secret(),
|
||||
access_token=api_secrets.access_token(), access_token_secret=api_secrets.access_secret()
|
||||
)
|
||||
|
||||
tweet = resp.data
|
||||
mentions = TwAPI.get_involved_parties(tweet, resp)
|
||||
return (tweet, mentions)
|
||||
|
||||
# Returns a list (tweet, {mentions}) from a user.
|
||||
# mentions- a set comprised of any other parties involved
|
||||
# in this tweet (reply, mention, qrt)
|
||||
# Returns a list of TalentTweets from a user.
|
||||
def get_users_all_tweets_mentions(self, id: int, count=inf):
|
||||
pairs = list()
|
||||
ttweets = list()
|
||||
|
||||
retrieve_size = util.clamp(count, 5, 100)
|
||||
next_page_token = None
|
||||
@@ -79,7 +67,7 @@ class TwAPI:
|
||||
|
||||
for tweet in resp.data:
|
||||
mentions = TwAPI.get_involved_parties(tweet, resp)
|
||||
pairs.append((tweet, mentions))
|
||||
ttweets.append(tt.TalentTweet(tweet=tweet, other_parties=mentions))
|
||||
|
||||
# update counters and pagination token
|
||||
tweets_retrieved += resp.meta['result_count']
|
||||
@@ -92,15 +80,15 @@ class TwAPI:
|
||||
break # reached end of user's tweets
|
||||
|
||||
print(f'Retrieved {tweets_retrieved} tweets using {tokens_retrieved} tokens.')
|
||||
return pairs
|
||||
return ttweets
|
||||
|
||||
# returns a filtered list (tweet, [mentions]) from a user
|
||||
# Returns a list of cross-company TalentTweets from a user.
|
||||
def get_users_cross_tweets_mentions(self, id):
|
||||
ret = list()
|
||||
pairs = self.get_users_all_tweets_mentions(id)
|
||||
for pair in pairs:
|
||||
if util.is_cross_company(pair):
|
||||
ret.append(pair)
|
||||
ttweets = self.get_users_all_tweets_mentions(id)
|
||||
for ttweet in ttweets:
|
||||
if ttweet.is_cross_company():
|
||||
ret.append(ttweet)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
import os
|
||||
import configparser
|
||||
|
||||
from util import *
|
||||
import util
|
||||
|
||||
# returns dictionary of the Credentials section.
|
||||
# [NOT TO BE USED OUTSIDE OF THIS FILE.]
|
||||
def __get_ini_credentials():
|
||||
c = configparser.RawConfigParser()
|
||||
if len(c.read(os.path.join(get_project_dir(), 'secrets.ini'))) > 0 and c.has_section('Credentials'):
|
||||
if len(c.read(os.path.join(util.get_project_dir(), 'secrets.ini'))) > 0 and c.has_section('Credentials'):
|
||||
return c['Credentials']
|
||||
return None
|
||||
|
||||
+21
-3
@@ -6,16 +6,34 @@
|
||||
|
||||
import os
|
||||
|
||||
import twint
|
||||
|
||||
from util import *
|
||||
from talent_lists import *
|
||||
from api import TwAPI
|
||||
import talenttweet as tt
|
||||
|
||||
## Returns list of tweets present in queue.txt
|
||||
def get_local_queue():
|
||||
# f = open(os.path.join(get_project_dir(), 'queue.txt'))
|
||||
pass
|
||||
|
||||
## Returns the ID of all tweets (up to limit) from a user ID.
|
||||
def get_user_tweet_ids(id, limit=None):
|
||||
tweets = list()
|
||||
c = twint.Config()
|
||||
c.User_id = id
|
||||
c.Limit = limit
|
||||
c.Store_object = True
|
||||
c.Store_object_tweets_list = tweets
|
||||
|
||||
twint.run.Search(c)
|
||||
return [x.id for x in tweets]
|
||||
|
||||
def run():
|
||||
queue = get_local_queue()
|
||||
pairs = TwAPI.instance.get_users_all_tweets_mentions(1390620618001838086, count=5)
|
||||
for (tweet, mentions) in pairs:
|
||||
print_tweet(tweet, mentions)
|
||||
|
||||
tweets_ids = get_user_tweet_ids(1390620618001838086, limit=20)
|
||||
for id in tweets_ids:
|
||||
ttweet = tt.TalentTweet(id)
|
||||
print(ttweet)
|
||||
+8
-9
@@ -3,12 +3,10 @@ import argparse
|
||||
from argparse import RawTextHelpFormatter
|
||||
|
||||
import talent_lists
|
||||
import secrets
|
||||
import api_secrets
|
||||
import catchup
|
||||
import listen
|
||||
|
||||
from api import TwAPI
|
||||
from util import is_cross_company, print_tweet
|
||||
|
||||
MODES_HELP_STR = '''mode to run the bot at:
|
||||
l,listen: listen for new tweets from all accounts; will not terminate unless error occurs
|
||||
@@ -21,6 +19,10 @@ def init_argparse():
|
||||
p.add_argument('--show-tokens', action='store_true', help='[DO NOT USE IN PUBLIC SETTING] print stored tokens from secrets.ini')
|
||||
return p
|
||||
|
||||
# TODO: implement command line mode for manually controlling the bot
|
||||
def command_line():
|
||||
pass
|
||||
|
||||
def main():
|
||||
parser = init_argparse()
|
||||
if len(sys.argv) < 2:
|
||||
@@ -30,7 +32,7 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.show_tokens:
|
||||
print(secrets.get_all_secrets())
|
||||
print(api_secrets.get_all_secrets())
|
||||
|
||||
if args.mode is None: return
|
||||
|
||||
@@ -42,11 +44,6 @@ def main():
|
||||
# Initialize talent account lists
|
||||
talent_lists.init()
|
||||
|
||||
## TEST CODE ##
|
||||
cross_pairs = twApi.get_users_cross_tweets_mentions(1390620618001838086)
|
||||
for pair in cross_pairs:
|
||||
print_tweet(pair)
|
||||
|
||||
## Determine running mode
|
||||
match args.mode.lower():
|
||||
case 'l' | 'listen':
|
||||
@@ -56,6 +53,8 @@ def main():
|
||||
print('RUNNING IN CATCH-UP MODE\n')
|
||||
catchup.run()
|
||||
case _:
|
||||
command_line()
|
||||
#TODO: remove message
|
||||
print('\ninvalid mode. run with no arguments or "-h" for help page, including mode list.')
|
||||
return
|
||||
|
||||
|
||||
+3
-1
@@ -2,15 +2,17 @@ import util
|
||||
|
||||
niji_en = dict()
|
||||
holo_en = dict()
|
||||
talents = dict()
|
||||
|
||||
def __create_dict(file, _dict):
|
||||
global talents
|
||||
with open(file, 'r') as f:
|
||||
for line in f:
|
||||
words = line.split()
|
||||
if len(words) == 2 and line[0] != '#':
|
||||
name, id = line.split()
|
||||
_dict[int(id)] = name
|
||||
|
||||
talents[int(id)] = name
|
||||
def init():
|
||||
global niji_en
|
||||
global holo_en
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
import platform
|
||||
|
||||
import tweepy
|
||||
|
||||
from api import *
|
||||
import talent_lists
|
||||
|
||||
class TalentTweet:
|
||||
def __init__(self, tweet: tweepy.Tweet, other_parties: set):
|
||||
self.tweet = tweet
|
||||
self.other_parties = other_parties
|
||||
|
||||
def __init__(self, tweet_id):
|
||||
resp = TwAPI.instance.client.get_tweet(tweet_id,
|
||||
media_fields=TwAPI.TWEET_MEDIA_FIELDS,
|
||||
tweet_fields=TwAPI.TWEET_FIELDS,
|
||||
expansions=TwAPI.TWEET_EXPANSIONS)
|
||||
|
||||
self.tweet = resp.data
|
||||
self.other_parties = TwAPI.get_involved_parties(self.tweet, resp)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f'{self.tweet.id} from {talent_lists.talents.get(self.tweet.author_id, "???")}:\n'
|
||||
f'{self.tweet.text}\n'
|
||||
f'------------------------------------------------------\n'
|
||||
f'{self.get_datetime_str()}\n'
|
||||
f'{self.get_mentions_usernames()}\n'
|
||||
f'Cross-company: {self.is_cross_company()}\n'
|
||||
f'======================================================'
|
||||
)
|
||||
|
||||
def is_cross_company(self):
|
||||
author_id = self.tweet.author_id
|
||||
mentions = self.other_parties
|
||||
|
||||
# TODO: update for EN/ID
|
||||
for mention_id in mentions:
|
||||
if author_id in talent_lists.niji_en:
|
||||
if mention_id in talent_lists.holo_en:
|
||||
return True
|
||||
elif author_id in talent_lists.holo_en:
|
||||
if mention_id in talent_lists.niji_en:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_mentions_usernames(self):
|
||||
if len(self.other_parties) > 0:
|
||||
s = str()
|
||||
for id in self.other_parties:
|
||||
s += f'{talent_lists.talents.get(id, "???")}, '
|
||||
return s[0:-2]
|
||||
|
||||
return 'none'
|
||||
|
||||
def get_datetime_str(self):
|
||||
unpad = '#' if platform.system() == 'Windows' else '-'
|
||||
return self.tweet.created_at.strftime(f'%b %{unpad}d %Y, %{unpad}I:%M%p (%Z)')
|
||||
|
||||
|
||||
class TalentTweets:
|
||||
def __init__(self):
|
||||
self.ttweets = list()
|
||||
|
||||
def get_ttweets(self):
|
||||
pass
|
||||
|
||||
def get_ttweet_ids(self):
|
||||
pass
|
||||
+1
-25
@@ -2,39 +2,15 @@
|
||||
|
||||
import os
|
||||
import talent_lists
|
||||
import talenttweet as tt
|
||||
|
||||
# returns system path to this project, which is
|
||||
# up one level from this file's directory (src).
|
||||
def get_project_dir():
|
||||
return os.path.join(os.path.dirname(__file__), os.pardir)
|
||||
|
||||
# determine if tweet involves cross-company interaction
|
||||
def is_cross_company(pair: tuple):
|
||||
author_id, mentions = pair[0].author_id, pair[1]
|
||||
|
||||
for mention_id in mentions:
|
||||
if author_id in talent_lists.niji_en:
|
||||
if mention_id in talent_lists.holo_en:
|
||||
return True
|
||||
elif author_id in talent_lists.holo_en:
|
||||
if mention_id in talent_lists.niji_en:
|
||||
return True
|
||||
return False
|
||||
|
||||
def tweet_id_to_url(id):
|
||||
return f'https://twitter.com/twitter/status/{id}'
|
||||
|
||||
def print_tweet(pair: tuple):
|
||||
tweet, mentions = pair
|
||||
s = (
|
||||
f'{tweet.id}: {tweet.created_at}: involves {mentions}\n'
|
||||
f'{tweet.text}\n'
|
||||
f'-----\n'
|
||||
f'{tweet.entities}\n'
|
||||
f'{tweet.referenced_tweets}\n'
|
||||
f'================================================='
|
||||
)
|
||||
print(s)
|
||||
|
||||
def clamp(n, smallest, largest):
|
||||
return max(smallest, min(n, largest))
|
||||
Reference in New Issue
Block a user