added twint (scraper), restructuring
This commit is contained in:
@@ -1,2 +1,3 @@
|
|||||||
tweepy
|
tweepy
|
||||||
tweet-capture
|
tweet-capture
|
||||||
|
git+https://github.com/muskit/twint_2022_fix.git
|
||||||
+18
-30
@@ -3,7 +3,8 @@ from math import inf
|
|||||||
from urllib import response
|
from urllib import response
|
||||||
import tweepy
|
import tweepy
|
||||||
|
|
||||||
import secrets
|
import api_secrets
|
||||||
|
import talenttweet as tt
|
||||||
import util
|
import util
|
||||||
|
|
||||||
class TwAPI:
|
class TwAPI:
|
||||||
@@ -12,14 +13,6 @@ class TwAPI:
|
|||||||
TWEET_FIELDS = ['created_at', 'in_reply_to_user_id']
|
TWEET_FIELDS = ['created_at', 'in_reply_to_user_id']
|
||||||
TWEET_EXPANSIONS = ['entities.mentions.username', 'referenced_tweets.id.author_id']
|
TWEET_EXPANSIONS = ['entities.mentions.username', 'referenced_tweets.id.author_id']
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
TwAPI.instance = self
|
|
||||||
self.client = tweepy.Client(
|
|
||||||
bearer_token=secrets.bearer_token(),
|
|
||||||
consumer_key=secrets.api_key(), consumer_secret=secrets.api_secret(),
|
|
||||||
access_token=secrets.access_token(), access_token_secret=secrets.access_secret()
|
|
||||||
)
|
|
||||||
|
|
||||||
# Returns a set of involved parties for a single tweet.
|
# Returns a set of involved parties for a single tweet.
|
||||||
#
|
#
|
||||||
# Tweet must have been queried with these parameters:
|
# Tweet must have been queried with these parameters:
|
||||||
@@ -48,22 +41,17 @@ class TwAPI:
|
|||||||
|
|
||||||
return involved_parties
|
return involved_parties
|
||||||
|
|
||||||
# Returns a tweet and mention-set pair, given a tweet ID.
|
def __init__(self):
|
||||||
def get_tweet_mentions(self, id):
|
TwAPI.instance = self
|
||||||
resp = self.client.get_tweet(id,
|
self.client = tweepy.Client(
|
||||||
media_fields=TwAPI.TWEET_MEDIA_FIELDS,
|
bearer_token=api_secrets.bearer_token(),
|
||||||
tweet_fields=TwAPI.TWEET_FIELDS,
|
consumer_key=api_secrets.api_key(), consumer_secret=api_secrets.api_secret(),
|
||||||
expansions=TwAPI.TWEET_EXPANSIONS)
|
access_token=api_secrets.access_token(), access_token_secret=api_secrets.access_secret()
|
||||||
|
)
|
||||||
|
|
||||||
tweet = resp.data
|
# Returns a list of TalentTweets from a user.
|
||||||
mentions = TwAPI.get_involved_parties(tweet, resp)
|
|
||||||
return (tweet, mentions)
|
|
||||||
|
|
||||||
# Returns a list (tweet, {mentions}) from a user.
|
|
||||||
# mentions- a set comprised of any other parties involved
|
|
||||||
# in this tweet (reply, mention, qrt)
|
|
||||||
def get_users_all_tweets_mentions(self, id: int, count=inf):
|
def get_users_all_tweets_mentions(self, id: int, count=inf):
|
||||||
pairs = list()
|
ttweets = list()
|
||||||
|
|
||||||
retrieve_size = util.clamp(count, 5, 100)
|
retrieve_size = util.clamp(count, 5, 100)
|
||||||
next_page_token = None
|
next_page_token = None
|
||||||
@@ -79,7 +67,7 @@ class TwAPI:
|
|||||||
|
|
||||||
for tweet in resp.data:
|
for tweet in resp.data:
|
||||||
mentions = TwAPI.get_involved_parties(tweet, resp)
|
mentions = TwAPI.get_involved_parties(tweet, resp)
|
||||||
pairs.append((tweet, mentions))
|
ttweets.append(tt.TalentTweet(tweet=tweet, other_parties=mentions))
|
||||||
|
|
||||||
# update counters and pagination token
|
# update counters and pagination token
|
||||||
tweets_retrieved += resp.meta['result_count']
|
tweets_retrieved += resp.meta['result_count']
|
||||||
@@ -92,15 +80,15 @@ class TwAPI:
|
|||||||
break # reached end of user's tweets
|
break # reached end of user's tweets
|
||||||
|
|
||||||
print(f'Retrieved {tweets_retrieved} tweets using {tokens_retrieved} tokens.')
|
print(f'Retrieved {tweets_retrieved} tweets using {tokens_retrieved} tokens.')
|
||||||
return pairs
|
return ttweets
|
||||||
|
|
||||||
# returns a filtered list (tweet, [mentions]) from a user
|
# Returns a list of cross-company TalentTweets from a user.
|
||||||
def get_users_cross_tweets_mentions(self, id):
|
def get_users_cross_tweets_mentions(self, id):
|
||||||
ret = list()
|
ret = list()
|
||||||
pairs = self.get_users_all_tweets_mentions(id)
|
ttweets = self.get_users_all_tweets_mentions(id)
|
||||||
for pair in pairs:
|
for ttweet in ttweets:
|
||||||
if util.is_cross_company(pair):
|
if ttweet.is_cross_company():
|
||||||
ret.append(pair)
|
ret.append(ttweet)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|||||||
@@ -3,13 +3,13 @@
|
|||||||
import os
|
import os
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
from util import *
|
import util
|
||||||
|
|
||||||
# returns dictionary of the Credentials section.
|
# returns dictionary of the Credentials section.
|
||||||
# [NOT TO BE USED OUTSIDE OF THIS FILE.]
|
# [NOT TO BE USED OUTSIDE OF THIS FILE.]
|
||||||
def __get_ini_credentials():
|
def __get_ini_credentials():
|
||||||
c = configparser.RawConfigParser()
|
c = configparser.RawConfigParser()
|
||||||
if len(c.read(os.path.join(get_project_dir(), 'secrets.ini'))) > 0 and c.has_section('Credentials'):
|
if len(c.read(os.path.join(util.get_project_dir(), 'secrets.ini'))) > 0 and c.has_section('Credentials'):
|
||||||
return c['Credentials']
|
return c['Credentials']
|
||||||
return None
|
return None
|
||||||
|
|
||||||
+21
-3
@@ -6,16 +6,34 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import twint
|
||||||
|
|
||||||
from util import *
|
from util import *
|
||||||
|
from talent_lists import *
|
||||||
from api import TwAPI
|
from api import TwAPI
|
||||||
|
import talenttweet as tt
|
||||||
|
|
||||||
## Returns list of tweets present in queue.txt
|
## Returns list of tweets present in queue.txt
|
||||||
def get_local_queue():
|
def get_local_queue():
|
||||||
# f = open(os.path.join(get_project_dir(), 'queue.txt'))
|
# f = open(os.path.join(get_project_dir(), 'queue.txt'))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
## Returns the ID of all tweets (up to limit) from a user ID.
|
||||||
|
def get_user_tweet_ids(id, limit=None):
|
||||||
|
tweets = list()
|
||||||
|
c = twint.Config()
|
||||||
|
c.User_id = id
|
||||||
|
c.Limit = limit
|
||||||
|
c.Store_object = True
|
||||||
|
c.Store_object_tweets_list = tweets
|
||||||
|
|
||||||
|
twint.run.Search(c)
|
||||||
|
return [x.id for x in tweets]
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
queue = get_local_queue()
|
queue = get_local_queue()
|
||||||
pairs = TwAPI.instance.get_users_all_tweets_mentions(1390620618001838086, count=5)
|
|
||||||
for (tweet, mentions) in pairs:
|
tweets_ids = get_user_tweet_ids(1390620618001838086, limit=20)
|
||||||
print_tweet(tweet, mentions)
|
for id in tweets_ids:
|
||||||
|
ttweet = tt.TalentTweet(id)
|
||||||
|
print(ttweet)
|
||||||
+8
-9
@@ -3,12 +3,10 @@ import argparse
|
|||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
import talent_lists
|
import talent_lists
|
||||||
import secrets
|
import api_secrets
|
||||||
import catchup
|
import catchup
|
||||||
import listen
|
import listen
|
||||||
|
|
||||||
from api import TwAPI
|
from api import TwAPI
|
||||||
from util import is_cross_company, print_tweet
|
|
||||||
|
|
||||||
MODES_HELP_STR = '''mode to run the bot at:
|
MODES_HELP_STR = '''mode to run the bot at:
|
||||||
l,listen: listen for new tweets from all accounts; will not terminate unless error occurs
|
l,listen: listen for new tweets from all accounts; will not terminate unless error occurs
|
||||||
@@ -21,6 +19,10 @@ def init_argparse():
|
|||||||
p.add_argument('--show-tokens', action='store_true', help='[DO NOT USE IN PUBLIC SETTING] print stored tokens from secrets.ini')
|
p.add_argument('--show-tokens', action='store_true', help='[DO NOT USE IN PUBLIC SETTING] print stored tokens from secrets.ini')
|
||||||
return p
|
return p
|
||||||
|
|
||||||
|
# TODO: implement command line mode for manually controlling the bot
|
||||||
|
def command_line():
|
||||||
|
pass
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = init_argparse()
|
parser = init_argparse()
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
@@ -30,7 +32,7 @@ def main():
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.show_tokens:
|
if args.show_tokens:
|
||||||
print(secrets.get_all_secrets())
|
print(api_secrets.get_all_secrets())
|
||||||
|
|
||||||
if args.mode is None: return
|
if args.mode is None: return
|
||||||
|
|
||||||
@@ -42,11 +44,6 @@ def main():
|
|||||||
# Initialize talent account lists
|
# Initialize talent account lists
|
||||||
talent_lists.init()
|
talent_lists.init()
|
||||||
|
|
||||||
## TEST CODE ##
|
|
||||||
cross_pairs = twApi.get_users_cross_tweets_mentions(1390620618001838086)
|
|
||||||
for pair in cross_pairs:
|
|
||||||
print_tweet(pair)
|
|
||||||
|
|
||||||
## Determine running mode
|
## Determine running mode
|
||||||
match args.mode.lower():
|
match args.mode.lower():
|
||||||
case 'l' | 'listen':
|
case 'l' | 'listen':
|
||||||
@@ -56,6 +53,8 @@ def main():
|
|||||||
print('RUNNING IN CATCH-UP MODE\n')
|
print('RUNNING IN CATCH-UP MODE\n')
|
||||||
catchup.run()
|
catchup.run()
|
||||||
case _:
|
case _:
|
||||||
|
command_line()
|
||||||
|
#TODO: remove message
|
||||||
print('\ninvalid mode. run with no arguments or "-h" for help page, including mode list.')
|
print('\ninvalid mode. run with no arguments or "-h" for help page, including mode list.')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
+3
-1
@@ -2,15 +2,17 @@ import util
|
|||||||
|
|
||||||
niji_en = dict()
|
niji_en = dict()
|
||||||
holo_en = dict()
|
holo_en = dict()
|
||||||
|
talents = dict()
|
||||||
|
|
||||||
def __create_dict(file, _dict):
|
def __create_dict(file, _dict):
|
||||||
|
global talents
|
||||||
with open(file, 'r') as f:
|
with open(file, 'r') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
words = line.split()
|
words = line.split()
|
||||||
if len(words) == 2 and line[0] != '#':
|
if len(words) == 2 and line[0] != '#':
|
||||||
name, id = line.split()
|
name, id = line.split()
|
||||||
_dict[int(id)] = name
|
_dict[int(id)] = name
|
||||||
|
talents[int(id)] = name
|
||||||
def init():
|
def init():
|
||||||
global niji_en
|
global niji_en
|
||||||
global holo_en
|
global holo_en
|
||||||
|
|||||||
@@ -0,0 +1,69 @@
|
|||||||
|
import platform
|
||||||
|
|
||||||
|
import tweepy
|
||||||
|
|
||||||
|
from api import *
|
||||||
|
import talent_lists
|
||||||
|
|
||||||
|
class TalentTweet:
|
||||||
|
def __init__(self, tweet: tweepy.Tweet, other_parties: set):
|
||||||
|
self.tweet = tweet
|
||||||
|
self.other_parties = other_parties
|
||||||
|
|
||||||
|
def __init__(self, tweet_id):
|
||||||
|
resp = TwAPI.instance.client.get_tweet(tweet_id,
|
||||||
|
media_fields=TwAPI.TWEET_MEDIA_FIELDS,
|
||||||
|
tweet_fields=TwAPI.TWEET_FIELDS,
|
||||||
|
expansions=TwAPI.TWEET_EXPANSIONS)
|
||||||
|
|
||||||
|
self.tweet = resp.data
|
||||||
|
self.other_parties = TwAPI.get_involved_parties(self.tweet, resp)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return (
|
||||||
|
f'{self.tweet.id} from {talent_lists.talents.get(self.tweet.author_id, "???")}:\n'
|
||||||
|
f'{self.tweet.text}\n'
|
||||||
|
f'------------------------------------------------------\n'
|
||||||
|
f'{self.get_datetime_str()}\n'
|
||||||
|
f'{self.get_mentions_usernames()}\n'
|
||||||
|
f'Cross-company: {self.is_cross_company()}\n'
|
||||||
|
f'======================================================'
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_cross_company(self):
|
||||||
|
author_id = self.tweet.author_id
|
||||||
|
mentions = self.other_parties
|
||||||
|
|
||||||
|
# TODO: update for EN/ID
|
||||||
|
for mention_id in mentions:
|
||||||
|
if author_id in talent_lists.niji_en:
|
||||||
|
if mention_id in talent_lists.holo_en:
|
||||||
|
return True
|
||||||
|
elif author_id in talent_lists.holo_en:
|
||||||
|
if mention_id in talent_lists.niji_en:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_mentions_usernames(self):
|
||||||
|
if len(self.other_parties) > 0:
|
||||||
|
s = str()
|
||||||
|
for id in self.other_parties:
|
||||||
|
s += f'{talent_lists.talents.get(id, "???")}, '
|
||||||
|
return s[0:-2]
|
||||||
|
|
||||||
|
return 'none'
|
||||||
|
|
||||||
|
def get_datetime_str(self):
|
||||||
|
unpad = '#' if platform.system() == 'Windows' else '-'
|
||||||
|
return self.tweet.created_at.strftime(f'%b %{unpad}d %Y, %{unpad}I:%M%p (%Z)')
|
||||||
|
|
||||||
|
|
||||||
|
class TalentTweets:
|
||||||
|
def __init__(self):
|
||||||
|
self.ttweets = list()
|
||||||
|
|
||||||
|
def get_ttweets(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_ttweet_ids(self):
|
||||||
|
pass
|
||||||
+1
-25
@@ -2,39 +2,15 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import talent_lists
|
import talent_lists
|
||||||
|
import talenttweet as tt
|
||||||
|
|
||||||
# returns system path to this project, which is
|
# returns system path to this project, which is
|
||||||
# up one level from this file's directory (src).
|
# up one level from this file's directory (src).
|
||||||
def get_project_dir():
|
def get_project_dir():
|
||||||
return os.path.join(os.path.dirname(__file__), os.pardir)
|
return os.path.join(os.path.dirname(__file__), os.pardir)
|
||||||
|
|
||||||
# determine if tweet involves cross-company interaction
|
|
||||||
def is_cross_company(pair: tuple):
|
|
||||||
author_id, mentions = pair[0].author_id, pair[1]
|
|
||||||
|
|
||||||
for mention_id in mentions:
|
|
||||||
if author_id in talent_lists.niji_en:
|
|
||||||
if mention_id in talent_lists.holo_en:
|
|
||||||
return True
|
|
||||||
elif author_id in talent_lists.holo_en:
|
|
||||||
if mention_id in talent_lists.niji_en:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def tweet_id_to_url(id):
|
def tweet_id_to_url(id):
|
||||||
return f'https://twitter.com/twitter/status/{id}'
|
return f'https://twitter.com/twitter/status/{id}'
|
||||||
|
|
||||||
def print_tweet(pair: tuple):
|
|
||||||
tweet, mentions = pair
|
|
||||||
s = (
|
|
||||||
f'{tweet.id}: {tweet.created_at}: involves {mentions}\n'
|
|
||||||
f'{tweet.text}\n'
|
|
||||||
f'-----\n'
|
|
||||||
f'{tweet.entities}\n'
|
|
||||||
f'{tweet.referenced_tweets}\n'
|
|
||||||
f'================================================='
|
|
||||||
)
|
|
||||||
print(s)
|
|
||||||
|
|
||||||
def clamp(n, smallest, largest):
|
def clamp(n, smallest, largest):
|
||||||
return max(smallest, min(n, largest))
|
return max(smallest, min(n, largest))
|
||||||
Reference in New Issue
Block a user