go back to passwords, use diff format

This commit is contained in:
muskit
2024-04-19 23:30:28 -07:00
parent 18dfb0a7c9
commit bacc426a6d
3 changed files with 32 additions and 26 deletions
+22 -20
View File
@@ -12,11 +12,17 @@ Setup the `.env` in the project root. Refer to the [`.env`](#env) section for va
Build and run the Docker container: Build and run the Docker container:
```bash ```bash
# to run attached (can CTRL+P,CTRL+Q to detach) # to delete container and built image
sh run.sh sh scripts/delete.sh
# ... or to run headless # to build image
sh run_detached.sh sh scripts/build.sh
# to create container and run attached (can CTRL+P,CTRL+Q to detach)
sh scripts/run.sh
# ... or to run headless/detached
sh scripts/run_detached.sh
``` ```
If attached to a container prepared by Dockerfile, you can run the program from project root (not in `src`). Refer to the following section for options. If attached to a container prepared by Dockerfile, you can run the program from project root (not in `src`). Refer to the following section for options.
@@ -36,17 +42,17 @@ These need to be defined in a `.env` file in the `run` ephemeral directory.
### Scraper Credentials ### Scraper Credentials
To get around rate limitations imposed on users, we scrape with multiple accounts. Each account is defined in the file using the following format: To get around rate limitations imposed on users, we scrape with multiple accounts. Each account is defined in the file using the following format:
``` ```
scraper_username_X=twitter_username scraperX_username=twitter_username
scraper_auth_token_X=twitter_auth_token scraperX_password=twitter_auth_token
``` ```
where `X` is a number starting from 0, increasing by 1 for each account added. For instance: where `X` is a number starting from 0, increasing by 1 for each account added. For instance:
``` ```
scraper_username_0= scraper0_username=
scraper_auth_token_0= scraper0_password=
scraper_username_1= scraper1_username=
scraper_auth_token_1= scraper1_password=
``` ```
The first account (`scraper_username_0` and `scraper_auth_token_0`) **MUST be defined (`scraper_username_` and `scraper_auth_token_` without number will not work!)** and will be used to attempt scraping private accounts. Make sure this account follows any private accounts that you want to scrape! The first account (`scraper0_username` and `scraper0_password`) **MUST be defined (`scraper_username` and `scraper_password` without number will not work!)** and will be used to attempt scraping private accounts. Make sure this account follows any private accounts that you want to scrape!
### Twitter API Stuff ### Twitter API Stuff
The following keys/tokens are used for the official API via `tweepy`. We mainly use these to just post tweets. The following keys/tokens are used for the official API via `tweepy`. We mainly use these to just post tweets.
``` ```
@@ -56,20 +62,16 @@ user_token=
user_secret= user_secret=
``` ```
### Screenshot Cookie *(optional)* ### Screenshot Cookie *(optional)*
This is the authentication token obtained from a browser when signed in on the Twitter website. It's only needed if you want to screenshot tweets from privated accounts. Make sure the token belongs to an account that follows desired private accounts! Maybe have it belong to `scraper_username_0`? This is the authentication token obtained from a browser when signed in on the Twitter website. It's only needed if you want to screenshot tweets from privated accounts. Make sure the token belongs to an account that follows desired private accounts! Maybe have it belong to `scraper0`?
``` ```
web_auth_token= web_auth_token=
``` ```
### Example `.env` without values ### Example `.env` without values
``` ```
scraper_username_0= scraper0_username=
scraper_auth_token_0= scraper0_password=
scraper_username_1= scraper1_username=
scraper_auth_token_1= scraper1_password=
scraper_username_2=
scraper_auth_token_2=
scraper_username_3=
scraper_auth_token_3=
web_auth_token= web_auth_token=
app_key= app_key=
app_secret= app_secret=
+3 -2
View File
@@ -11,13 +11,14 @@ class AccountPool:
creds = dotenv_values(working_path(file=".env")) creds = dotenv_values(working_path(file=".env"))
i = 0 i = 0
while True: while True:
if f"scraper_username_{i}" in creds and f"scraper_auth_token_{i}" in creds: if f"scraper{i}_username" in creds and f"scraper{i}_password" in creds:
self.__accounts.append( self.__accounts.append(
(creds[f"scraper_username_{i}"], creds[f"scraper_auth_token_{i}"]) (creds[f"scraper{i}_username"], creds[f"scraper{i}_password"])
) )
i += 1 i += 1
else: else:
break break
print(f"{len(self.__accounts)} scraper credentials found!")
def use_index(self, idx): def use_index(self, idx):
self.__idx = idx self.__idx = idx
+7 -4
View File
@@ -1,6 +1,7 @@
from os.path import exists from os.path import exists
from time import sleep from time import sleep
from datetime import datetime, timedelta from datetime import datetime, timedelta
import traceback
import pytz import pytz
@@ -37,9 +38,9 @@ class Scraper:
try: try:
self.app.connect() self.app.connect()
except: except:
self.app.load_auth_token(acc[1]) self.app.sign_in(*acc)
else: else:
self.app.load_auth_token(acc[1]) self.app.sign_in(*acc)
return True return True
print("exhausted all accounts!") print("exhausted all accounts!")
return False return False
@@ -101,12 +102,14 @@ class Scraper:
if "_Missing" in e.message: # tweet is probably unavailable if "_Missing" in e.message: # tweet is probably unavailable
print(f"tweet {id} seems unavailable; skipping...") print(f"tweet {id} seems unavailable; skipping...")
return None return None
print("treating like RateLimitReached...") print("treating like RateLimitReached and using the next scraper...")
# traceback.print_exc() # traceback.print_exc()
self.login_wait(private_user) self.login_wait(private_user)
except Exception as e: except Exception as e:
if not private_user: if not private_user:
print("Unhandled exception occurred, trying again as private...") print("Unhandled exception occurred getting tweet!")
traceback.print_exc()
print("trying again as pvt-accessible...\n")
return self.get_tweet(id, True) return self.get_tweet(id, True)
else: else:
print( print(