move working files into its own directory

2024-01-25 16:29:01 -08:00
parent 22743c58ef
commit ab3a8fcc89
6 changed files with 176 additions and 129 deletions
@@ -143,9 +143,5 @@ cython_debug/
 .vscode

 # project-specific
-*.png
+run/
 *.json
-queue.txt
-_queue_backup.txt
-finished_ttweets.txt
-_current_ttweet.txt
@@ -53,7 +53,7 @@ This is the authentication token obtained from a browser when signed in on the T
 ```
 web_auth_token=
 ```
-### Example contents of `.env` without values
+### Example `.env` without values
 ```
 scraper_username0=
 scraper_password0=
@@ -19,6 +19,7 @@ PROGRAM_ARGS = None
 safe_to_post_tweets = True
 scraper: Scraper

+
 # Updates TTweetQueue
 async def get_cross_tweets_online():
    global safe_to_post_tweets
@@ -26,51 +27,60 @@ async def get_cross_tweets_online():
    global scraper

    safe_to_post_tweets = True
-    dbg_curr_user = ''
+    dbg_curr_user = ""
    # Begin getting tweets from online
-    print('Pulling tweets from online!')
+    print("Pulling tweets from online!")
    try:
        for i, (talent_id, talent_username) in enumerate(talents.items()):
-            print(f'[{i+1}/{len(talents)}] {talent_username}-----------------------------------')
-            dbg_curr_user = f'{talent_id}: {talent_username}'
+            print(
+                f"[{i+1}/{len(talents)}] {talent_username}-----------------------------------"
+            )
+            dbg_curr_user = f"{talent_id}: {talent_username}"
            try:
                since_date = queue.finished_user_dates.get(talent_id, None)
-                ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date)
-                print(f'got {len(ttweets)} TalentTweets')
+                ttweets = scraper.get_cross_ttweets_from_user(
+                    talent_username, since_date=since_date
+                )
+                print(f"got {len(ttweets)} TalentTweets")
                for ttweet in ttweets:
-                    if ttweet.tweet_id not in queue.finished_ttweets \
-                        and ttweet.is_cross_company():
+                    if (
+                        ttweet.tweet_id not in queue.finished_ttweets
+                        and ttweet.is_cross_company()
+                    ):
                        queue.add_ttweet(ttweet)
            except KeyboardInterrupt as e:
                raise e
            except Exception as e:
-                print('Unhandled error occurred processing tweet data.')
+                print("Unhandled error occurred processing tweet data.")
                safe_to_post_tweets = False
                raise e
            else:
                queue.finished_user_dates[talent_id] = get_current_date()
                queue.save_file()
    except KeyboardInterrupt as e:
-        print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!')
+        print(
+            "Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!"
+        )
        queue.save_file()
        raise e
    except:
-        print('Unhandled error occurred while pulling tweets.')
+        print("Unhandled error occurred while pulling tweets.")
        traceback.print_exc()
-        with open("error_catchup.txt", "a") as f:
-            f.write(f'Error getting tweets from user {dbg_curr_user}\n')
+        with open(os.path.join(working_path(), "error_catchup.txt"), "a") as f:
+            f.write(f"Error getting tweets from user {dbg_curr_user}\n")
            traceback.print_exc(file=f)
        safe_to_post_tweets = False
    else:
-        print('Successfully saved all tweets from online!')
+        print("Successfully saved all tweets from online!")
        queue.save_file()

+
 # return False = we posted at least one ttweet
 # return True = we didn't post a single ttweet
 async def process_queue() -> bool:
-    '''
+    """
    Go through the queue and post stored TalentTweets.
-    '''
+    """
    global scraper
    global queue

@@ -80,37 +90,38 @@ async def process_queue() -> bool:
    ttweets_posted = 0

    if queued_ttweets_count == 0:
-        print('Posting queue is empty!')
+        print("Posting queue is empty!")
        return True

    try:
        while not queue.is_empty():
            ttweet = queue.get_next_ttweet()
            if ttweet.tweet_id in queue.finished_ttweets:
-                print('skipping finished tweet...')
+                print("skipping finished tweet...")
                queue.good(ttweet.tweet_id)
                continue

            tweet_was_successful = await TwAPI.instance.post_ttweet(ttweet)

-            print('running queue.good()...')
+            print("running queue.good()...")
            queue.good(ttweet.tweet_id)
            if tweet_was_successful:
                ttweets_posted += 1
-                print(f'({ttweets_posted}/{queued_ttweets_count}) done')
+                print(f"({ttweets_posted}/{queued_ttweets_count}) done")
                if not queue.is_empty():
-                    print(f'resting for {WAIT_TIME}s...')
+                    print(f"resting for {WAIT_TIME}s...")
                    await asyncio.sleep(WAIT_TIME - 5)
-                    print('5 second warning!')
+                    print("5 second warning!")
                    await asyncio.sleep(5)
    except Exception as e:
-        print('Unhandled error occurred while posting tweets from queue.')
+        print("Unhandled error occurred while posting tweets from queue.")
        traceback.print_exc()

    if ttweets_posted > 0:
        return False
    return True

+
 # return True = no problems
 # return False = issue occurred where we couldn't post all past tweets properly
 async def run(PROGRAM_ARGS):
@@ -124,63 +135,67 @@ async def run(PROGRAM_ARGS):
    # post tweets given in command line first
    if PROGRAM_ARGS.post_id is not None and len(PROGRAM_ARGS.post_id) > 0:
        PROGRAM_ARGS.post_id.sort()
-        print('Posting specified tweets first.')
+        print("Posting specified tweets first.")
        for id in PROGRAM_ARGS.post_id:
            try:
                i = int(id)
            except ValueError:
-                print(f'Invalid tweet {id}!')
+                print(f"Invalid tweet {id}!")
                continue

            posted = await TwAPI.instance.post_ttweet_by_id(i)
            if posted:
                queue.add_finished_tweet(i)
-                print('Successfully posted tweet. Sleeping for 5 minutes')
+                print("Successfully posted tweet. Sleeping for 5 minutes")
                await asyncio.sleep(60 * 5)
            else:
-                print('Did not post tweet')
-        print('Done processing specified tweets')
+                print("Did not post tweet")
+        print("Done processing specified tweets")
        PROGRAM_ARGS.post_id = None

    # refresh stored queue first
    if PROGRAM_ARGS.refresh_queue:
        PROGRAM_ARGS.refresh_queue = False
-        print('Refreshing queue tweets...')
+        print("Refreshing queue tweets...")
        for id in queue.ttweets_dict:
-            t  = scraper.get_tweet(id, queue.ttweets_dict[id].author_id in privated_accounts)
+            t = scraper.get_tweet(
+                id, queue.ttweets_dict[id].author_id in privated_accounts
+            )
            queue.ttweets_dict[id] = tt.TalentTweet.create_from_tweety(t)
        queue.save_file()

    async def queue_loop():
        while True:
-            print(f'{queue.get_count()} cross-company tweets to announce.')
+            print(f"{queue.get_count()} cross-company tweets to announce.")
            try:
                if safe_to_post_tweets:
                    if await process_queue():
                        print("Finished processing queue")
                        return
                    else:
-                        print('Posted no new tweets; we\'re caught up!')
+                        print("Posted no new tweets; we're caught up!")
                        return
                else:
-                    print('Tweets were not retrieved cleanly. Not processing queue.')
+                    print("Tweets were not retrieved cleanly. Not processing queue.")
                    return
            except KeyboardInterrupt as e:
-                print('Interrupting queue processing...')
+                print("Interrupting queue processing...")
                raise e
            except:
-                print('Unhandled error occurred while running catch up in posting phase.')
+                print(
+                    "Unhandled error occurred while running catch up in posting phase."
+                )
                traceback.print_exc()
            await get_cross_tweets_online()

    try:
        if PROGRAM_ARGS.straight_to_queue:
            PROGRAM_ARGS.straight_to_queue = False
-            print('Processing queue first before fetching tweets...')
+            print("Processing queue first before fetching tweets...")
            await queue_loop()
        else:
            await get_cross_tweets_online()
            await queue_loop()
    except KeyboardInterrupt:
-        print('Interrupt received. Ending catchup mode...')
+        print("Interrupt received. Ending catchup mode...")
        return False
@@ -1,4 +1,4 @@
-from util import get_project_dir
+from util import project_root

 holo_en: dict[int, str] = dict()
 holo_id: dict[int, str] = dict()
@@ -10,22 +10,25 @@ privated_accounts: dict[int, str] = dict()

 test_talents = dict()

+
 # TODO: talents(id) -> (name, company)
 def __create_dict(file, _dict, company):
-    print(f'Initializing talents\' account list from {file}...')
+    print(f"Initializing talents' account list from {file}...")
    global talents
-    with open(file, 'r') as f:
+    with open(file, "r") as f:
        for line in f:
            words = line.split()
-            if len(words) >= 2 and line[0] != '#':
+            if len(words) >= 2 and line[0] != "#":
                t = line.split()
                id, name = int(t[0]), t[1]
                # name = f'{util.get_username_online(id, default=name)}' # attempt to get updated name
                talents[id] = name
                _dict[id] = name
                talents_company[id] = company
-                if len(words) > 2 and words[2] == 'p':
+                if len(words) > 2 and words[2] == "p":
                    privated_accounts[id] = name
+
+
 def init():
    global holo_en
    global holo_id
@@ -34,26 +37,30 @@ def init():
    global test_talents

    # holoEN
-    __create_dict(f'{get_project_dir()}/lists/holoen.txt', holo_en, 'holoEN')
+    __create_dict(f"{project_root()}/lists/holoen.txt", holo_en, "holoEN")
    # holoID
-    __create_dict(f'{get_project_dir()}/lists/holoid.txt', holo_id, 'holoID')
+    __create_dict(f"{project_root()}/lists/holoid.txt", holo_id, "holoID")
    # nijiEN
-    __create_dict(f'{get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN')
+    __create_dict(f"{project_root()}/lists/nijien.txt", niji_en, "nijiEN")
    # nijiexID
-    __create_dict(f'{get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID')
+    __create_dict(f"{project_root()}/lists/nijiexid.txt", niji_exid, "nijiex'ID")
    # TODO: nijiex-KR

    test_talents = holo_en

+
 def is_niji(id: int) -> bool:
    return id in niji_en or id in niji_exid

+
 def is_holo(id: int) -> bool:
    return id in holo_en or id in holo_id

+
 def is_cross_company(id1: int, id2: int):
    return (is_niji(id1) and is_holo(id2)) or (is_holo(id1) and is_niji(id2))

+
 # For filtered stream
 # DEPRECATED: thx elon
 def get_twitter_rules():
@@ -61,12 +68,12 @@ def get_twitter_rules():
    rules = list()

    names = list(talents.values())
-    curr_rule = f'from:{names}'
+    curr_rule = f"from:{names}"
    for name in list(talents.values())[1:]:
-        test_rule = curr_rule +  f' OR from:{name}'
+        test_rule = curr_rule + f" OR from:{name}"
        if len(test_rule) > 512:
            rules.append(curr_rule)
-            curr_rule = f'from:{name}'
+            curr_rule = f"from:{name}"
        else:
            curr_rule = test_rule
    rules.append(curr_rule)
@@ -9,15 +9,20 @@ import talenttweet as tt
 # User timestamps line format:
 # {user_id} {status_num} {UNIX_timestamp}

+
 class TalentTweetQueue:
    instance = None

    def __init__(self):
        TalentTweetQueue.instance = self
-        self.queue_path = util.get_queue_path()
-        self.queue_backup_path = util.get_queue_backup_path()
-        self.current_ttweet_path = f'{util.get_project_dir()}/_current_ttweet.txt'
-        self.finished_ttweets_path = f'{util.get_project_dir()}/finished_ttweets.txt'
+        self.queue_path = os.path.join(util.working_path(), "queue.txt")
+        self.queue_backup_path = os.path.join(util.working_path(), "_queue_backup.txt")
+        self.current_ttweet_path = os.path.join(
+            util.working_path(), "_current_ttweet.txt"
+        )
+        self.finished_ttweets_path = os.path.join(
+            util.working_path(), "finished_ttweets.txt"
+        )
        self.is_good = True
        self.__sorted = False
        self.finished_user_dates: dict[int, str] = dict()
@@ -26,58 +31,62 @@ class TalentTweetQueue:

        ## file check, backup copy
        if os.path.exists(self.queue_backup_path):
-            print('Found backup queue! We errored in the previous run.')
+            print("Found backup queue! We errored in the previous run.")
            shutil.copyfile(self.queue_backup_path, self.queue_path)
        elif os.path.exists(self.queue_path):
-            print('Creating backup queue...')
+            print("Creating backup queue...")
            shutil.copyfile(self.queue_path, self.queue_backup_path)

        ## initialize structures
        # user timestamps
        try:
-            with open(self.queue_path, 'r') as f:
+            with open(self.queue_path, "r") as f:
                for line in f:
                    tokens = line.split()
-                    if len(tokens) == 0: continue
+                    if len(tokens) == 0:
+                        continue

-                    if tokens[0][0] != '#':
-                        print(f'Stopped finding user dates at {line}')
+                    if tokens[0][0] != "#":
+                        print(f"Stopped finding user dates at {line}")
                        # reached end of accounts list
                        break
-                    if tokens[2] != '-1':
+                    if tokens[2] != "-1":
                        self.finished_user_dates[int(tokens[1])] = tokens[2]
-        except: pass
+        except:
+            pass
        # ttweets
        try:
-            with open(self.queue_path, 'r') as f: # reset seek head
+            with open(self.queue_path, "r") as f:  # reset seek head
                # Get existing queued TalentTweets
                for line in f:
                    tokens = line.split()
-                    if len(tokens) == 0 or tokens[0][0] == '#':
+                    if len(tokens) == 0 or tokens[0][0] == "#":
                        continue
                    ttweet = tt.TalentTweet.deserialize(line)
                    # print(f'{ttweet.tweet_id}:\n{ttweet}')
                    self.ttweets_dict[ttweet.tweet_id] = ttweet
-                print(f'Found {len(self.finished_user_dates)} scraped accounts and {len(self.ttweets_dict)} tweets in queue.')
+                print(
+                    f"Found {len(self.finished_user_dates)} scraped accounts and {len(self.ttweets_dict)} tweets in queue."
+                )
        except:
            traceback.print_exc()
            pass
        # unfinished ttweet
        if os.path.exists(self.current_ttweet_path):
-            with open(self.current_ttweet_path, 'r') as f:
+            with open(self.current_ttweet_path, "r") as f:
                for line in f:
                    if len(line) > 0:
                        ttweet = tt.TalentTweet.deserialize(line)
                        if ttweet.tweet_id in self.ttweets_dict:
                            self.ttweets_dict[ttweet.tweet_id] = ttweet
-                            print(f'adding unfinished tweet {ttweet.tweet_id}')
+                            print(f"adding unfinished tweet {ttweet.tweet_id}")
        # finished ttweets
        try:
-            with open(self.finished_ttweets_path, 'r') as f:
+            with open(self.finished_ttweets_path, "r") as f:
                for line in f:
                    self.finished_ttweets.add(int(line))
-        except: pass
-
+        except:
+            pass

    def is_empty(self):
        return self.get_count() <= 0
@@ -94,7 +103,7 @@ class TalentTweetQueue:
        self.__sort_ttweets_dict()
        key = list(self.ttweets_dict.keys())[0]
        ttweet = self.ttweets_dict.pop(key)
-        with open(self.current_ttweet_path, 'w') as f:
+        with open(self.current_ttweet_path, "w") as f:
            f.write(ttweet.serialize())
        return ttweet

@@ -104,8 +113,10 @@ class TalentTweetQueue:
    ## Call when the TalentTweet retrieved from get_next_ttweet() was
    #  posted successfully.
    def good(self, tweet_id: int):
-        try: os.remove(self.current_ttweet_path)
-        except: pass
+        try:
+            os.remove(self.current_ttweet_path)
+        except:
+            pass

        self.add_finished_tweet(tweet_id)
        self.save_file()
@@ -113,28 +124,28 @@ class TalentTweetQueue:

    # overwrite queue.txt
    def save_file(self, replace_backup=True):
-        print('saving queue...', end='')
+        print("saving queue...", end="")
        if replace_backup:
-            print('overwriting backup...', end='')
+            print("overwriting backup...", end="")
            shutil.copyfile(self.queue_path, self.queue_backup_path)

        self.__sort_ttweets_dict()
-        with open(self.queue_path, 'w') as f:
+        with open(self.queue_path, "w") as f:
            # write dates
-            for (id, date) in self.finished_user_dates.items():
-                f.write(f'# {id} {date}\n')
+            for id, date in self.finished_user_dates.items():
+                f.write(f"# {id} {date}\n")

-            f.write('\n')
+            f.write("\n")

            # write sorted ttweets
            for ttweet in self.ttweets_dict.values():
-                f.write(ttweet.serialize() + '\n')
-        print('done')
+                f.write(ttweet.serialize() + "\n")
+        print("done")

    def add_finished_tweet(self, id):
        self.finished_ttweets.add(id)
-        with open(self.finished_ttweets_path, 'a') as f:
-            f.write(f'{id}\n')
+        with open(self.finished_ttweets_path, "a") as f:
+            f.write(f"{id}\n")

    def __sort_ttweets_dict(self):
        if not self.__sorted:
@@ -144,7 +155,7 @@ class TalentTweetQueue:
    # destructor
    def __del__(self):
        if self.is_good:
-            print('Ended in good state, deleting backup queue...')
+            print("Ended in good state, deleting backup queue...")
            os.remove(self.queue_backup_path)
        else:
-            print('Ended in bad state, keeping backup queue.')
+            print("Ended in bad state, keeping backup queue.")
@@ -13,36 +13,45 @@ import tweepy
 from recrop import fix_aspect_ratio
 import talent_lists

-# returns system path to this project, which is
-# up one level from this file's directory (effective path: ..../src/../).
-def get_project_dir():
+
+def project_root():
+    """Returns the project root."""
    return os.path.join(os.path.dirname(__file__), os.pardir)

-def get_queue_path():
-    return f'{get_project_dir()}/queue.txt'

-def get_queue_backup_path():
-    return f'{get_project_dir()}/_queue_backup.txt'
+def working_path():
+    """Returns the working ephemeral directory."""
+    path = os.path.join(os.path.join(project_root(), "run"))
+    if not os.path.isdir(path):
+        os.mkdir(path)
+    return path
+

 def clamp(n, smallest, largest):
    return max(smallest, min(n, largest))

+
 def datetime_to_tdate(date_time: datetime):
    return date_time.strftime("%Y-%m-%d")

+
 def tdate_to_datetime(tdate: str):
    return datetime.strptime("%Y-%m-%d")

+
 def timestamp_to_tdate(timestamp=None):
    if timestamp == None:
        timestamp = datetime.now().timestamp()
    return datetime_to_tdate(datetime.fromtimestamp(timestamp, tz=pytz.utc))

+
 def get_current_timestamp():
    return datetime.now().timestamp()

+
 def get_current_date():
-    return datetime.today().strftime('%Y-%m-%d')
+    return datetime.today().strftime("%Y-%m-%d")
+

 def get_key_from_value(d: dict, val):
    keys = [k for k, v in d.items() if v == val]
@@ -50,38 +59,43 @@ def get_key_from_value(d: dict, val):
        return keys[0]
    return None

+
 # FIXME: web_auth_token under rate-limitation will fail to screenshot
 async def create_ttweet_image(ttweet):
    tc = TweetCapture()
-    auth_token = dotenv_values().get('web_auth_token')
+    auth_token = dotenv_values().get("web_auth_token")
    if auth_token:
-        tc.cookies = [{'name': 'auth_token', 'value': auth_token}]
-    if 'linux' in sys.platform:
+        tc.cookies = [{"name": "auth_token", "value": auth_token}]
+    if "linux" in sys.platform:
        # Linux chromedriver path
-        tc.driver_path = '/usr/bin/chromedriver'
-    filename = f'{get_project_dir()}/img.png'
+        tc.driver_path = "/usr/bin/chromedriver"
+    filename = f"{working_path()}/img.png"
    img = None
-    try: os.remove(filename)
-    except: pass
+    try:
+        os.remove(filename)
+    except:
+        pass
    try:
        img = await tc.screenshot(
            url=ttweet.url(),
            path=filename,
            mode=4,
            night_mode=1,
-            show_parent_tweets=True
+            show_parent_tweets=True,
        )
        img = fix_aspect_ratio(img)
    except:
-        print('unable to create tweet image')
+        print("unable to create tweet image")
        traceback.print_exc()
        return None

-    print(f'successfully saved {img}')
+    print(f"successfully saved {img}")
    return img

+
 def get_tweet_url(id, username):
-    return f'https://www.twitter.com/{username}/status/{id}'
+    return f"https://www.twitter.com/{username}/status/{id}"
+

 ## Attempt to pull username from local; pull from online if doesn't exist.
 def get_username(id):
@@ -90,22 +104,26 @@ def get_username(id):
        return get_username_online(id)
    return ret

+
 def get_username_with_company(id):
    company = talent_lists.talents_company.get(id, None)
    return f'{get_username(id)} {f"({company})" if company is not None else ""}'

+
 def get_username_local(id: int):
-    return talent_lists.talents.get(id, f'{id}')
+    return talent_lists.talents.get(id, f"{id}")
+

 # Retrieve username via API v2 (tweepy)
 def get_username_online(id, default=None):
    import twapi
+
    try:
        resp = twapi.TwAPI.instance.client.get_user(id=id)
        return resp.data.username
    except tweepy.TooManyRequests:
-        return str(default) if default is not None else f'id:{id}'
+        return str(default) if default is not None else f"id:{id}"
    except:
-        print(f'Unhandled error retrieving username for {id}!')
+        print(f"Unhandled error retrieving username for {id}!")
        traceback.print_exc()
-        return str(default) if default is not None else f'id:{id}'
+        return str(default) if default is not None else f"id:{id}"