Merge pull request #5 from muskit/no-listen-rewrite

Rewrite bot to not use filtered streams
This commit is contained in:
muskit
2023-08-18 02:29:46 -07:00
committed by GitHub
24 changed files with 714 additions and 3032 deletions
+1 -1
View File
@@ -143,5 +143,5 @@ cython_debug/
.vscode
# project-specific (secret.ini: can't ignore existing file?)
secrets.ini
*.png
*.json
+62 -4
View File
@@ -5,11 +5,69 @@ Twitter bot that tracks cross-company interactions between the non-JP branches o
**This project was created to run [this account](https://twitter.com/NijiHolo_EN_ID).**
## Running modes
## Running
Install dependencies.
```
pip install -r requirements.txt
```
Setup the `.env` in the project root. Refer to the `.env` section for variables.
Run the program from project root (not in `src`). Refer to the following section for options.
```
python src/main.py
```
## Modes & Options
The bot may run in these modes:
* Catch-up (`c`): intended to run only once, scan all accounts for cross-company tweets and post them. Terminate when done posting all.
- use `--auto-listen` to switch to listen mode when finished
* Listen (`l`): listens for tweets from list, sharing it if it's cross-company
* Pass no argument to run in listen mode, which scrapes all accounts in the *list* folder at an interval.
* Pass `--straight-to-queue` to process the locally-stored queue first before attempting to scrape.
* Command-line (`cmd`): an interactive mode for manual control and debugging (drops into Python interpretor)
## `.env`
These need to be defined in a `.env` file at the project root (outside of `src`):
### Scraper Credentials
To get around rate limitations imposed on users, we scrape with multiple accounts. Each account is defined in the file using the following format:
```
scraper_usernameX=twitter_username
scraper_passwordX=twitter_password
```
where `X` is a number starting from 0, increasing by 1 for each account added. For instance:
```
scraper_username0=
scraper_password0=
scraper_username1=
scraper_password1=
```
The first account (`scraper_username0` and `scraper_password0`) **MUST be defined (`scraper_username` and `scraper_password` without number will not work!)** and will be used to attempt scraping private accounts. Make sure this account follows any private accounts that you want to scrape!
### Twitter API Stuff
The following keys/tokens are used for the official API via `tweepy`. We mainly use these to just post tweets.
```
app_key=
app_secret=
user_token=
user_secret=
```
### Screenshot Cookie *(optional)*
This is the authentication token obtained from a browser when signed in on the Twitter website. It's only needed if you want to screenshot tweets from privated accounts. Make sure the token belongs to an account that follows desired private accounts! Maybe have it belong to `scraper_username0`?
```
web_auth_token=
```
### Example contents of `.env` without values
```
scraper_username0=
scraper_password0=
scraper_username1=
scraper_password1=
scraper_username2=
scraper_password2=
scraper_username3=
scraper_password3=
web_auth_token=
app_key=
app_secret=
user_token=
user_secret=
```
*Created for the spirit of entertainment and in the name of unity.*
-998
View File
@@ -1,998 +0,0 @@
1576430556933672960
1576489483457134594
1576509602732322819
1576660900668317696
1576716382024015872
1576720425505415170
1576755412824600576
1576995410992041986
1577051131339235328
1577055035577417728
1577068103791366145
1577070764175466499
1577083110805876738
1577098034147573760
1577193525720272897
1577193763252076544
1577100518446403584
1577592730104254464
1577681106971000832
1577751763859521570
1577814530729525248
1577857217352863744
1577858408144936960
1577866370179805184
1577940553081176065
1577996126955122688
1577998007018328064
1577998613615345664
1577998915366162433
1578245815130210304
1578516453434675200
1578638137701838848
1578973277539926017
1577940553081176065
1578245815130210304
1578516453434675200
1578638137701838848
1578973277539926017
1579547332571705345
1579547617717293056
1579548229431365634
1579554638369796102
1579608397468794880
1579628879270281216
1579629421064921088
1579758548355276800
1579758871589302272
1579768241400197120
1579789920960655360
1579790235730575360
1579900565424701441
1579964993243873287
1580181779868430336
1580258975047888896
1580259413297807363
1580259612670177280
1580260690962501632
1580261950604222464
1580262961058263040
1580265275265495040
1580268043279958016
1580269274848849920
1580269426082910215
1580269963679477761
1580276935921655808
1580277066930434049
1580279094205259776
1580283694731776000
1580381402767720448
1580449719511572480
1580449785135640576
1580476694519246850
1580480705733148672
1580516570518159366
1580517097708609538
1580576986145038336
1580664540152606721
1580838449292406784
1580838874200227841
1580842852476915714
1580842860622286848
1580879439193595904
1581315661242171392
1581359254363148288
1581359894082424834
1581512593918685184
1581628130418384896
1581628264434782209
1581639528053321730
1581644625604845568
1581655283448745985
1581732410584371200
1581945888465096704
1581945996095160320
1582035073573888000
1582082039033262082
1582135400147001344
1582135466068828161
1582142861604446209
1582148227943436288
1582167082997014528
1582188441793548290
1582191818430521344
1582191849540026368
1582280269469716481
1582283272024834050
1582371009587740672
1582461770198761472
1582555511949385728
1582617083179044864
1582617435416694784
1582635977373544449
1582636277605617664
1582773035178024961
1583173642166292480
1583176372696252417
1583204758495457280
1583526503492440064
1583563981108629504
1583575990411747328
1583576858133921793
1583668922167234561
1583669374397059072
1583689284594245633
1583863041493413888
1583888933494358017
1583908414102519812
1583909513832574976
1583968857554767872
1584008637134970883
1584441576205320195
1584442364591939584
1584480659199754242
1584528264915083265
1584528264965787649
1584622108025712641
1584628579677126657
1584629534527488001
1584629633735364608
1584649537997533185
1584728511952011266
1584753182156414976
1585073813905375233
1585097154032435200
1585098477557555200
1585112474097176576
1585144962085048321
1585145288767201288
1585145360313618432
1585145559887020032
1585145633530597376
1585161614461849600
1585199425206394880
1585208233010290690
1585277536187064322
1585424318103945216
1585449222635225088
1585511782885380097
1585553749199978497
1585828601727156224
1585841772332302339
1585871064021671936
1585872552513134592
1585872662915559424
1585875080315559936
1586100052623171586
1586179378089582592
1586374470939254785
1586376473119686656
1586392482321158144
1586643720878706689
1586737628338077701
1586840114868629504
1587216332940247040
1587235419241996288
1587255460943511553
1587280116069142528
1587290714487943168
1587336915048894466
1587459847636779008
1587670143840112640
1587670772323155969
1587828606583115776
1587830007753105408
1587830065223684098
1587891436371861504
1587931182284517381
1587936783299321856
1587947549658386433
1587983746543894534
1588044083666231298
1588093544685719552
1588095705070981120
1588100836575440896
1588101073910104066
1588101210329927680
1588128449490612225
1588149163811913731
1588293190322253827
1588366787065675777
1588678173138780160
1588762792521498629
1588796489224294400
1588958260258025472
1589150733659963393
1589439728973217793
1589529660429602816
1589532085144150017
1589532195844411393
1589540519583289344
1589570601567809538
1589641589135413248
1589669189765451776
1589916075705085952
1589916571518242816
1589918062358450176
1589977133857722368
1590885589603479552
1591306004666675201
1591307219169148930
1591336097216622594
1591336705730437120
1591730177310359553
1591839086364741632
1592029314119520256
1592086362508840960
1592189421897797633
1592514737165971456
1592635108510474240
1592743719639592960
1593330237609164800
1593612237217964038
1594025064478801920
1594025676578824193
1594025967428608000
1594305241377284102
1594336607661760512
1594475762614423552
1594499300364779523
1594537154453147649
1594543914186317825
1594581129041915904
1594595823748550657
1594617557696348160
1594648307338739712
1594648462431498242
1594650682220769280
1594705822281789443
1580268846790778881
1581083174305599488
1581083425401806848
1581088983148560384
1581096278469246978
1581513143087681536
1581514105894350849
1583568304576565254
1584600364783144960
1584612840824049684
1587558629489926145
1587669760078188545
1591359889011773440
1592042710722314244
1592082624558034946
1594945258965848066
1595094770539712512
1595144029448339457
1595159086265339904
1595190586537283585
1595209971486306304
1595520675607113728
1595531274474643469
1595605679938707457
1595677291300163584
1596235440700407811
1596630228289605633
1596636538116382722
1596733577076449280
1596797256665292802
1596833246561521664
1596996552887013377
1597029385625665536
1597051834689941504
1597053360393228288
1597265217515581440
1597265603529969664
1597266128665214976
1597270533850427392
1597321808331825153
1597325594341310464
1597426336636669952
1597636961694797829
1597654482758045696
1597698554688335872
1597699060462673920
1597845762322223105
1598567014347542529
1598586430795747328
1598796336270217217
1599022459029966848
1599310792008269824
1599324276540297216
1599350192134033409
1599694863800279041
1599703777665503232
1599996347658051584
1600462906335690753
1600496863278407691
1600499056165404672
1600628466977832961
1600669875503972353
1600671934315786240
1600677118874353665
1600700567948767233
1600705712682053632
1600706014537564160
1600706374857822209
1600709284786053120
1600709816791334913
1600752783845560321
1600772713135554560
1601053614755446785
1595709206627246081
1596251536841400320
1596364591269023744
1596365300026421248
1596500447308939265
1597857498903764995
1598352981967376385
1598477628943794176
1598552959738343425
1599118485363490817
1599279465838018560
1599291560507895810
1599297564633083904
1599337445199253504
1599358415092191234
1599423114018185216
1600363418166648835
1600369195241205760
1601558689529593857
1601595882763616257
1601642608698482688
1601749751745372162
1601754145861484544
1601768148621217792
1601877383522713601
1601878641620967424
1601878682137931776
1601879062560968705
1601881127958253570
1602009414990430208
1602012523708010496
1602205274168852480
1602205385993101313
1602275459697639424
1602506093829099520
1602508255062020096
1602540444365508608
1602658754297843712
1602743376201359360
1602753038909050880
1602766954514767872
1602768627534467073
1602800460288647168
1602804823887421444
1602832095952658434
1602847291362123777
1602851330309357568
1602853324667191297
1602963752101580800
1603073603364233217
1603074898758631424
1603287279975686144
1603297578514546688
1603822421190008833
1603838388347584518
1603840758783815680
1602766894465175552
1603867200204701696
1604185149448634369
1604237041977495553
1604261358039711744
1604403162504232961
1604513581780844551
1604516203908632576
1604519045495234561
1604522344550043648
1604556203903709185
1604726291093233664
1604726760142262272
1605034554594832385
1605065307986735104
1605083978847096832
1605252604874874880
1605264950347399168
1605266135339134976
1605266238443241474
1605268175796445200
1605268292112945153
1605341177791684608
1605354994127241217
1605398103036948484
1605415343358447616
1605552893553016832
1605563286728196098
1605707013589151745
1605763163391156226
1605763733040599040
1605978761127636992
1606359300565237763
1606359416999116813
1606359600680271877
1606359770969014284
1606385110697316371
1607544867357433859
1607925564412932096
1608271072562139136
1608325890097782784
1608536973727367170
1608539465340915715
1608780569676251140
1608826609766891522
1608827143257223169
1608827350376140802
1608827612788576256
1608829041943138305
1608888487436255232
1609051353543761921
1609272099276984320
1609724860590329856
1610129643067310081
1610130224003559424
1610710681975914496
1610710768030486530
1605758391506599936
1606347441795731471
1608806613812678660
1610391363820081152
1610883754435973120
1611004280282157057
1611004684344475650
1611005195936489478
1611144693231779843
1611145010531160064
1611145304161624064
1611160944197132288
1611175600126935041
1611231349939240960
1611594073231720449
1611669467972644864
1611954084751474688
1611957866096889859
1611976715802214401
1611978377568874498
1611978836383522816
1611989935439118336
1612041276584792070
1612222955332984832
1612223088833200132
1612223456724279297
1612486617561985025
1612492903158267906
1612559384906813445
1612828971590193152
1613178801374232578
1613178992894398470
1613179047114346496
1613179141196767233
1613179219256958983
1613179256917352448
1613179328854122502
1613411736790437889
1613760768641572869
1613784820076929025
1613792568772366336
1613799407069323266
1614026676589019138
1614045021606666244
1614052812585066496
1612040906529456130
1613408647937417216
1613822548906774529
1614451292163944448
1614504913299714050
1614514545644666881
1614516595333959680
1614620215467454504
1614654062267617280
1614666713181880325
1614668312809738240
1614677261537673216
1614677382820134914
1614706430682054657
1614712147917041665
1614713259600535552
1614713633707274241
1614713877807398912
1614714298080854017
1614714610170613761
1614714833966104577
1614715700198928386
1614727456782712832
1614744314927779841
1614801067224748033
1614801210724474880
1614801331427966977
1614831641935122434
1614882693657034754
1614892923488960512
1614896348918321153
1614897026814324736
1614910639213871104
1614928185006919680
1615072566439288832
1615160760321343488
1615198160649060355
1615744899655307266
1616432895001772040
1616436672723304453
1616496843705442304
1616501597244387331
1616506713582235648
1616507859944222721
1616509728888324098
1616537220919492608
1616539191462223874
1616564014129070080
1616565464246718466
1616568259515502592
1616582644992548864
1616591801372258309
1616592777718165504
1616593204870287360
1616999858153422851
1617004670865805312
1617136121200377856
1617158328022618117
1617233807194705920
1617272807666372608
1617317197373911040
1617317424969437187
1617318052000940035
1617318239603662848
1617318880489398274
1617318933903671297
1617499924962369537
1617541083508142080
1617719234787962880
1617732292319006722
1617734154870980609
1617737559546941441
1617737633568010241
1617743342670073856
1617746760046346241
1617757228794281985
1617764829892349952
1617862776046899202
1617870239760535553
1617886523063664641
1617930583505801224
1618423435964846081
1618424133347594240
1618542182117539842
1618542328389697539
1618548432167403520
1618554017730396163
1618567686174429184
1618677052806434816
1618830423236448256
1618834876064747520
1618844600894107648
1618845420557590530
1618845486580129792
1618983950336282624
1618988673227436033
1618991676781137921
1619066157964881921
1619099894391738368
1619180071893479424
1619202780224446467
1619305720683585538
1619399152651358210
1619654784725950464
1619687684951384065
1619774044869632001
1619775039435587588
1619778797368389633
1619832253449912321
1619856428969398272
1619924833621671937
1620003405958873092
1620227328566792192
1620346225886396421
1620346703831502848
1620664802241835008
1620674202293653504
1620684007473287169
1620711599593119744
1620849272018333696
1620952817803612160
1620953485851394049
1620954399475658753
1620955271974780929
1620957482465230848
1620958412438929409
1621310782666416128
1621311858329407488
1621312731034030080
1621314869487091712
1621403652991721472
1621425409157140484
1621561581690826752
1621574909116948480
1621591321067622402
1621741476412821504
1621743668045676546
1621744901338841088
1621746092449878016
1621750280693092352
1621750707388055552
1621750740724350977
1621751603970842624
1621755848568733696
1621771326494244865
1621797995678429184
1621798650778206208
1621799269496913920
1622048615672733697
1622519854560595969
1622565675922501632
1622893611955240961
1622953785713913857
1623465800836284416
1623471458696728581
1623472449961529346
1623707075300986880
1623709743264190464
1624643824692908032
1624646092510879745
1624648616446152706
1624648791034044416
1624649128771739648
1624659722954747906
1624853328155779078
1624860275831672834
1625010090326761472
1625058230807277568
1625118424694247424
1625734543016660992
1624520599656624129
1624521241901076485
1627514900703584256
1627522526527062016
1627529002029297664
1627529397296320513
1627563980154671105
1627566950267371520
1627573853823307776
1627574225199591425
1627606044070277120
1627606106720686080
1627608118292287488
1627635743697866752
1627639742312468482
1627654096378384384
1627655361410011136
1627655522840358912
1627861956634746884
1627864965053808640
1627934959846707200
1627953946789838848
1627985560920006657
1627985896279797761
1628003892641566720
1628004446310649856
1628007913032753157
1628008071048925184
1628047652691775488
1628269360111579137
1628283406114230272
1628283727582498816
1628333043961430016
1628335653841944578
1628640208425197569
1628652565088059392
1628680578613780480
1628680986346266624
1628717287233421316
1628786187233099780
1628786323128741888
1628786724317941762
1628786919470407684
1628795062472835073
1628795184044728320
1629272877060046850
1629272892595634178
1629272914330636289
1629335290530676736
1629341302524346368
1629402458689794050
1629454168845893633
1629468461100589056
1629758716751798272
1630026264705896448
1630026923303911425
1630027564344569856
1630032477103222784
1630033474861056000
1630035097461346304
1631048890504867843
1631542496814825473
1631590820678860800
1631676512029282307
1632102469072297986
1632283032207114251
1632285887995449345
1632414814692601856
1632556021049856005
1632560765424259072
1632629409751416835
1632641410230026240
1632641678623522816
1633423730654478337
1633657001749397505
1633659447934611457
1633664285078421505
1633664320792940544
1633911083202117632
1634528563344064512
1634531188034654209
1634539654916751363
1634620065864728581
1634683766235856896
1634746599263043584
1635151666445193217
1635154154497916928
1635154248769093640
1635168017276633088
1635253449913925632
1635253930849599489
1635259184194408453
1635260955562237952
1635263134352162817
1635336166865248257
1635366683052482560
1635462585616314368
1635474518184722432
1635477005226618887
1635511568556507136
1635566086815875073
1635745147181735948
1635797049512951808
1635798410837245953
1635817346664153095
1635891740640174081
1635904748774887426
1635973373384871936
1635976579624095745
1636360081670545411
1636360302207078402
1636370880241754112
1636377928140468227
1636418785006567427
1636576201102721027
1636599574197403648
1636635893149876226
1636729072788045824
1636766609984352256
1636767125640728577
1636984749553254400
1637021438090829825
1637022826472574976
1637022930097049600
1637035673621528576
1637058107233939456
1637061108992442368
1637061432104849408
1637205007891066882
1637210521941442561
1637872529342869504
1637924701573099520
1637925400537108480
1637925419323371522
1638013762165788672
1638017789326663681
1638044945520132096
1638093064064364544
1638094749335707648
1638174096126263303
1638240669256024064
1638250006443352064
1638276318944022547
1638288078056857606
1638496316635357185
1638515168941592581
1638525170330599424
1638564705341001730
1638565298990383104
1638565311057391616
1638565357861609474
1638565800150958081
1638565817280503808
1638694373612298240
1638694824638398465
1638977522862813204
1639137601226522626
1639150016693452801
1639159907168718850
1639159970733359104
1639160062311796737
1639161115199541250
1639170191790596096
1639179598956208129
1639257674704453634
1639347991990636544
1639472078574481408
1639703873286701056
1639705899932557312
1639766858822762503
1639796727753457667
1639796887795511296
1639829562258325504
1640367083598024706
1640438349252141057
1640728560267608067
1640730592303108096
1640733153290592262
1640733678484574215
1640733839935909892
1640924027014332417
1640924197797998592
1641206319397969921
1641206458531323907
1641207736296677376
1641213550248050689
1641214619329916929
1641214980564344833
1641331011186466816
1641527643592949760
1641638401534476293
1641682622626627584
1641775144657117185
1641842256020635657
1641842492587819009
1641843861730230273
1641843911789367296
1641848890776223750
1641863954535178261
1641874376877563905
1641881631232499712
1641905300813803520
1641920742576070658
1641931894211133442
1641931924611448835
1641968426985099264
1642044327445639168
1642052711259701248
1642271925807329280
1642363150442934274
1642366098359328770
1642386091146899457
1642386251453202432
1642386862714294276
1642387403498459139
1642389173641568257
1642390508629807104
1642391144947671043
1642391409058787328
1642392966391287808
1642401063419662336
1642404506716590081
1642407503333044224
1642442692985593857
1642683998026563587
1642874077491892226
1642936452525834251
1642965863111479296
1642974611460505604
1643151747874172930
1643375613665787907
1643383401980870658
1643456057425948672
1643456150652739584
1643514992803696644
1644101531111952384
1644101652398612481
1644228307607486464
1644804036896641024
1644808862627770368
1644825182161256448
1644835718345228288
1644835937266827272
1644836105416474624
1644905879475752960
1644947050709102592
1645279524341563392
1645279596139671552
1645279816734883841
1645693219554852864
1645875484411015169
1645877585795256321
1646006703572283399
1646010597840936960
1646010746344538112
1646011153129021441
1646122631043883009
1646131757320933377
1646133175196991488
1646135538792153089
1646135600570040320
1646136821154467843
1646192373062569990
1646560089539743744
1646566407684132865
1646747817753362433
1646748169055924224
1646751823666073600
1646753010209681409
1646928376093892609
1646928741275439104
1646932746030161921
1647078104823922690
1647103804893167616
1647225532344352770
1647227289950904320
1647276137628696578
1647438699347468288
1647526954575486976
1647527000519884800
1647633398964408323
1647707442921234434
1648048413408399362
1648049825445974017
1648052454364413952
1648053219787161601
1648149958296588296
1648237005309095937
1648241034030370816
1648259276589404161
1648380069210107904
1648411348370444288
1648424997789265920
1648520806639759361
1648623694338486273
1648701720887713793
1648703217771880449
1648726724874752002
1648733920442322944
1648768838614278187
1649308567747407875
1649308879073816576
1649355368361762816
1649476854321913860
1649477152012640283
1649649276077703168
1649774499267747841
1649777168883363841
1649801910659022849
1649804472330387457
1649804636268773378
1649805303737663488
1649821444442845185
1649870014605037568
1649947340558442496
1650001834134720514
1650055872289329153
1650059824422596609
1650060926119460865
1650111617710063621
1650111986506801153
1650260086818832386
1650262244846968835
1650561499893493763
1650561606991097856
1650561615300034561
1650561616940003328
1650573608186441729
1650574520711798805
1650574719639322631
1650575471719899141
1650575762586492956
1650625322914664448
1650626698029703168
1650642367748411392
1650682902362079232
1650684291280699392
1650685691016384512
1650690513169948672
1650812641500098560
1650874430711648259
1650950207087996932
1651032960978567168
1651034395397029890
1651034863351283713
1651035920840245249
1651323872484945922
1651327435076513795
1651355062633918465
+27 -21
View File
@@ -3,36 +3,42 @@
# ----- hololive EN -----
# --- [Myth] ---
gawrgura 1283657064410017793
watsonameliaen 1283656034305769472
moricalliope 1283653858510598144
ninomaeinanis 1283650008835743744
takanashikiara 1283646922406760448
1283657064410017793 gawrgura
1283656034305769472 watsonameliaen
1283653858510598144 moricalliope
1283650008835743744 ninomaeinanis
1283646922406760448 takanashikiara
# --- [HOPE] ---
irys_en 1363705980261855232
1363705980261855232 irys_en
# --- [Council] ---
hakosbaelz 1409783149211443200
ourokronii 1409817096523968513
ceresfauna 1409784760805650436
tsukumosana 1409819816194576394
nanashimumei_en 1409817941705515015
1409783149211443200 hakosbaelz
1409817096523968513 ourokronii
1409784760805650436 ceresfauna
1409819816194576394 tsukumosana
1409817941705515015 nanashimumei_en
# --- [Advent] ---
1656536840240005121 shiorinovella
1656528951303614464 nerissa_en
1656889310472437761 fuwamoco_en
1656531547279982593 kosekibijou
# ----- HOLOSTARS EN -----
# --- [TEMPUS] ---
# Gen 1
noirvesper_en 1536579341332516864
axelsyrios 1536577295632441344
magnidezmond 1536576325296996352
regisaltare 1536575088996524032
1536579341332516864 noirvesper_en
1536577295632441344 axelsyrios
1536576325296996352 magnidezmond
1536575088996524032 regisaltare
# Gen 2
gavisbettel 1582926739684339712
machinaxflayon 1582922712166825986
banzoinhakka 1582927907206631425
josuijishinri 1582925071546732544
1582926739684339712 gavisbettel
1582922712166825986 machinaxflayon
1582927907206631425 banzoinhakka
1582925071546732544 josuijishinri
# --- STAFF ---
omegaalpha_en 1397148959798226945
hololivepro_EN 1540204458042621952
1397148959798226945 omegaalpha_en
1540204458042621952 hololivepro_EN
+9 -9
View File
@@ -3,16 +3,16 @@
# ----- hololive ID -----
# --- [Gen 1] ---
ayunda_risu 1234752200145899520
moonahoshinova 1234753886520393729
airaniiofifteen 1235180878449397764
1234752200145899520 ayunda_risu
1234753886520393729 moonahoshinova
1235180878449397764 airaniiofifteen
# --- [Gen 2] ---
pavoliareine 1328275136575799297
kureijiollie 1328277233492844544
anyamelfissa 1328277750000492545
1328275136575799297 pavoliareine
1328277233492844544 kureijiollie
1328277750000492545 anyamelfissa
# --- [Gen 3] ---
kobokanaeru 1486629076005634049
vestiazeta 1486633489101307907
kaelakovalskia 1486636197908602880
1486629076005634049 kobokanaeru
1486633489101307907 vestiazeta
1486636197908602880 kaelakovalskia
+35 -35
View File
@@ -3,54 +3,54 @@
# ----- [NIJISANJI EN] -----
# --- [Lazulight] ---
PomuRainpuff 1390637197167038464
EliraPendora 1390620618001838086
FinanaRyugu 1390209302120394754
1390637197167038464 PomuRainpuff
1390620618001838086 EliraPendora
1390209302120394754 FinanaRyugu
# --- [Obsydia] ---
Petra_Gurin 1413339084076978179
Selen_Tatsuki 1413318241804439552
Rosemi_Lovelock 1413326894435602434
1413339084076978179 Petra_Gurin
1413318241804439552 Selen_Tatsuki
1413326894435602434 Rosemi_Lovelock
# --- [Ethyria] ---
MillieParfait 1437952405283426310
EnnaAlouette 1437963160544284675
NinaKosaka 1437959162651156484
ReimuEndou 1437961007029227520
1437952405283426310 MillieParfait
1437963160544284675 EnnaAlouette
1437959162651156484 NinaKosaka p
1437961007029227520 ReimuEndou
# --- [Luxiem]---
Vox_Akuma 1465851881180348425
shu_amino 1465850835951357955
ike_eveland 1465851188562345985
Mysta_Rias 1465851243167895554
luca_kaneshiro 1465858739970273281
1465851881180348425 Vox_Akuma
1465850835951357955 shu_amino
1465851188562345985 ike_eveland
1465851243167895554 Mysta_Rias
1465858739970273281 luca_kaneshiro
# --- [Noctyx] ---
alban_knox 1490867613915828224
uki_violeta 1491195742123397124
Yugo_Asuma 1492604168145539072
Fulgur_Ovid 1493392149664219138
sonny_brisko 1493394108014292993
1490867613915828224 alban_knox
1491195742123397124 uki_violeta
1492604168145539072 Yugo_Asuma p
1493392149664219138 Fulgur_Ovid
1493394108014292993 sonny_brisko
# --- [ILUNA] ---
MariaMari0nette 1545351225293426688
AsterArcadia 1545352592884084736
ScarleYonaguni 1545354510515654656
KyoKanek0 1545552756773208066
AiaAmare 1545562635650957312
RenZott0 1546328834559340544
1545351225293426688 MariaMari0nette
1545352592884084736 AsterArcadia
1545354510515654656 ScarleYonaguni
1545552756773208066 KyoKanek0
1545562635650957312 AiaAmare
1546328834559340544 RenZott0
# --- [XSOLEIL] ---
MelocoKyoran 1589536631324692480
HexHaywire 1589524401170833409
D_Dropscythe 1589531775058968576
ZaionLanZa 1589539582399348738
KotokaTorahime 1591995159901663232
Ver_Vermillion 1589791076709171201
1589536631324692480 MelocoKyoran
1589524401170833409 HexHaywire
1589531775058968576 D_Dropscythe
1589539582399348738 ZaionLanZa p
1591995159901663232 KotokaTorahime
1589791076709171201 Ver_Vermillion
# --- [ALTS] ---
3W1W4 1507066475638673422
RyuguFinana 1506863869901168642
1507066475638673422 3W1W4
1506863869901168642 RyuguFinana
# --- [STAFF] ---
NIJISANJI_World 1214737620749578240
1214737620749578240 NIJISANJI_World
+22 -22
View File
@@ -2,31 +2,31 @@
# ----- [NIJISANJI ex-ID] -----
ZEA_Cornelia 1165866976192823297
Hana_Macchia 1165866977472024576
Taka_Radjiman 1165866977715347456
# graduated MiyuOttavia 1205742630467801088
RiksaDhirendra 1205743596785127424
Rai_Galilei 1205744131294654466
AmiciaMichella 1205744430386315265
Azura_Cecillia 1237600624646078464
Nara_Haramaung 1237603606448058371
LaylaAlstro2434 1237613895675596800
# elonmusk'd 1290243278814683137
Bonnivier_2434 1587724357496815616
Etna_Crimson 1290243331629318144
SiskaLeontyne 1290243510193369089
DeremKado 1323147415168323586
NagisaArcinia 1323147843398324225
RezaAvanluna 1323147856828510208
HyonaElatiora 1414845791944937476
Xia_Ekavira 1414845844692504611
MikaMelatika 1414849131655450626
1165866976192823297 ZEA_Cornelia
1165866977472024576 Hana_Macchia
1165866977715347456 Taka_Radjiman
# graduated 1205742630467801088 MiyuOttavia
1205743596785127424 RiksaDhirendra
1205744131294654466 Rai_Galilei
1205744430386315265 AmiciaMichella
1237600624646078464 Azura_Cecillia
1237603606448058371 Nara_Haramaung
1237613895675596800 LaylaAlstro2434
# elonmusk'd 1290243278814683137 bobon_pranaja
1587724357496815616 Bonnivier_2434
1290243331629318144 Etna_Crimson
1290243510193369089 SiskaLeontyne
1323147415168323586 DeremKado
1323147843398324225 NagisaArcinia
1323147856828510208 RezaAvanluna
1414845791944937476 HyonaElatiora
1414845844692504611 Xia_Ekavira
1414849131655450626 MikaMelatika
# --- [ALTS] ---
HanaMacchia2 1405494022446010375
1405494022446010375 HanaMacchia2
# --- [STAFF] ---
NIJISANJI_ID 1152523848060850177
1152523848060850177 NIJISANJI_ID
+27
View File
@@ -0,0 +1,27 @@
[scraper rate limitations]
50 searches/pages every 15 minutes
- max 20 tweets per search
[possible combinations which involve a "target cross-tweeter" B]
A retweets B
- B's tweet may have cross-mentions (B1, B2, etc.)
- rt_author_id=B; rt_mentions=B1,B2,...
A retweets tweet mentioning B
- rt_author_id=...; rt_mentions=B...
A quotes a tweet from B
- B's tweet may have cross-mentions (B1, B2, etc.)
- quote_retweeted=B; rt_mentions=B1,B2,...
A quotes a tweet mentioning B
- quote_retweeted=...; rt_mentions=B...
A replies to B
r = B
A replies to a tweet mentioning B
- r=...; rtm=B1,B2,...
-- NO --
A retweets a tweet that quotes a tweet mentioning B?
[potential code change]
rtm --> tgm (target tweet's mentions)
-88
View File
@@ -1,88 +0,0 @@
# 1283657064410017793 2023-03-20
# 1283656034305769472 2023-03-20
# 1283653858510598144 2023-03-20
# 1283650008835743744 2023-03-20
# 1283646922406760448 2023-03-20
# 1363705980261855232 2023-03-20
# 1409783149211443200 2023-03-20
# 1409817096523968513 2023-03-20
# 1409784760805650436 2023-03-20
# 1409819816194576394 2023-03-20
# 1409817941705515015 2023-03-20
# 1536579341332516864 2023-03-20
# 1536577295632441344 2023-03-20
# 1536576325296996352 2023-03-20
# 1536575088996524032 2023-03-20
# 1397148959798226945 2023-03-20
# 1540204458042621952 2023-03-20
# 1234752200145899520 2023-03-20
# 1234753886520393729 2023-03-20
# 1235180878449397764 2023-03-20
# 1328275136575799297 2023-03-20
# 1328277233492844544 2023-03-20
# 1328277750000492545 2023-03-20
# 1486629076005634049 2023-03-20
# 1486633489101307907 2023-03-20
# 1486636197908602880 2023-03-20
# 1390637197167038464 2023-03-20
# 1390620618001838086 2023-03-20
# 1390209302120394754 2023-03-20
# 1413339084076978179 2023-03-20
# 1413318241804439552 2023-03-20
# 1413326894435602434 2023-03-20
# 1437952405283426310 2023-03-20
# 1437963160544284675 2023-03-20
# 1437959162651156484 2023-03-20
# 1437961007029227520 2023-03-20
# 1465851881180348425 2023-03-20
# 1465850835951357955 2023-03-20
# 1465851188562345985 2023-03-20
# 1465851243167895554 2023-03-20
# 1465858739970273281 2023-03-20
# 1490867613915828224 2023-03-20
# 1491195742123397124 2023-03-20
# 1492604168145539072 2023-03-20
# 1493392149664219138 2023-03-20
# 1493394108014292993 2023-03-20
# 1545351225293426688 2023-03-20
# 1545352592884084736 2023-03-20
# 1545354510515654656 2023-03-20
# 1545552756773208066 2023-03-20
# 1545562635650957312 2023-03-20
# 1546328834559340544 2023-03-20
# 1507066475638673422 2023-03-20
# 1506863869901168642 2023-03-20
# 1214737620749578240 2023-03-20
# 1165866976192823297 2023-03-20
# 1165866977472024576 2023-03-20
# 1165866977715347456 2023-03-20
# 1205742630467801088 2023-01-01
# 1205743596785127424 2023-03-20
# 1205744131294654466 2023-03-20
# 1205744430386315265 2023-03-20
# 1237600624646078464 2023-03-20
# 1237603606448058371 2023-03-20
# 1237613895675596800 2023-03-20
# 1290243278814683137 2023-01-01
# 1290243331629318144 2023-03-20
# 1290243510193369089 2023-03-20
# 1323147415168323586 2023-03-20
# 1323147843398324225 2023-03-20
# 1323147856828510208 2023-03-20
# 1414845791944937476 2023-03-20
# 1414845844692504611 2023-03-20
# 1414849131655450626 2023-03-20
# 1405494022446010375 2023-03-20
# 1152523848060850177 2023-03-20
# 1587724357496815616 2023-03-20
# 1589536631324692480 2023-03-20
# 1589524401170833409 2023-03-20
# 1589531775058968576 2023-03-20
# 1589539582399348738 2023-03-20
# 1591995159901663232 2023-03-20
# 1589791076709171201 2023-03-20
# 1582926739684339712 2023-03-20
# 1582922712166825986 2023-03-20
# 1582927907206631425 2023-03-20
# 1582925071546732544 2023-03-20
File diff suppressed because it is too large Load Diff
+3 -2
View File
@@ -1,6 +1,7 @@
python-dotenv
nest-asyncio
pytz
tweet-capture
git+https://github.com/muskit/tweety.git
tweepy
tweet-capture
opencv-python
git+https://github.com/muskit/twint_2022_fix.git
-23
View File
@@ -1,23 +0,0 @@
## Twitter developer credentials.
# ---->> MAKE SURE YOUR VALUES AREN'T UPLOADED TO THE REPO! <<----
#
# This file should be added to .gitignore as a safeguard.
#
# If Git still wants to commit this file after changing its contents,
# force Git to stop tracking the file's changes:
# git update-index --assume-unchanged [<file> ...]
#
# To resume tracking its changes:
# git update-index --no-assume-unchanged [<file> ...]
#
# https://stackoverflow.com/questions/10755655/git-ignore-tracked-files
# note: api_key/secret = consumer_key/secret
[Credentials]
api_key=xxx
api_secret=yyy
bearer_token=zzz
oauth1_access_token=x
oauth1_access_secret=y
+35
View File
@@ -0,0 +1,35 @@
from dotenv import dotenv_values
## Track multiple accounts in a pool, cycling to the next one when requested.
class AccountPool:
def __init__(self):
self.__accounts: list[tuple[str, str]] = list()
self.__idx = -1
creds = dotenv_values()
i = 0
while True:
if f'scraper_username{i}' in creds \
and f'scraper_password{i}' in creds:
self.__accounts.append((
creds[f'scraper_username{i}'],
creds[f'scraper_password{i}']
))
i += 1
else:
break
def use_index(self, idx):
self.__idx = idx
return self.current()
def current(self):
if 0 <= self.__idx < len(self.__accounts):
return self.__accounts[self.__idx]
return None
def next(self) -> tuple[str, str] | None:
self.__idx += 1
if self.__idx >= len(self.__accounts):
self.__idx = -1
return None
return self.current()
-42
View File
@@ -1,42 +0,0 @@
## Twitter developer credentials management.
import os
import configparser
import util
# returns dictionary of the Credentials section.
# [NOT TO BE USED OUTSIDE OF THIS FILE.]
def __get_ini_credentials():
c = configparser.RawConfigParser()
if len(c.read(os.path.join(util.get_project_dir(), 'secrets.ini'))) > 0 and c.has_section('Credentials'):
return c['Credentials']
return None
# returns the consumer api_key stored in secrets.ini
def api_key():
c = __get_ini_credentials()
return c.get(option='api_key', fallback='xxx') if c is not None else 'xxx'
# returns the consumer api_secret stored in secrets.ini
def api_secret():
c = __get_ini_credentials()
return c.get(option='api_secret', fallback='yyy') if c is not None else 'yyy'
# returns the bearer_token stored in secrets.ini
def bearer_token():
c = __get_ini_credentials()
return c.get(option='bearer_token', fallback='zzz') if c is not None else 'zzz'
# returns the access_token stroed in secrets.ini
def access_token():
c = __get_ini_credentials()
return c.get(option='oauth1_access_token', fallback='zzz') if c is not None else 'aaa'
# returns the access_secret stroed in secrets.ini
def access_secret():
c = __get_ini_credentials()
return c.get(option='oauth1_access_secret', fallback='zzz') if c is not None else 'bbb'
def get_all_secrets():
return f'api_key:{api_key()}\napi_secret:{api_secret()}\nbearer_token:{bearer_token()}\naccess_token:{access_token()}\naccess_secret:{access_secret()}'
+35 -55
View File
@@ -8,54 +8,24 @@ import traceback
import datetime
import asyncio
import shutil
from datetime import datetime
import twint
from scraper import Scraper
from util import *
from talent_lists import *
from twapi import TwAPI
import talenttweet as tt
import ttweetqueue as ttq
PROGRAM_ARGS = None
safe_to_post_tweets = True
errored = False
## Returns the ID of all tweets (up to limit) from a user ID.
def get_user_tweets(id, since_date=None, limit=None):
global safe_to_post_tweets
qrt_count = 0
tweets = list()
c = twint.Config()
c.User_id = id
c.Limit = limit
c.Store_object = True
c.Store_object_tweets_list = tweets
c.Hide_output = True
c.Since = '' if since_date == None else f'{since_date} 00:00:00'
user_str = f'@{util.get_username_local(id)}'
print(f'Scraping tweets from {user_str} since {"forever ago" if c.Since == "" else c.Since}...')
try:
twint.run.Search(c)
except:
print(f'Had trouble getting tweets from {user_str}')
safe_to_post_tweets = False
traceback.print_exc()
for twt in tweets:
if type(twt.quote_url) is str and twt.quote_url != '':
qrt_count += 1
print(f'Scraped {len(tweets)} tweets, {qrt_count} of which are quote tweets.')
return tweets
# Returns a list of sorted and filtered TalentTweets (should
# be equivalent to queue.txt)
async def get_cross_talent_tweets():
async def get_cross_tweets_online():
global safe_to_post_tweets
scraper = Scraper()
queue = ttq.TalentTweetQueue.instance
# Begin getting tweets from online
@@ -64,19 +34,25 @@ async def get_cross_talent_tweets():
for i, (talent_id, talent_username) in enumerate(talent_lists.talents.items()):
print(f'[{i+1}/{len(talent_lists.talents)}] {talent_username}-----------------------------------')
try:
tweets = get_user_tweets(talent_id, since_date=queue.finished_user_dates.get(talent_id, None))
for tweet in tweets:
if tweet.id not in queue.ttweets_dict and tweet.id not in queue.finished_ttweets:
ttweet = await tt.TalentTweet.create_from_twint_tweet(tweet)
if ttweet.is_cross_company():
since_date = queue.finished_user_dates.get(talent_id, None)
ttweets = scraper.get_cross_ttweets_from_user(talent_username, since_date=since_date)
print(f'got {len(ttweets)} TalentTweets')
for ttweet in ttweets:
if ttweet.tweet_id not in queue.finished_ttweets \
and ttweet.is_cross_company():
queue.add_ttweet(ttweet)
except KeyboardInterrupt as e:
raise e
except:
print('Error occurred processing tweet data.')
safe_to_post_tweets = False
print(traceback.format_exc())
queue.finished_user_dates[talent_id] = '2000-01-01'
traceback.print_exc()
else:
queue.finished_user_dates[talent_id] = util.get_current_date()
queue.save_file()
except KeyboardInterrupt:
print('Interrupting tweet pulling... NOTE: remaining dates in queue file will not be updated!')
queue.save_file()
except:
print('Unhandled error occurred while pulling tweets.')
traceback.print_exc()
@@ -88,9 +64,9 @@ async def get_cross_talent_tweets():
# return False = errored or we posted at least one ttweet
# return True = we didn't post a single ttweet
async def process_queue() -> bool:
global PROGRAM_ARGS
global errored
WAIT_TIME = 60*3
WAIT_TIME = 60*15
ttweets_posted = 0
errored = False
@@ -101,13 +77,10 @@ async def process_queue() -> bool:
print('Posting queue is empty!')
return True
if PROGRAM_ARGS.announce_catchup:
TwAPI.instance.post_tweet(text=f'Starting to catch up through {queued_ttweets_count} logged tweets.')
try:
while not queue.is_empty():
ttweet = queue.get_next_ttweet()
tweet_was_successful = await TwAPI.instance.post_ttweet(ttweet, is_catchup=True)
tweet_was_successful = await TwAPI.instance.post_ttweet(ttweet)
print('running queue.good()...')
queue.good()
@@ -123,9 +96,6 @@ async def process_queue() -> bool:
print('Unhandled error occurred while posting tweets from queue.')
errored = True
traceback.print_exc()
else:
if PROGRAM_ARGS.announce_catchup:
await TwAPI.instance.post_tweet('Finished with catch-up tweets!')
if errored or ttweets_posted > 0:
return False
@@ -133,16 +103,14 @@ async def process_queue() -> bool:
# return True = no problems
# return False = issue occurred where we couldn't post all past tweets properly
async def run(program_args):
global PROGRAM_ARGS
async def run(PROGRAM_ARGS):
global errored
global safe_to_post_tweets
PROGRAM_ARGS = program_args
ret = None
queue = ttq.TalentTweetQueue.instance
async def queue_loop():
while True:
await get_cross_talent_tweets()
print(f'{queue.get_count()} cross-company tweets to attempt sharing.')
try:
if safe_to_post_tweets:
@@ -152,6 +120,9 @@ async def run(program_args):
else:
print('Tweets were not retrieved cleanly.')
return False
except KeyboardInterrupt:
print('Interrupting queue processing...')
return False
except:
print('Unhandled error occurred while running catch up in posting phase.')
traceback.print_exc()
@@ -159,3 +130,12 @@ async def run(program_args):
if errored:
return False
await get_cross_tweets_online()
if PROGRAM_ARGS.straight_to_queue:
print('Processing queue first before pulling tweets...')
return await queue_loop()
else:
await get_cross_tweets_online()
return await queue_loop()
+6 -51
View File
@@ -1,66 +1,21 @@
## The bot's listen mode
# Continuously listen for cross-company interactions.
from time import sleep
import asyncio
import traceback
import tweepy
from talenttweet import TalentTweet
from twapi import TwAPI
import ttweetqueue as ttq
import api_secrets
import talent_lists as tl
import util
import catchup
errors_encountered = 0
def on_response(resp):
ttweet = TalentTweet.create_from_v2api_response(resp)
if ttweet is None:
print('Couldn\'t create ttweet from the response:')
print(resp)
return
tweet_username = util.get_username(ttweet.author_id)
if ttweet.is_cross_company():
print(f'Tweet {ttweet.tweet_id} is cross-company! Creating post...')
is_successful = asyncio.run(TwAPI.instance.post_ttweet(ttweet))
if is_successful:
ttq.TalentTweetQueue.instance.add_finished_tweet(ttweet.tweet_id)
else:
print(f'[WARNING] Failed to post ttweet for {tweet_username}/{ttweet.tweet_id}!')
else:
print(f'Tweet {tweet_username}/{ttweet.tweet_id} is not cross-company.')
def run():
def run(PROGRAM_ARGS):
global errors_encountered
while True:
try:
sc = tweepy.StreamingClient(api_secrets.bearer_token())
# clear rules
print('Clearing streaming rules...')
rules_resp = sc.get_rules()
if rules_resp.data:
print('Deleted a rule!')
sc.delete_rules(rules_resp.data)
# create new rules
print('Creating new streaming rules...')
for rule in tl.get_twitter_rules():
sc.add_rules(tweepy.StreamRule(rule))
print('--------------------------------------------')
print(sc.get_rules().data)
print('--------------------------------------------')
sc.on_response=on_response
print('Starting listening stream...')
sc.filter(
expansions=TwAPI.TWEET_EXPANSIONS,
media_fields=TwAPI.TWEET_MEDIA_FIELDS,
tweet_fields=TwAPI.TWEET_FIELDS
)
asyncio.run(catchup.run(PROGRAM_ARGS))
print('Sleeping for 10 minutes...')
sleep(60*10) # run every 10 minutes
except KeyboardInterrupt:
print('Interrupt signal received. Exiting listen mode.')
print(f'{errors_encountered} errors encountered throughout session.')
+15 -56
View File
@@ -8,7 +8,6 @@ import nest_asyncio
import talent_lists
import ttweetqueue as ttq
import api_secrets
import catchup
import listen
from twapi import TwAPI
@@ -16,69 +15,36 @@ from twapi import TwAPI
PROGRAM_ARGS = None
MODES_HELP_STR = '''mode to run the bot at:
l,listen: listen for new tweets from all accounts; will not terminate unless error occurs
c,catchup: scan all tweets from all accounts; will terminate when done
d,delete-all: delete all tweets on account provided by secrets.ini; make sure the function is uncommented in twapi.py'''
<blank> scrape accounts in lists and post cross-company tweets if relevant
cmd drop into Python interpretor with access to initialized variables'''
def init_argparse():
p = argparse.ArgumentParser(description='Twitter bot that follows interactions between Nijisanji EN/ID and hololive EN/ID members.', formatter_class=RawTextHelpFormatter)
p.add_argument('mode', nargs='?', \
help=MODES_HELP_STR)
p.add_argument('--show-tokens', action='store_true', help='[DO NOT USE IN PUBLIC SETTING] print stored tokens from secrets.ini')
p.add_argument('--announce-catchup', action='store_true', help='In catch-up mode, post a tweet announcing catch-up mode.')
p.add_argument('--auto-listen', action='store_true', help='In catch-up mode, transition to listen mode after successfuly catching up.')
p.add_argument('--no-delay', action='store_true', help='In self-destruct mode, clear tweets without safety waiting.')
p.add_argument('mode', nargs='?', help=MODES_HELP_STR)
p.add_argument('--no-listen', action='store_true', help='Run one scraping-posting cycle without waiting to run again.')
p.add_argument('--straight-to-queue', action='store_true', help='Go through queue first before attempting to pull tweets.')
return p
def command_line():
# TODO (extra): implement command line mode for manually controlling the bot
print('Shell coming soon. For now, here\'s a Python interpretor.')
print('Here\'s a Python interpretor.')
code.interact(local=globals())
pass
async def self_destruct():
if not PROGRAM_ARGS.no_delay:
print('\033[31;6m-----DELETING ALL TWEETS IN 10 SECONDS!! PRESS CTRL+C TO CANCEL.-----\033[0m')
await asyncio.sleep(10)
await TwAPI.instance.nuke_tweets()
async def async_main():
global PROGRAM_ARGS
## Determine running mode
# match PROGRAM_ARGS.mode.lower():
# case 'l' | 'listen':
# print('RUNNING IN LISTEN MODE\n')
# await listen.run()
# case 'c' | 'catchup':
# print('RUNNING IN CATCH-UP MODE\n')
# if await catchup.run(PROGRAM_ARGS) and PROGRAM_ARGS.auto_listen:
# print('CATCH-UP MODE DONE, GOING INTO LISTEN MODE')
# await listen.run()
# case 'd' | 'delete-all':
# print('WARNING: SELF-DESTRUCT MODE')
# await self_destruct()
# case 'cmd':
# command_line()
# case _:
# print('\ninvalid mode. run with no arguments or "-h" for help page, including mode list.')
# return
if PROGRAM_ARGS.mode == None:
if PROGRAM_ARGS.no_listen:
await catchup.run(PROGRAM_ARGS)
else:
listen.run(PROGRAM_ARGS)
return
mode = PROGRAM_ARGS.mode.lower()
if mode in ['l', 'listen']:
print('RUNNING IN LISTEN MODE')
await listen.run()
elif mode in ['c', 'catchup']:
print('RUNNING IN CATCH UP MODE')
if await catchup.run(PROGRAM_ARGS) and PROGRAM_ARGS.auto_listen:
print('CATCH UP MODE DONE, GOING INTO LISTEN MODE')
listen.run()
elif mode in ['d', 'delete-all']:
print('WARNING: SELF-DESTRUCT MODE')
await self_destruct()
elif mode == 'cmd':
if mode == 'cmd':
command_line()
else:
print('\ninvalid mode. run with no arguments or -h for help and modes')
print('\nunknown mode. run with no arguments or -h for help and modes')
def main():
global PROGRAM_ARGS
@@ -90,13 +56,6 @@ def main():
PROGRAM_ARGS = parser.parse_args()
if PROGRAM_ARGS.show_tokens:
print(api_secrets.get_all_secrets())
if PROGRAM_ARGS.mode is None: return
## We expect to run in some mode now.
# Initialize shared API instance
TwAPI()
+156
View File
@@ -0,0 +1,156 @@
from os.path import exists
from time import sleep
from datetime import datetime, timedelta
import pytz
from tweety import Twitter
from tweety.types import *
from tweety.exceptions_ import *
from tweety.filters import SearchFilters
from account_pool import AccountPool
from tweety_utils import *
from talenttweet import *
import talent_lists
class Scraper:
def __init__(self):
Scraper.instance = self
self.__account = AccountPool()
self.try_login()
def try_login(self, account_idx: int = None) -> bool:
if account_idx is not None:
acc = self.__account.use_index(account_idx)
else:
acc = self.__account.next()
if acc is not None:
name = acc[0]
print(f"using {name}")
self.app = Twitter(name)
if exists(f"{name}.json"):
try:
self.app.connect()
except:
self.app.sign_in(*acc)
else:
self.app.sign_in(*acc)
return True
print('exhausted all accounts!')
return False
# since MUST BE TIMEZONE AWARE
# usage example: since=datetime(2023, 8, 1).replace(tzinfo=pytz.utc)
def get_tweets_from_user(self, username: str, since: datetime = None) -> list[Tweet]:
reached_backdate = False
tweets: list[Tweet] = []
cur = None
if since == None:
since = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(days=7)
print(f'falling back to grabbing tweets since 7 days ago ({since.date()})')
else:
print(f'grabbing tweets since {since.date()}')
uid = self.app._get_user_id(username)
print(f"{username} = {uid}")
def add_tweet(tweet: Tweet):
# malformed tweet check
nonlocal reached_backdate
try:
tweet.author.id
except:
print(f"skipping malformed tweet: {tweet}")
return
# recover lost info
if tweet.is_retweet:
if tweet.retweeted_tweet is None:
print(f'{tweet.author.username}/{tweet.id} is missing the RT! It\'s probably nothing...')
# tweet.retweeted_tweet = self.app.tweet_detail(str(tweet.id)).retweeted_tweet
tweet.is_retweet = False
elif tweet.retweeted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the RT author! Recovering details...')
tweet.retweeted_tweet = self.app.tweet_detail(tweet.retweeted_tweet.id)
if tweet.is_quoted:
if tweet.quoted_tweet is None: # quoted tweet is deleted
# print(f'{tweet.author.username}/{tweet.id} is missing the QRT! Recovering...')
# tweet.quoted_tweet = self.app.tweet_detail(str(tweet.id)).quoted_tweet
tweet.is_quoted = False
elif tweet.quoted_tweet.author is None:
print(f'WARNING: {tweet.author.username}/{tweet.id} is missing the QRT author! Recovering details...')
tweet.quoted_tweet = self.app.tweet_detail(tweet.quoted_tweet.id)
# fix reply if it exists
# if tweet.is_reply and tweet.replied_to is None:
# tweet.replied_to = self.app.tweet_detail(tweet.original_tweet['in_reply_to_status_id_str'])
tweets.append(tweet)
if not reached_backdate and int(tweet.author.id) == uid and tweet.date <= since:
print("reached backdate")
reached_backdate = True
if uid in talent_lists.privated_accounts:
self.try_login(0)
while not reached_backdate:
try:
# uts = self.app.get_tweets(uid, replies=True, cursor=cur)
search = self.app.search(f'from:{username}', filter_=SearchFilters.Latest(), cursor=cur)
cur_page = search.tweets
print(f'obtained {len(cur_page)} tweets')
if len(cur_page) == 0: break
for e in cur_page:
if isinstance(e, Tweet):
add_tweet(e)
elif isinstance(e, TweetThread):
# FIXME: rework when replied_to is fixed (currently populates user_mentions)
# latest tweet in thread = og author's reply
for t in e:
add_tweet(t)
cur = search.cursor
except UnknownError:
print("UnknownError occurred, probably rate-limited")
if uid in talent_lists.privated_accounts:
print("sticking pvt-accessible account. sleeping for 2 minutes...")
sleep(120)
print()
l = self.try_login(0)
else:
l = self.try_login()
if not l:
print("sleeping for 2 minutes...")
sleep(120)
print()
self.try_login()
tweets.sort(key=lambda t: t.id)
return tweets
def get_cross_ttweets_from_user(self, username: str, since_date: str = None) -> list[TalentTweet]:
if since_date is not None:
d = since_date.split('-')
since = datetime(*[int(x) for x in d]).replace(tzinfo=pytz.utc)
else:
since = None
tweets = self.get_tweets_from_user(username, since)
# print_tweets(tweets)
ret: list[TalentTweet] = []
for t in tweets:
tt = TalentTweet.create_from_tweety(t)
if tt.is_cross_company():
ret.append(tt)
return ret
if __name__== '__main__':
talent_lists.init()
s = Scraper()
ttweets = s.get_cross_ttweets_from_user("pomurainpuff", since=datetime(2023, 7, 30).replace(tzinfo=pytz.utc))
print("\n".join([x.__repr__() for x in ttweets]))
+28 -13
View File
@@ -1,11 +1,12 @@
import util
holo_en = dict()
holo_id = dict()
niji_en = dict()
niji_exid = dict()
talents = dict()
talents_company = dict()
holo_en: dict[int, str] = dict()
holo_id: dict[int, str] = dict()
niji_en: dict[int, str] = dict()
niji_exid: dict[int, str] = dict()
talents: dict[int, str] = dict()
talents_company: dict[int, str] = dict()
privated_accounts: dict[int, str] = dict()
test_talents = dict()
@@ -16,12 +17,15 @@ def __create_dict(file, _dict, company):
with open(file, 'r') as f:
for line in f:
words = line.split()
if len(words) == 2 and line[0] != '#':
name, id = line.split()
name = f'{util.get_username_online(id, default=name)}' # attempt to get updated name
talents[int(id)] = name
_dict[int(id)] = name
talents_company[int(id)] = company
if len(words) >= 2 and line[0] != '#':
t = line.split()
id, name = int(t[0]), t[1]
# name = f'{util.get_username_online(id, default=name)}' # attempt to get updated name
talents[id] = name
_dict[id] = name
talents_company[id] = company
if len(words) > 2 and words[2] == 'p':
privated_accounts[id] = name
def init():
global holo_en
global holo_id
@@ -36,11 +40,22 @@ def init():
# nijiEN
__create_dict(f'{util.get_project_dir()}/lists/nijien.txt', niji_en, 'nijiEN')
# nijiexID
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex-ID')
__create_dict(f'{util.get_project_dir()}/lists/nijiexid.txt', niji_exid, 'nijiex\'ID')
# TODO: nijiex-KR
test_talents = holo_en
def is_niji(id: int) -> bool:
return id in niji_en or id in niji_exid
def is_holo(id: int) -> bool:
return id in holo_en or id in holo_id
def is_cross_company(id1: int, id2: int):
return (is_niji(id1) and is_holo(id2)) or (is_holo(id1) and is_niji(id2))
# For filtered stream
# DEPRECATED: thx elon
def get_twitter_rules():
global talents
rules = list()
+162 -112
View File
@@ -1,26 +1,58 @@
import datetime
from datetime import datetime
from zoneinfo import ZoneInfo
import platform
import pytz
from tweety.types import *
import twapi
import talent_lists
# from talent_lists import is_cross_company, talents
import talent_lists as tl
import util
class TalentTweet:
# Serialized one-liner format:
# {tweet} {author} {time in seconds since epoch UTC} m {mention set} r {reply to author} q {quote tweet author} rt {retweeted user's id} rtm {mentions in retweet}
def serialize(self):
s = f'{self.tweet_id} {self.author_id} {int(self.date_time.timestamp())} '
if self.date_time.tzinfo is None:
print(f'warning: serialized tweet {self.tweet_id} has a NAIVE timestamp!')
if len(self.rt_mentions) > 0:
s += 'rtm '
for n in self.rt_mentions:
s += f'{n} '
if self.rt_author_id != None:
s += f'rt {self.rt_author_id} '
return s[:-1] # stop here since retweets can't have other info
if len(self.mentions) > 0:
s += 'm '
for id in self.mentions:
s += f'{id} '
if self.reply_to:
s += f'r {self.reply_to} '
if self.quote_tweeted:
s += f'q {self.quote_tweeted} '
return s[:-1]
@staticmethod
def deserialize(serialized_str: str):
tokens = serialized_str.split()
if len(tokens) < 3:
token_check = serialized_str.split('#')[0]
if len(token_check) < 3:
raise ValueError('not enough tokens to reconstruct a TalentTweet')
tweet_id, author_id = int(tokens[0]), int(tokens[1])
date_time = datetime.datetime.fromtimestamp(float(tokens[2]), tz=pytz.utc)
tokens = serialized_str.split()
mentions = set()
tweet_id, author_id = int(tokens[0]), int(tokens[1])
date_time = datetime.fromtimestamp(float(tokens[2]), tz=pytz.utc)
mentions = list()
reply_to = None
quote_retweeted = None
rt = None
rtm = list()
mode = ''
for i in range(3, len(tokens)):
@@ -30,144 +62,99 @@ class TalentTweet:
if tokens[i].isnumeric():
if mode == 'm': # mentions
mentions.add(int(tokens[i]))
mentions.append(int(tokens[i]))
continue
if mode == 'r': # reply_to
reply_to = int(tokens[i])
continue
if mode == 'q': # quote_retweeted
quote_retweeted = int(tokens[i])
if mode == 'rt': # retweeted user
rt = int(tokens[i])
if mode == 'rtm': # retweet/qrt mentions
rtm.append(int(tokens[i]))
return TalentTweet(
tweet_id=tweet_id, author_id=author_id,
date_time=date_time, mrq=(mentions, reply_to, quote_retweeted)
date_time=date_time, mrq=(mentions, reply_to, quote_retweeted),
rt_author_id=rt, rt_mentions=rtm
)
## Creates a TalentTweet from a Tweety-library Tweet.
@staticmethod
async def create_from_twint_tweet(tweet):
# MRQ
mentions = set()
reply_to = None
quoted_id = None
# reply_to/mentions
is_reply = tweet.id != int(tweet.conversation_id)
mentions = set([x['id'] for x in tweet.mentions])
if is_reply and len(tweet.reply_to) > 0:
reply_to = tweet.reply_to[0]['id'] # FIXME: QRT = is_reply and len(tweet.reply_to) == 0?
reply_others = [x['id'] for x in tweet.reply_to[1:]]
mentions.update(reply_others)
try: mentions.remove(reply_to)
except: pass
# qrt
if type(tweet.quote_url) == str:
# print(f'url: {tweet.quote_url} ({type(tweet.quote_url)})')
quote_tokens = tweet.quote_url.split('/')
if len(quote_tokens) >= 2:
quoted_username = quote_tokens[-2]
quoted_id = util.get_user_id_local(quoted_username)
if quoted_id == -1:
quoted_id = util.get_user_id_online(quoted_username)
# NOTE: strptime doesn't attach timezone info.
# tweet's datetime will be in local time
date_time = datetime.datetime.strptime(tweet.datetime, '%Y-%m-%d %H:%M:%S %Z')
LOCAL_TIMEZONE = datetime.datetime.now().astimezone().tzinfo
date_time = date_time.replace(tzinfo=LOCAL_TIMEZONE) # attach system local timezone
return TalentTweet(tweet_id=tweet.id, author_id=tweet.user_id, date_time=date_time, mrq=(mentions, reply_to, quoted_id))
@staticmethod
def create_from_v2api_response(resp):
tweet = resp.data
if tweet is None: return None
mrq = twapi.TwAPI.get_mrq(resp)
rt_target = None
rt_author_id = None
# check if is RT
if tweet.referenced_tweets is not None and len(tweet.referenced_tweets) > 0:
for ref in tweet.referenced_tweets:
if ref.type == 'retweeted':
rt_target = ref.id
for incl_tweet in resp.includes['tweets']:
if incl_tweet.id == ref.id:
rt_author_id = incl_tweet.author_id
def create_from_tweety(tweety: Tweet):
if tweety.is_retweet:
rtm = [int(x.id) for x in tweety.retweeted_tweet.user_mentions]
elif tweety.is_quoted:
rtm = [int(x.id) for x in tweety.quoted_tweet.user_mentions]
else:
rtm = list()
return TalentTweet(
tweet_id=tweet.id,
author_id=tweet.author_id,
date_time=tweet.created_at,
mrq=mrq,
rt_target=rt_target,
rt_author_id=rt_author_id
tweet_id=int(tweety.id), author_id=int(tweety.author.id),
date_time=tweety.date, text=tweety.text,
mrq=(
[int(x.id) for x in tweety.user_mentions],
int(tweety.original_tweet['in_reply_to_user_id_str']) if tweety.is_reply else None,
int(tweety.quoted_tweet.author.id) if tweety.quoted_tweet is not None else None
),
rt_author_id=tweety.retweeted_tweet.author.id if tweety.is_retweet else None,
rt_mentions=rtm
)
@staticmethod
async def create_from_id(id):
resp = await twapi.TwAPI.instance.get_tweet_response(id)
return TalentTweet.create_from_v2api_response(resp)
def __init__(self, tweet_id: int, author_id: int, date_time: datetime.datetime, mrq: tuple, rt_target: int=None, rt_author_id: int=None):
def __init__(self, tweet_id: int, author_id: int, date_time: datetime, text: str = None, mrq: tuple[list[int], int|None, int|None]=None, rt_author_id: int=None, rt_mentions: list[int]=None):
# basic information
self.tweet_id, self.author_id = tweet_id, author_id
self.username = util.get_username_local(self.author_id)
self.date_time = date_time
self.mentions = tuple(int(x) for x in mrq[0])
self.reply_to = int(mrq[1]) if mrq[1] is not None else None
self.quote_retweeted = int(mrq[2]) if mrq[2] is not None else None
self.rt_target, self.rt_author_id = rt_target, rt_author_id
self.text = text
# all users involved, except for the author
self.all_parties = {self.reply_to, self.quote_retweeted}
self.all_parties.update(self.mentions)
try:
self.all_parties.remove(None)
# filter users to only be talents
self.mentions = {x for x in mrq[0] if x in tl.talents}
self.rt_mentions = {x for x in rt_mentions if x in tl.talents}
self.reply_to = mrq[1]
self.quote_tweeted = mrq[2]
self.rt_author_id = rt_author_id
try: self.mentions.remove(self.reply_to)
except: pass
try:
self.all_parties.remove(self.author_id)
# -1 if user is not in company
self.reply_to = self.reply_to if self.reply_to is None or self.reply_to in tl.talents else -1
self.quote_tweeted = self.quote_tweeted if self.quote_tweeted is None or self.quote_tweeted in tl.talents else -1
self.rt_author_id = self.rt_author_id if self.rt_author_id is None or self.rt_author_id in tl.talents else -1
# all users involved except for the author
self.all_parties = {self.reply_to, self.quote_tweeted, rt_author_id}
self.all_parties.update(self.mentions, self.rt_mentions)
try: self.all_parties.remove(None)
except: pass
try: self.all_parties.remove(self.author_id)
except: pass
def __repr__(self) -> str:
return (
f'{self.tweet_id} from {util.get_username_local(self.author_id)}):\n'
f'======================================================\n'
f'{self.tweet_id} from {self.username}:\n'
f'{self.get_datetime_str()}\n'
f'{self.get_all_parties_usernames()}\n'
f'parties: {self.get_all_parties_usernames()}\n'
f'mentions: {self.mentions}\n'
f'reply_to: {self.reply_to}\n'
f'quote_retweeted: {self.quote_retweeted}\n'
f'Cross-company: {self.is_cross_company()}\n'
f'quote_retweeted: {self.quote_tweeted}\n'
f'cross-company? {self.is_cross_company()}\n'
f'{self.serialize()}\n'
f'======================================================'
f'----\n{self.announce_text()}\n----\n'
f'{self.url()}'
)
# Serialized one-liner format:
# {tweet} {author} {time in seconds since epoch} m {mention_set} r {reply_to_author} q {quote_retweet_author}
def serialize(self):
s = f'{self.tweet_id} {self.author_id} {self.date_time.timestamp()} '
if len(self.mentions) > 0:
s += 'm '
for id in self.mentions:
s += f'{id} '
if self.reply_to:
s += f'r {self.reply_to} '
if self.quote_retweeted:
s += f'q {self.quote_retweeted} '
return s[:-1]
def url(self):
return util.get_tweet_url(self.tweet_id, self.username)
def is_cross_company(self):
for other_id in self.all_parties:
if self.author_id in talent_lists.holo_en:
if other_id in talent_lists.niji_en or other_id in talent_lists.niji_exid:
return True
if self.author_id in talent_lists.niji_en:
if other_id in talent_lists.holo_en or other_id in talent_lists.holo_id:
return True
if self.author_id in talent_lists.holo_id:
if other_id in talent_lists.niji_en or other_id in talent_lists.niji_exid:
return True
if self.author_id in talent_lists.niji_exid:
if other_id in talent_lists.holo_en or other_id in talent_lists.holo_id:
if tl.is_cross_company(self.author_id, other_id):
return True
return False
@@ -183,3 +170,66 @@ class TalentTweet:
def get_datetime_str(self):
unpad = '#' if platform.system() == 'Windows' else '-'
return self.date_time.strftime(f'%b %{unpad}d %Y, %{unpad}I:%M%p (%Z)')
def announce_text(self):
# templates
TWEET = '{0} tweeted mentioning {1}!'
REPLY = '{0} replied to {1}!'
REPLY_TO_MENTION_B = '{0} replied to a tweet{1}mentioning {1}!' #########################
RETWEET = '{0} retweeted {1}!'
RETWEET_MENTIONS_B = '{0} shared a tweet{1}mentioning {2}!' #########################
QUOTE_TWEET = '{0} quote tweeted {1}!'
QUOTED_TWEET_MENTIONS_B = '{0} quoted a tweet{1}mentioning {2}!' #########################
author_username = f'@/{util.get_username_with_company(self.author_id)}'
ret = str()
print_mention_ids = set(self.mentions)
try: print_mention_ids.remove(None)
except: pass
mention_usernames = [f'@/{util.get_username_with_company(x)}' for x in print_mention_ids]
def rtm_msg(TEMPLATE: str, rtm_author_username: str):
if self.rt_author_id != -1: # rtm tweet is from talent; rtm should be everyone
rtm_names = [f'@/{util.get_username_with_company(x)}' for x in self.rt_mentions]
between = f' from {rtm_author_username} '
ret += TEMPLATE.format(author_username, between, ", ".join(rtm_names))
else: # rtm tweet is not from a talent; rtm should just be cross company
rtm_names = [f'@/{util.get_username_with_company(x)}' for x in self.rt_mentions if tl.is_cross_company(self.author_id, x)]
ret += TEMPLATE.format(author_username, ' ', ", ".join(rtm_names))
# Tweet types
if self.rt_author_id is not None: # retweet
rt_username = f'@/{util.get_username_with_company(self.rt_author_id)}' if self.rt_author_id != -1 else None
if len(self.rt_mentions) > 0:
rtm_msg(RETWEET_MENTIONS_B, rt_username)
else:
ret += RETWEET.format(author_username, rt_username)
elif self.reply_to is not None: # reply
reply_username = f'@/{util.get_username_with_company(self.reply_to)}' if self.reply_to != -1 else None
if len(self.rt_mentions) > 0:
rtm_msg(REPLY_TO_MENTION_B, reply_username)
else:
ret += REPLY.format(author_username, reply_username)
elif self.quote_tweeted is not None: # qrt
quoted_username = f'@/{util.get_username_with_company(self.quote_tweeted)}' if self.quote_tweeted != -1 else None
if len(self.rt_mentions) > 0:
rtm_msg(QUOTED_TWEET_MENTIONS_B, quoted_username)
else:
ret += QUOTE_TWEET.format(author_username, quoted_username)
elif len(self.mentions) > 0: # standalone tweet
ret += TWEET.format(author_username, ", ".join(mention_usernames))
f'[{self.get_datetime_str()}]\n'
return ret
else:
raise ValueError(f'TalentTweet {self.tweet_id} has insufficient other parties')
# mention line
if len(print_mention_ids) > 0:
ret += (
'\nMentioning '
f'{", ".join(mention_usernames)}'
)
ret += f'\n\n{self.get_datetime_str()}'
return ret
+12 -3
View File
@@ -1,6 +1,7 @@
# TODO: move queue structures and file handling here
import os
import shutil
import traceback
import util
import talenttweet as tt
@@ -55,9 +56,12 @@ class TalentTweetQueue:
if len(tokens) == 0 or tokens[0][0] == '#':
continue
ttweet = tt.TalentTweet.deserialize(line)
# print(f'{ttweet.tweet_id}:\n{ttweet}')
self.ttweets_dict[ttweet.tweet_id] = ttweet
print(f'Found {len(self.finished_user_dates)} scraped accounts and {len(self.ttweets_dict)} tweets in queue.')
except: pass
except:
traceback.print_exc()
pass
# finished ttweets
try:
with open(self.finished_ttweets_path, 'r') as f:
@@ -70,8 +74,8 @@ class TalentTweetQueue:
return self.get_count() <= 0
def add_ttweet(self, ttweet):
self.__sorted = False
self.ttweets_dict[ttweet.tweet_id] = ttweet
self.__sorted = False
def get_ttweet(self, id):
return self.ttweets_dict[id]
@@ -80,7 +84,10 @@ class TalentTweetQueue:
self.is_good = False
if os.path.exists(self.current_ttweet_path):
with open(self.current_ttweet_path, 'r') as f:
return tt.TalentTweet.deserialize(f.readline())
ttweet = tt.TalentTweet.deserialize(f.readline())
if ttweet.tweet_id in self.ttweets_dict:
self.ttweets_dict.pop(ttweet.tweet_id)
return ttweet
self.__sort_ttweets_dict()
key = list(self.ttweets_dict.keys())[0]
@@ -105,6 +112,7 @@ class TalentTweetQueue:
# overwrite queue.txt
def save_file(self):
print('saving file...', end='')
shutil.copyfile(self.queue_path, self.queue_backup_path)
self.__sort_ttweets_dict()
with open(self.queue_path, 'w') as f:
@@ -117,6 +125,7 @@ class TalentTweetQueue:
# write sorted ttweets
for ttweet in self.ttweets_dict.values():
f.write(ttweet.serialize() + '\n')
print('done')
def add_finished_tweet(self, id):
self.finished_ttweets.append(id)
+16 -158
View File
@@ -2,9 +2,9 @@ import datetime
import traceback
import asyncio
from dotenv import dotenv_values
import tweepy
import api_secrets
import talenttweet as tt
import talent_lists as tl
import ttweetqueue as ttq
@@ -70,104 +70,24 @@ class TwAPI:
def __init__(self):
creds = dotenv_values()
TwAPI.instance = self
self.client = tweepy.Client(
bearer_token=api_secrets.bearer_token(),
consumer_key=api_secrets.api_key(), consumer_secret=api_secrets.api_secret(),
access_token=api_secrets.access_token(), access_token_secret=api_secrets.access_secret()
consumer_key=creds['app_key'], consumer_secret=creds['app_secret'],
access_token=creds['user_token'], access_token_secret=creds['user_secret']
)
self.api = tweepy.API(
auth=tweepy.OAuthHandler(
consumer_key=api_secrets.api_key(), consumer_secret=api_secrets.api_secret(),
access_token=api_secrets.access_token(), access_token_secret=api_secrets.access_secret()
consumer_key=creds['app_key'], consumer_secret=creds['app_secret'],
access_token=creds['user_token'], access_token_secret=creds['user_secret']
)
)
try:
self.me = self.client.get_me().data
except Exception as e:
print('Did you setup secrets.ini?')
raise e
print(f'Assuming the account of @{self.me.data["username"]} ({self.me["id"]})')
## ---[COMMENT OUT WHEN NOT IN USE]---
# async def nuke_tweets(self):
# async def delete_tweet(id):
# try:
# self.client.delete_tweet(id)
# except tweepy.TooManyRequests as e:
# wait_for = float(e.response.headers["x-rate-limit-reset"]) - datetime.datetime.now().timestamp() + 1
# print(f'\thit rate limit deleting {id}, retrying in {wait_for} seconds...')
# await asyncio.sleep(wait_for)
# print('continuing...')
# await delete_tweet(id)
# print(f'Retrieving all of {self.me["username"]}\'s tweets...')
# tweets = self.get_all_tweet_ids_from_user(self.me['id'])
# print(f'Retrieved {len(tweets)} tweets.')
# if not len(tweets) > 0:
# print('No tweets obtained. Make sure the profile is public.')
# return
# print(f'Deleting {len(tweets)} tweets...')
# deleted_count = 0
# try:
# for tweet in tweets:
# print(f'deleted {deleted_count}/{len(tweets)}')
# await delete_tweet(tweet.id)
# await asyncio.sleep(0.5)
# deleted_count += 1
# except:
# print('Unhandled error occurred while trying to delete tweets.')
# traceback.print_exc()
# print('Try running again.')
# else:
# print('Saul Gone')
def get_all_tweet_ids_from_user(self, user_id):
next_page_token = None
tokens_retrieved = 0
tweets_retrieved = 0
tweets = list()
while True:
print(f'Retrieved {tokens_retrieved} tokens so far...')
resp = self.client.get_users_tweets(
user_id, max_results=100, pagination_token=next_page_token,
media_fields=TwAPI.TWEET_MEDIA_FIELDS,
tweet_fields=TwAPI.TWEET_FIELDS,
expansions=TwAPI.TWEET_EXPANSIONS
)
for tweet in resp.data:
tweets.append(tweet)
# update counters and pagination token
tweets_retrieved += resp.meta['result_count']
try:
next_page_token = resp.meta['next_token']
tokens_retrieved += 1
except KeyError:
print("next_token wasn't provided; we've reached the end!")
break # reached end of user's tweets
print(f'Retrieved {tweets_retrieved} tweets using {tokens_retrieved} tokens.')
return tweets
async def get_tweet_response(self, id, attempt = 0):
try:
twt = TwAPI.instance.client.get_tweet(
id,
media_fields=TwAPI.TWEET_MEDIA_FIELDS,
tweet_fields=TwAPI.TWEET_FIELDS,
expansions=TwAPI.TWEET_EXPANSIONS
)
TwAPI.tweets_fetched += 1
return twt
except tweepy.TooManyRequests as e:
wait_for = float(e.response.headers["x-rate-limit-reset"]) - datetime.datetime.now().timestamp() + 1
print(f'[{attempt}]\tget_tweet_response({id}):\n\thit rate limit after {TwAPI.tweets_fetched} fetches -- trying again in {wait_for} seconds...')
await asyncio.sleep(wait_for)
return await self.get_tweet_response(id, attempt=attempt+1)
except:
pass
async def post_tweet(self, text='', media_ids: list=None, reply_to_tweet: int=None, quote_tweet_id: int=None):
try:
@@ -186,78 +106,31 @@ class TwAPI:
# return True = successfully posted a single ttweet
# return False = did not post ttweet (duplicate)
async def post_ttweet(self, ttweet: tt.TalentTweet, is_catchup=False, dry_run=False):
async def post_ttweet(self, ttweet: tt.TalentTweet, dry_run=False):
print(f'------{ttweet.tweet_id} ({util.get_username_local(ttweet.author_id)})------')
REPLY = '{0} replied to {1}!\n'
QUOTE_TWEET = '{0} quote tweeted {1}!\n'
TWEET = '{0} tweeted!\n'
RETWEET = '{0} retweeted {1}!\n'
def create_text():
author_username = f'@/{util.get_username_with_company(ttweet.author_id)}'
print_mention_ids = set(ttweet.mentions)
ret = str()
if is_catchup:
ret += f'{ttweet.get_datetime_str()}\n'
pass
# Tweet types
if ttweet.rt_target is not None: # retweet
ret += RETWEET.format(f'{author_username}', f'@/{util.get_username_with_company(ttweet.rt_author_id)}')
elif ttweet.reply_to is not None: # reply
reply_username = f'@/{util.get_username_with_company(ttweet.reply_to)}'
ret += REPLY.format(author_username, reply_username)
# if qrt, push id into mentions
print_mention_ids.add(ttweet.quote_retweeted)
elif ttweet.quote_retweeted is not None: # qrt
quoted_username = f'@/{util.get_username_with_company(ttweet.quote_retweeted)}'
ret += QUOTE_TWEET.format(author_username, quoted_username)
elif len(ttweet.mentions) > 0: # standalone tweet
ret += TWEET.format(author_username)
else:
raise ValueError(f'TalentTweet {ttweet.tweet_id} has insufficient other parties')
try: print_mention_ids.remove(None)
except: pass
# mention line
if len(print_mention_ids) > 0:
mention_usernames = [f'@/{util.get_username_with_company(x)}' for x in print_mention_ids]
ret += (
'mentioning '
f'{", ".join(mention_usernames)}\n'
)
ret += '\n'
# ret += '(this is a missed tweet)\n' if is_catchup else ''
return ret
text = create_text()
ttweet_url = util.ttweet_to_url(ttweet)
text = ttweet.announce_text()
ttweet_url = ttweet.url()
if dry_run: print('-------------------- DRY RUN --------------------')
print(text)
print(ttweet)
if dry_run: return False
# NO DRY-RUN: actually post tweet
# main tweet: text + screenshot
try:
print('creating main QRT w/ screenshot...', end='')
print('creating main QRT w/ screenshot...')
media_ids = [await self.get_ttweet_image_media_id(ttweet)]
twt_resp = await self.post_tweet(text, media_ids=media_ids, quote_tweet_id=ttweet.tweet_id)
print('done')
except:
print('error occurred trying to create main tweet, falling back to URL-main + reply screencap format')
traceback.print_exc()
text += f"\n{ttweet_url}"
try:
print('posting main tweet...', end='')
twt_resp = await self.post_tweet(text)
print('posting main tweet...')
twt_resp = await self.post_tweet(text, quote_tweet_id=ttweet.tweet_id)
print('done')
twt_id = twt_resp.data['id']
# if ttweet.reply_to is not None:
# re_ttweet = tt.TalentTweet(tweet_id=ttweet.reply_to, author_id=)
# media_ids.insert(0, await self.get_ttweet_image_media_id())
try:
print('creating reply img...', end='')
@@ -275,18 +148,3 @@ class TwAPI:
else:
raise e
return True
def post_ttweet_by_id(self, tweet_id, is_catchup=False, dry_run=False):
ttweet = asyncio.run(tt.TalentTweet.create_from_id(tweet_id))
print(f'm({ttweet.mentions}), r({ttweet.reply_to}), q({ttweet.quote_retweeted})')
if ttweet.is_cross_company():
print(f'Tweet {ttweet.tweet_id} is cross-company! Creating post...')
asyncio.run(self.post_ttweet(ttweet, is_catchup=is_catchup, dry_run=dry_run))
ttq.TalentTweetQueue.instance.add_finished_tweet(ttweet.tweet_id)
else:
print(f'Tweet {tweet_id} is not cross-company.')
+24
View File
@@ -0,0 +1,24 @@
from tweety.types import *
def url(t: Tweet):
return f'https://twitter.com/{t.author.username}/status/{t.id}'
def print_tweets(tweets: list[Tweet | TweetThread]):
print(f'{len(tweets)} tweets:')
for t in tweets:
if isinstance(t, Tweet):
print(f'{t.date} : {url(t)} :', end=' ')
if t.is_retweet:
print(f'RT ({t.retweeted_tweet.author.username})', end=' ')
if t.is_reply:
print(f'is reply!', end=' ')
if t.replied_to is not None:
print(f'reply to {t.replied_to.author.username}', end=' ')
print("m=" + ",".join([x.username for x in t.user_mentions]))
elif isinstance(t, TweetThread):
print('-----------TTd----------')
print_tweets(t.tweets)
print('-----------end----------')
+16 -52
View File
@@ -4,6 +4,7 @@ import os
import sys
import traceback
from datetime import datetime
from dotenv import dotenv_values
import tweepy
import pytz
@@ -45,7 +46,7 @@ def get_current_timestamp():
def get_current_date():
return datetime.today().strftime('%Y-%m-%d')
def get_key_from_value(d, val):
def get_key_from_value(d: dict, val):
keys = [k for k, v in d.items() if v == val]
if keys:
return keys[0]
@@ -53,11 +54,12 @@ def get_key_from_value(d, val):
async def create_ttweet_image(ttweet):
tc = TweetCapture()
tc.cookies = [{'name': 'auth_token', 'value': dotenv_values()['web_auth_token']}]
if 'linux' in sys.platform:
# Linux chromedriver path
tc.driver_path = '/usr/bin/chromedriver'
filename = f'{get_project_dir()}/img.png'
url = ttweet_to_url(ttweet)
url = ttweet.url()
img = None
print(url)
try: os.remove(filename)
@@ -66,7 +68,7 @@ async def create_ttweet_image(ttweet):
img = await tc.screenshot(
url=url,
path=filename,
mode=4,
mode=0,
night_mode=1,
show_parent_tweets=True
)
@@ -80,28 +82,20 @@ async def create_ttweet_image(ttweet):
return img
def get_tweet_url(id, username):
return f'https://twitter.com/{username}/status/{id}'
return f'https://www.twitter.com/{username}/status/{id}'
def ttweet_to_url(ttweet):
username = get_username(ttweet.author_id)
return get_tweet_url(ttweet.tweet_id, username)
## Attempt to pull username from local; pull from online if doesn't exist.
def get_username(id):
ret = talent_lists.talents.get(id, None)
if ret == None:
return get_username_online(id)
return ret
# twint
# May not work with short user IDs (ie. 1354241437)
# def get_username_online(id, default=None):
# c = twint.Config()
# c.User_id = id
# c.Store_object = True
# c.Hide_output = True
# try:
# twint.output.users_list.clear()
# twint.run.Lookup(c)
# user = twint.output.users_list[0]
# return user.username
# except:
# return str(default) if default is not None else f'{id}'
def get_username_with_company(id):
company = talent_lists.talents_company.get(id, None)
return f'{get_username(id)} {f"({company})" if company is not None else ""}'
def get_username_local(id):
def get_username_local(id: int):
return talent_lists.talents.get(id, f'{id}')
# Retrieve username via API v2 (tweepy)
@@ -115,33 +109,3 @@ def get_username_online(id, default=None):
print(f'Unhandled error retrieving username for {id}!')
traceback.print_exc()
return str(default) if default is not None else f'id:{id}'
## Attempt to pull username from local; pull from online if doesn't exist.
def get_username(id):
ret = talent_lists.talents.get(id, None)
if ret == None:
return get_username_online(id)
return ret
def get_username_with_company(id):
company = talent_lists.talents_company.get(id, None)
return f'{get_username(id)} {f"({company})" if company is not None else ""}'
def get_user_id_local(username) -> int:
talent_usernames = list(talent_lists.talents.values())
for i in range(0, len(talent_usernames)):
if username.lower() == talent_usernames[i].lower():
return list(talent_lists.talents)[i]
def get_user_id_online(username) -> int:
c = twint.Config()
c.Username = username
c.Store_object = True
c.Hide_output = True
try:
twint.output.users_list.clear()
twint.run.Lookup(c)
user = twint.output.users_list[0]
return user.id
except:
return -1