You've already forked JapariArchive
102 lines
3.6 KiB
Python
102 lines
3.6 KiB
Python
import asyncio
|
|
from typing import AsyncIterator
|
|
from tweety import TwitterAsync
|
|
from tweety.types.twDataTypes import Tweet, SelfThread, ConversationThread
|
|
from tweety.types.usertweet import UserMedia
|
|
from tweety.exceptions_ import RateLimitReached
|
|
from config import Global_Config
|
|
|
|
class TweetyApi:
|
|
async def init(self, skip_login = False, session_name = "session"):
|
|
if skip_login:
|
|
self.app = TwitterAsync(session_name)
|
|
else:
|
|
if Global_Config["x_cookies"] == None:
|
|
raise Exception("X cookies are required")
|
|
cookies = Global_Config["x_cookies"]
|
|
self.app = TwitterAsync(session_name)
|
|
await self.app.load_cookies(cookies)
|
|
print(self.app.user)
|
|
return self
|
|
|
|
async def get_tweet(self, url):
|
|
try:
|
|
tweet = await self.app.tweet_detail(url)
|
|
return tweet
|
|
except:
|
|
return None
|
|
|
|
async def get_tweets(self, user_name, bottom_id, all_posts : list) -> AsyncIterator[Tweet]:
|
|
def validate_tweet(tweet : Tweet):
|
|
tweet_id_num = int(tweet.id)
|
|
|
|
past_bounds = False
|
|
tweet_valid = True
|
|
|
|
if tweet_id_num <= bottom_id:
|
|
past_bounds = True
|
|
if tweet_id_num in all_posts:
|
|
tweet_valid = False
|
|
|
|
return past_bounds, tweet_valid
|
|
|
|
sleep_default = 0.125
|
|
sleep_exponent = 1
|
|
user = None
|
|
|
|
while user == None:
|
|
try:
|
|
user = await self.app.get_user_info(username=user_name)
|
|
except RateLimitReached as ex:
|
|
sleep_exponent = await self.sleep_wait(sleep_default, sleep_exponent)
|
|
except Exception as ex:
|
|
print("User error: " + str(ex))
|
|
raise ex
|
|
|
|
tweety_api = UserMedia(user.rest_id, self.app, 1, 2, None)
|
|
sleep_exponent = 1
|
|
|
|
while True:
|
|
await asyncio.sleep(5)
|
|
old_cursor = tweety_api.cursor
|
|
|
|
try:
|
|
tweets = await tweety_api.get_next_page()
|
|
sleep_exponent = 1
|
|
except RateLimitReached as ex:
|
|
sleep_exponent = await self.sleep_wait(sleep_default, sleep_exponent)
|
|
tweety_api.cursor = old_cursor
|
|
continue
|
|
except Exception as ex:
|
|
raise ex
|
|
|
|
has_valid_tweets = False
|
|
for tweet in tweets:
|
|
if isinstance(tweet, ConversationThread) | isinstance(tweet, SelfThread):
|
|
tweet:ConversationThread | SelfThread
|
|
for tweet1 in tweet.tweets:
|
|
_, tweet_valid = validate_tweet(tweet1)
|
|
if tweet_valid:
|
|
has_valid_tweets = True
|
|
yield tweet1
|
|
else:
|
|
past_bounds, tweet_valid = validate_tweet(tweet)
|
|
if past_bounds: continue
|
|
if tweet_valid:
|
|
has_valid_tweets = True
|
|
yield tweet
|
|
|
|
if len(tweets) == 0 or not has_valid_tweets:
|
|
break
|
|
await asyncio.sleep(1)
|
|
|
|
@staticmethod
|
|
async def sleep_wait(sleep_default, sleep_exponent):
|
|
sleep_amount = min(sleep_default * pow(2,sleep_exponent), 2)
|
|
print(f"Sleeping for {round(sleep_amount,2)} hours.")
|
|
await asyncio.sleep(sleep_amount * 60 * 60)
|
|
print("Sleep done")
|
|
sleep_exponent += 1
|
|
return sleep_exponent
|
|
|
|
#asyncio.run(TweetyApi().get_tweets("redhood_depth", 0)) |