TwitchDownloader/twitch-archive.py

266 lines
21 KiB
Python
Raw Normal View History

2022-12-09 06:10:02 -05:00
import requests, os, time, json, sys, subprocess, getopt, smtplib, pathlib, glob
2022-12-06 10:52:44 -05:00
from colorama import Fore, Style
2022-12-05 11:21:39 -05:00
from datetime import datetime, timedelta
2022-12-03 08:57:56 -05:00
from pytz import timezone
from dotenv import load_dotenv, find_dotenv
2022-12-05 11:21:39 -05:00
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
2022-12-03 08:57:56 -05:00
load_dotenv(find_dotenv())
class TwitchArchive:
def __init__(self):
# user configuration
self.username = "KalathrasLolweapon" # Twitch streamer username
self.quality = "best" # Qualities options: best/source high/720p medium/540p low/360p
# global configuration
self.root_path = r"archive" # Path where this script saves everything (livestream,VODs,chat,metadata)
2022-12-09 06:10:02 -05:00
self.refresh = 5.0 # Time between checking (5.0 is recommended), avoid less than 1.0
self.notifications = 1 # 0 - disable email notification of current seccion, 1 - enable email notification of current seccion
2022-12-03 08:57:56 -05:00
self.downloadMETADATA = 1 # 0 - disable metadata downloading, 1 - enable metadata downloading
self.downloadVOD = 1 # 0 - disable VOD downloading after stream finished, 1 - enable VOD downloading after stream finished (this option downloads the latest public vod)
self.downloadCHAT = 1 # 0 - disable chat downloading and rendering, 1 - enable chat downloading and rendering
2022-12-09 06:10:02 -05:00
self.uploadCloud = 1 # 0 - disable upload to remote cloud, 1 - enable upload to remote cloud
2022-12-06 10:52:44 -05:00
self.deleteFiles = 0 # 0 - disable the deleting of files from current seccion after being uploaded to the cloud, 1 - enable the deleting files of files from current seccion after being uploaded to the cloud (BE CAREFUL WITH THIS OPTION)
2022-12-09 06:10:02 -05:00
self.cleanRaw = 1 # 0 - disable the deleting of raw (.ts) files, 1 - enable the deleteing of raw (.ts) files (if upload enable they will be deleted before)
2022-12-03 08:57:56 -05:00
self.hls_segments = 3 # 1-10 for live stream, it's possible to use multiple threads to potentially increase the throughput. 2-3 is enough
self.hls_segmentsVOD = 10 # 1-10 for downloading vod, it's possible to use multiple threads to potentially increase the throughput
2022-12-09 06:10:02 -05:00
def run(self):
self.terminal = self.get_OS()
2022-12-03 08:57:56 -05:00
print('Twitch-Archive')
print('Configuration:')
2022-12-05 11:21:39 -05:00
print(f'Root path: {Fore.GREEN}' + str(pathlib.Path(self.root_path).resolve()) + f'{Style.RESET_ALL}')
2022-12-03 08:57:56 -05:00
print(f'Refresh rate: {Fore.GREEN} {str(self.refresh)}{Style.RESET_ALL}')
if self.notifications == 1: print(f'Email notifications: {Fore.GREEN}Enabled{Style.RESET_ALL}')
else: print(f'Email notifications: {Fore.RED}Disabled{Style.RESET_ALL}')
if self.downloadMETADATA == 1: print(f'Metada downloading {Fore.GREEN}Enabled{Style.RESET_ALL}')
else: print(f'Metada downloading: {Fore.RED}Disabled{Style.RESET_ALL}')
if self.downloadVOD == 1: print(f'VOD downloading {Fore.GREEN}Enabled{Style.RESET_ALL}')
else: print(f'VOD downloading: {Fore.RED}Disabled{Style.RESET_ALL}')
if self.downloadCHAT == 1: print(f'Chat downloading {Fore.GREEN}Enabled{Style.RESET_ALL}')
else: print(f'Chat downloading: {Fore.RED}Disabled{Style.RESET_ALL}')
2022-12-09 06:10:02 -05:00
if self.uploadCloud == 1: print(f'Upload to cloud storage: {Fore.GREEN}Enabled{Style.RESET_ALL}')
else: print(f'Upload to cloud storage: {Fore.RED}Disabled{Style.RESET_ALL}')
2022-12-03 08:57:56 -05:00
if self.deleteFiles == 1: print(f'{Fore.RED}'+'\033[1m'+f'CAREFUL FILES ARE CONFIGURATED TO BE DELETED{Style.RESET_ALL}')
2022-12-05 11:21:39 -05:00
else: print(f'{Fore.GREEN}'+'\033[1m'+f'Files will NOT be deleted{Style.RESET_ALL}')
2022-12-03 08:57:56 -05:00
if self.uploadCloud == 0 and self.deleteFiles == 1: print(f'{Fore.RED}'+'\033[1m'+f'FILES WILL BE DELETED AND NO UPLOADED {Style.RESET_ALL}{Fore.GREEN}\n"CTRL + C"{Style.RESET_ALL}{Fore.RED}'+'\033[1m'+f' TO STOP AND CHANGED CONFIGURATION{Style.RESET_ALL}')
2022-12-06 10:52:44 -05:00
2022-12-03 08:57:56 -05:00
self.oauth_token = self.get_oauth_token()
2022-12-09 06:10:02 -05:00
self.channel_id = self.get_channel_id()
2022-12-06 10:52:44 -05:00
2022-12-09 06:10:02 -05:00
self.raw_path = str(pathlib.Path(os.path.join(self.root_path,self.username,"video", "raw")).absolute())
self.video_path = str(pathlib.Path(os.path.join(self.root_path, self.username, "video")).absolute())
2022-12-06 10:52:44 -05:00
self.chatJSON_path = str(pathlib.Path(os.path.join(self.root_path, self.username, "chat", "json")).absolute())
2022-12-09 06:10:02 -05:00
self.chatMP4_path = str(pathlib.Path(os.path.join(self.root_path, self.username, "chat")).absolute())
2022-12-06 10:52:44 -05:00
self.metadata_path = str(pathlib.Path(os.path.join(self.root_path, self.username, "metadata")).absolute())
2022-12-03 08:57:56 -05:00
2022-12-09 06:10:02 -05:00
if(os.path.isdir(self.raw_path) is False): os.makedirs(self.raw_path)
if(os.path.isdir(self.video_path) is False): os.makedirs(self.video_path)
2022-12-03 08:57:56 -05:00
if(os.path.isdir(self.chatJSON_path) is False): os.makedirs(self.chatJSON_path)
if(os.path.isdir(self.chatMP4_path) is False): os.makedirs(self.chatMP4_path)
if(os.path.isdir(self.metadata_path) is False): os.makedirs(self.metadata_path)
2022-12-09 06:10:02 -05:00
if not os.path.exists(os.path.join(self.root_path, ".log")):
with open(os.path.join(self.root_path, ".log"), 'w'): pass
2022-12-03 08:57:56 -05:00
print(f"Checking for {Fore.GREEN}{self.username}{Style.RESET_ALL} every {Fore.GREEN}{self.refresh}{Style.RESET_ALL} seconds. Record with {Fore.GREEN}{self.quality}{Style.RESET_ALL} quality.")
self.sendNotif("TWITCH ARCHIVE", f"Checking for {self.username} every {self.refresh} seconds. Record with {self.quality} quality.")
self.loopcheck()
2022-12-06 10:52:44 -05:00
2022-12-09 06:10:02 -05:00
def get_OS(self):
if sys.platform.startswith('win32'):
return 'powershell.exe'
elif sys.platform.startswith('linux'):
return 'bash'
else:
print('OS no supported')
return
2022-12-03 08:57:56 -05:00
def get_oauth_token(self):
try:
2022-12-09 06:10:02 -05:00
return requests.post(f"https://id.twitch.tv/oauth2/token?client_id={os.getenv('CLIENT-ID')}&client_secret={os.getenv('CLIENT-SECRET')}&grant_type=client_credentials").json()['access_token']
2022-12-03 08:57:56 -05:00
except:
return None
def get_channel_id(self):
try:
2022-12-09 06:10:02 -05:00
r = requests.get(f'https://api.twitch.tv/helix/users?login={self.username}', headers = {"Authorization" : "Bearer " + self.oauth_token, "Client-ID": os.getenv('CLIENT-ID')}, timeout = 15)
2022-12-03 08:57:56 -05:00
r.raise_for_status()
info = r.json()
2022-12-09 06:10:02 -05:00
if info["data"] != []:
return info["data"][0]["id"]
else:
return None
2022-12-03 08:57:56 -05:00
except requests.exceptions.RequestException as e:
print(f'\n{e}\n')
def check_user(self):
2022-12-09 06:10:02 -05:00
try:
url = 'https://api.twitch.tv/helix/streams?user_id=' + self.channel_id
live = requests.get(url, headers = {"Authorization" : "Bearer " + self.oauth_token, "Client-ID": os.environ.get('CLIENT-ID')}, timeout = 30)
stream_data = live.json()
if len(stream_data['data']) == 1:
self.live_info = stream_data['data'][0]
return True
else:
return False
except Exception as e:
print("ERROR checking user: ", e)
return False
2022-12-03 08:57:56 -05:00
def sendNotif(self, subject, content):
if self.notifications == 1:
2022-12-09 06:10:02 -05:00
sender = os.getenv("SENDER")
receiver = os.getenv("RECEIVER")
2022-12-05 11:21:39 -05:00
msg = MIMEMultipart()
msg['From'] = sender
msg['To'] = receiver
msg['Subject'] = self.username + " _ " + subject
body = "Current seccion is for " + self.username + "\n\n\n\n" + content
msg.attach(MIMEText((body), 'plain'))
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
2022-12-09 06:10:02 -05:00
server.login(sender, os.getenv("PASSWD"))
2022-12-05 11:21:39 -05:00
txt = msg.as_string()
server.sendmail(sender, receiver, txt)
server.quit()
2022-12-06 10:52:44 -05:00
2022-12-03 08:57:56 -05:00
def loopcheck(self):
while True:
2022-12-09 06:10:02 -05:00
if self.check_user() is True:
live_date = datetime.strptime(self.live_info["started_at"],'%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone('UTC')).astimezone(tz=None).replace(tzinfo=None)
live_raw_filename = datetime.strftime(live_date,'%Y%m%d_%Hh%Mm%Ss')
live_raw_path = os.path.join(self.raw_path, "LIVE_" + live_raw_filename + ".ts")
live_proc_path = os.path.join(self.video_path, "LIVE_" + live_raw_filename + ".mp4")
with open(os.path.join(self.root_path, ".log")) as logs:
logs = logs.read()
log_id = self.live_info["started_at"] + " - " + self.live_info["title"]
if log_id in logs:
time.sleep(self.refresh)
with open(os.path.join(self.root_path, ".log"), "r+") as logs:
log_id = self.live_info["started_at"] + " - " + self.username + " - " + self.live_info["title"]
for line in logs:
if log_id in line:
break
else:
logs.write(self.live_info["started_at"] + " - " + self.username + " - " + self.live_info["title"] + "\n")
self.sendNotif('Stream - ' + live_raw_filename, 'Streamer went live: ' + self.live_info["title"])
subprocess.call([self.terminal,'streamlink', 'twitch.tv/'+ self.username, self.quality, '--http-header', '"Authorization=OAuth ' + os.getenv('OAUTH-PRIVATE-TOKEN') + '"', '--hls-segment-threads', str(self.hls_segments), '--hls-live-restart', '--retry-streams', str(self.refresh), '--output', live_raw_path])
if(os.path.exists(live_raw_path) is True):
subprocess.call([self.terminal,str(pathlib.Path(__file__).parent.resolve())+'/bin/ffmpeg', '-y', '-i', live_raw_path, '-analyzeduration', '2147483647', '-probesize', '2147483647', '-c:v', 'copy', '-c:a', 'copy', '-start_at_zero', '-copyts', live_proc_path], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
else:
print("Skip fixing. File not found.")
try:
vodurl = f'https://api.twitch.tv/helix/videos?user_id={str(self.channel_id)}&period=day&type=archive'
vods = requests.get(vodurl, headers = {"Authorization" : "Bearer " + self.oauth_token, "Client-ID": os.getenv('CLIENT-ID')}, timeout = 30)
vodsinfo = json.loads(vods.text)
if vodsinfo["data"][0] != []:
vod_date = datetime.strptime(vodsinfo["data"][0]["created_at"],'%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone('UTC')).astimezone(tz=None).replace(tzinfo=None)
vod_raw_filename = datetime.strftime(vod_date,'%Y%m%d_%Hh%Mm%Ss')
if self.live_info["id"] == vodsinfo["data"][0]["stream_id"]:
current_vod = vodsinfo["data"][0]
vod_raw_path = os.path.join(self.raw_path, "VOD_" + live_raw_filename + ".ts")
vod_proc_path = os.path.join(self.video_path, "VOD_" + live_raw_filename + ".mp4")
if self.downloadMETADATA == 1:
self.sendNotif('Metadata - ' + live_raw_filename,'Downloading and saving metadata:\n' + json.dumps(current_vod, indent=4))
with open(os.path.join(self.metadata_path, "METADA_" + live_raw_filename + ".json"), 'w', encoding='utf-8') as f:
json.dump(current_vod, f, ensure_ascii=False, indent=4)
if self.downloadVOD == 1:
print('Downloading VOD: ' + current_vod["title"])
self.sendNotif('VOD - ' + live_raw_filename,'Downloading VOD: ' + current_vod["title"])
try:
subprocess.call([self.terminal,'streamlink', 'twitch.tv/videos/' + current_vod["id"], self.quality, '--http-header', '"Authorization=OAuth ' + os.getenv('OAUTH-PRIVATE-TOKEN') +'"', "--hls-segment-threads", str(self.hls_segmentsVOD), "-o", vod_raw_path])
if(os.path.exists(vod_raw_path) is True):
subprocess.call([self.terminal,str(pathlib.Path(__file__).parent.resolve())+'/bin/ffmpeg', '-y', '-i', vod_raw_path, '-analyzeduration', '2147483647', '-probesize', '2147483647', '-c:v', 'copy', '-c:a', 'copy', '-start_at_zero', '-copyts', vod_proc_path], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
else:
print("Skip fixing. File not found.")
except Exception as e:
print('Error', 'A ERROR has ocurred and the VOD will not be downloaded.\n')
self.sendNotif('ERROR - ' + live_raw_filename, 'A ERROR has ocurred and the VOD will not be downloaded.\n')
if self.downloadCHAT == 1:
print('Downloading and rendering CHAT: ' + current_vod["title"])
self.sendNotif('CHAT - ' + live_raw_filename,'Downloading JSON and rendering chat logs from VOD:\n' + current_vod["title"])
2022-12-03 08:57:56 -05:00
try:
2022-12-09 06:10:02 -05:00
subprocess.call([self.terminal,str(pathlib.Path(__file__).parent.resolve())+"/bin/chat", current_vod["id"], os.path.join(self.chatJSON_path, "CHAT_" + live_raw_filename + ".json"), os.path.join(self.chatMP4_path, "CHAT_" + live_raw_filename + ".mp4")])
2022-12-03 08:57:56 -05:00
except Exception as e:
2022-12-09 06:10:02 -05:00
self.sendNotif('ERROR - ' + live_raw_filename, "A ERROR has ocurred and chat will need to be downloaded and rendered manually.\n")
print("A ERROR has ocurred and chat will need to be downloaded and rendered manually\n")
2022-12-03 08:57:56 -05:00
else:
2022-12-09 06:10:02 -05:00
print('A ERROR has ocurred, the latest VOD doesnt match with the livestream, the VOD is not published\nThe VOD and chat will not be downloaded and rendered.\nThe current livestream date: ' + live_raw_filename + '\nThe VOD date: ' + vod_raw_filename)
self.sendNotif('ERROR - ' + live_raw_filename, 'A ERROR has ocurred, the latest VOD doesnt match with the livestream, the VOD is not published\nThe VOD and chat will not be downloaded and rendered.\nThe current livestream date: ' + live_raw_filename + '\nThe VOD date: ' + vod_raw_filename)
except Exception as e:
print('An error has occurred. VOD and chat will not be downloaded. Please check them manually.\n')
self.sendNotif('ERROR - ' + live_raw_filename, 'An error has occurred. VOD and chat will not be downloaded. Please check them manually.\n')
if self.cleanRaw == 1:
print('Deleting raw files')
if(os.path.exists(live_raw_path) is True): os.remove(live_raw_path)
if self.downloadVOD == 1:
if(os.path.exists(os.path.join(self.raw_path, "VOD_" + live_raw_filename + ".ts")) is True):
os.remove(os.path.join(self.raw_path, "VOD_" + live_raw_filename + ".ts"))
2022-12-06 10:52:44 -05:00
if self.uploadCloud == 1:
2022-12-09 06:10:02 -05:00
if self.terminal == 'powershell.exe':
tree = subprocess.run([self.terminal,"tree", f"'{self.root_path}/{self.username}'", "/f"], capture_output=True, text=True).stdout.split("\n",2)[2]
elif self.terminal == 'bash':
tree = subprocess.check_output([self.terminal,"tree", str(pathlib.Path(self.root_path).resolve())+"/"+self.username]).decode(sys.stdout.encoding)
2022-12-05 11:21:39 -05:00
print('Uploading the following files:\n' + tree)
2022-12-09 06:10:02 -05:00
self.sendNotif("UPLOADING - " + live_raw_filename, 'Uploading the following files: \n' + tree)
subprocess.call([self.terminal,str(pathlib.Path(__file__).parent.resolve())+'/bin/upload', str(pathlib.Path(self.root_path).resolve()),self.username])
2022-12-03 08:57:56 -05:00
if self.deleteFiles == 1:
2022-12-09 06:10:02 -05:00
self.sendNotif("DELETING - " + live_raw_filename, "Deleting the files from current seccion.")
2022-12-03 08:57:56 -05:00
print(f'{Fore.RED}DELETING FILES{Style.RESET_ALL}')
2022-12-09 06:10:02 -05:00
if self.cleanRaw == 0:
print(f'{Fore.RED}Deleting ' + live_raw_path + f'{Style.RESET_ALL}')
os.remove(live_raw_path)
print(f'{Fore.RED}Deleting ' + live_proc_path + f'{Style.RESET_ALL}')
os.remove(live_proc_path)
2022-12-06 10:52:44 -05:00
if self.downloadVOD == 1:
2022-12-09 06:10:02 -05:00
if(os.path.exists(os.path.join(self.raw_path, "VOD_" + live_raw_filename + ".ts")) is True):
if self.cleanRaw == 0:
print(f'{Fore.RED}Deleting ' + os.path.join(self.raw_path, "VOD_" + live_raw_filename + ".ts") + f'{Style.RESET_ALL}')
os.remove(os.path.join(self.raw_path, "VOD_" + live_raw_filename + ".ts"))
if(os.path.exists(os.path.join(self.video_path, "VOD_" + live_raw_filename + ".mp4")) is True):
print(f'{Fore.RED}Deleting ' + os.path.join(self.video_path, "VOD_" + live_raw_filename + ".mp4") + f'{Style.RESET_ALL}')
os.remove(os.path.join(self.video_path, "VOD_" + live_raw_filename + ".mp4"))
2022-12-06 10:52:44 -05:00
if self.downloadCHAT == 1:
2022-12-09 06:10:02 -05:00
if(os.path.exists(os.path.join(self.chatJSON_path, "CHAT_"+live_raw_filename + ".json")) is True):
print(f'{Fore.RED}Deleting ' + os.path.join(self.chatJSON_path, "CHAT_"+live_raw_filename + ".json") + f'{Style.RESET_ALL}')
os.remove(os.path.join(self.chatJSON_path, "CHAT_"+live_raw_filename + ".json"))
if(os.path.exists(os.path.join(self.chatMP4_path, "CHAT_"+live_raw_filename + ".mp4")) is True):
print(f'{Fore.RED}Deleting ' + os.path.join(self.chatMP4_path, "CHAT_"+live_raw_filename + ".mp4") + f'{Style.RESET_ALL}')
os.remove(os.path.join(self.chatMP4_path, "CHAT_"+live_raw_filename + ".mp4"))
2022-12-03 08:57:56 -05:00
if self.downloadMETADATA == 1:
2022-12-09 06:10:02 -05:00
if(os.path.exists(os.path.join(self.metadata_path, "METADA_"+live_raw_filename+".json")) is True):
print(f'{Fore.RED}Deleting ' + os.path.join(self.metadata_path, "METADA_"+live_raw_filename+".json") + f'{Style.RESET_ALL}')
os.remove(os.path.join(self.metadata_path, "METADA_"+live_raw_filename+".json"))
2022-12-03 08:57:56 -05:00
print('CURRENT SECCION HAVE FINISHED GOING BACK TO CHECKING')
2022-12-09 06:10:02 -05:00
self.sendNotif("SECCION DONE - " + live_raw_filename, 'CURRENT SECCION HAVE FINISHED GOING BACK TO CHECKING')
2022-12-03 08:57:56 -05:00
time.sleep(self.refresh)
def main(argv):
2022-12-09 06:10:02 -05:00
twitch_archive = TwitchArchive()
help_msg = 'Twitch-Archive\nPython script to record twitch live stream, download the VOD, metadata, chat and render it, and uploads them to any cloud storage.\n -h, --help Display this information\n -u, --username <username> Twitch channel username\n -q, --quality <quality> best/source high/720p medium/480p worst/360p\n -v, --vod <1/0> Download vod\n -c, --chat <1/0> Download chat and render it\n -m, --metadata <1/0> Download metadata\n -r, --upload <1/0> Upload to cloud storage\n -d, --delete <1/0> Delete all files after upload (CAREFUL with this arg)\n -n, --notifications <1/0> Receive email notification of the proccess through gmail\n'
2022-12-03 08:57:56 -05:00
try:
2022-12-09 06:10:02 -05:00
opts, args = getopt.getopt(argv,"h:u:q:v:c:m:r:d:n",["username=","quality=","vod=","chat=","metadata=","upload=","delete=","notifications="])
2022-12-03 08:57:56 -05:00
except getopt.GetoptError:
2022-12-09 06:10:02 -05:00
print (help_msg)
2022-12-03 08:57:56 -05:00
sys.exit(2)
for opt, arg in opts:
2022-12-09 06:10:02 -05:00
if opt in ('-h', '--help'):
print(help_msg)
2022-12-03 08:57:56 -05:00
sys.exit()
2022-12-09 06:10:02 -05:00
elif opt in ("-u", "--username"): twitch_archive.username = arg
elif opt in ("-q", "--quality"): twitch_archive.quality = arg
elif opt in ("-v", "--vod"): twitch_archive.quality = int(arg)
elif opt in ("-c", "--chat"): twitch_archive.quality = int(arg)
elif opt in ("-m", "--metadata"): twitch_archive.quality = int(arg)
elif opt in ("-r", "--upload"): twitch_archive.quality = int(arg)
elif opt in ("-d", "--delete"): twitch_archive.quality = int(arg)
elif opt in ("-n", "--notifications"): twitch_archive.quality = int(arg)
twitch_archive.run()
2022-12-03 08:57:56 -05:00
if __name__ == "__main__":
2022-12-06 10:52:44 -05:00
main(sys.argv[1:])