feat: add standalone chat downloader script and batch file for testing

This commit is contained in:
MaddoScientisto 2026-02-15 09:38:58 +01:00
commit 22a1f5b600
3 changed files with 279 additions and 8 deletions

View file

@ -8,6 +8,8 @@ import subprocess
import json
import threading
import time
import socket
import re
from typing import Dict, Any, Optional
from colorama import Fore, Style
@ -325,6 +327,118 @@ class ContentDownloader:
except Exception as e:
print(f'{Fore.RED}✗ Failed to start live chat download: {str(e)}{Style.RESET_ALL}')
return None
def _download_live_chat_via_irc(self, username: str, json_path: str,
max_messages: Optional[int] = None,
timeout: Optional[float] = None,
shutdown_check: Optional[callable] = None,
stream_monitor = None,
verbose: bool = False) -> bool:
"""
Simple IRC-based fallback to capture Twitch chat when GraphQL methods fail.
This writes newline-delimited JSON objects with at least: timestamp (ms),
author (dict with `name`), and `message`.
"""
try:
sock = socket.socket()
sock.connect(('irc.chat.twitch.tv', 6667))
sock.settimeout(1.0)
# Request tags & capabilities
sock.sendall(b'CAP REQ :twitch.tv/tags twitch.tv/commands twitch.tv/membership\r\n')
sock.sendall(b'PASS SCHMOOPIIE\r\n')
sock.sendall(b'NICK justinfan67420\r\n')
sock.sendall(f'JOIN #{username}\r\n'.encode('utf-8'))
messages_written = 0
start_time = time.time()
# Open file for streaming newline-delimited JSON
os.makedirs(os.path.dirname(json_path), exist_ok=True)
with open(json_path, 'w', encoding='utf-8') as out_f:
buffer = ''
while True:
# Shutdown/timeouts
if shutdown_check and shutdown_check():
break
if timeout and (time.time() - start_time) > timeout:
break
if stream_monitor:
try:
if not stream_monitor.is_user_live():
break
except Exception:
pass
try:
data = sock.recv(4096).decode('utf-8', 'ignore')
except socket.timeout:
continue
except Exception as e:
print(f'{Fore.YELLOW}⚠ IRC recv error: {e}{Style.RESET_ALL}')
break
if not data:
continue
buffer += data
lines = buffer.split('\r\n')
buffer = lines.pop() # remainder
for line in lines:
if not line:
continue
# Respond to PINGs
if line.startswith('PING'):
try:
sock.sendall(b'PONG :tmi.twitch.tv\r\n')
except Exception:
pass
continue
# Extract PRIVMSG lines
m = re.match(r'(?:@[^ ]+ )?:([^!]+)!.* PRIVMSG #[^ ]+ :(.+)', line)
if not m:
continue
author = m.group(1)
msg_text = m.group(2)
timestamp_ms = int(time.time() * 1000)
item = {
'timestamp': timestamp_ms,
'author': {'name': author},
'message': msg_text
}
out_f.write(json.dumps(item, ensure_ascii=False) + '\n')
out_f.flush()
messages_written += 1
if verbose and (messages_written % 10 == 0):
print(f'\n{Fore.GREEN}💬 {author}: {Fore.WHITE}{msg_text}{Style.RESET_ALL}')
if max_messages and messages_written >= max_messages:
break
if max_messages and messages_written >= max_messages:
break
sock.close()
if messages_written > 0:
print(f'\n{Fore.GREEN}✓ IRC fallback captured {messages_written} messages{Style.RESET_ALL}')
return True
else:
print(f'\n{Fore.RED}✗ IRC fallback captured no messages{Style.RESET_ALL}')
return False
except Exception as e:
print(f'{Fore.RED}✗ IRC fallback failed: {e}{Style.RESET_ALL}')
import traceback
traceback.print_exc()
return False
def wait_for_chat_download(self, process: Optional[subprocess.Popen],
json_path: str, timeout: int = 300) -> bool:
@ -403,15 +517,45 @@ class ContentDownloader:
print(f'{Fore.MAGENTA}[VERBOSE] Timeout: {timeout}s (None = unlimited){Style.RESET_ALL}')
print(f'{Fore.MAGENTA}[VERBOSE] Max messages: {max_messages} (None = unlimited){Style.RESET_ALL}')
# Get chat messages
# Get chat messages with a small retry loop to handle transient GQL/network issues
print(f'{Fore.CYAN}Connecting to Twitch chat...{Style.RESET_ALL}')
chat = self.chat_downloader.get_chat(
stream_url,
message_types=['text_message'], # Basic text messages
output=json_path,
timeout=timeout,
max_messages=max_messages
)
chat = None
max_attempts = 3
for attempt in range(1, max_attempts + 1):
try:
chat = self.chat_downloader.get_chat(
stream_url,
message_types=['text_message'], # Basic text messages
output=json_path,
timeout=timeout,
max_messages=max_messages
)
break
except Exception as e:
# Provide a clearer, user-facing message for common failures
print(f"{Fore.YELLOW}⚠ chat_downloader attempt {attempt}/{max_attempts} failed: {str(e)}{Style.RESET_ALL}")
# On final attempt, dump traceback to help diagnose library internals
if attempt >= max_attempts:
print(f"{Fore.RED}✗ chat_downloader failed after {max_attempts} attempts. This may be caused by Twitch GraphQL changes or rate-limiting.{Style.RESET_ALL}")
print(f"{Fore.YELLOW} Try upgrading the chat-downloader package: pip install -U chat-downloader{Style.RESET_ALL}")
import traceback
traceback.print_exc()
# Try IRC fallback before giving up
print(f"{Fore.MAGENTA}[VERBOSE] Attempting IRC fallback for chat capture...{Style.RESET_ALL}")
try:
return self._download_live_chat_via_irc(username, json_path,
max_messages=max_messages,
timeout=timeout,
shutdown_check=shutdown_check,
stream_monitor=stream_monitor,
verbose=verbose)
except Exception as fallback_err:
print(f"{Fore.RED}✗ IRC fallback failed: {fallback_err}{Style.RESET_ALL}")
traceback.print_exc()
return False
else:
time.sleep(1)
continue
# The get_chat with output parameter writes to file automatically
# We just need to iterate to trigger the download