import sys import re import os filepath = r"db/dump-pg-202604211927.sql" filesize = os.path.getsize(filepath) # Pattern for MySQL-style INSERT INTO `table` pattern = re.compile(rb"^INSERT INTO `([^`]+)`", re.IGNORECASE) stats = {} with open(filepath, "rb") as f: for line in f: line_len = len(line) match = pattern.search(line) if match: table_name = match.group(1).decode("utf-8", errors="ignore") if table_name not in stats: stats[table_name] = {"bytes": 0, "count": 0} stats[table_name]["bytes"] += line_len stats[table_name]["count"] += 1 results = [] for table, data in stats.items(): results.append({ "table": table, "payload_mb": round(data["bytes"] / (1024 * 1024), 2), "payload_pct": round((data["bytes"] / filesize) * 100, 2), "insert_statements": data["count"], "bytes": data["bytes"] }) results.sort(key=lambda x: x["bytes"], reverse=True) top_20 = results[:20] top_20_names = {r["table"] for r in top_20} special_names = {"log_foto", "clifor", "users", "news_users", "coda_messaggi", "clifor_log", "banner_stats", "access_log"} special_substrings = ["log", "queue", "messaggi", "session", "temp", "stats", "foto"] def is_special(name): if name in special_names: return True name_lower = name.lower() for sub in special_substrings: if sub in name_lower: return True return False # Collect all that match special criteria special_rows = [r for r in results if is_special(r["table"])] special_names_found = {r["table"] for r in special_rows} # Combine top 20 and special rows output_rows_set = {r["table"]: r for r in top_20} for r in special_rows: output_rows_set[r["table"]] = r output_rows = list(output_rows_set.values()) output_rows.sort(key=lambda x: x["bytes"], reverse=True) print(f"{'table':<30} | {'payload_mb':>10} | {'payload_pct':>11} | {'insert_statements':>17}") print("-" * 75) for r in output_rows: print(f"{r['table']:<30} | {r['payload_mb']:>10.2f} | {r['payload_pct']:>10.2f}% | {r['insert_statements']:>17}")