Regalamiunsorriso/analyze_dump.py

64 lines
2.1 KiB
Python
Raw Normal View History

2026-04-22 18:41:37 +02:00
import sys
import re
import os
filepath = r"db/dump-pg-202604211927.sql"
filesize = os.path.getsize(filepath)
# Pattern for MySQL-style INSERT INTO `table`
pattern = re.compile(rb"^INSERT INTO `([^`]+)`", re.IGNORECASE)
stats = {}
with open(filepath, "rb") as f:
for line in f:
line_len = len(line)
match = pattern.search(line)
if match:
table_name = match.group(1).decode("utf-8", errors="ignore")
if table_name not in stats:
stats[table_name] = {"bytes": 0, "count": 0}
stats[table_name]["bytes"] += line_len
stats[table_name]["count"] += 1
results = []
for table, data in stats.items():
results.append({
"table": table,
"payload_mb": round(data["bytes"] / (1024 * 1024), 2),
"payload_pct": round((data["bytes"] / filesize) * 100, 2),
"insert_statements": data["count"],
"bytes": data["bytes"]
})
results.sort(key=lambda x: x["bytes"], reverse=True)
top_20 = results[:20]
top_20_names = {r["table"] for r in top_20}
special_names = {"log_foto", "clifor", "users", "news_users", "coda_messaggi", "clifor_log", "banner_stats", "access_log"}
special_substrings = ["log", "queue", "messaggi", "session", "temp", "stats", "foto"]
def is_special(name):
if name in special_names: return True
name_lower = name.lower()
for sub in special_substrings:
if sub in name_lower: return True
return False
# Collect all that match special criteria
special_rows = [r for r in results if is_special(r["table"])]
special_names_found = {r["table"] for r in special_rows}
# Combine top 20 and special rows
output_rows_set = {r["table"]: r for r in top_20}
for r in special_rows:
output_rows_set[r["table"]] = r
output_rows = list(output_rows_set.values())
output_rows.sort(key=lambda x: x["bytes"], reverse=True)
print(f"{'table':<30} | {'payload_mb':>10} | {'payload_pct':>11} | {'insert_statements':>17}")
print("-" * 75)
for r in output_rows:
print(f"{r['table']:<30} | {r['payload_mb']:>10.2f} | {r['payload_pct']:>10.2f}% | {r['insert_statements']:>17}")