64 lines
2.1 KiB
Python
64 lines
2.1 KiB
Python
|
|
import sys
|
||
|
|
import re
|
||
|
|
import os
|
||
|
|
|
||
|
|
filepath = r"db/dump-pg-202604211927.sql"
|
||
|
|
filesize = os.path.getsize(filepath)
|
||
|
|
|
||
|
|
# Pattern for MySQL-style INSERT INTO `table`
|
||
|
|
pattern = re.compile(rb"^INSERT INTO `([^`]+)`", re.IGNORECASE)
|
||
|
|
|
||
|
|
stats = {}
|
||
|
|
|
||
|
|
with open(filepath, "rb") as f:
|
||
|
|
for line in f:
|
||
|
|
line_len = len(line)
|
||
|
|
match = pattern.search(line)
|
||
|
|
if match:
|
||
|
|
table_name = match.group(1).decode("utf-8", errors="ignore")
|
||
|
|
if table_name not in stats:
|
||
|
|
stats[table_name] = {"bytes": 0, "count": 0}
|
||
|
|
stats[table_name]["bytes"] += line_len
|
||
|
|
stats[table_name]["count"] += 1
|
||
|
|
|
||
|
|
results = []
|
||
|
|
for table, data in stats.items():
|
||
|
|
results.append({
|
||
|
|
"table": table,
|
||
|
|
"payload_mb": round(data["bytes"] / (1024 * 1024), 2),
|
||
|
|
"payload_pct": round((data["bytes"] / filesize) * 100, 2),
|
||
|
|
"insert_statements": data["count"],
|
||
|
|
"bytes": data["bytes"]
|
||
|
|
})
|
||
|
|
|
||
|
|
results.sort(key=lambda x: x["bytes"], reverse=True)
|
||
|
|
|
||
|
|
top_20 = results[:20]
|
||
|
|
top_20_names = {r["table"] for r in top_20}
|
||
|
|
|
||
|
|
special_names = {"log_foto", "clifor", "users", "news_users", "coda_messaggi", "clifor_log", "banner_stats", "access_log"}
|
||
|
|
special_substrings = ["log", "queue", "messaggi", "session", "temp", "stats", "foto"]
|
||
|
|
|
||
|
|
def is_special(name):
|
||
|
|
if name in special_names: return True
|
||
|
|
name_lower = name.lower()
|
||
|
|
for sub in special_substrings:
|
||
|
|
if sub in name_lower: return True
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Collect all that match special criteria
|
||
|
|
special_rows = [r for r in results if is_special(r["table"])]
|
||
|
|
special_names_found = {r["table"] for r in special_rows}
|
||
|
|
|
||
|
|
# Combine top 20 and special rows
|
||
|
|
output_rows_set = {r["table"]: r for r in top_20}
|
||
|
|
for r in special_rows:
|
||
|
|
output_rows_set[r["table"]] = r
|
||
|
|
|
||
|
|
output_rows = list(output_rows_set.values())
|
||
|
|
output_rows.sort(key=lambda x: x["bytes"], reverse=True)
|
||
|
|
|
||
|
|
print(f"{'table':<30} | {'payload_mb':>10} | {'payload_pct':>11} | {'insert_statements':>17}")
|
||
|
|
print("-" * 75)
|
||
|
|
for r in output_rows:
|
||
|
|
print(f"{r['table']:<30} | {r['payload_mb']:>10.2f} | {r['payload_pct']:>10.2f}% | {r['insert_statements']:>17}")
|