import os, zipfile, gnupg, hashlib, datetime, json
from pathlib import Path
from ai_models import metadata_cleaner, redactor, verifier, summarizer
def process_upload(raw_dir: Path) -> Path:
# 1️⃣ Strip metadata
for f in raw_dir.rglob("*"):
if f.is_file():
metadata_cleaner.strip(f)
# 2️⃣ Redact PII
for f in raw_dir.rglob("*"):
if f.is_file():
redactor.apply_rules(f, ruleset="global_pp")
# 3️⃣ Verify content
verification_report = verifier.check_batch(raw_dir)
# 4️⃣ Summarise each doc
index_lines = []
for f in raw_dir.rglob("*"):
if f.is_file() and f.suffix.lower() in ".txt",".pdf",".docx":
summary = summarizer.summarise(f, max_words=200)
index_lines.append(f"f.name: summary\n")
# 5️⃣ Write README & manifest
(raw_dir / "README.txt").write_text("=== Document Index ===\n" + "".join(index_lines))
manifest = p.relative_to(raw_dir).as_posix(): hashlib.sha256(p.read_bytes()).hexdigest()
for p in raw_dir.rglob("*") if p.is_file()
(raw_dir / "manifest.json").write_text(json.dumps(manifest, indent=2))
# 6️⃣ Zip the folder
zip_path = Path(f"raw_dir.name.zip")
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
for p in raw_dir.rglob("*"):
if p.is_file():
z.write(p, p.relative_to(raw_dir))
# 7️⃣ Sign the zip
gpg = gnupg.GPG()
with open(zip_path, "rb") as f:
signed = gpg.sign_file(f, keyid="YOUR_KEY_ID", detach=True, output=str(zip_path) + ".sig")
# 8️⃣ Return signed zip path
return zip_path
Note: All AI models used here can be run on a modest CPU/GPU; you can swap in open‑source alternatives (e.g., spaCy for redaction, HuggingFace’s
distilbert-base-uncased-squadfor summarisation) to keep costs low.
Document Classification: TLP:AMBER // REL LIMITED
Subject: Analysis of NWOLeaks.com-Tec-zip1.zip
Date of Analysis: [Insert Current Date]
Analyst: [Insert Your Name/ID] NWOLeaks.com-Tec-zip1.zip
(Conducted in an isolated environment such as Any.Run, Cuckoo, or a local FLARE VM) import os, zipfile, gnupg, hashlib, datetime, json from
README.txt and a manifest.json (file‑hash list) are added.