Python-LLM
Reading files (PDF or MD) and using an LLM API to analyze them:
# pip install openai pymupdf
# export OPENAI_API_KEY=...
import os
from pathlib import Path
from typing import Iterable
import pymupdf
from openai import OpenAI
client = OpenAI()
ROOT = Path("markdown")
OUT = Path("findings")
OUT.mkdir(parents=True, exist_ok=True)
MODEL = "gpt-4o"
PROMPT_TEMPLATE = """You are analyzing project notes and papers.
Extract 3–5 bullet points with concrete findings or action items.
Respond in Markdown only (no preamble), using:
- **Summary**: one sentence
- **Key Points**: bullets
- **Evidence**: short quotes if available
"""
# -------- helpers --------
def read_text_from_md(path: Path) -> str:
return path.read_text(encoding="utf-8", errors="replace")
def read_text_from_pdf(path: Path) -> str:
text_chunks = []
with pymupdf.open(path) as doc:
for page in doc:
text_chunks.append(page.get_text("text"))
return "\n".join(text_chunks)
def iter_corpus(root: Path) -> Iterable[Path]:
exts = {".md", ".pdf"}
for p in root.rglob("*"):
if p.is_file() and p.suffix.lower() in exts:
yield p
def run_llm(model: str, system: str, user: str) -> str:
# Chat Completions API (widely supported)
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
temperature=0.2,
)
return resp.choices[0].message.content or ""
def main():
files = list(iter_corpus(ROOT))
if not files:
print(f"No .md or .pdf files found under {ROOT.resolve()}")
return
for path in files:
try:
if path.suffix.lower() == ".md":
raw_text = read_text_from_md(path)
elif path.suffix.lower() == ".pdf":
raw_text = read_text_from_pdf(path)
else:
continue
# keep input small for demo; trim if huge
snippet = raw_text[:20000]
prompt = f"{PROMPT_TEMPLATE}\n\n# File: {path.name}\n\n<Content Start>\n{snippet}\n<Content End>"
findings_md = run_llm(MODEL, "You are a concise research assistant.", prompt)
out_path = OUT / f"{path.stem}.md"
out_path.write_text(findings_md.strip() + "\n", encoding="utf-8")
print(f"✓ Saved: {out_path.relative_to(Path.cwd())}")
except Exception as e:
print(f"⚠️ {path}: {e}")
if __name__ == "__main__":
main()