import subprocess from pathlib import Path from void_workers.handlers.pdf import handle as handle_pdf FIXTURES = Path(__file__).parent / "fixtures" def _run_node_migrations(): subprocess.run( ["node", "lib/db/migrate.js", "up"], cwd="/project/src/void-v2", check=True ) def _reset_void_schema(conn): """Mirror tests/helpers/db.js::resetDb on the Node side.""" conn.execute("DROP SCHEMA IF EXISTS public CASCADE") conn.execute("CREATE SCHEMA public") conn.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto") conn.execute("CREATE EXTENSION IF NOT EXISTS vector") def _seed_space_and_ref(conn, blob_path, kind="pdf"): sp = conn.execute( "INSERT INTO spaces(slug, name) VALUES('plan4-tests', 'P4') " "ON CONFLICT (slug) DO UPDATE SET name=EXCLUDED.name RETURNING id" ).fetchone()[0] ref = conn.execute( "INSERT INTO refs(space_id, kind, source_url, title, blob_path) " "VALUES(%s, %s, NULL, 'fixture', %s) RETURNING id", (sp, kind, str(blob_path)) ).fetchone()[0] return sp, ref def test_pdf_born_digital_uses_pdftotext(conn): _reset_void_schema(conn) _run_node_migrations() blob = FIXTURES / "born_digital.pdf" sp, ref = _seed_space_and_ref(conn, blob) out = handle_pdf({"ref_id": str(ref), "blob_path": str(blob)}) assert out["ref_id"] == str(ref) assert out["method"] == "pdftotext" assert out["chars"] > 0 row = conn.execute("SELECT body_text FROM refs WHERE id=%s", (ref,)).fetchone() assert "void-workers" in (row[0] or "").lower() def test_pdf_scanned_falls_back_to_tesseract(conn): _reset_void_schema(conn) _run_node_migrations() blob = FIXTURES / "scanned.pdf" sp, ref = _seed_space_and_ref(conn, blob) out = handle_pdf({"ref_id": str(ref), "blob_path": str(blob)}) assert out["method"] == "tesseract" row = conn.execute("SELECT body_text FROM refs WHERE id=%s", (ref,)).fetchone() assert "blackflame" in (row[0] or "").lower()