Files
Void-Homelab/workers/void_workers/handlers/image.py
2026-06-01 05:00:21 +10:00

19 lines
518 B
Python

from PIL import Image
import pytesseract
from .. import repo
NAME = "extract.image"
def handle(job_data: dict) -> dict:
ref_id = job_data["ref_id"]
blob_path = job_data["blob_path"]
text = pytesseract.image_to_string(Image.open(blob_path), lang="eng").strip()
body_text = text[:200_000]
repo.update_ref(
ref_id,
body_text=body_text,
metadata_patch={"extract": {"method": "tesseract", "chars": len(body_text)}}
)
return {"ref_id": ref_id, "chars": len(body_text)}