diff --git a/workers/.gitignore b/workers/.gitignore new file mode 100644 index 0000000..21d0b89 --- /dev/null +++ b/workers/.gitignore @@ -0,0 +1 @@ +.venv/ diff --git a/workers/README.md b/workers/README.md new file mode 100644 index 0000000..609a768 --- /dev/null +++ b/workers/README.md @@ -0,0 +1,23 @@ +# void-workers + +Python ML ingest service alongside `void-server` (Node). Sibling of `lib/` in the void-v2 repo. + +## Local dev + +```bash +cd workers +python3.12 -m venv .venv +. .venv/bin/activate +pip install -e ".[all]" +export DATABASE_URL="postgres://..." +python -m void_workers.runner +``` + +## Tests + +```bash +pip install -e ".[test,all]" +DATABASE_URL="postgres://..." pytest -v +``` + +See `../docs/superpowers/plans/2026-06-01-void-v2-plan4-workers.md` for the full plan and `../docs/superpowers/specs/2026-06-01-void-v2-plan4-workers.md` for the design. diff --git a/workers/pyproject.toml b/workers/pyproject.toml new file mode 100644 index 0000000..b2e2a8c --- /dev/null +++ b/workers/pyproject.toml @@ -0,0 +1,23 @@ +[project] +name = "void-workers" +version = "0.1.0" +requires-python = ">=3.12" +dependencies = [ + "psycopg[binary,pool]>=3.2", + "structlog>=24.1", +] + +[project.optional-dependencies] +pdf = ["pdfplumber>=0.11", "pytesseract>=0.3.13", "pillow>=10.3"] +image = ["pytesseract>=0.3.13", "pillow>=10.3"] +video = ["yt-dlp>=2024.10.0", "faster-whisper>=1.0.3"] +test = ["pytest>=8.0", "pytest-asyncio>=0.23"] +all = ["void-workers[pdf,image,video,test]"] + +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["."] +include = ["void_workers*"] diff --git a/workers/void_workers/__init__.py b/workers/void_workers/__init__.py new file mode 100644 index 0000000..3dc1f76 --- /dev/null +++ b/workers/void_workers/__init__.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/workers/void_workers/config.py b/workers/void_workers/config.py new file mode 100644 index 0000000..945d958 --- /dev/null +++ b/workers/void_workers/config.py @@ -0,0 +1,26 @@ +import os + +def env(name, default=None, required=False): + v = os.environ.get(name, default) + if required and v is None: + raise RuntimeError(f"env {name} is required") + return v + +def env_int(name, default): + return int(os.environ.get(name, default)) + +DATABASE_URL = env("DATABASE_URL", required=True) +BLOB_ROOT = env("BLOB_ROOT", "/var/lib/void/blobs") +WHISPER_MODEL = env("WHISPER_MODEL", "small.en") +WHISPER_CACHE = env("WHISPER_CACHE", "/var/lib/void/whisper-models") +ALLOW_PRIVATE = env("VOID_INGEST_ALLOW_PRIVATE", "false") == "true" + +CONCURRENCY = { + "extract.pdf": env_int("VOID_CONCURRENCY_EXTRACT_PDF", 2), + "extract.image": env_int("VOID_CONCURRENCY_EXTRACT_IMAGE", 2), + "ingest.video": env_int("VOID_CONCURRENCY_INGEST_VIDEO", 1), + "sync.source_doc": env_int("VOID_CONCURRENCY_SYNC_SOURCE_DOC", 1), + "echo": env_int("VOID_CONCURRENCY_ECHO", 1), +} + +POLL_INTERVAL_MS = env_int("VOID_POLL_INTERVAL_MS", 1000) diff --git a/workers/void_workers/log.py b/workers/void_workers/log.py new file mode 100644 index 0000000..dc11fe6 --- /dev/null +++ b/workers/void_workers/log.py @@ -0,0 +1,16 @@ +import logging +import structlog + +def init(): + structlog.configure( + processors=[ + structlog.contextvars.merge_contextvars, + structlog.processors.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.JSONRenderer(), + ], + wrapper_class=structlog.make_filtering_bound_logger(logging.INFO), + ) + return structlog.get_logger() + +log = init()