feat(workers): Python skeleton + config + structlog

Plan 4 Phase A scaffolding. void-workers package at /workers/, sibling
of /lib/. pyproject.toml pins Python 3.12 with separate extras for
pdf / image / video / test.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-01 04:41:33 +10:00
parent c4663992ec
commit 6e3798f6d1
6 changed files with 90 additions and 0 deletions

1
workers/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.venv/

23
workers/README.md Normal file
View File

@@ -0,0 +1,23 @@
# void-workers
Python ML ingest service alongside `void-server` (Node). Sibling of `lib/` in the void-v2 repo.
## Local dev
```bash
cd workers
python3.12 -m venv .venv
. .venv/bin/activate
pip install -e ".[all]"
export DATABASE_URL="postgres://..."
python -m void_workers.runner
```
## Tests
```bash
pip install -e ".[test,all]"
DATABASE_URL="postgres://..." pytest -v
```
See `../docs/superpowers/plans/2026-06-01-void-v2-plan4-workers.md` for the full plan and `../docs/superpowers/specs/2026-06-01-void-v2-plan4-workers.md` for the design.

23
workers/pyproject.toml Normal file
View File

@@ -0,0 +1,23 @@
[project]
name = "void-workers"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = [
"psycopg[binary,pool]>=3.2",
"structlog>=24.1",
]
[project.optional-dependencies]
pdf = ["pdfplumber>=0.11", "pytesseract>=0.3.13", "pillow>=10.3"]
image = ["pytesseract>=0.3.13", "pillow>=10.3"]
video = ["yt-dlp>=2024.10.0", "faster-whisper>=1.0.3"]
test = ["pytest>=8.0", "pytest-asyncio>=0.23"]
all = ["void-workers[pdf,image,video,test]"]
[build-system]
requires = ["setuptools>=68"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["."]
include = ["void_workers*"]

View File

@@ -0,0 +1 @@
__version__ = "0.1.0"

View File

@@ -0,0 +1,26 @@
import os
def env(name, default=None, required=False):
v = os.environ.get(name, default)
if required and v is None:
raise RuntimeError(f"env {name} is required")
return v
def env_int(name, default):
return int(os.environ.get(name, default))
DATABASE_URL = env("DATABASE_URL", required=True)
BLOB_ROOT = env("BLOB_ROOT", "/var/lib/void/blobs")
WHISPER_MODEL = env("WHISPER_MODEL", "small.en")
WHISPER_CACHE = env("WHISPER_CACHE", "/var/lib/void/whisper-models")
ALLOW_PRIVATE = env("VOID_INGEST_ALLOW_PRIVATE", "false") == "true"
CONCURRENCY = {
"extract.pdf": env_int("VOID_CONCURRENCY_EXTRACT_PDF", 2),
"extract.image": env_int("VOID_CONCURRENCY_EXTRACT_IMAGE", 2),
"ingest.video": env_int("VOID_CONCURRENCY_INGEST_VIDEO", 1),
"sync.source_doc": env_int("VOID_CONCURRENCY_SYNC_SOURCE_DOC", 1),
"echo": env_int("VOID_CONCURRENCY_ECHO", 1),
}
POLL_INTERVAL_MS = env_int("VOID_POLL_INTERVAL_MS", 1000)

View File

@@ -0,0 +1,16 @@
import logging
import structlog
def init():
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.JSONRenderer(),
],
wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
)
return structlog.get_logger()
log = init()