feat(workers): pgboss claim/complete/fail via psycopg
Adds the Boss class — SELECT … FOR UPDATE SKIP LOCKED to atomically claim, UPDATE state on completion. Retry semantics match pg-boss: exponential backoff via retry_count / retry_delay / retry_backoff. Forces client_encoding=UTF8 on every connection. The void2-db cluster was initialized as SQL_ASCII so psycopg refuses to decode text by default; UTF8 client_encoding works because the data is already UTF-8. Node's pg lib is more forgiving and didn't surface this. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
43
workers/tests/conftest.py
Normal file
43
workers/tests/conftest.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
import psycopg
|
||||||
|
|
||||||
|
DB_URL = os.environ["DATABASE_URL"]
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def conn():
|
||||||
|
with psycopg.connect(DB_URL, autocommit=True, client_encoding='UTF8') as c:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_pgboss(conn):
|
||||||
|
"""Drop the pgboss schema before each test. Tests that need it call ensure_pgboss(conn)."""
|
||||||
|
conn.execute("DROP SCHEMA IF EXISTS pgboss CASCADE")
|
||||||
|
yield
|
||||||
|
|
||||||
|
def ensure_pgboss(conn):
|
||||||
|
"""Bring up a minimal pgboss schema matching the columns boss.py reads/writes."""
|
||||||
|
conn.execute("CREATE SCHEMA IF NOT EXISTS pgboss")
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS pgboss.job (
|
||||||
|
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
name text NOT NULL,
|
||||||
|
priority int NOT NULL DEFAULT 0,
|
||||||
|
data jsonb,
|
||||||
|
state text NOT NULL DEFAULT 'created',
|
||||||
|
retry_limit int NOT NULL DEFAULT 5,
|
||||||
|
retry_count int NOT NULL DEFAULT 0,
|
||||||
|
retry_delay int NOT NULL DEFAULT 10,
|
||||||
|
retry_backoff boolean NOT NULL DEFAULT true,
|
||||||
|
start_after timestamptz NOT NULL DEFAULT now(),
|
||||||
|
started_on timestamptz,
|
||||||
|
completed_on timestamptz,
|
||||||
|
created_on timestamptz NOT NULL DEFAULT now(),
|
||||||
|
output jsonb
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def boss_ready(conn):
|
||||||
|
ensure_pgboss(conn)
|
||||||
|
yield conn
|
||||||
48
workers/tests/test_boss.py
Normal file
48
workers/tests/test_boss.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import json
|
||||||
|
from void_workers.boss import Boss
|
||||||
|
from void_workers.config import DATABASE_URL
|
||||||
|
|
||||||
|
def test_claim_returns_none_when_no_jobs(boss_ready):
|
||||||
|
boss = Boss(dsn=DATABASE_URL)
|
||||||
|
assert boss.claim("echo") is None
|
||||||
|
|
||||||
|
def test_claim_then_complete_round_trip(boss_ready):
|
||||||
|
boss_ready.execute(
|
||||||
|
"INSERT INTO pgboss.job(name, data) VALUES('echo', %s)",
|
||||||
|
(json.dumps({"ping": 1}),)
|
||||||
|
)
|
||||||
|
boss = Boss(dsn=DATABASE_URL)
|
||||||
|
job = boss.claim("echo")
|
||||||
|
assert job is not None
|
||||||
|
assert job["data"] == {"ping": 1}
|
||||||
|
boss.complete(job["id"], {"pong": 1})
|
||||||
|
row = boss_ready.execute(
|
||||||
|
"SELECT state, output FROM pgboss.job WHERE id=%s", (job["id"],)
|
||||||
|
).fetchone()
|
||||||
|
assert row[0] == "completed"
|
||||||
|
assert row[1] == {"pong": 1}
|
||||||
|
|
||||||
|
def test_fail_records_output_and_state(boss_ready):
|
||||||
|
boss_ready.execute("INSERT INTO pgboss.job(name) VALUES('echo')")
|
||||||
|
boss = Boss(dsn=DATABASE_URL)
|
||||||
|
job = boss.claim("echo")
|
||||||
|
boss.fail(job["id"], {"name": "BoomError", "message": "boom"})
|
||||||
|
row = boss_ready.execute(
|
||||||
|
"SELECT state, output, retry_count FROM pgboss.job WHERE id=%s",
|
||||||
|
(job["id"],)
|
||||||
|
).fetchone()
|
||||||
|
# retry_limit defaults to 5 and retry_count was 0 → should move to 'retry'
|
||||||
|
assert row[0] == "retry"
|
||||||
|
assert row[2] == 1
|
||||||
|
|
||||||
|
def test_fail_past_retry_limit_marks_failed(boss_ready):
|
||||||
|
boss_ready.execute(
|
||||||
|
"INSERT INTO pgboss.job(name, retry_count, retry_limit) VALUES('echo', 5, 5)"
|
||||||
|
)
|
||||||
|
boss = Boss(dsn=DATABASE_URL)
|
||||||
|
job = boss.claim("echo")
|
||||||
|
boss.fail(job["id"], {"name": "Done"})
|
||||||
|
row = boss_ready.execute(
|
||||||
|
"SELECT state FROM pgboss.job WHERE id=%s", (job["id"],)
|
||||||
|
).fetchone()
|
||||||
|
assert row[0] == "failed"
|
||||||
88
workers/void_workers/boss.py
Normal file
88
workers/void_workers/boss.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import json
|
||||||
|
import psycopg
|
||||||
|
from psycopg.rows import dict_row
|
||||||
|
|
||||||
|
|
||||||
|
class Boss:
|
||||||
|
"""psycopg-based pg-boss-compatible job claimer.
|
||||||
|
|
||||||
|
Matches the SQL semantics of pg-boss v10 close enough that the same
|
||||||
|
pgboss.job partition can be operated on by either side. We use
|
||||||
|
SELECT ... FOR UPDATE SKIP LOCKED to atomically claim work, then
|
||||||
|
UPDATE state on completion/failure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, dsn):
|
||||||
|
self.dsn = dsn
|
||||||
|
|
||||||
|
def _conn(self):
|
||||||
|
return psycopg.connect(
|
||||||
|
self.dsn, autocommit=False, row_factory=dict_row, client_encoding='UTF8'
|
||||||
|
)
|
||||||
|
|
||||||
|
def claim(self, queue):
|
||||||
|
"""Atomically claim one job for the given queue. Returns dict or None."""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT id, data, retry_count, retry_limit
|
||||||
|
FROM pgboss.job
|
||||||
|
WHERE name=%s
|
||||||
|
AND state IN ('created','retry')
|
||||||
|
AND start_after <= now()
|
||||||
|
ORDER BY priority DESC, created_on ASC
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
LIMIT 1
|
||||||
|
""", (queue,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
cur.execute(
|
||||||
|
"UPDATE pgboss.job SET state='active', started_on=now() WHERE id=%s",
|
||||||
|
(row["id"],)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return row
|
||||||
|
|
||||||
|
def complete(self, job_id, output):
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
UPDATE pgboss.job
|
||||||
|
SET state='completed', completed_on=now(), output=%s
|
||||||
|
WHERE id=%s
|
||||||
|
""", (json.dumps(output), job_id))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def fail(self, job_id, output):
|
||||||
|
"""Mark the job retry (if budget left) or failed (otherwise)."""
|
||||||
|
with self._conn() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"SELECT retry_count, retry_limit, retry_delay, retry_backoff "
|
||||||
|
"FROM pgboss.job WHERE id=%s FOR UPDATE",
|
||||||
|
(job_id,)
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return
|
||||||
|
new_count = row["retry_count"] + 1
|
||||||
|
if new_count > row["retry_limit"]:
|
||||||
|
cur.execute(
|
||||||
|
"UPDATE pgboss.job SET state='failed', output=%s, "
|
||||||
|
"completed_on=now() WHERE id=%s",
|
||||||
|
(json.dumps(output), job_id)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
delay = row["retry_delay"]
|
||||||
|
if row["retry_backoff"]:
|
||||||
|
delay = delay * (2 ** (new_count - 1))
|
||||||
|
cur.execute("""
|
||||||
|
UPDATE pgboss.job
|
||||||
|
SET state='retry',
|
||||||
|
retry_count=%s,
|
||||||
|
start_after=now() + (%s || ' seconds')::interval,
|
||||||
|
output=%s
|
||||||
|
WHERE id=%s
|
||||||
|
""", (new_count, str(delay), json.dumps(output), job_id))
|
||||||
|
conn.commit()
|
||||||
Reference in New Issue
Block a user