feat(workers): safe_fetch Python port
Mirrors lib/ingest/safe_fetch.js. Same scheme + IP-range checks and VOID_INGEST_ALLOW_PRIVATE env gate. Used by sync.source_doc and any future Python workers that fetch user-controlled URLs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
27
workers/tests/test_safe_fetch.py
Normal file
27
workers/tests/test_safe_fetch.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
from void_workers.safe_fetch import safe_fetch, SafeFetchError
|
||||
|
||||
|
||||
def test_rejects_file_scheme():
|
||||
with pytest.raises(SafeFetchError):
|
||||
safe_fetch("file:///etc/passwd")
|
||||
|
||||
|
||||
def test_rejects_loopback():
|
||||
with pytest.raises(SafeFetchError):
|
||||
safe_fetch("http://127.0.0.1/x")
|
||||
|
||||
|
||||
def test_rejects_rfc1918():
|
||||
with pytest.raises(SafeFetchError):
|
||||
safe_fetch("http://192.168.1.1/x")
|
||||
|
||||
|
||||
def test_rejects_metadata_endpoint():
|
||||
with pytest.raises(SafeFetchError):
|
||||
safe_fetch("http://169.254.169.254/latest/")
|
||||
|
||||
|
||||
def test_rejects_cgnat():
|
||||
with pytest.raises(SafeFetchError):
|
||||
safe_fetch("http://100.64.0.1/x")
|
||||
82
workers/void_workers/safe_fetch.py
Normal file
82
workers/void_workers/safe_fetch.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""Python port of lib/ingest/safe_fetch.js.
|
||||
|
||||
Same SSRF mitigations the Node side ships:
|
||||
- http/https only
|
||||
- DNS-resolved hostnames checked against loopback / RFC1918 /
|
||||
link-local / CGNAT / IPv6 ULA + link-local
|
||||
- Redirects followed manually with the same checks on each hop
|
||||
- VOID_INGEST_ALLOW_PRIVATE=true gate for offline-fixture tests
|
||||
"""
|
||||
import socket
|
||||
import ipaddress
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
|
||||
BLOCK_V4_NETS = [ipaddress.ip_network(c) for c in [
|
||||
"0.0.0.0/8", "127.0.0.0/8", "10.0.0.0/8",
|
||||
"172.16.0.0/12", "192.168.0.0/16",
|
||||
"169.254.0.0/16", "100.64.0.0/10",
|
||||
]]
|
||||
|
||||
|
||||
class SafeFetchError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _is_blocked(addr):
|
||||
if os.environ.get("VOID_INGEST_ALLOW_PRIVATE") == "true":
|
||||
return False
|
||||
try:
|
||||
ip = ipaddress.ip_address(addr)
|
||||
except ValueError:
|
||||
return True
|
||||
if ip.is_loopback or ip.is_link_local or ip.is_multicast or ip.is_unspecified:
|
||||
return True
|
||||
if isinstance(ip, ipaddress.IPv4Address):
|
||||
return any(ip in n for n in BLOCK_V4_NETS)
|
||||
# IPv6: ULA + link-local
|
||||
if ip in ipaddress.ip_network("fc00::/7") or ip in ipaddress.ip_network("fe80::/10"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _resolve(host):
|
||||
try:
|
||||
infos = socket.getaddrinfo(host, None)
|
||||
except socket.gaierror as e:
|
||||
raise SafeFetchError(f"no DNS for {host}: {e}")
|
||||
addrs = list({i[4][0] for i in infos})
|
||||
for a in addrs:
|
||||
if _is_blocked(a):
|
||||
raise SafeFetchError(f"{host} resolves to blocked address {a}")
|
||||
if not addrs:
|
||||
raise SafeFetchError(f"no addresses for {host}")
|
||||
return addrs[0]
|
||||
|
||||
|
||||
def safe_fetch(url, *, headers=None, timeout=15, max_hops=5):
|
||||
current = url
|
||||
for hop in range(max_hops + 1):
|
||||
u = urlparse(current)
|
||||
if u.scheme not in ("http", "https"):
|
||||
raise SafeFetchError(f"unsupported scheme {u.scheme}")
|
||||
host = u.hostname
|
||||
try:
|
||||
ipaddress.ip_address(host)
|
||||
if _is_blocked(host):
|
||||
raise SafeFetchError(f"blocked literal IP {host}")
|
||||
except ValueError:
|
||||
_resolve(host)
|
||||
req = urllib.request.Request(current, headers=headers or {})
|
||||
try:
|
||||
opener = urllib.request.build_opener()
|
||||
with opener.open(req, timeout=timeout) as r:
|
||||
return r.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code in (301, 302, 303, 307, 308) and "Location" in e.headers and hop < max_hops:
|
||||
current = e.headers["Location"]
|
||||
continue
|
||||
raise
|
||||
raise SafeFetchError(f"too many redirects ({max_hops})")
|
||||
Reference in New Issue
Block a user