#!/usr/bin/env python3
"""
CRLF-permissive front-end proxy for CVE-2026-24880.

This stands in for the "documented precondition": a reverse proxy that forwards
client traffic to a back-end Tomcat over HTTP/1.1 and does NOT validate or
normalize the chunked request body, so raw CRLF embedded inside an HTTP/1.1
chunk extension is passed through to the back-end verbatim.

It also enforces a real front-end -> back-end authorization boundary:

  * It parses ONLY the outer request line + headers (the head, up to the blank
    line). It applies an allow/deny policy on the outer request target:
        - targets under /public/*  -> forwarded to the back-end
        - targets under /internal/* -> REJECTED with 403 (never forwarded)
  * It then streams the request BODY byte-for-byte to the back-end without
    parsing, re-chunking, or sanitizing it. Chunk extensions (and any raw CRLF
    inside them) are forwarded unchanged.

Because the proxy refuses to forward any *outer* request to /internal/*, the
only way a request reaches the back-end's /internal/arrival endpoint is for a
vulnerable ChunkedInputFilter to absorb the embedded CRLF + request bytes from
a chunk extension and re-parse them as a second, independent request. That is
the smuggling boundary the criterion requires.

Stdlib only. Binds inside the container; published to 127.0.0.1 by compose.
"""

import os
import socket
import sys
import threading

LISTEN_HOST = "0.0.0.0"
LISTEN_PORT = int(os.environ.get("FRONTEND_PORT", "8000"))
BACKEND_HOST = os.environ.get("BACKEND_HOST", "backend")
BACKEND_PORT = int(os.environ.get("BACKEND_PORT", "8080"))

CRLF = b"\r\n"
HEAD_TERM = b"\r\n\r\n"


def log(msg):
    sys.stdout.write("[frontend] %s\n" % msg)
    sys.stdout.flush()


def read_head(sock):
    """Read bytes until the end of the HTTP request head (blank line)."""
    buf = b""
    while HEAD_TERM not in buf:
        chunk = sock.recv(1)
        if not chunk:
            return None, b""
        buf += chunk
        if len(buf) > 65536:
            return None, buf  # head too large; refuse
    idx = buf.index(HEAD_TERM) + len(HEAD_TERM)
    return buf[:idx], buf[idx:]


def parse_request_line_target(head):
    try:
        first = head.split(CRLF, 1)[0].decode("latin-1")
        parts = first.split(" ")
        if len(parts) < 2:
            return None
        return parts[1]
    except Exception:
        return None


def header_value(head, name):
    name_l = name.lower()
    for line in head.split(CRLF)[1:]:
        if b":" in line:
            k, v = line.split(b":", 1)
            if k.strip().lower().decode("latin-1") == name_l:
                return v.strip().decode("latin-1")
    return None


def send_simple_response(client, status, body):
    payload = (
        "HTTP/1.1 %s\r\n"
        "Content-Type: text/plain; charset=utf-8\r\n"
        "Content-Length: %d\r\n"
        "Connection: close\r\n"
        "\r\n%s" % (status, len(body.encode()), body)
    ).encode()
    try:
        client.sendall(payload)
    except Exception:
        pass


def pump(src, dst):
    try:
        while True:
            data = src.recv(65536)
            if not data:
                break
            dst.sendall(data)
    except Exception:
        pass
    finally:
        try:
            dst.shutdown(socket.SHUT_WR)
        except Exception:
            pass


def handle(client, addr):
    client.settimeout(15)
    try:
        head, leftover = read_head(client)
        if head is None:
            send_simple_response(client, "400 Bad Request", "bad head\n")
            return

        target = parse_request_line_target(head)
        if target is None:
            send_simple_response(client, "400 Bad Request", "bad request line\n")
            return

        # ---- Authorization boundary enforced by the front end ----
        # The front end will not forward any *outer* request to /internal/*.
        if target.startswith("/internal"):
            log("DENY outer request to %s from %s" % (target, addr))
            send_simple_response(client, "403 Forbidden", "front-end: /internal is not exposed\n")
            return

        # Open a fresh back-end connection for this request.
        backend = socket.create_connection((BACKEND_HOST, BACKEND_PORT), timeout=15)
        backend.settimeout(15)

        # Forward the head verbatim, then any already-buffered body bytes verbatim.
        # NOTE: the proxy does not parse, validate, or re-chunk the body. Raw CRLF
        # inside chunk extensions is forwarded unchanged -- the CVE precondition.
        backend.sendall(head)
        if leftover:
            backend.sendall(leftover)

        # Bidirectional byte pump: client body -> backend, backend response -> client.
        t1 = threading.Thread(target=pump, args=(client, backend), daemon=True)
        t2 = threading.Thread(target=pump, args=(backend, client), daemon=True)
        t1.start()
        t2.start()
        t1.join()
        t2.join()
        try:
            backend.close()
        except Exception:
            pass
    except Exception as e:
        log("error handling %s: %r" % (addr, e))
        try:
            send_simple_response(client, "502 Bad Gateway", "proxy error\n")
        except Exception:
            pass
    finally:
        try:
            client.close()
        except Exception:
            pass


def main():
    srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    srv.bind((LISTEN_HOST, LISTEN_PORT))
    srv.listen(128)
    log("listening on %s:%d -> backend %s:%d" % (LISTEN_HOST, LISTEN_PORT, BACKEND_HOST, BACKEND_PORT))
    while True:
        client, addr = srv.accept()
        threading.Thread(target=handle, args=(client, addr), daemon=True).start()


if __name__ == "__main__":
    main()
