# CVE-2024-47611 — XZ Utils Windows argument-injection / Unicode best-fit lab.
#
# A single Debian container that:
#   1. Cross-compiles xz.exe for native Windows (MinGW-w64) from the official
#      release tarballs at TWO pins:
#         - 5.6.2  -> VULNERABLE  (no UTF-8 application manifest)
#         - 5.6.3  -> PATCHED     (embeds activeCodePage=UTF-8 manifest)
#      The xz source is built unmodified (tarball extracted, ./configure, make).
#   2. Installs Wine to run those .exe files under a LEGACY code page (CP1252,
#      Windows-1252) so the Windows CRT "best-fit" argv conversion is in force.
#
# The container does NOT trigger the exploit. It only stands up the two binaries
# and the Wine runtime. The exploiter/verifier drive it via `docker exec`.

# Pinned to linux/amd64: the vulnerable/patched xz.exe are native x86_64 Windows
# PE binaries, and Wine must provide x86_64 Windows semantics to run them. On an
# arm64 host this runs under qemu user emulation (provided by Docker Desktop).
FROM --platform=linux/amd64 debian:bookworm-slim

ARG VULN_VERSION=5.6.2
ARG PATCHED_VERSION=5.6.3

ENV DEBIAN_FRONTEND=noninteractive

# --- toolchain: MinGW-w64 cross-compiler + Wine (32/64) + build deps ---------
RUN dpkg --add-architecture i386 \
    && apt-get update \
    && apt-get install -y --no-install-recommends \
        ca-certificates \
        curl \
        file \
        locales \
        xz-utils \
        make \
        gcc \
        gettext \
        po4a \
        autoconf \
        automake \
        libtool \
        gcc-mingw-w64-x86-64 \
        binutils-mingw-w64-x86-64 \
        wine \
        wine64 \
        wine32 \
        winbind \
    && rm -rf /var/lib/apt/lists/*

# --- generate the legacy CP1252 locale -------------------------------------
# Wine derives its ANSI (legacy) code page from the process locale. Generating
# en_US.CP1252 and selecting it via LANG/LC_ALL forces Wine's CRT argv path to
# use Windows-1252 best-fit mapping (the condition this CVE requires).
RUN localedef -c -i en_US -f CP1252 en_US.CP1252 || true

# --- build the two Windows xz.exe binaries from unmodified release tarballs ---
WORKDIR /build
COPY config/build-xz.sh /usr/local/bin/build-xz.sh
RUN chmod +x /usr/local/bin/build-xz.sh \
    && /usr/local/bin/build-xz.sh "${VULN_VERSION}"    /opt/xz-vuln \
    && /usr/local/bin/build-xz.sh "${PATCHED_VERSION}" /opt/xz-patched

# --- Wine runtime configuration ----------------------------------------------
# Two distinct locale roles, do not conflate them:
#
#  * PREFIX INIT locale (en_US.CP1252): used by the entrypoint's one-time
#    wineboot so Wine bakes ACP=1252 (Windows-1252) into the prefix registry.
#    This is what puts the legacy best-fit mapping (U+2215 -> '/') in force
#    inside the binary's narrow-argv CRT conversion.
#
#  * RUNTIME launch locale (C.UTF-8): the locale `wine` runs under when the
#    exploiter/verifier launch xz.exe. It must be UTF-8 so Wine decodes the
#    incoming Linux argv bytes correctly and builds a Windows WIDE command line
#    that holds the real U+2215 codepoint. (If this were CP1252, the UTF-8
#    bytes would be mis-decoded before they ever reach the wide cmdline.)
#
# Net effect: real U+2215 in the wide cmdline + ACP=1252 best-fit in the narrow
# conversion => the differential (vuln rewrites to '/', patched does not).
ENV WINEPREFIX=/root/.wine \
    WINEARCH=win64 \
    WINEDEBUG=-all \
    WINE_INIT_LOCALE=en_US.CP1252 \
    LANG=C.UTF-8 \
    LC_ALL=C.UTF-8

COPY config/entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["sleep", "infinity"]
