Initial test framework proposal

2026-03-15 20:10:16 +01:00 · 2026-03-15 20:10:16 +01:00 · d242f523dd
parent f0a8f403e8
commit d242f523dd
6 changed files with 1387 additions and 0 deletions
--- a/src/test/issue3808/CMakeLists.txt
+++ b/src/test/issue3808/CMakeLists.txt
@ -0,0 +1,62 @@
+cmake_minimum_required(VERSION 3.15)
+project(issue3808 LANGUAGES CXX)
+
+if(MSVC)
+    add_link_options(/DEBUG:FULL /INCREMENTAL:NO /OPT:REF /OPT:ICF)
+endif()
+
+get_filename_component(REPO_ROOT "${CMAKE_CURRENT_LIST_DIR}/../../.." ABSOLUTE)
+set(PLUGINSDK_DIR "${REPO_ROOT}/release/pluginsdk")
+
+if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+    set(X64DBG_ARCH x64)
+    set(X64DBG_PLUGIN_SUFFIX .dp64)
+else()
+    set(X64DBG_ARCH x32)
+    set(X64DBG_PLUGIN_SUFFIX .dp32)
+endif()
+
+set(X64DBG_DBG_LIB "")
+set(X64DBG_BRIDGE_LIB "")
+foreach(X64DBG_LIB_DIR
+    "${REPO_ROOT}/bin/${X64DBG_ARCH}"
+    "${REPO_ROOT}/bin/${X64DBG_ARCH}d"
+    "${PLUGINSDK_DIR}")
+    if(NOT X64DBG_DBG_LIB AND EXISTS "${X64DBG_LIB_DIR}/${X64DBG_ARCH}dbg.lib")
+        set(X64DBG_DBG_LIB "${X64DBG_LIB_DIR}/${X64DBG_ARCH}dbg.lib")
+    endif()
+    if(NOT X64DBG_BRIDGE_LIB AND EXISTS "${X64DBG_LIB_DIR}/${X64DBG_ARCH}bridge.lib")
+        set(X64DBG_BRIDGE_LIB "${X64DBG_LIB_DIR}/${X64DBG_ARCH}bridge.lib")
+    endif()
+endforeach()
+
+if(NOT X64DBG_DBG_LIB OR NOT X64DBG_BRIDGE_LIB)
+    message(FATAL_ERROR "Could not find x64dbg import libraries for ${X64DBG_ARCH}")
+endif()
+
+add_executable(Issue3808Target target.cpp)
+set_target_properties(Issue3808Target PROPERTIES
+    CXX_STANDARD 17
+    CXX_STANDARD_REQUIRED YES
+)
+
+target_compile_definitions(Issue3808Target PRIVATE WIN32_LEAN_AND_MEAN NOMINMAX)
+
+add_library(FlagRepro3808 SHARED flagrepro.cpp)
+target_include_directories(FlagRepro3808 PRIVATE
+    "${REPO_ROOT}/src/dbg"
+    "${REPO_ROOT}/src/bridge"
+    "${PLUGINSDK_DIR}"
+)
+target_link_libraries(FlagRepro3808 PRIVATE
+    "${X64DBG_DBG_LIB}"
+    "${X64DBG_BRIDGE_LIB}"
+)
+set_target_properties(FlagRepro3808 PROPERTIES
+    PREFIX ""
+    SUFFIX "${X64DBG_PLUGIN_SUFFIX}"
+    OUTPUT_NAME "FlagRepro3808"
+    CXX_STANDARD 17
+    CXX_STANDARD_REQUIRED YES
+)
+target_compile_definitions(FlagRepro3808 PRIVATE WIN32_LEAN_AND_MEAN NOMINMAX)
--- a/src/test/issue3808/README.md
+++ b/src/test/issue3808/README.md
@ -0,0 +1,92 @@
+# Issue #3808 reproduction (`Script::Flag::Set*`)
+
+This directory contains a minimal, headless reproduction for:
+
+- `gh issue view 3808`
+- https://github.com/x64dbg/x64dbg/issues/3808
+
+## Contents
+
+- `flagrepro.cpp` - minimal plugin command callback that exercises `Script::Flag::SetZF/SetCF`
+- `target.cpp` - tiny debuggee used by the reproduction
+- `run_repro.py` - minimal headless runner that creates a clean sandbox, loads only the repro plugin, runs the command, and parses the result
+- `CMakeLists.txt` - standalone build for the plugin and target
+
+## Why the runner creates a sandbox
+
+`headless.exe` auto-loads every plugin in its local `plugins` directory.
+In a normal x64dbg build tree that can pull in unrelated plugins, which makes a
+minimal repro noisy or unreliable.
+
+The runner copies a small headless runtime into a temporary directory and places
+only `FlagRepro3808.dp32/.dp64` in `plugins/`.
+
+## Build
+
+Example x64 build:
+
+```powershell
+cmake -S src/test/issue3808 -B src/test/issue3808/build64 -A x64
+cmake --build src/test/issue3808/build64 --config Release
+```
+
+Example x86 build:
+
+```powershell
+cmake -S src/test/issue3808 -B src/test/issue3808/build32 -A Win32
+cmake --build src/test/issue3808/build32 --config Release
+```
+
+## Run
+
+Inspect the issue with `gh` if needed:
+
+```powershell
+gh issue view 3808
+```
+
+Run the reproduction against an existing headless build:
+
+### x64
+
+```powershell
+py src/test/issue3808/run_repro.py `
+  --headless bin/x64/headless.exe `
+  --build-dir src/test/issue3808/build64 `
+  --config Release
+```
+
+### x86
+
+```powershell
+py src/test/issue3808/run_repro.py `
+  --headless bin/x32/headless.exe `
+  --build-dir src/test/issue3808/build32 `
+  --config Release
+```
+
+Add `--keep-sandbox` if you want to keep the isolated headless directory and full log,
+or `--log-file <path>` if you only want to preserve the captured output.
+
+## Expected output
+
+When the bug is present, the runner prints:
+
+```text
+result:   REPRODUCED
+```
+
+The parsed result line includes:
+
+- `broken=1` when `Script::Flag::Set*` failed to apply the new CPU flag state
+- `exact_issue=1` when the setters also returned success, matching the issue report exactly
+
+`exact_issue` can vary by build; `broken=1` is the core regression the runner checks by default.
+
+If the bug is fixed, run with:
+
+```powershell
+py src/test/issue3808/run_repro.py ... --expect fixed
+```
+
+That mode expects the final state to become `ZF=0` and `CF=1`.
--- a/src/test/issue3808/flagrepro.cpp
+++ b/src/test/issue3808/flagrepro.cpp
@ -0,0 +1,139 @@
+#include <Windows.h>
+
+#include <cstring>
+
+#include "_plugins.h"
+#include "_scriptapi_flag.h"
+#include "_scriptapi_misc.h"
+#include "bridgemain.h"
+
+namespace
+{
+    int gPluginHandle = 0;
+
+    struct FlagSnapshot
+    {
+        bool regdumpOk = false;
+        REGDUMP regdump{};
+        bool zfExprOk = false;
+        duint zfExpr = 0;
+        bool cfExprOk = false;
+        duint cfExpr = 0;
+        bool zfApi = false;
+        bool cfApi = false;
+    };
+
+    FlagSnapshot TakeSnapshot()
+    {
+        FlagSnapshot snapshot;
+        snapshot.regdumpOk = DbgGetRegDumpEx(reinterpret_cast<REGDUMP_AVX512*>(&snapshot.regdump), sizeof(snapshot.regdump));
+        snapshot.zfExprOk = Script::Misc::ParseExpression("_ZF", &snapshot.zfExpr);
+        snapshot.cfExprOk = Script::Misc::ParseExpression("_CF", &snapshot.cfExpr);
+        snapshot.zfApi = Script::Flag::GetZF();
+        snapshot.cfApi = Script::Flag::GetCF();
+        return snapshot;
+    }
+
+    void LogSnapshot(const char* tag, const FlagSnapshot & snapshot)
+    {
+        _plugin_logprintf(
+            "[issue3808] %s regdump_ok=%d eflags=0x%llX reg_zf=%d reg_cf=%d expr_zf_ok=%d expr_zf=%llu expr_cf_ok=%d expr_cf=%llu api_zf=%d api_cf=%d\n",
+            tag,
+            snapshot.regdumpOk ? 1 : 0,
+            snapshot.regdumpOk ? static_cast<unsigned long long>(snapshot.regdump.regcontext.eflags) : 0ULL,
+            snapshot.regdumpOk && snapshot.regdump.flags.z ? 1 : 0,
+            snapshot.regdumpOk && snapshot.regdump.flags.c ? 1 : 0,
+            snapshot.zfExprOk ? 1 : 0,
+            static_cast<unsigned long long>(snapshot.zfExpr),
+            snapshot.cfExprOk ? 1 : 0,
+            static_cast<unsigned long long>(snapshot.cfExpr),
+            snapshot.zfApi ? 1 : 0,
+            snapshot.cfApi ? 1 : 0
+        );
+    }
+
+    bool IsPrimed(const FlagSnapshot & snapshot)
+    {
+        return snapshot.regdumpOk
+               && snapshot.zfExprOk
+               && snapshot.cfExprOk
+               && snapshot.regdump.flags.z
+               && !snapshot.regdump.flags.c
+               && snapshot.zfExpr == 1
+               && snapshot.cfExpr == 0
+               && snapshot.zfApi
+               && !snapshot.cfApi;
+    }
+
+    bool IsBrokenAfterSetters(const FlagSnapshot & snapshot)
+    {
+        return snapshot.regdumpOk
+               && snapshot.zfExprOk
+               && snapshot.cfExprOk
+               && snapshot.regdump.flags.z
+               && !snapshot.regdump.flags.c
+               && snapshot.zfExpr == 1
+               && snapshot.cfExpr == 0
+               && snapshot.zfApi
+               && !snapshot.cfApi;
+    }
+
+    bool cbFlagRepro3808(int, char**)
+    {
+        _plugin_logputs("[issue3808] Priming ZF=1 CF=0 through the command path");
+        if(!DbgCmdExecDirect("_ZF=1") || !DbgCmdExecDirect("_CF=0"))
+        {
+            _plugin_logputs("[issue3808] ERROR failed to prime flags with command path");
+            return false;
+        }
+
+        const auto primed = TakeSnapshot();
+        LogSnapshot("after_command_path", primed);
+
+        const bool setZfOk = Script::Flag::SetZF(false);
+        const auto afterSetZf = TakeSnapshot();
+        LogSnapshot("after_SetZF_false", afterSetZf);
+
+        const bool setCfOk = Script::Flag::SetCF(true);
+        const auto finalSnapshot = TakeSnapshot();
+        LogSnapshot("after_SetCF_true", finalSnapshot);
+
+        const bool primedOk = IsPrimed(primed);
+        const bool broken = primedOk && IsBrokenAfterSetters(finalSnapshot);
+        const bool exactIssue = broken && setZfOk && setCfOk;
+
+        _plugin_logprintf(
+            "[issue3808] RESULT broken=%d exact_issue=%d primed=%d setzf_ok=%d setcf_ok=%d final_reg_zf=%d final_reg_cf=%d final_expr_zf=%llu final_expr_cf=%llu final_api_zf=%d final_api_cf=%d\n",
+            broken ? 1 : 0,
+            exactIssue ? 1 : 0,
+            primedOk ? 1 : 0,
+            setZfOk ? 1 : 0,
+            setCfOk ? 1 : 0,
+            finalSnapshot.regdumpOk && finalSnapshot.regdump.flags.z ? 1 : 0,
+            finalSnapshot.regdumpOk && finalSnapshot.regdump.flags.c ? 1 : 0,
+            static_cast<unsigned long long>(finalSnapshot.zfExpr),
+            static_cast<unsigned long long>(finalSnapshot.cfExpr),
+            finalSnapshot.zfApi ? 1 : 0,
+            finalSnapshot.cfApi ? 1 : 0
+        );
+        return true;
+    }
+}
+
+extern "C" __declspec(dllexport) bool pluginit(PLUG_INITSTRUCT* initStruct)
+{
+    initStruct->pluginVersion = 1;
+    initStruct->sdkVersion = PLUG_SDKVERSION;
+    strncpy_s(initStruct->pluginName, sizeof(initStruct->pluginName), "FlagRepro3808", _TRUNCATE);
+    gPluginHandle = initStruct->pluginHandle;
+    _plugin_registercommand(gPluginHandle, "flagrepro3808", cbFlagRepro3808, true);
+    return true;
+}
+
+extern "C" __declspec(dllexport) void plugstop()
+{
+}
+
+extern "C" __declspec(dllexport) void plugsetup(PLUG_SETUPSTRUCT*)
+{
+}
--- a/src/test/issue3808/run_repro.py
+++ b/src/test/issue3808/run_repro.py
@ -0,0 +1,247 @@
+from __future__ import annotations
+
+import argparse
+import os
+import queue
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+from pathlib import Path
+from typing import Dict
+
+PLUGIN_COMMAND = "flagrepro3808"
+PLUGIN_BASENAME = "FlagRepro3808"
+TARGET_BASENAME = "Issue3808Target.exe"
+RUNTIME_FILES = [
+    "headless.exe",
+    "x64bridge.dll",
+    "x64dbg.dll",
+    "DeviceNameResolver.dll",
+    "LLVMDemangle.dll",
+    "XEDParse.dll",
+    "dbghelp.dll",
+    "jansson.dll",
+    "lz4.dll",
+    "asmjit.dll",
+    "TitanEngine.dll",
+    "loaddll.exe",
+    "msvcp140.dll",
+    "vcruntime140.dll",
+    "vcruntime140_1.dll",
+]
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run the issue #3808 headless reproduction.")
+    parser.add_argument("--headless", required=True, help="Path to headless.exe from an x64dbg build.")
+    parser.add_argument("--build-dir", help="Build directory for src/test/issue3808.")
+    parser.add_argument("--plugin", help="Path to FlagRepro3808.dp32/.dp64.")
+    parser.add_argument("--target", help="Path to Issue3808Target.exe.")
+    parser.add_argument("--config", default="Release", help="Build configuration for multi-config generators. Default: Release")
+    parser.add_argument("--sandbox", help="Optional sandbox directory. Defaults to a temporary directory.")
+    parser.add_argument("--log-file", help="Where to write the captured headless log.")
+    parser.add_argument("--timeout", type=int, default=90, help="Timeout in seconds. Default: 90")
+    parser.add_argument("--expect", choices=["bug", "fixed"], default="bug", help="Expected outcome. Default: bug")
+    parser.add_argument("--keep-sandbox", action="store_true", help="Do not delete the sandbox directory.")
+    return parser.parse_args()
+
+
+def resolve_plugin(build_dir: Path, config: str) -> Path:
+    candidates = []
+    for base in (build_dir / config, build_dir):
+        candidates.extend(sorted(base.glob(f"{PLUGIN_BASENAME}.dp*")))
+    if len(candidates) != 1:
+        raise FileNotFoundError(f"Expected exactly one {PLUGIN_BASENAME}.dp32/.dp64 under {build_dir}, found: {candidates}")
+    return candidates[0]
+
+
+def resolve_target(build_dir: Path, config: str) -> Path:
+    candidates = [build_dir / config / TARGET_BASENAME, build_dir / TARGET_BASENAME]
+    for candidate in candidates:
+        if candidate.exists():
+            return candidate
+    raise FileNotFoundError(f"Could not find {TARGET_BASENAME} under {build_dir}")
+
+
+def ensure_file(path: Path, description: str) -> Path:
+    if not path.exists() or not path.is_file():
+        raise FileNotFoundError(f"Missing {description}: {path}")
+    return path.resolve()
+
+
+def prepare_sandbox(headless: Path, plugin: Path, sandbox: Path) -> Path:
+    source_dir = headless.parent
+    if sandbox.exists():
+        shutil.rmtree(sandbox, ignore_errors=True)
+    sandbox.mkdir(parents=True, exist_ok=True)
+    (sandbox / "plugins").mkdir(exist_ok=True)
+
+    for filename in RUNTIME_FILES:
+        source = source_dir / filename
+        ensure_file(source, f"runtime file {filename}")
+        shutil.copy2(source, sandbox / filename)
+
+    shutil.copy2(plugin, sandbox / "plugins" / plugin.name)
+    return sandbox / "headless.exe"
+
+
+def run_headless(headless: Path, target: Path, timeout: int) -> subprocess.CompletedProcess[str]:
+    command = [str(headless), str(target), "-c", PLUGIN_COMMAND]
+    process = subprocess.Popen(
+        command,
+        cwd=headless.parent,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+        errors="replace",
+        bufsize=1,
+    )
+
+    output_lines = []
+    line_queue: queue.Queue[str | None] = queue.Queue()
+
+    def reader() -> None:
+        assert process.stdout is not None
+        for line in process.stdout:
+            line_queue.put(line)
+        line_queue.put(None)
+
+    thread = threading.Thread(target=reader, daemon=True)
+    thread.start()
+
+    deadline = time.monotonic() + timeout
+    saw_result = False
+
+    while True:
+        remaining = deadline - time.monotonic()
+        if remaining <= 0:
+            process.kill()
+            thread.join(timeout=1)
+            stdout = "".join(output_lines)
+            raise RuntimeError(f"Timed out after {timeout}s running: {' '.join(command)}\n\n{stdout}")
+
+        try:
+            line = line_queue.get(timeout=min(0.5, remaining))
+        except queue.Empty:
+            if process.poll() is not None:
+                break
+            continue
+
+        if line is None:
+            break
+
+        output_lines.append(line)
+        if "[issue3808] RESULT" in line and not saw_result:
+            saw_result = True
+            if process.stdin is not None:
+                process.stdin.write("exit\n")
+                process.stdin.flush()
+                process.stdin.close()
+
+    try:
+        returncode = process.wait(timeout=max(1, int(deadline - time.monotonic())))
+    except subprocess.TimeoutExpired:
+        process.kill()
+        returncode = process.wait(timeout=5)
+
+    thread.join(timeout=1)
+    stdout = "".join(output_lines)
+    return subprocess.CompletedProcess(command, returncode, stdout, None)
+
+
+def parse_result(stdout: str) -> Dict[str, str]:
+    result_line = ""
+    for line in stdout.splitlines():
+        if "[issue3808] RESULT" in line:
+            result_line = line
+    if not result_line:
+        raise RuntimeError("Did not find an [issue3808] RESULT line in headless output.")
+    return dict(re.findall(r"(\w+)=([^\s]+)", result_line))
+
+
+def main() -> int:
+    if os.name != "nt":
+        print("This reproduction is Windows-only.", file=sys.stderr)
+        return 2
+
+    args = parse_args()
+    headless = ensure_file(Path(args.headless), "headless executable")
+
+    if args.plugin:
+        plugin = ensure_file(Path(args.plugin), "repro plugin")
+    else:
+        if not args.build_dir:
+            raise SystemExit("Either --plugin or --build-dir is required.")
+        plugin = ensure_file(resolve_plugin(Path(args.build_dir), args.config), "repro plugin")
+
+    if args.target:
+        target = ensure_file(Path(args.target), "repro target")
+    else:
+        if not args.build_dir:
+            raise SystemExit("Either --target or --build-dir is required.")
+        target = ensure_file(resolve_target(Path(args.build_dir), args.config), "repro target")
+
+    sandbox_path = Path(args.sandbox) if args.sandbox else Path(tempfile.mkdtemp(prefix="issue3808-headless-"))
+    sandbox_created = not args.sandbox
+    log_file = Path(args.log_file) if args.log_file else sandbox_path / "issue3808.log"
+    cleanup_sandbox = sandbox_created and not args.keep_sandbox
+
+    try:
+        sandbox_headless = prepare_sandbox(headless, plugin, sandbox_path)
+        completed = run_headless(sandbox_headless, target, args.timeout)
+        log_file.write_text(completed.stdout, encoding="utf-8", errors="replace")
+
+        result = parse_result(completed.stdout)
+        broken = result.get("broken") == "1"
+        fixed = (
+            not broken
+            and result.get("final_expr_zf") == "0"
+            and result.get("final_expr_cf") == "1"
+            and result.get("final_api_zf") == "0"
+            and result.get("final_api_cf") == "1"
+        )
+
+        if args.expect == "bug":
+            success = broken
+            summary = "REPRODUCED" if success else "NOT REPRODUCED"
+        else:
+            success = fixed
+            summary = "FIX VERIFIED" if success else "FIX NOT VERIFIED"
+
+        if not success:
+            cleanup_sandbox = False
+
+        sandbox_display = str(sandbox_path)
+        log_display = str(log_file)
+        if cleanup_sandbox:
+            sandbox_display += " (temporary; use --keep-sandbox to preserve)"
+            log_display += " (temporary; use --keep-sandbox or --log-file to preserve)"
+
+        print(f"headless: {headless}")
+        print(f"plugin:   {plugin}")
+        print(f"target:   {target}")
+        print(f"sandbox:  {sandbox_display}")
+        print(f"log:      {log_display}")
+        print(f"result:   {summary}")
+        print(f"details:  {result}")
+
+        if completed.returncode != 0:
+            print(f"warning: headless exited with code {completed.returncode}", file=sys.stderr)
+
+        return 0 if success else 1
+    except Exception:
+        cleanup_sandbox = False
+        raise
+    finally:
+        if cleanup_sandbox and sandbox_path.exists():
+            shutil.rmtree(sandbox_path, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/test/issue3808/target.cpp
+++ b/src/test/issue3808/target.cpp
@ -0,0 +1,11 @@
+#include <Windows.h>
+
+#include <cstdio>
+
+int main()
+{
+    std::puts("issue3808 target started");
+    std::fflush(stdout);
+    Sleep(60000);
+    return 0;
+}
--- a/test-framework-proposal.md
+++ b/test-framework-proposal.md
@ -0,0 +1,836 @@
+# Test framework proposal
+
+## Summary
+
+Create a small debugger test framework around:
+
+- `src/tests/...` in the source tree
+- `bin/x32/tests/...` and `bin/x64/tests/...` in the runtime tree
+- `headless.exe` as the default per-test host
+- a small Python orchestrator that discovers tests and launches one process per test
+- self-asserting tests through:
+  - a script command: `testassert`
+  - an internal test-plugin API: `DbgTestAssert(...)`
+
+No manifests. No extra test metadata files. No explicit “pass” command.
+
+The Python runner only orchestrates discovery and process launching.
+The actual test is run by x64dbg/headless itself.
+
+---
+
+## Goals
+
+- Headless is the default automated host.
+- The same test script should also be runnable in the GUI.
+- Tests should be isolated from the user’s normal settings/plugins.
+- Tests should be discoverable from folder structure alone.
+- Script tests, plugin tests, and mixed tests should all use the same model.
+
+## Non-goals
+
+- No manifest system.
+- No build orchestration in the Python runner.
+- No “reusable test plugin” framework.
+- No flush command.
+
+---
+
+## Source tree
+
+New automated test tree:
+
+- `src/tests`
+
+A directory under `src/tests/...` is treated as an automated test if it contains:
+
+- `test.txt`
+
+Optional files:
+
+- `README.md`
+- `check.py`
+
+If `test.txt` is absent, the directory is not auto-discovered.
+
+The old `src/test` tree can remain temporarily for legacy/manual content.
+
+---
+
+## Build integration
+
+Yes: add `src/tests` as a subdirectory in the main build and build test artifacts into a known runtime layout.
+
+### Proposed build-side structure
+
+- add `src/tests/cmake.toml`
+- add `src/tests` as a subdir from root `cmake.toml`
+- build/copy test artifacts into:
+  - `bin/x32/tests/...`
+  - `bin/x64/tests/...`
+
+The Python runner still does **not** build anything.
+It only assumes the runtime tree already exists.
+
+---
+
+## Source tree -> binary tree mapping
+
+Let `<rel>` be the relative path of a test directory under `src/tests`.
+
+Examples:
+
+- `src/tests/issue3808` -> `<rel> = issue3808`
+- `src/tests/breakpoints/random_dll` -> `<rel> = breakpoints/random_dll`
+
+### Runtime mapping
+
+For each test `<rel>`:
+
+- main debuggee convention:
+  - `bin/<arch>/tests/<rel>.exe`
+- per-test runtime directory:
+  - `bin/<arch>/tests/<rel>/`
+- copied script file:
+  - `bin/<arch>/tests/<rel>/test.txt`
+- optional in-tree test plugins:
+  - `bin/<arch>/tests/<rel>/*.dp32` or `*.dp64`
+- optional extra runtime assets:
+  - `bin/<arch>/tests/<rel>/...`
+
+### Example
+
+Source:
+
+```text
+src/tests/issue3808/
+  test.txt
+  README.md
+  target.cpp
+  flagrepro.cpp
+```
+
+Runtime:
+
+```text
+bin/x64/tests/
+  issue3808.exe
+  issue3808/
+    test.txt
+    FlagRepro3808.dp64
+```
+
+This allows the test script to simply say:
+
+```text
+init tests/issue3808.exe
+```
+
+and load/use test-local files from:
+
+```text
+tests/issue3808/...
+```
+
+---
+
+## Python orchestrator
+
+Suggested entry point:
+
+- `src/tests/run.py`
+
+Suggested commands:
+
+```powershell
+py src/tests/run.py list --arch x64
+py src/tests/run.py run issue3808 --arch x64
+py src/tests/run.py run issue3808 --arch x64 --host gui
+py src/tests/run.py run-all --arch x64
+```
+
+### Responsibilities
+
+For each discovered test `<rel>`, the runner should:
+
+1. locate `src/tests/<rel>/test.txt`
+2. locate `bin/<arch>/tests/<rel>/test.txt`
+3. locate test-local plugins in `bin/<arch>/tests/<rel>/*.dpXX`
+4. create an isolated userdir for the run unless one is explicitly supplied
+5. choose a log file path for the run
+6. launch one debugger process for the test
+7. wait for completion / timeout
+8. decide pass/fail
+9. preserve the userdir/log on failure
+
+### Important non-responsibility
+
+The runner should **not** drive the test interactively over stdin.
+
+The test should be driven by debugger startup arguments, especially:
+
+- `-c`
+- `-cf`
+- `-plugin`
+- `-testing`
+
+So the debugger host itself is the thing running the individual test.
+
+---
+
+## Canonical test launch model
+
+The canonical automated launch should be something like:
+
+```powershell
+bin/x64/headless.exe \
+  -testing \
+  -userdir <temp-userdir> \
+  -plugin tests/issue3808/FlagRepro3808.dp64 \
+  -c "RedirectLog \"<logfile>\"" \
+  -cf tests/issue3808/test.txt
+```
+
+The GUI/manual equivalent should be:
+
+```powershell
+bin/x64/x64dbg.exe \
+  -testing \
+  -userdir <temp-userdir> \
+  -plugin tests/issue3808/FlagRepro3808.dp64 \
+  -c "RedirectLog \"<logfile>\"" \
+  -cf tests/issue3808/test.txt
+```
+
+### Log redirection contract
+
+The log redirection story must be explicit.
+
+The runner should always provide a log file by passing:
+
+```text
+-c "RedirectLog \"<logfile>\""
+```
+
+before `-cf`.
+
+That gives us one canonical log source for both:
+
+- headless
+- GUI
+
+and it captures:
+
+- `log ...` output from scripts
+- plugin log output
+- assertion failure lines
+- normal debugger diagnostics
+
+The runner can still capture stdout/stderr, but the redirected debugger log should be the primary test artifact.
+
+---
+
+## What `-cf` needs in testing mode
+
+This is the key behavior that needs implementation support.
+
+### Current issue
+
+`-cf` currently ends up enqueueing `scriptexec` through the normal debugger command path.
+That is useful because it preserves command ordering, but by itself it is not enough for one-shot testing:
+
+- the host still needs to stay alive while the script runs
+- headless still needs a clean automatic shutdown point after the script finishes
+- GUI should keep its normal message pump behavior
+
+### Important observation
+
+The command queue ordering is actually useful here.
+
+Once `scriptexec` begins executing, the command loop is blocked until `ScriptExecAwait(...)` completes.
+That means any command queued **after** `scriptexec` will naturally run only after the test script has finished (or aborted).
+
+So we do **not** need to make the whole startup path strictly synchronous in a way that fights the GUI/message-pump model.
+
+### Proposed `-testing` semantics
+
+Introduce a startup flag:
+
+- `-testing`
+
+This is the dedicated test-run mode.
+
+In `-testing` mode:
+
+1. default plugin autoload is disabled
+2. test assertion/failure state is enabled
+3. after processing the normal startup arguments, if `-cf` was supplied, x64dbg automatically enqueues a final test command after `scriptexec`
+4. headless uses that final command as the graceful one-shot shutdown point
+
+### Proposed final queued command
+
+Add a single debugger command for test finalization, for example:
+
+- `testfinalize`
+
+The runner does **not** need to add this manually if `-testing` does it automatically after `-cf`.
+
+That command ordering would effectively become:
+
+1. `RedirectLog ...`
+2. `scriptexec "tests/.../test.txt"`
+3. `testfinalize`
+
+Because `scriptexec` blocks until the script is done, `testfinalize` runs at exactly the right time.
+
+### Why this is a better fit
+
+This preserves the existing queued execution model and fits naturally with the GUI message pump.
+
+---
+
+## Startup flags needed for the framework
+
+### 1. `-testing`
+
+Dedicated test-run mode.
+
+Recommended semantics:
+
+- disables normal plugin autoload
+- enables test result tracking/assertion counting
+- if `-cf` is supplied, automatically queues a final `testfinalize` command after the script command
+- enables headless auto-exit from `testfinalize`
+- if no `-userdir` is supplied, may create a temporary one automatically for manual/direct use
+
+### 2. `-userdir <abs-path>`
+
+Explicit isolated userdir.
+
+Purpose:
+
+- test settings/database isolation
+- reproducible multi-run scenarios when needed
+
+The Python runner will usually create the temp userdir itself and pass it explicitly.
+
+### 3. `-plugin <path>` (repeatable)
+
+Preload test-local plugins before the script/debuggee starts.
+
+Requirements:
+
+- repeatable
+- load order is CLI order
+- accepts direct `.dp32/.dp64` paths
+
+Optional convenience:
+
+- if a directory is passed, resolve only the conventional single plugin path:
+  - `C:\myplugin` -> `C:\myplugin\myplugin.dp64` / `.dp32`
+- **not** “load all plugins in that directory”
+
+---
+
+## Plugin loading by path
+
+`plugload` should also be extended to accept direct plugin file paths.
+
+Suggested behavior:
+
+- existing name-based behavior stays
+- if the argument looks like a path or ends with `.dp32/.dp64`, load by exact path
+
+This is useful for both manual debugging and tests.
+
+---
+
+## Assertions
+
+We only need an explicit **failure** mechanism.
+Success is the happy path.
+
+### Script command: `testassert`
+
+Syntax:
+
+```text
+testassert expression
+testassert expression, formatstring
+```
+
+Because x64dbg commands are comma-separated, this fits naturally.
+
+Semantics:
+
+- increments the global test assertion count
+- if the expression is true: continue
+- if the expression is false:
+  - emit a standardized failure line to the redirected debugger log
+  - mark the test as failed
+  - abort the current script gracefully
+
+Suggested log prefix:
+
+```text
+[x64dbg-test] ASSERT FAIL ...
+```
+
+### Command: `testfinalize`
+
+This is the framework-owned finalization command.
+
+Tests do not need to write it manually when `-testing` auto-appends it after `-cf`.
+
+Responsibilities:
+
+- evaluate the final test state
+- fail the run if zero assertions executed
+- emit one standardized final summary line to the redirected log
+- in headless, trigger graceful host shutdown after logging the summary
+- in GUI, do **not** auto-close by default
+
+Suggested final log line:
+
+```text
+[x64dbg-test] FINAL status=pass asserts=3
+[x64dbg-test] FINAL status=fail asserts=0 reason=no_asserts
+[x64dbg-test] FINAL status=fail asserts=2 reason=assert_failed
+```
+
+This gives the runner a simple, deterministic thing to parse.
+
+### Internal plugin API: `DbgTestAssert(...)`
+
+Needed for in-tree test plugins.
+
+Suggested shape:
+
+```cpp
+bool DbgTestAssert(bool condition, const char* fmt = nullptr, ...);
+```
+
+Semantics:
+
+- increments the same global assertion count used by `testassert`
+- if `condition` is true:
+  - return `true`
+- if `condition` is false:
+  - emit the same standardized failure line
+  - mark the test as failed
+  - request graceful test termination
+  - return `false`
+
+### Why `DbgTestAssert` is needed
+
+A plugin command or callback should not have to reimplement test failure plumbing itself.
+It should be able to do:
+
+```cpp
+if(!DbgTestAssert(hitCount == 2, "expected 2 hits, got %d", hitCount))
+    return false;
+```
+
+---
+
+## Graceful termination on assert failure
+
+This is the tricky part and should be designed explicitly.
+
+### Required shared test state
+
+When `-testing` is active, maintain per-process test state such as:
+
+- assertion count
+- failure flag
+- optional first failure message / failure count
+
+### On `testassert` failure
+
+The script-side command should:
+
+1. increment assertion count
+2. set failure flag
+3. log the standardized failure line
+4. abort the current script gracefully
+
+Existing `DbgScriptAbort()` / script abort plumbing should be reused for this.
+After the script returns, the already-queued `testfinalize` command will run and complete shutdown/final result handling.
+
+### On `DbgTestAssert` failure
+
+The plugin-side API should:
+
+1. increment assertion count
+2. set failure flag
+3. log the same failure line
+4. request script abort if a script is active
+5. if needed, request a graceful stop of the debuggee so execution unwinds to a stable state
+
+The important part is that `DbgTestAssert(...)` should not try to tear down the whole host immediately.
+It should mark failure and trigger graceful unwinding, then let `testfinalize` perform the final completion logic.
+
+A reasonable implementation path is to reuse existing script abort / stop-debug mechanisms rather than inventing a separate test-kill path.
+
+### Finalization flow
+
+The intended ordered flow in `-testing` mode is:
+
+1. `RedirectLog ...`
+2. `scriptexec ".../test.txt"`
+3. `testfinalize`
+
+If the script aborts or fails, `testfinalize` should still run because it is a separate queued command.
+That is important because it centralizes:
+
+- zero-assert detection
+- final pass/fail summary logging
+- headless auto-exit
+
+---
+
+## Tests must actually assert something
+
+For safety, a test should fail if **no assertions were executed at all**.
+
+That means:
+
+- if neither `testassert` nor `DbgTestAssert` was ever called during the run,
+- the run is considered invalid/failing
+
+Reason:
+
+- a script that only launches a target and exits has not verified anything
+- “green by doing nothing” is dangerous
+
+This should be part of the final test result evaluation in `-testing` mode.
+
+---
+
+## Settings manipulation
+
+Keep this minimal.
+
+### Command: `settingset`
+
+Syntax:
+
+```text
+settingset section, key, value
+settingset section, key
+```
+
+Semantics:
+
+- with `value`: set/update the setting
+- without `value`: unset/remove the key
+
+Behavior:
+
+- call `BridgeSettingSet(...)`
+- call `DbgSettingsUpdated()` after a successful change
+
+Examples:
+
+```text
+settingset Events, SystemBreakpoint, 1
+settingset Engine, InitializeScript, ""
+settingset Gui, SomeTransientKey
+```
+
+### No flush command
+
+Drop it entirely.
+Not needed.
+
+---
+
+## Headless testing behavior
+
+In `-testing` mode, `headless.exe` should act as a one-shot host.
+
+Recommended behavior:
+
+1. parse testing-related startup flags before full startup
+2. initialize with isolated userdir
+3. disable default plugin autoload
+4. preload explicit `-plugin` entries
+5. queue startup commands normally (`-c`, then `-cf`)
+6. if `-cf` was supplied, automatically queue `testfinalize` after it
+7. let the normal command/script machinery run
+8. when `testfinalize` executes, emit the final summary line and exit automatically
+
+### How headless should exit
+
+`testfinalize` should not hard-kill the process.
+
+Preferred behavior:
+
+- `testfinalize` logs the final `[x64dbg-test] FINAL ...` line
+- if running under the headless host, it requests graceful application shutdown
+- headless then follows its normal clean shutdown path
+
+A reasonable implementation is to make headless handle the existing close-application path in a way that maps to its normal shutdown request.
+That keeps finalization host-aware without inventing a second shutdown mechanism just for tests.
+
+### Suggested final outcome rules in headless testing mode
+
+A test passes only if:
+
+- the process did not crash
+- no assertion failed
+- at least one assertion executed
+- the startup test script completed successfully
+
+A test fails if:
+
+- any assertion failed
+- zero assertions executed
+- script execution failed
+- the process timed out or crashed
+
+Optionally, headless testing mode can later expose this via dedicated exit codes.
+
+---
+
+## GUI testing behavior
+
+GUI should support the same startup contract:
+
+- `-testing`
+- `-userdir`
+- `-plugin`
+- `-c "RedirectLog ..."`
+- `-cf test.txt`
+
+Initial purpose:
+
+- launch the same prepared test scenario for manual inspection/reproduction
+
+Phase 1 does **not** need unattended GUI auto-exit.
+
+In GUI, `testfinalize` should still emit the final summary line, but it should not close the application by default.
+That way:
+
+- the same test script runs
+- the same log/result plumbing is active
+- the user can inspect the GUI state after the scripted setup finishes
+
+---
+
+## Optional fallback checker
+
+Keep optional fallback Python checking.
+
+Convention:
+
+- `src/tests/<rel>/check.py`
+
+Use only when the final validation is awkward to express with `testassert` / `DbgTestAssert`.
+
+The runner can invoke it with the redirected log path and userdir.
+
+---
+
+## Example: `issue3808`
+
+### Source
+
+```text
+src/tests/issue3808/
+  test.txt
+  README.md
+  target.cpp
+  flagrepro.cpp
+```
+
+### Runtime
+
+```text
+bin/x64/tests/
+  issue3808.exe
+  issue3808/
+    test.txt
+    FlagRepro3808.dp64
+```
+
+### `test.txt`
+
+```text
+init tests/issue3808.exe
+flagrepro3808
+```
+
+### Automated launch
+
+```powershell
+headless.exe \
+  -testing \
+  -userdir <tmp-userdir> \
+  -plugin tests/issue3808/FlagRepro3808.dp64 \
+  -c "RedirectLog \"<logfile>\"" \
+  -cf tests/issue3808/test.txt
+```
+
+The plugin performs the actual assertions via `DbgTestAssert(...)`.
+
+---
+
+## Exact implementation responsibilities
+
+### 1. Build/layout
+
+- add `src/tests/cmake.toml`
+- add `src/tests` from the root `cmake.toml`
+- standardize outputs into:
+  - `bin/<arch>/tests/<rel>.exe`
+  - `bin/<arch>/tests/<rel>/...`
+
+### 2. Startup argument handling
+
+Likely in/around the existing startup argument parsing path:
+
+- add `-testing`
+- add `-userdir`
+- add repeatable `-plugin`
+- when `-testing` and `-cf` are present:
+  - queue `scriptexec ...`
+  - then queue `testfinalize`
+- preserve normal command ordering for `-c` before `-cf`
+
+### 3. Shared test-state plumbing in the debugger
+
+Add per-process test state, enabled by `-testing`, including at least:
+
+- assertion count
+- failure flag
+- optional failure reason / first failure message
+- optional script completion flag if needed
+
+This state should be used by both:
+
+- `testassert`
+- `DbgTestAssert(...)`
+- `testfinalize`
+
+### 4. New debugger commands / APIs
+
+Add:
+
+- `testassert`
+- `testfinalize`
+- `settingset`
+- internal `DbgTestAssert(...)`
+
+Behavior split:
+
+- `testassert` / `DbgTestAssert(...)`
+  - update state
+  - log failures
+  - initiate graceful unwinding
+- `testfinalize`
+  - compute final status
+  - fail on zero assertions
+  - emit final summary line
+  - trigger headless shutdown if needed
+
+### 5. Headless host work
+
+Headless needs explicit support for test finalization, not just stdin EOF.
+
+Recommended responsibilities:
+
+- honor `-testing`
+- allow the normal queued `-c`/`-cf` model to run
+- when debugger-side finalization requests application close, perform the existing graceful shutdown path
+- keep stdout/stderr behavior as diagnostics only; redirected log remains canonical
+
+### 6. Python runner
+
+Keep it intentionally small:
+
+- discover tests from `src/tests/**/test.txt`
+- derive runtime paths from the `<rel>` convention
+- create temp userdir
+- choose redirected log path
+- find test-local plugins in `bin/<arch>/tests/<rel>/*.dpXX`
+- launch one process per test
+- wait/timeout
+- parse the final `[x64dbg-test] FINAL ...` line
+- treat a missing final line as failure
+- preserve artifacts on failure
+
+---
+
+## Recommended rollout
+
+### Phase 1: layout and runner
+
+- add `src/tests`
+- add `src/tests/cmake.toml`
+- wire it into the main build
+- build/copy artifacts into `bin/<arch>/tests/...`
+- add `src/tests/run.py`
+- discover tests from `src/tests/**/test.txt`
+
+### Phase 2: test startup mode
+
+- add `-testing`
+- add `-userdir`
+- add repeatable `-plugin`
+- ensure the runner can prepend `RedirectLog` through startup `-c`
+- make `-testing` auto-queue `testfinalize` after `-cf`
+
+### Phase 3: assertion/finalization plumbing
+
+- add shared test state
+- add `testassert`
+- add `testfinalize`
+- add internal `DbgTestAssert(...)`
+- add `settingset`
+- fail test runs with zero assertions executed
+- emit a final standardized summary line from `testfinalize`
+- extend `plugload` to accept direct paths
+
+### Phase 4: headless integration
+
+- make headless honor debugger-side close/finalize requests in `-testing`
+- make headless auto-exit cleanly after `testfinalize`
+
+### Phase 4: migrate representative tests
+
+- migrate `issue3808`
+- add one script-only regression test
+- add one mixed script/plugin test involving callbacks
+
+---
+
+## Final recommendation
+
+Use this minimal framework contract:
+
+- source tree: `src/tests/**/test.txt`
+- runtime tree:
+  - main debuggee: `bin/<arch>/tests/<rel>.exe`
+  - per-test dir: `bin/<arch>/tests/<rel>/`
+  - test-local plugins: `bin/<arch>/tests/<rel>/*.dpXX`
+- runner: small Python orchestrator only
+- default host: `headless.exe`
+- startup mode: `-testing`
+- isolation: explicit `-userdir` from the runner
+- plugin preload: repeatable `-plugin`
+- canonical output: redirected debugger log via `RedirectLog`
+- assertions:
+  - script: `testassert`
+  - plugin: `DbgTestAssert(...)`
+- finalization:
+  - framework-owned `testfinalize`
+  - emits `[x64dbg-test] FINAL ...`
+- failure if:
+  - any assert fails
+  - script fails
+  - process crashes/times out
+  - **zero assertions executed**
+- GUI: same startup contract, initially for manual inspection rather than unattended automation
+
+This keeps the framework small, explicit, and close to the existing x64dbg execution model.