From 24a254a9144965a05f64cd4f117dcc9c3b22d6bf Mon Sep 17 00:00:00 2001 From: "Lance R. Vick" Date: Wed, 30 Apr 2025 17:30:18 -0700 Subject: [PATCH] fix: overhaul socket buffer handling for far higher stability and blocking boot --- src/host/rootfs/usr/local/bin/netvm | 323 ++++++++++++++++------------ 1 file changed, 183 insertions(+), 140 deletions(-) diff --git a/src/host/rootfs/usr/local/bin/netvm b/src/host/rootfs/usr/local/bin/netvm index 8757168..5bbfbcc 100755 --- a/src/host/rootfs/usr/local/bin/netvm +++ b/src/host/rootfs/usr/local/bin/netvm @@ -2,59 +2,72 @@ set -eu COMMAND=($@) -QGA_SOCKET=/var/run/netvm_qga.sock LOCKFILE=/var/run/netvm.pid +QGA_SOCK_PATH=/var/run/netvm_qga.sock +QGA_SOCK_FDS=false -qemu_execute() { - local COMMAND ARGS - COMMAND="$1" - ARGS="${2-}" - - jq \ - -ncM \ - --arg cmd "$COMMAND" \ - --argjson args "$ARGS" \ - '{"execute": $cmd, "arguments": $args}' \ - >&$FD_SOCKET_OUT - - local LINE - read -t 5 -r -u $FD_SOCKET_IN LINE - - local ERROR=$(jq -r '.error.desc // empty' <<< "$LINE") - if [[ -n "$ERROR" ]]; then - echo "$ERROR" >&2 - return 1 - fi - - GA_RETURN=$(jq -cM .return <<< "$LINE") +qga_connect() { + [[ -v "QGA_SOCK_FDS_PID" ]] && return 0 + coproc QGA_SOCK_FDS ( + exec socat - UNIX-CONNECT:"${QGA_SOCK_PATH}" + ) || return 1 + QGA_SOCK_IN=${QGA_SOCK_FDS[0]} + QGA_SOCK_OUT=${QGA_SOCK_FDS[1]} } -qemu_ga() { - local COMMAND ARGS - COMMAND="$1" - ARGS="$2" +qga_execute() { + local cmd args + cmd="$1" + args="${2-}" + GA_RETURN="" - coproc FDS ( - exec socat - UNIX-CONNECT:"${QGA_SOCKET}" - ) + jq \ + -ncM \ + --arg cmd "$cmd" \ + --argjson args "$args" \ + '{"execute": $cmd, "arguments": $args}' \ + >&$QGA_SOCK_OUT - FD_SOCKET_IN=${FDS[0]} - FD_SOCKET_OUT=${FDS[1]} + local LINE + read -t 5 -r -u $QGA_SOCK_IN LINE || return 1 - local PID=$$ - qemu_execute guest-sync "$(jq -ncM --argjson pid "$PID" '{"id": $pid}')" - [[ "$(jq -re . <<< "$GA_RETURN")" = "$$" ]] \ - || (echo "guest-sync mismatch" >&2 && return 1) + local ERROR=$(jq -r '.error.desc // empty' <<< "$LINE") + if [[ -n "$ERROR" ]]; then + echo "$ERROR" >&2 + return 1 + fi - qemu_execute "$COMMAND" "$ARGS" - echo "$GA_RETURN" 2>&1 + GA_RETURN=$(jq -cM .return <<< "$LINE") +} - local RETURN - kill -INT "$FDS_PID" 2>/dev/null - wait "$FDS_PID" || RETURN=$? - if [[ $RETURN != 130 ]]; then - return $RETURN - fi +qga_flush() { + #Docs say this should work, but it just get parse errors + #LC_ALL= LC_CTYPE=en_US.UTF-8 printf '%b' "\uff" >&$QGA_SOCK_OUT + #read -t 5 -r -u $QGA_SOCK_IN LINE + until ! read -t 1 -r -u $QGA_SOCK_IN LINE; do sleep 0.1; done +} + +qga() { + local cmd args + cmd="$1" + args="$2" + + qga_connect + + local id=$((1 + $RANDOM % 10000000)) + qga_execute guest-sync "$(jq -ncM --argjson id "$id" '{"id": $id}')"; + [[ "$(jq -re . <<< "$GA_RETURN")" = "$id" ]] || (echo "Error: guest-sync mismatch" >&2 && return 1) + + unset GA_RETURN + qga_execute "$cmd" "$args" + echo "$GA_RETURN" 2>&1 + + local RETURN + kill -INT "$QGA_SOCK_FDS_PID" 2>/dev/null + wait "$QGA_SOCK_FDS_PID" || RETURN=$? + if [[ $RETURN != 130 ]]; then + return $RETURN + fi } function cmd_start(){ @@ -71,120 +84,150 @@ function cmd_start(){ if [[ -n "$net_args" ]]; then echo Y > /sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts fi - echo "Starting netvm"; + printf "Starting netvm..."; qemu-system-x86_64 \ -m 512M \ - --machine q35 \ - -nographic \ - -serial none \ - -monitor none \ - -net none \ - -cdrom /guest.img \ - -boot order=d \ - -chardev socket,path=${QGA_SOCKET},server=on,wait=off,id=qga0 \ + --machine q35 \ + -nographic \ + -serial none \ + -monitor none \ + -net none \ + -cdrom /guest.img \ + -boot order=d \ + -chardev socket,path=/var/run/netvm_qga.sock,server=on,wait=off,id=qga0 \ $net_args \ -device qemu-xhci \ - -device virtio-serial \ - -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0 & - pid=$! + -device virtio-serial \ + -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0 \ + >/dev/null 2>&1 &! + pid=$! + printf "done\n" echo "$pid" > "${LOCKFILE}" - until [ -S "${QGA_SOCKET}" ]; do sleep 1; done - until qemu_ga guest-ping "{}"; do - ps -p $pid >/dev/null || { - echo "Error: netvm exited unexpectedly"; - exit 1; - rm "${LOCKFILE}" - } - sleep 1 - done - echo "NetVM is booted" + printf "QGA Socket starting... " + until [ -S "${QGA_SOCK_PATH}" ]; do sleep 1; done + printf "done\n" + printf "Connecting to QGA socket... " + until qga_connect; do sleep 1; done + printf "done\n" + [ -f "/proc/${pid}/status" ] || { + echo "Error: netvm exited unexpectedly"; + rm "${LOCKFILE}" + exit 1; + } + + local id; + local spin='-\|/' + local i=0; + while true; do + i=$(( (i+1) %4 )); + printf "\rConnecting to QGA agent... ${spin:$i:1}" + qga_execute guest-ping "{}" > /dev/null || continue && break + sleep 1 + done; + printf "\rConnecting to guest agent... done\n" + printf "Flushing buffers..." + qga_flush + printf "done\n" + local i=0; + while true; do + i=$(( (i+1) %4 )); + printf "\rSyncing with guest... ${spin:$i:1}" + id=$((1 + $RANDOM % 10000000)) + qga_execute guest-sync "$(jq -ncM --argjson id "$id" '{"id": $id}')" || continue + if [[ "$(jq -re . <<< "$GA_RETURN")" = "$id" ]]; then + printf "\rSyncing with guest... done\n" + break + fi; + sleep 1 + done; + echo "NetVM boot complete" } function cmd_stop(){ - pkill $(cat "${LOCKFILE}") + kill $(cat "${LOCKFILE}") rm "${LOCKFILE}" } function cmd_status(){ - qemu_ga guest-get-host-name "{}" | jq -r '."host-name"' - pid=$(qemu_ga guest-exec '{"path": "uptime", "capture-output": true}' | jq -r '.pid') - out=$(qemu_ga guest-exec-status "$(jq -n --argjson pid "$pid" '{pid: $pid }')" \ - | jq -r '."out-data"' \ - | base64 -d \ - ) - echo $out + qga guest-get-host-name "{}" | jq -r '."host-name"' + pid=$(qga guest-exec '{"path": "uptime", "capture-output": true}' | jq -r '.pid') + out=$(qga guest-exec-status "$(jq -n --argjson pid "$pid" '{pid: $pid }')" \ + | jq -r '."out-data"' \ + | base64 -d \ + ) + echo $out } function cmd_push(){ - local source="${COMMAND[1]}" - local dest="${COMMAND[2]}" - fo_request=$(jq -n --arg dest "$dest" '{"path": $dest, "mode": "w" }') - handle=$(qemu_ga guest-file-open "$fo_request") - bufb64=$(base64 "$source") - count=$(cat "$source" | wc -c) - fw_request=$(jq -n \ - --argjson handle $handle \ - --argjson count $count \ - --arg bufb64 "$bufb64" \ - '{handle: $handle, "buf-b64": $bufb64, count: $count }' \ - ) - qemu_ga guest-file-write "$fw_request" - fh_request=$(jq -n --argjson handle $handle '{handle: $handle}' ) - qemu_ga guest-file-flush "$fh_request" - qemu_ga guest-file-close "$fh_request" + local source="${COMMAND[1]}" + local dest="${COMMAND[2]}" + fo_request=$(jq -n --arg dest "$dest" '{"path": $dest, "mode": "w" }') + handle=$(qga guest-file-open "$fo_request") + bufb64=$(base64 "$source") + count=$(cat "$source" | wc -c) + fw_request=$(jq -n \ + --argjson handle $handle \ + --argjson count $count \ + --arg bufb64 "$bufb64" \ + '{handle: $handle, "buf-b64": $bufb64, count: $count }' \ + ) + qga guest-file-write "$fw_request" + fh_request=$(jq -n --argjson handle $handle '{handle: $handle}' ) + qga guest-file-flush "$fh_request" + qga guest-file-close "$fh_request" } function cmd_pull(){ - local source="${COMMAND[1]}" - local dest="${COMMAND[2]}" - fo_request=$(jq -n --arg source "$source" '{"path": $source}') - handle=$(qemu_ga guest-file-open "$fo_request") - fr_request=$(jq -n \ - --argjson handle $handle \ - '{handle: $handle, count: 48000000 }' \ - ) - out=$(qemu_ga guest-file-read "$fr_request") - echo $out | jq -r '."buf-b64"' | base64 -d > $dest + local source="${COMMAND[1]}" + local dest="${COMMAND[2]}" + fo_request=$(jq -n --arg source "$source" '{"path": $source}') + handle=$(qga guest-file-open "$fo_request") + fr_request=$(jq -n \ + --argjson handle $handle \ + '{handle: $handle, count: 48000000 }' \ + ) + out=$(qga guest-file-read "$fr_request") + echo $out | jq -r '."buf-b64"' | base64 -d > $dest } function cmd_run(){ - [ -z "${COMMAND[1]}" ] && { echo "Error: missing command"; exit 1; } - [ -f "${LOCKFILE}" ] || { echo "Error: Netvm is not running"; exit 1; } - [ -S "${QGA_SOCKET}" ] || { echo "Error: Netvm QGA socket is missing"; exit 1; } - local cmd="${COMMAND[1]}" - local args="${COMMAND[@]:2}" - local args_json="[]" - if [[ -n "$args" ]]; then - args_json=$(printf '%s\n' "$args" | jq -R . | jq -s .) - fi - local request - request=$( \ - jq -n \ - --arg path "$cmd" \ - --argjson args "$args_json" \ - '{ - path: $path, - arg: $args, - "capture-output": true - }' \ - ) - pid=$(qemu_ga guest-exec "$request" | jq -r '.pid') - local exited=false - until [ "$exited" == "true" ]; do \ - out=$(qemu_ga guest-exec-status "$(jq -n --argjson pid "$pid" '{pid: $pid }')" ) - exited=$(echo $out | jq -r '.exited') - if $exited && jq -r 'has("out-data")' >/dev/null < <(echo $out); then - echo "$out" | jq -r '."out-data"' | base64 -d - break - fi - sleep 1 - done + [ -z "${COMMAND[1]}" ] && { echo "Error: missing command"; exit 1; } + [ -f "${LOCKFILE}" ] || { echo "Error: Netvm is not running"; exit 1; } + [ -S "${QGA_SOCK_PATH}" ] || { echo "Error: Netvm QGA socket is missing"; exit 1; } + local cmd="${COMMAND[1]}" + local args="${COMMAND[@]:2}" + local args_json="[]" + if [[ -n "$args" ]]; then + args_json=$(printf '%s\n' "$args" | jq -R . | jq -s .) + fi + local request + request=$( \ + jq -n \ + --arg path "$cmd" \ + --argjson args "$args_json" \ + '{ + path: $path, + arg: $args, + "capture-output": true + }' \ + ) + pid=$(qga guest-exec "$request" | jq -r '.pid') + local exited=false + until [ "$exited" == "true" ]; do \ + out=$(qga guest-exec-status "$(jq -n --argjson pid "$pid" '{pid: $pid }')" ) + exited=$(echo $out | jq -r '.exited') + if $exited && jq -r 'has("out-data")' >/dev/null < <(echo $out); then + echo "$out" | jq -r '."out-data"' | base64 -d + break + fi + sleep 1 + done } cmd_usage() { cat <<-_EOF netvm - + Control network vm headlessly via QMP protocol Usage: @@ -204,12 +247,12 @@ cmd_usage() { } case "$1" in - status) shift; cmd_status $@ ;; - start) shift; cmd_start $@ ;; - stop) shift; cmd_stop $@ ;; - push) shift; cmd_push $@ ;; - pull) shift; cmd_pull $@ ;; - run) shift; cmd_run $@ ;; - help) shift; cmd_usage $@ ;; - *) cmd_usage $@ ;; + status) shift; cmd_status $@ ;; + start) shift; cmd_start $@ ;; + stop) shift; cmd_stop $@ ;; + push) shift; cmd_push $@ ;; + pull) shift; cmd_pull $@ ;; + run) shift; cmd_run $@ ;; + help) shift; cmd_usage $@ ;; + *) cmd_usage $@ ;; esac