fix: backups resilience improvements (#5555)

* fix: backups

* fix: stability
This commit is contained in:
Calum H.
2026-03-13 19:10:13 +00:00
committed by GitHub
parent 455a4f527d
commit c44cc38b3a
9 changed files with 60 additions and 144 deletions

View File

@@ -483,7 +483,6 @@ const isServerRunning = computed(() => serverPowerState.value === 'running')
const serverPowerState = ref<Archon.Websocket.v0.PowerState>('stopped')
const powerStateDetails = ref<{ oom_killed?: boolean; exit_code?: number }>()
const backupsState = reactive(new Map())
const completedBackupTasks = new Set<string>()
const cancelledBackups = new Set<string>()
const markBackupCancelled = (backupId: string) => {
@@ -853,28 +852,22 @@ const handleAuthIncorrect = () => {
}
const handleBackupProgress = (data: Archon.Websocket.v0.WSBackupProgressEvent) => {
// Ignore 'file' task events - these are per-file progress updates sent continuously
if (data.task === 'file') {
return
}
if (data.task === 'file') return
const backupId = data.id
const taskKey = `${backupId}:${data.task}`
if (completedBackupTasks.has(taskKey)) {
return
}
if (cancelledBackups.has(backupId)) {
return
}
if (cancelledBackups.has(backupId)) return
const current = backupsState.get(backupId) ?? {}
const previousState = current[data.task]?.state
const previousProgress = current[data.task]?.progress
const currentTaskState = current[data.task]?.state
const isIncomingTerminal =
data.state === 'done' || data.state === 'failed' || data.state === 'cancelled'
if (previousState !== data.state || previousProgress !== data.progress) {
// (mutating same reference doesn't work)
// Skip duplicate terminal events, but allow retries (terminal → ongoing)
if (currentTaskState === data.state && isIncomingTerminal) return
const previousProgress = current[data.task]?.progress
if (currentTaskState !== data.state || previousProgress !== data.progress) {
backupsState.set(backupId, {
...current,
[data.task]: {
@@ -884,11 +877,7 @@ const handleBackupProgress = (data: Archon.Websocket.v0.WSBackupProgressEvent) =
})
}
const isTerminalState =
data.state === 'done' || data.state === 'failed' || data.state === 'cancelled'
if (isTerminalState) {
completedBackupTasks.add(taskKey)
if (isIncomingTerminal) {
const attemptCleanup = (attempt: number = 1) => {
queryClient.invalidateQueries({ queryKey: ['backups', 'list', serverId] }).then(() => {
const backupData = queryClient.getQueryData<Archon.Backups.v1.Backup[]>([
@@ -897,12 +886,31 @@ const handleBackupProgress = (data: Archon.Websocket.v0.WSBackupProgressEvent) =
serverId,
])
const backup = backupData?.find((b) => b.id === backupId)
const isStillActive =
backup && (backup.status === 'in_progress' || backup.status === 'pending')
if (backup?.ongoing && attempt < 3) {
// retry 3 times max, archon is slow compared to ws state
setTimeout(() => attemptCleanup(attempt + 1), 1000)
if (isStillActive && attempt < 6) {
setTimeout(() => attemptCleanup(attempt + 1), 1000 * Math.pow(2, attempt - 1))
return
}
if (isStillActive) {
queryClient.setQueryData<Archon.Backups.v1.Backup[]>(
['backups', 'list', serverId],
(old) =>
old?.map((b) => {
if (b.id !== backupId) return b
return {
...b,
status: data.state === 'done' ? ('done' as const) : ('error' as const),
ongoing: false,
interrupted: data.state === 'failed',
}
}),
)
}
backupsState.delete(backupId)
})
}
@@ -1325,7 +1333,6 @@ const cleanup = () => {
isReconnecting.value = false
isLoading.value = true
completedBackupTasks.clear()
cancelledBackups.clear()
clearNodeAuthState()

View File

@@ -426,6 +426,7 @@ export namespace Archon {
export namespace v1 {
export type BackupState = 'ongoing' | 'done' | 'failed' | 'cancelled' | 'unchanged'
export type BackupTask = 'file' | 'create' | 'restore'
export type BackupStatus = 'pending' | 'in_progress' | 'timed_out' | 'error' | 'done'
export type BackupTaskProgress = {
progress: number // 0.0 to 1.0
@@ -438,6 +439,7 @@ export namespace Archon {
name: string
created_at: string
automated: boolean
status: BackupStatus
interrupted: boolean
ongoing: boolean
locked: boolean

View File

@@ -50,11 +50,13 @@ const props = withDefaults(
const backupQueued = computed(
() =>
props.backup.status === 'pending' ||
props.backup.task?.create?.progress === 0 ||
(props.backup.ongoing && !props.backup.task?.create),
(props.backup.status === 'in_progress' && !props.backup.task?.create),
)
const failedToCreate = computed(
() => props.backup.status === 'error' || props.backup.status === 'timed_out',
)
// const automated = computed(() => props.backup.automated)
const failedToCreate = computed(() => props.backup.interrupted)
const inactiveStates = ['failed', 'cancelled', 'done']
@@ -64,11 +66,11 @@ const creating = computed(() => {
return task
}
if (props.backup.ongoing && !props.backup.task?.restore) {
return {
progress: 0,
state: 'ongoing',
}
if (
(props.backup.status === 'in_progress' || props.backup.status === 'pending') &&
!props.backup.task?.restore
) {
return { progress: 0, state: 'ongoing' as const }
}
return undefined
})
@@ -78,13 +80,6 @@ const restoring = computed(() => {
if (task && task.progress < 1 && !inactiveStates.includes(task.state)) {
return task
}
if (props.backup.ongoing && props.backup.task?.restore) {
return {
progress: 0,
state: 'ongoing',
}
}
return undefined
})

View File

@@ -86,13 +86,16 @@ export function useInlineBackup(backupName: string | (() => string)) {
if (!entry?.create) return
if (entry.create.state === 'done') {
stopPolling()
isBackingUp.value = false
backupComplete.value = true
} else if (entry.create.state === 'cancelled') {
stopPolling()
isBackingUp.value = false
isCancelling.value = false
backupCancelled.value = true
} else if (entry.create.state === 'failed') {
stopPolling()
isBackingUp.value = false
backupFailed.value = true
}
@@ -118,11 +121,13 @@ export function useInlineBackup(backupName: string | (() => string)) {
try {
const backup = await client.archon.backups_v1.get(serverId, worldId.value!, backupId)
const isTerminal =
backup.status === 'done' || backup.status === 'error' || backup.status === 'timed_out'
if (!backup.ongoing) {
if (isTerminal) {
stopPolling()
if (backup.interrupted) {
if (!isBackingUp.value) return
if (backup.status === 'error' || backup.status === 'timed_out') {
isBackingUp.value = false
backupFailed.value = true
} else {

View File

@@ -221,7 +221,11 @@ const backups = computed(() => {
...backup.task,
...progressState,
},
status: hasOngoingTask
? ('in_progress' as const)
: hasCompletedTask
? ('done' as const)
: backup.status,
ongoing: hasOngoingTask || (backup.ongoing && !hasCompletedTask),
}
}
@@ -304,8 +308,8 @@ const backupCreationDisabled = computed(() => {
if (busyReasons.value.length > 0) {
return formatMessage(busyReasons.value[0].reason)
}
// also check API data for ongoing backups (before ws fires)
if (backupsData.value?.some((backup) => backup.ongoing)) {
// also check for active backups, combining REST data with WS overlay
if (backups.value.some((b) => b.status === 'in_progress' || b.status === 'pending')) {
return 'A backup is already in progress'
}
return undefined

View File

@@ -1,27 +0,0 @@
import type { WSBackupState, WSBackupTask } from './websocket'
export interface Backup {
id: string
name: string
created_at: string
automated: boolean
interrupted: boolean
ongoing: boolean
task: {
[K in WSBackupTask]?: {
progress: number
state: WSBackupState
}
}
}
export interface AutoBackupSettings {
enabled: boolean
interval: number
}
export interface ServerBackup {
id: string
name: string
created_at: string
}

View File

@@ -1,8 +1,3 @@
import type { Project } from '../../types'
import type { ServerBackup } from './backup'
import type { Mod } from './content'
import type { Allocation } from './server'
export type ServerNotice = {
id: number
message: string
@@ -21,42 +16,3 @@ export type ServerNotice = {
dismissed_on: string
}[]
}
export interface Server {
server_id: string
name: string
status: string
net: {
ip: string
port: number
domain: string
allocations: Allocation[]
}
game: string
loader: string | null
loader_version: string | null
mc_version: string | null
backup_quota: number
used_backup_quota: number
backups: ServerBackup[]
mods: Mod[]
project: Project | null
suspension_reason: string | null
image: string | null
upstream?: {
kind: 'modpack'
project_id: string
version_id: string
}
motd: string
flows: {
intro?: boolean
}
is_medal?: boolean
medal_expires?: string
}
export interface Servers {
servers: Server[]
}

View File

@@ -1,5 +1,4 @@
export * from './api'
export * from './backup'
export * from './common'
export * from './content'
export * from './filesystem'

View File

@@ -59,18 +59,6 @@ export interface WSNewModEvent {
event: 'new-mod'
}
export type WSBackupTask = 'create' | 'restore'
export type WSBackupState = 'ongoing' | 'done' | 'failed' | 'cancelled' | 'unchanged'
export interface WSBackupProgressEvent {
event: 'backup-progress'
task: WSBackupTask
id: string
progress: number // percentage
state: WSBackupState
ready: boolean
}
export type FSQueuedOpUnarchive = {
op: 'unarchive'
src: string
@@ -109,16 +97,3 @@ export interface WSFilesystemOpsEvent {
event: 'filesystem-ops'
all: FilesystemOp[]
}
export type WSEvent =
| WSLogEvent
| WSStatsEvent
| WSPowerStateEvent
| WSAuthExpiringEvent
| WSAuthIncorrectEvent
| WSInstallationResultEvent
| WSAuthOkEvent
| WSUptimeEvent
| WSNewModEvent
| WSBackupProgressEvent
| WSFilesystemOpsEvent