fix: backups resilience improvements (#5555)
* fix: backups * fix: stability
This commit is contained in:
@@ -483,7 +483,6 @@ const isServerRunning = computed(() => serverPowerState.value === 'running')
|
|||||||
const serverPowerState = ref<Archon.Websocket.v0.PowerState>('stopped')
|
const serverPowerState = ref<Archon.Websocket.v0.PowerState>('stopped')
|
||||||
const powerStateDetails = ref<{ oom_killed?: boolean; exit_code?: number }>()
|
const powerStateDetails = ref<{ oom_killed?: boolean; exit_code?: number }>()
|
||||||
const backupsState = reactive(new Map())
|
const backupsState = reactive(new Map())
|
||||||
const completedBackupTasks = new Set<string>()
|
|
||||||
const cancelledBackups = new Set<string>()
|
const cancelledBackups = new Set<string>()
|
||||||
|
|
||||||
const markBackupCancelled = (backupId: string) => {
|
const markBackupCancelled = (backupId: string) => {
|
||||||
@@ -853,28 +852,22 @@ const handleAuthIncorrect = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const handleBackupProgress = (data: Archon.Websocket.v0.WSBackupProgressEvent) => {
|
const handleBackupProgress = (data: Archon.Websocket.v0.WSBackupProgressEvent) => {
|
||||||
// Ignore 'file' task events - these are per-file progress updates sent continuously
|
if (data.task === 'file') return
|
||||||
if (data.task === 'file') {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const backupId = data.id
|
const backupId = data.id
|
||||||
const taskKey = `${backupId}:${data.task}`
|
|
||||||
|
|
||||||
if (completedBackupTasks.has(taskKey)) {
|
if (cancelledBackups.has(backupId)) return
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cancelledBackups.has(backupId)) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const current = backupsState.get(backupId) ?? {}
|
const current = backupsState.get(backupId) ?? {}
|
||||||
const previousState = current[data.task]?.state
|
const currentTaskState = current[data.task]?.state
|
||||||
const previousProgress = current[data.task]?.progress
|
const isIncomingTerminal =
|
||||||
|
data.state === 'done' || data.state === 'failed' || data.state === 'cancelled'
|
||||||
|
|
||||||
if (previousState !== data.state || previousProgress !== data.progress) {
|
// Skip duplicate terminal events, but allow retries (terminal → ongoing)
|
||||||
// (mutating same reference doesn't work)
|
if (currentTaskState === data.state && isIncomingTerminal) return
|
||||||
|
|
||||||
|
const previousProgress = current[data.task]?.progress
|
||||||
|
if (currentTaskState !== data.state || previousProgress !== data.progress) {
|
||||||
backupsState.set(backupId, {
|
backupsState.set(backupId, {
|
||||||
...current,
|
...current,
|
||||||
[data.task]: {
|
[data.task]: {
|
||||||
@@ -884,11 +877,7 @@ const handleBackupProgress = (data: Archon.Websocket.v0.WSBackupProgressEvent) =
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const isTerminalState =
|
if (isIncomingTerminal) {
|
||||||
data.state === 'done' || data.state === 'failed' || data.state === 'cancelled'
|
|
||||||
if (isTerminalState) {
|
|
||||||
completedBackupTasks.add(taskKey)
|
|
||||||
|
|
||||||
const attemptCleanup = (attempt: number = 1) => {
|
const attemptCleanup = (attempt: number = 1) => {
|
||||||
queryClient.invalidateQueries({ queryKey: ['backups', 'list', serverId] }).then(() => {
|
queryClient.invalidateQueries({ queryKey: ['backups', 'list', serverId] }).then(() => {
|
||||||
const backupData = queryClient.getQueryData<Archon.Backups.v1.Backup[]>([
|
const backupData = queryClient.getQueryData<Archon.Backups.v1.Backup[]>([
|
||||||
@@ -897,12 +886,31 @@ const handleBackupProgress = (data: Archon.Websocket.v0.WSBackupProgressEvent) =
|
|||||||
serverId,
|
serverId,
|
||||||
])
|
])
|
||||||
const backup = backupData?.find((b) => b.id === backupId)
|
const backup = backupData?.find((b) => b.id === backupId)
|
||||||
|
const isStillActive =
|
||||||
|
backup && (backup.status === 'in_progress' || backup.status === 'pending')
|
||||||
|
|
||||||
if (backup?.ongoing && attempt < 3) {
|
if (isStillActive && attempt < 6) {
|
||||||
// retry 3 times max, archon is slow compared to ws state
|
setTimeout(() => attemptCleanup(attempt + 1), 1000 * Math.pow(2, attempt - 1))
|
||||||
setTimeout(() => attemptCleanup(attempt + 1), 1000)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isStillActive) {
|
||||||
|
queryClient.setQueryData<Archon.Backups.v1.Backup[]>(
|
||||||
|
['backups', 'list', serverId],
|
||||||
|
(old) =>
|
||||||
|
old?.map((b) => {
|
||||||
|
if (b.id !== backupId) return b
|
||||||
|
return {
|
||||||
|
...b,
|
||||||
|
status: data.state === 'done' ? ('done' as const) : ('error' as const),
|
||||||
|
ongoing: false,
|
||||||
|
interrupted: data.state === 'failed',
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
backupsState.delete(backupId)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1325,7 +1333,6 @@ const cleanup = () => {
|
|||||||
isReconnecting.value = false
|
isReconnecting.value = false
|
||||||
isLoading.value = true
|
isLoading.value = true
|
||||||
|
|
||||||
completedBackupTasks.clear()
|
|
||||||
cancelledBackups.clear()
|
cancelledBackups.clear()
|
||||||
|
|
||||||
clearNodeAuthState()
|
clearNodeAuthState()
|
||||||
|
|||||||
@@ -426,6 +426,7 @@ export namespace Archon {
|
|||||||
export namespace v1 {
|
export namespace v1 {
|
||||||
export type BackupState = 'ongoing' | 'done' | 'failed' | 'cancelled' | 'unchanged'
|
export type BackupState = 'ongoing' | 'done' | 'failed' | 'cancelled' | 'unchanged'
|
||||||
export type BackupTask = 'file' | 'create' | 'restore'
|
export type BackupTask = 'file' | 'create' | 'restore'
|
||||||
|
export type BackupStatus = 'pending' | 'in_progress' | 'timed_out' | 'error' | 'done'
|
||||||
|
|
||||||
export type BackupTaskProgress = {
|
export type BackupTaskProgress = {
|
||||||
progress: number // 0.0 to 1.0
|
progress: number // 0.0 to 1.0
|
||||||
@@ -438,6 +439,7 @@ export namespace Archon {
|
|||||||
name: string
|
name: string
|
||||||
created_at: string
|
created_at: string
|
||||||
automated: boolean
|
automated: boolean
|
||||||
|
status: BackupStatus
|
||||||
interrupted: boolean
|
interrupted: boolean
|
||||||
ongoing: boolean
|
ongoing: boolean
|
||||||
locked: boolean
|
locked: boolean
|
||||||
|
|||||||
@@ -50,11 +50,13 @@ const props = withDefaults(
|
|||||||
|
|
||||||
const backupQueued = computed(
|
const backupQueued = computed(
|
||||||
() =>
|
() =>
|
||||||
|
props.backup.status === 'pending' ||
|
||||||
props.backup.task?.create?.progress === 0 ||
|
props.backup.task?.create?.progress === 0 ||
|
||||||
(props.backup.ongoing && !props.backup.task?.create),
|
(props.backup.status === 'in_progress' && !props.backup.task?.create),
|
||||||
|
)
|
||||||
|
const failedToCreate = computed(
|
||||||
|
() => props.backup.status === 'error' || props.backup.status === 'timed_out',
|
||||||
)
|
)
|
||||||
// const automated = computed(() => props.backup.automated)
|
|
||||||
const failedToCreate = computed(() => props.backup.interrupted)
|
|
||||||
|
|
||||||
const inactiveStates = ['failed', 'cancelled', 'done']
|
const inactiveStates = ['failed', 'cancelled', 'done']
|
||||||
|
|
||||||
@@ -64,11 +66,11 @@ const creating = computed(() => {
|
|||||||
return task
|
return task
|
||||||
}
|
}
|
||||||
|
|
||||||
if (props.backup.ongoing && !props.backup.task?.restore) {
|
if (
|
||||||
return {
|
(props.backup.status === 'in_progress' || props.backup.status === 'pending') &&
|
||||||
progress: 0,
|
!props.backup.task?.restore
|
||||||
state: 'ongoing',
|
) {
|
||||||
}
|
return { progress: 0, state: 'ongoing' as const }
|
||||||
}
|
}
|
||||||
return undefined
|
return undefined
|
||||||
})
|
})
|
||||||
@@ -78,13 +80,6 @@ const restoring = computed(() => {
|
|||||||
if (task && task.progress < 1 && !inactiveStates.includes(task.state)) {
|
if (task && task.progress < 1 && !inactiveStates.includes(task.state)) {
|
||||||
return task
|
return task
|
||||||
}
|
}
|
||||||
|
|
||||||
if (props.backup.ongoing && props.backup.task?.restore) {
|
|
||||||
return {
|
|
||||||
progress: 0,
|
|
||||||
state: 'ongoing',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return undefined
|
return undefined
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -86,13 +86,16 @@ export function useInlineBackup(backupName: string | (() => string)) {
|
|||||||
if (!entry?.create) return
|
if (!entry?.create) return
|
||||||
|
|
||||||
if (entry.create.state === 'done') {
|
if (entry.create.state === 'done') {
|
||||||
|
stopPolling()
|
||||||
isBackingUp.value = false
|
isBackingUp.value = false
|
||||||
backupComplete.value = true
|
backupComplete.value = true
|
||||||
} else if (entry.create.state === 'cancelled') {
|
} else if (entry.create.state === 'cancelled') {
|
||||||
|
stopPolling()
|
||||||
isBackingUp.value = false
|
isBackingUp.value = false
|
||||||
isCancelling.value = false
|
isCancelling.value = false
|
||||||
backupCancelled.value = true
|
backupCancelled.value = true
|
||||||
} else if (entry.create.state === 'failed') {
|
} else if (entry.create.state === 'failed') {
|
||||||
|
stopPolling()
|
||||||
isBackingUp.value = false
|
isBackingUp.value = false
|
||||||
backupFailed.value = true
|
backupFailed.value = true
|
||||||
}
|
}
|
||||||
@@ -118,11 +121,13 @@ export function useInlineBackup(backupName: string | (() => string)) {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const backup = await client.archon.backups_v1.get(serverId, worldId.value!, backupId)
|
const backup = await client.archon.backups_v1.get(serverId, worldId.value!, backupId)
|
||||||
|
const isTerminal =
|
||||||
|
backup.status === 'done' || backup.status === 'error' || backup.status === 'timed_out'
|
||||||
|
|
||||||
if (!backup.ongoing) {
|
if (isTerminal) {
|
||||||
stopPolling()
|
stopPolling()
|
||||||
|
if (!isBackingUp.value) return
|
||||||
if (backup.interrupted) {
|
if (backup.status === 'error' || backup.status === 'timed_out') {
|
||||||
isBackingUp.value = false
|
isBackingUp.value = false
|
||||||
backupFailed.value = true
|
backupFailed.value = true
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -221,7 +221,11 @@ const backups = computed(() => {
|
|||||||
...backup.task,
|
...backup.task,
|
||||||
...progressState,
|
...progressState,
|
||||||
},
|
},
|
||||||
|
status: hasOngoingTask
|
||||||
|
? ('in_progress' as const)
|
||||||
|
: hasCompletedTask
|
||||||
|
? ('done' as const)
|
||||||
|
: backup.status,
|
||||||
ongoing: hasOngoingTask || (backup.ongoing && !hasCompletedTask),
|
ongoing: hasOngoingTask || (backup.ongoing && !hasCompletedTask),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -304,8 +308,8 @@ const backupCreationDisabled = computed(() => {
|
|||||||
if (busyReasons.value.length > 0) {
|
if (busyReasons.value.length > 0) {
|
||||||
return formatMessage(busyReasons.value[0].reason)
|
return formatMessage(busyReasons.value[0].reason)
|
||||||
}
|
}
|
||||||
// also check API data for ongoing backups (before ws fires)
|
// also check for active backups, combining REST data with WS overlay
|
||||||
if (backupsData.value?.some((backup) => backup.ongoing)) {
|
if (backups.value.some((b) => b.status === 'in_progress' || b.status === 'pending')) {
|
||||||
return 'A backup is already in progress'
|
return 'A backup is already in progress'
|
||||||
}
|
}
|
||||||
return undefined
|
return undefined
|
||||||
|
|||||||
@@ -1,27 +0,0 @@
|
|||||||
import type { WSBackupState, WSBackupTask } from './websocket'
|
|
||||||
|
|
||||||
export interface Backup {
|
|
||||||
id: string
|
|
||||||
name: string
|
|
||||||
created_at: string
|
|
||||||
automated: boolean
|
|
||||||
interrupted: boolean
|
|
||||||
ongoing: boolean
|
|
||||||
task: {
|
|
||||||
[K in WSBackupTask]?: {
|
|
||||||
progress: number
|
|
||||||
state: WSBackupState
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface AutoBackupSettings {
|
|
||||||
enabled: boolean
|
|
||||||
interval: number
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface ServerBackup {
|
|
||||||
id: string
|
|
||||||
name: string
|
|
||||||
created_at: string
|
|
||||||
}
|
|
||||||
@@ -1,8 +1,3 @@
|
|||||||
import type { Project } from '../../types'
|
|
||||||
import type { ServerBackup } from './backup'
|
|
||||||
import type { Mod } from './content'
|
|
||||||
import type { Allocation } from './server'
|
|
||||||
|
|
||||||
export type ServerNotice = {
|
export type ServerNotice = {
|
||||||
id: number
|
id: number
|
||||||
message: string
|
message: string
|
||||||
@@ -21,42 +16,3 @@ export type ServerNotice = {
|
|||||||
dismissed_on: string
|
dismissed_on: string
|
||||||
}[]
|
}[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Server {
|
|
||||||
server_id: string
|
|
||||||
name: string
|
|
||||||
status: string
|
|
||||||
net: {
|
|
||||||
ip: string
|
|
||||||
port: number
|
|
||||||
domain: string
|
|
||||||
allocations: Allocation[]
|
|
||||||
}
|
|
||||||
game: string
|
|
||||||
loader: string | null
|
|
||||||
loader_version: string | null
|
|
||||||
mc_version: string | null
|
|
||||||
backup_quota: number
|
|
||||||
used_backup_quota: number
|
|
||||||
backups: ServerBackup[]
|
|
||||||
mods: Mod[]
|
|
||||||
project: Project | null
|
|
||||||
suspension_reason: string | null
|
|
||||||
image: string | null
|
|
||||||
upstream?: {
|
|
||||||
kind: 'modpack'
|
|
||||||
project_id: string
|
|
||||||
version_id: string
|
|
||||||
}
|
|
||||||
motd: string
|
|
||||||
flows: {
|
|
||||||
intro?: boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
is_medal?: boolean
|
|
||||||
medal_expires?: string
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface Servers {
|
|
||||||
servers: Server[]
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
export * from './api'
|
export * from './api'
|
||||||
export * from './backup'
|
|
||||||
export * from './common'
|
export * from './common'
|
||||||
export * from './content'
|
export * from './content'
|
||||||
export * from './filesystem'
|
export * from './filesystem'
|
||||||
|
|||||||
@@ -59,18 +59,6 @@ export interface WSNewModEvent {
|
|||||||
event: 'new-mod'
|
event: 'new-mod'
|
||||||
}
|
}
|
||||||
|
|
||||||
export type WSBackupTask = 'create' | 'restore'
|
|
||||||
export type WSBackupState = 'ongoing' | 'done' | 'failed' | 'cancelled' | 'unchanged'
|
|
||||||
|
|
||||||
export interface WSBackupProgressEvent {
|
|
||||||
event: 'backup-progress'
|
|
||||||
task: WSBackupTask
|
|
||||||
id: string
|
|
||||||
progress: number // percentage
|
|
||||||
state: WSBackupState
|
|
||||||
ready: boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
export type FSQueuedOpUnarchive = {
|
export type FSQueuedOpUnarchive = {
|
||||||
op: 'unarchive'
|
op: 'unarchive'
|
||||||
src: string
|
src: string
|
||||||
@@ -109,16 +97,3 @@ export interface WSFilesystemOpsEvent {
|
|||||||
event: 'filesystem-ops'
|
event: 'filesystem-ops'
|
||||||
all: FilesystemOp[]
|
all: FilesystemOp[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export type WSEvent =
|
|
||||||
| WSLogEvent
|
|
||||||
| WSStatsEvent
|
|
||||||
| WSPowerStateEvent
|
|
||||||
| WSAuthExpiringEvent
|
|
||||||
| WSAuthIncorrectEvent
|
|
||||||
| WSInstallationResultEvent
|
|
||||||
| WSAuthOkEvent
|
|
||||||
| WSUptimeEvent
|
|
||||||
| WSNewModEvent
|
|
||||||
| WSBackupProgressEvent
|
|
||||||
| WSFilesystemOpsEvent
|
|
||||||
|
|||||||
Reference in New Issue
Block a user