Only mark servers as offline if they fail pings 3+ times (#5664)

* wip: online status fix

* use INCR

* properly clear cache
This commit is contained in:
aecsocket
2026-03-26 06:34:20 +00:00
committed by GitHub
parent bf24ed8d12
commit da48a12551
8 changed files with 83 additions and 25 deletions

View File

@@ -29,7 +29,8 @@
"low",
"medium",
"high",
"severe"
"severe",
"malware"
]
}
}
@@ -45,7 +46,7 @@
false,
true,
false,
false
true
]
},
"hash": "10e2a3b31ba94b93ed2d6c9753a5aabf13190a0b336089e6521022069813cf17"

View File

@@ -44,7 +44,8 @@
"low",
"medium",
"high",
"severe"
"severe",
"malware"
]
}
}
@@ -79,7 +80,7 @@
true,
false,
false,
false,
true,
null
]
},

View File

@@ -25,7 +25,8 @@
"low",
"medium",
"high",
"severe"
"severe",
"malware"
]
}
}

View File

@@ -22,7 +22,8 @@
"low",
"medium",
"high",
"severe"
"severe",
"malware"
]
}
}

View File

@@ -8,7 +8,7 @@ When the user refers to "perform[ing] pre-PR checks", do the following:
- Run `cargo clippy -p labrinth --all-targets` — there must be ZERO warnings, otherwise CI will fail
- DO NOT run tests unless explicitly requested (they take a long time)
- Prepare the sqlx cache: cd into `apps/labrinth` and run `cargo sqlx prepare`
- Prepare the sqlx cache: cd into `apps/labrinth` and run `cargo sqlx prepare -- --tests`
- NEVER run `cargo sqlx prepare --workspace`
## Testing

View File

@@ -788,6 +788,20 @@ impl RedisConnection {
.await?;
Ok(values)
}
#[tracing::instrument(skip(self))]
pub async fn incr(
&mut self,
namespace: &str,
id: &str,
) -> Result<Option<u64>, DatabaseError> {
let key = format!("{}_{namespace}:{id}", self.meta_namespace);
let value = cmd("INCR")
.arg(key)
.query_async(&mut self.connection)
.await?;
Ok(value)
}
}
#[derive(Serialize, Deserialize)]

View File

@@ -293,4 +293,5 @@ vars! {
SERVER_PING_RETRIES: usize = 3usize;
SERVER_PING_MIN_INTERVAL_SEC: u64 = 30u64 * 60;
SERVER_PING_TIMEOUT_MS: u64 = 3u64 * 1000;
SERVER_PING_MAX_FAIL_COUNT: u64 = 3u64;
}

View File

@@ -24,6 +24,7 @@ pub struct ServerPingQueue {
}
pub const REDIS_NAMESPACE: &str = "minecraft_java_server_ping";
pub const REDIS_FAILURE_NAMESPACE: &str = "minecraft_java_server_ping_failures";
pub const CLICKHOUSE_TABLE: &str = "minecraft_java_server_pings";
impl ServerPingQueue {
@@ -118,27 +119,65 @@ impl ServerPingQueue {
.await
.wrap_err("failed to write ping record")?;
redis
.set_serialized_to_json(
REDIS_NAMESPACE,
project_id,
ping,
let mut updated_project = false;
if data.is_some() {
// ping succeeded; immediately update its online status in redis
redis
.set_serialized_to_json(
REDIS_NAMESPACE,
project_id,
ping,
None,
)
.await
.wrap_err("failed to set redis key")?;
updated_project = true;
redis
.delete(REDIS_FAILURE_NAMESPACE, project_id)
.await
.wrap_err("failed to delete failure count")?;
} else {
// ping failed; if it's failed too many times, mark it as offline in redis
// otherwise, just add to the fail counter
let failure_count = redis
.incr(REDIS_FAILURE_NAMESPACE, &project_id.to_string())
.await
.wrap_err("failed to increment failure count")?;
if let Some(count) = failure_count
&& count >= ENV.SERVER_PING_MAX_FAIL_COUNT
{
redis
.set_serialized_to_json(
REDIS_NAMESPACE,
project_id,
ping,
None,
)
.await
.wrap_err(
"failed to set failed ping record in redis",
)?;
updated_project = true;
}
}
if updated_project {
DBProject::clear_cache(
(*project_id).into(),
None,
None,
&self.redis,
)
.await
.wrap_err("failed to set redis key")?;
DBProject::clear_cache(
(*project_id).into(),
None,
None,
&self.redis,
)
.await
.inspect_err(|err| {
warn!("failed to clear project cache: {err:#}")
})
.ok();
.inspect_err(|err| {
warn!("failed to clear project cache: {err:#}")
})
.ok();
}
}
ch.end()