Fix binding token extraction and harden startup concurrency

This commit is contained in:
2026-03-05 14:40:27 +08:00
parent feb99faaf3
commit 7ed6f70bab
9 changed files with 96 additions and 17 deletions

View File

@@ -5,9 +5,9 @@ from datetime import UTC, datetime, timedelta
from typing import Callable
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from sqlalchemy import delete, select
from sqlalchemy import delete, select, text
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine, AsyncSession, async_sessionmaker
from app.config import RuntimeSettings, Settings
from app.models.token_binding import TokenBinding
@@ -20,33 +20,45 @@ class ArchiveService:
def __init__(
self,
settings: Settings,
engine: AsyncEngine,
session_factory: async_sessionmaker[AsyncSession],
binding_service: BindingService,
runtime_settings_getter: Callable[[], RuntimeSettings],
) -> None:
self.settings = settings
self.engine = engine
self.session_factory = session_factory
self.binding_service = binding_service
self.runtime_settings_getter = runtime_settings_getter
self.scheduler = AsyncIOScheduler(timezone="UTC")
self._leader_connection: AsyncConnection | None = None
async def start(self) -> None:
if self.scheduler.running:
return
self.scheduler.add_job(
self.archive_inactive_bindings,
trigger="interval",
minutes=self.settings.archive_job_interval_minutes,
id="archive-inactive-bindings",
replace_existing=True,
max_instances=1,
coalesce=True,
)
self.scheduler.start()
if not await self._acquire_leader_lock():
logger.info("Archive scheduler leader lock not acquired; skipping local scheduler start.")
return
try:
self.scheduler.add_job(
self.archive_inactive_bindings,
trigger="interval",
minutes=self.settings.archive_job_interval_minutes,
id="archive-inactive-bindings",
replace_existing=True,
max_instances=1,
coalesce=True,
)
self.scheduler.start()
except Exception:
await self._release_leader_lock()
raise
logger.info("Archive scheduler started on current worker.")
async def stop(self) -> None:
if self.scheduler.running:
self.scheduler.shutdown(wait=False)
await self._release_leader_lock()
async def archive_inactive_bindings(self) -> int:
runtime_settings = self.runtime_settings_getter()
@@ -82,3 +94,43 @@ class ArchiveService:
if total_archived:
logger.info("Archived inactive bindings.", extra={"count": total_archived})
return total_archived
async def _acquire_leader_lock(self) -> bool:
if self._leader_connection is not None:
return True
connection = await self.engine.connect()
try:
acquired = bool(
await connection.scalar(
text("SELECT pg_try_advisory_lock(:lock_key)"),
{"lock_key": self.settings.archive_scheduler_lock_key},
)
)
except Exception:
await connection.close()
logger.exception("Failed to acquire archive scheduler leader lock.")
return False
if not acquired:
await connection.close()
return False
self._leader_connection = connection
return True
async def _release_leader_lock(self) -> None:
if self._leader_connection is None:
return
connection = self._leader_connection
self._leader_connection = None
try:
await connection.execute(
text("SELECT pg_advisory_unlock(:lock_key)"),
{"lock_key": self.settings.archive_scheduler_lock_key},
)
except Exception:
logger.warning("Failed to release archive scheduler leader lock cleanly.")
finally:
await connection.close()