Introduce authentication framework with AuthConfig, AuthGate, and Authenticator classes, alongside comprehensive tests for rules, modes, and schemes.

2026-05-29 13:22:31 +02:00
parent d9b639a539
commit bcf5572aeb
39 changed files with 2629 additions and 326 deletions
@@ -51,6 +51,7 @@ public final class FixedWindowLimiter implements RateLimiter {
     *
     * @param olderThanNanos maximum age in nanoseconds before a window is removed
     */
+    @Override
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
        windows.entrySet().removeIf(e -> now - e.getValue().windowStart.get() > olderThanNanos);
@@ -80,23 +81,30 @@ public final class FixedWindowLimiter implements RateLimiter {
         * Rolls the window over if it has expired, then counts this request and decides whether
         * it stays within the limit.
         *
+         * <p>The roll-over (resetting {@code windowStart} and {@code count}) and the subsequent
+         * increment must happen atomically together: the previous lock-free version reset the
+         * count in one thread while another was incrementing it, so increments were silently lost
+         * and the window admitted more than {@code limit} requests around a boundary. Guarding the
+         * whole operation with the window's monitor keeps the count exact; contention is per key
+         * only, so throughput is unaffected in practice.</p>
+         *
         * @param now         the current time in nanoseconds
         * @param limit       the per-window request limit
         * @param windowNanos the window length in nanoseconds
         * @return an allow result with the remaining quota, or a deny result with the time until
         *         the window resets
         */
-        Result tryAcquire(long now, long limit, long windowNanos) {
+        synchronized Result tryAcquire(long now, long limit, long windowNanos) {
            long start = windowStart.get();
            if (now - start >= windowNanos) {
-                if (windowStart.compareAndSet(start, now)) {
-                    count.set(0);
-                }
+                windowStart.set(now);
+                count.set(0);
+                start = now;
            }

            long current = count.incrementAndGet();
            if (current > limit) {
-                long retryMs = (windowNanos - (now - windowStart.get())) / 1_000_000L;
+                long retryMs = (windowNanos - (now - start)) / 1_000_000L;
                return Result.deny(limit, Math.max(1, retryMs));
            }
            return Result.allow(limit - current, limit);
@@ -1,14 +1,22 @@
 package dev.coph.nextusweb.server.ratelimit;

-import io.netty.handler.codec.http.HttpRequest;
+import dev.coph.nextusweb.server.auth.Principal;
+import dev.coph.nextusweb.server.router.Request;

 /**
 * Strategy for deriving the logical key under which a request is rate limited. The key
- * determines which bucket a request counts against — for example one bucket per client IP, or
- * one per authenticated user.
+ * determines which bucket a request counts against — for example one bucket per client IP, one
+ * per API key, one per session cookie, or one per authenticated user.
 *
- * <p>Two ready-made resolvers are provided as factory methods: {@link #clientIp()} and
- * {@link #userOrIp()}.</p>
+ * <p>Resolvers receive the framework {@link Request} together with the already-resolved client
+ * IP (the pipeline computes it once, honouring the configured trusted proxies — see
+ * {@link dev.coph.nextusweb.server.net.ClientIp}). They are therefore <strong>not</strong> tied
+ * to bearer tokens: pick whichever request facet identifies the caller for your API.</p>
+ *
+ * <p>Built-in resolvers: {@link #clientIp()}, {@link #header(String)}, {@link #cookie(String)}
+ * and {@link #principal()}. The header/cookie/principal resolvers fall back to the client IP when
+ * their facet is absent, so an anonymous caller is still bucketed rather than sharing one global
+ * bucket. Each key is additionally namespaced by the rule, so different rules never collide.</p>
 */
@FunctionalInterface
 public interface KeyResolver {
@@ -16,45 +24,65 @@ public interface KeyResolver {
    /**
     * Resolves the rate-limit key for a request.
     *
-     * @param req           the incoming HTTP request, used to inspect headers
-     * @param remoteAddress the transport-level remote address, used as a fallback
-     * @return the key the request should be counted against
+     * @param request  the incoming request (headers, cookies, attached principal, ...)
+     * @param clientIp the resolved client IP, honouring trusted proxies
+     * @return the key the request should be counted against; never {@code null}
     */
-    String resolve(HttpRequest req, String remoteAddress);
+    String resolve(Request request, String clientIp);

    /**
-     * Returns a resolver that keys on the client IP address. It prefers the first entry of the
-     * {@code X-Forwarded-For} header (so it works behind a reverse proxy) and falls back to the
-     * transport-level remote address when that header is absent.
+     * Returns a resolver that keys purely on the resolved client IP. This is the spoofing-safe
+     * replacement for the old header-trusting behaviour: the IP has already been derived through
+     * the trusted-proxy policy, so a directly connected client cannot forge it.
     *
     * @return a client-IP key resolver
     */
    static KeyResolver clientIp() {
-        return (req, remote) -> {
-            String forwarded = req.headers().get("X-Forwarded-For");
-            if (forwarded != null && !forwarded.isEmpty()) {
-                int comma = forwarded.indexOf(',');
-                return comma > 0 ? forwarded.substring(0, comma).trim() : forwarded.trim();
-            }
-            return remote;
+        return (request, clientIp) -> clientIp;
+    }
+
+    /**
+     * Returns a resolver that keys on the value of a request header (for example an API key in
+     * {@code X-API-Key}), falling back to the client IP when the header is absent.
+     *
+     * @param headerName the header to key on
+     * @return a header-value key resolver
+     */
+    static KeyResolver header(String headerName) {
+        return (request, clientIp) -> {
+            String value = request.header(headerName);
+            return (value != null && !value.isEmpty()) ? "h:" + value : "ip:" + clientIp;
        };
    }

    /**
-     * Returns a resolver that keys on the authenticated user when possible, falling back to the
-     * client IP otherwise. A {@code Bearer} token from the {@code Authorization} header yields a
-     * {@code "u:<token>"} key; otherwise the {@code "ip:<address>"} key from {@link #clientIp()}
-     * is used.
+     * Returns a resolver that keys on the value of a request cookie (for example a session id),
+     * falling back to the client IP when the cookie is absent.
     *
-     * @return a user-or-IP key resolver
+     * @param cookieName the cookie to key on
+     * @return a cookie-value key resolver
     */
-    static KeyResolver userOrIp() {
-        return (req, remote) -> {
-            String auth = req.headers().get("Authorization");
-            if (auth != null && auth.startsWith("Bearer ")) {
-                return "u:" + auth.substring(7);
-            }
-            return "ip:" + clientIp().resolve(req, remote);
+    static KeyResolver cookie(String cookieName) {
+        return (request, clientIp) -> {
+            String value = request.cookie(cookieName);
+            return (value != null && !value.isEmpty()) ? "c:" + value : "ip:" + clientIp;
+        };
+    }
+
+    /**
+     * Returns a resolver that keys on the authenticated {@link Principal} attached to the request,
+     * falling back to the client IP for unauthenticated requests.
+     *
+     * <p>For this to key on the principal, the {@link dev.coph.nextusweb.server.auth.AuthGate auth
+     * layer} must have run before rate limiting (configure it to authenticate the relevant paths).
+     * When no principal is present the resolver degrades gracefully to per-IP limiting.</p>
+     *
+     * @return a principal-or-IP key resolver
+     */
+    static KeyResolver principal() {
+        return (request, clientIp) -> {
+            Principal p = request.principal();
+            return p != null ? "p:" + p.id() : "ip:" + clientIp;
        };
    }
 }
@@ -1,7 +1,7 @@
 package dev.coph.nextusweb.server.ratelimit;

 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;

 /**
 * A {@link RateLimiter} implementing the <em>leaky bucket</em> algorithm.
@@ -12,8 +12,10 @@ import java.util.concurrent.atomic.AtomicLong;
 * enough has leaked away. Compared to the token bucket this smooths bursts into a steady
 * outflow rather than allowing them through up front.</p>
 *
- * <p>State is held in {@link AtomicLong}s and updated with a lock-free compare-and-set loop, so
- * the limiter is safe for concurrent use.</p>
+ * <p>Each bucket's water level and last-leak timestamp are held together in a single immutable
+ * {@link LeakyBucket.State} behind one {@link AtomicReference} and advanced with a lock-free
+ * compare-and-set loop, so the level and the timestamp it was leaked to are always published
+ * together and the limiter is safe for concurrent use.</p>
 */
 public final class LeakyBucketLimiter implements RateLimiter {

@@ -51,20 +53,19 @@ public final class LeakyBucketLimiter implements RateLimiter {
     *
     * @param olderThanNanos maximum idle age in nanoseconds before a bucket is removed
     */
+    @Override
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
-        buckets.entrySet().removeIf(e -> now - e.getValue().lastLeakNanos.get() > olderThanNanos);
+        buckets.entrySet().removeIf(e -> now - e.getValue().lastLeak() > olderThanNanos);
    }

    /**
     * A single client's leaky bucket, tracking the current water level and the timestamp up to
-     * which leakage has been accounted for.
+     * which leakage has been accounted for as one atomic unit.
     */
    private static final class LeakyBucket {
-        /** Current water level (number of units in the bucket). */
-        final AtomicLong waterLevel;
-        /** Timestamp, in nanoseconds, up to which leakage has been applied. */
-        final AtomicLong lastLeakNanos;
+        /** Holds the current {@code (waterLevel, lastLeakNanos)} pair as one atomic unit. */
+        private final AtomicReference<State> state;

        /**
         * Creates an empty bucket.
@@ -72,12 +73,23 @@ public final class LeakyBucketLimiter implements RateLimiter {
         * @param now the creation timestamp in nanoseconds
         */
        LeakyBucket(long now) {
-            this.waterLevel = new AtomicLong(0);
-            this.lastLeakNanos = new AtomicLong(now);
+            this.state = new AtomicReference<>(new State(0, now));
        }

        /**
-         * Applies elapsed leakage and, if there is room, adds one unit of water.
+         * Returns the timestamp leakage was last accounted to, used by {@link #cleanup(long)}.
+         *
+         * @return the last-leak timestamp in nanoseconds
+         */
+        long lastLeak() {
+            return state.get().lastLeakNanos();
+        }
+
+        /**
+         * Applies elapsed leakage and, if there is room, adds one unit of water. The new level and
+         * the timestamp it was leaked to are swapped in together, so the previous race where the
+         * level advanced but the timestamp update was lost (drifting the leak accounting) can no
+         * longer occur.
         *
         * @param now               the current time in nanoseconds
         * @param capacity          the bucket capacity
@@ -87,24 +99,33 @@ public final class LeakyBucketLimiter implements RateLimiter {
         */
        Result tryAcquire(long now, long capacity, long leakIntervalNanos) {
            while (true) {
-                long lastLeak = lastLeakNanos.get();
-                long current = waterLevel.get();
+                State current = state.get();

-                long leaked = (now - lastLeak) / leakIntervalNanos;
-                long newLevel = Math.max(0, current - leaked);
+                long leaked = (now - current.lastLeakNanos()) / leakIntervalNanos;
+                long newLevel = Math.max(0, current.waterLevel() - leaked);

                if (newLevel >= capacity) {
                    long retryMs = leakIntervalNanos / 1_000_000L;
                    return Result.deny(capacity, retryMs);
                }

-                long newLastLeak = leaked > 0 ? lastLeak + leaked * leakIntervalNanos : lastLeak;
+                long newLastLeak = leaked > 0
+                        ? current.lastLeakNanos() + leaked * leakIntervalNanos
+                        : current.lastLeakNanos();

-                if (waterLevel.compareAndSet(current, newLevel + 1)) {
-                    lastLeakNanos.compareAndSet(lastLeak, newLastLeak);
+                if (state.compareAndSet(current, new State(newLevel + 1, newLastLeak))) {
                    return Result.allow(capacity - newLevel - 1, capacity);
                }
            }
        }
+
+        /**
+         * Immutable snapshot of a bucket's mutable state.
+         *
+         * @param waterLevel    current water level (number of units in the bucket)
+         * @param lastLeakNanos timestamp leakage has been applied up to, in nanoseconds
+         */
+        private record State(long waterLevel, long lastLeakNanos) {
+        }
    }
 }
@@ -1,9 +1,12 @@
 package dev.coph.nextusweb.server.ratelimit;

 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;

 /**
 * Immutable mapping from request paths to the {@link Rule rate-limit rules} that apply to them.
@@ -27,6 +30,8 @@ public final class RateLimitConfig {
    private final Map<String, Rule> exactPathRules;
    /** Prefix rules, pre-sorted longest-prefix-first so the most specific match wins. */
    private final List<PrefixRule> prefixRules;
+    /** Every distinct limiter referenced by any rule, by identity; used for periodic cleanup. */
+    private final Set<RateLimiter> allLimiters;

    /**
     * Builds an immutable configuration from a {@link Builder}, copying the exact-path rules
@@ -40,6 +45,15 @@ public final class RateLimitConfig {
        this.prefixRules = b.prefixRules.stream()
                .sorted((a, c) -> Integer.compare(c.prefix.length(), a.prefix.length()))
                .toList();
+
+        // Collect the distinct limiter instances once so the gate's periodic cleanup can iterate
+        // them. Identity-based de-duplication keeps a limiter shared across several rules from
+        // being cleaned multiple times per pass.
+        Set<RateLimiter> limiters = Collections.newSetFromMap(new IdentityHashMap<>());
+        if (globalRule != null) limiters.add(globalRule.limiter());
+        for (Rule r : exactPathRules.values()) limiters.add(r.limiter());
+        for (PrefixRule pr : prefixRules) limiters.add(pr.rule.limiter());
+        this.allLimiters = Collections.unmodifiableSet(limiters);
    }

    /**
@@ -79,6 +93,16 @@ public final class RateLimitConfig {
        return rules;
    }

+    /**
+     * Returns every distinct limiter referenced by this configuration, for periodic state
+     * eviction by {@link RateLimitGate}.
+     *
+     * @return the immutable set of distinct limiters (de-duplicated by identity)
+     */
+    public Set<RateLimiter> allLimiters() {
+        return allLimiters;
+    }
+
    /**
     * A single rate-limit rule: a limiter, the key resolver feeding it, and a name used to
     * namespace keys and aid diagnostics.
@@ -1,7 +1,7 @@
 package dev.coph.nextusweb.server.ratelimit;

+import dev.coph.nextusweb.server.router.Request;
 import dev.coph.nextusweb.server.router.Response;
-import io.netty.handler.codec.http.HttpRequest;

 import java.util.List;
 import java.util.concurrent.Executors;
@@ -22,19 +22,38 @@ import java.util.concurrent.TimeUnit;
 */
 public final class RateLimitGate {

+    /** Default idle age after which per-key limiter state is eligible for eviction. */
+    private static final long DEFAULT_STALE_AFTER_NANOS = 10L * 60 * 1_000_000_000L;
+
    /** The rule set this gate enforces. */
    private final RateLimitConfig config;
+    /** Idle age (nanoseconds) after which a limiter's per-key state may be evicted. */
+    private final long staleAfterNanos;
    /** Single-threaded scheduler driving periodic cleanup of stale buckets. */
    private final ScheduledExecutorService cleanup;

    /**
     * Creates a gate for the given configuration and starts a background cleanup task that runs
-     * every five minutes on a daemon thread.
+     * every five minutes on a daemon thread, evicting per-key state idle for more than ten
+     * minutes.
     *
     * @param config the rate-limit rules to enforce
     */
    public RateLimitGate(RateLimitConfig config) {
+        this(config, DEFAULT_STALE_AFTER_NANOS);
+    }
+
+    /**
+     * Creates a gate with an explicit idle age before per-key limiter state is evicted.
+     *
+     * @param config          the rate-limit rules to enforce
+     * @param staleAfterNanos idle age in nanoseconds after which per-key state is evicted; must
+     *                        be positive
+     */
+    public RateLimitGate(RateLimitConfig config, long staleAfterNanos) {
+        if (staleAfterNanos <= 0) throw new IllegalArgumentException("staleAfterNanos must be > 0");
        this.config = config;
+        this.staleAfterNanos = staleAfterNanos;
        this.cleanup = Executors.newSingleThreadScheduledExecutor(r -> {
            Thread t = new Thread(r, "ratelimit-cleanup");
            t.setDaemon(true);
@@ -52,12 +71,13 @@ public final class RateLimitGate {
     * independent. The first denial short-circuits and is returned immediately; if every rule
     * allows the request, the result with the least remaining quota is returned.</p>
     *
-     * @param req           the incoming request, used by key resolvers
-     * @param path          the request path used to select rules
-     * @param remoteAddress the client's remote address, used as a key-resolver fallback
+     * @param req      the incoming request, used by key resolvers
+     * @param path     the request path used to select rules
+     * @param clientIp the resolved client IP (honouring trusted proxies), used as a key-resolver
+     *                 fallback
     * @return the limiting result, or {@code null} if no rule applies to the path
     */
-    public RateLimiter.Result check(HttpRequest req, String path, String remoteAddress) {
+    public RateLimiter.Result check(Request req, String path, String clientIp) {
        List<RateLimitConfig.Rule> rules = config.rulesFor(path);
        if (rules.isEmpty()) return null;

@@ -65,7 +85,7 @@ public final class RateLimitGate {
        RateLimiter.Result strictest = null;

        for (var rule : rules) {
-            String key = rule.name() + ":" + rule.keyResolver().resolve(req, remoteAddress);
+            String key = rule.name() + ":" + rule.keyResolver().resolve(req, clientIp);
            RateLimiter.Result result = rule.limiter().tryAcquire(key, now);

            if (!result.allowed()) return result;
@@ -97,11 +117,18 @@ public final class RateLimitGate {
    }

    /**
-     * Periodic cleanup hook invoked by the background scheduler to evict limiter state that has
-     * not been touched recently (older than roughly ten minutes).
+     * Periodic cleanup hook invoked by the background scheduler. Asks every configured limiter to
+     * evict per-key state idle for longer than {@link #staleAfterNanos}. A failure cleaning one
+     * limiter must not abort the others or kill the scheduler, so each call is guarded.
     */
    private void doCleanup() {
-        long threshold = 10L * 60 * 1_000_000_000L;
+        for (RateLimiter limiter : config.allLimiters()) {
+            try {
+                limiter.cleanup(staleAfterNanos);
+            } catch (RuntimeException ignored) {
+                // Best-effort eviction; never let one limiter break the cleanup cycle.
+            }
+        }
    }

    /**
@@ -8,6 +8,10 @@ package dev.coph.nextusweb.server.ratelimit;
 * {@link LeakyBucketLimiter}, {@link FixedWindowLimiter} and {@link SlidingWindowLimiter}.
 * Implementations are expected to be thread-safe, since the same limiter is shared across all
 * request-handling threads.</p>
+ *
+ * <p>The interface remains effectively functional ({@link #tryAcquire} is its single abstract
+ * method), so simple stateless limiters can still be written as a lambda; stateful limiters that
+ * keep one entry per key should additionally override {@link #cleanup(long)}.</p>
 */
 public interface RateLimiter {

@@ -21,6 +25,24 @@ public interface RateLimiter {
     */
    Result tryAcquire(String key, long nowNanos);

+    /**
+     * Evicts per-key state that has not been accessed within the given age, bounding the memory
+     * a limiter consumes when it has seen many distinct keys.
+     *
+     * <p>Implementations keep one entry per key seen ({@code clientIp}, API key, ...). Without
+     * periodic eviction those maps grow without bound, which is both a memory leak and a denial
+     * of service vector (an attacker that varies the key on every request can exhaust the heap).
+     * {@link RateLimitGate} calls this periodically for every configured limiter.</p>
+     *
+     * <p>The default implementation does nothing, which is correct for stateless limiters; any
+     * limiter that retains per-key state <strong>must</strong> override it to evict stale
+     * entries.</p>
+     *
+     * @param olderThanNanos maximum idle age in nanoseconds before an entry is removed
+     */
+    default void cleanup(long olderThanNanos) {
+    }
+
    /**
     * Immutable outcome of a {@link #tryAcquire(String, long)} call.
     *
@@ -53,6 +53,7 @@ public final class SlidingWindowLimiter implements RateLimiter {
     *
     * @param olderThanNanos maximum age in nanoseconds before a window is removed
     */
+    @Override
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
        windows.entrySet().removeIf(e -> now - e.getValue().windowStart.get() > olderThanNanos);
@@ -1,7 +1,7 @@
 package dev.coph.nextusweb.server.ratelimit;

 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;

 /**
 * A {@link RateLimiter} implementing the <em>token bucket</em> algorithm.
@@ -12,8 +12,11 @@ import java.util.concurrent.atomic.AtomicLong;
 * with a retry hint computed from the refill rate. This permits short bursts (up to the bucket
 * capacity) while bounding the sustained rate.</p>
 *
- * <p>Token counts are stored in fixed-point form (scaled by 1e9) inside {@link AtomicLong}s and
- * updated with a lock-free compare-and-set loop, so the limiter is safe for concurrent use.</p>
+ * <p>Token counts are stored in fixed-point form (scaled by 1e9). Each bucket's token count and
+ * last-refill timestamp are held together in a single immutable {@link Bucket.State} behind one
+ * {@link AtomicReference} and advanced with a lock-free compare-and-set loop, so a refill and the
+ * timestamp it is based on are always published as one atomic unit and the limiter is safe for
+ * concurrent use.</p>
 */
 public final class TokenBucketLimiter implements RateLimiter {

@@ -55,6 +58,7 @@ public final class TokenBucketLimiter implements RateLimiter {
     *
     * @param olderThanNanos maximum idle age in nanoseconds before a bucket is removed
     */
+    @Override
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
        buckets.entrySet().removeIf(e -> now - e.getValue().lastAccess() > olderThanNanos);
@@ -63,68 +67,77 @@ public final class TokenBucketLimiter implements RateLimiter {

    /**
     * A single client's token bucket. Tokens are stored in fixed-point form (multiplied by
-     * 1e9) to retain sub-token precision while using integer atomics.
-     *
-     * @param tokensFixed      current token count in fixed-point (tokens &times; 1e9)
-     * @param lastRefillNanos  timestamp of the last refill/consume, in nanoseconds
+     * 1e9) to retain sub-token precision; the mutable pair {@code (tokens, timestamp)} lives in a
+     * single {@link AtomicReference} so updates are atomic as a unit.
     */
-    private record Bucket(AtomicLong tokensFixed, AtomicLong lastRefillNanos) {
-            /**
-             * Creates a full bucket.
-             *
-             * @param tokensFixed     initial token count (in whole tokens, scaled internally)
-             * @param lastRefillNanos the creation timestamp in nanoseconds
-             */
-            private Bucket(long tokensFixed, long lastRefillNanos) {
-                this(new AtomicLong(tokensFixed * 1_000_000_000L), new AtomicLong(lastRefillNanos));
-            }
+    private static final class Bucket {
+        /** Holds the current {@code (tokensFixed, lastRefillNanos)} pair as one atomic unit. */
+        private final AtomicReference<State> state;

-            /**
-             * Returns the timestamp of the last access, used by {@link #cleanup(long)}.
-             *
-             * @return the last-refill timestamp in nanoseconds
-             */
-            long lastAccess() {
-                return lastRefillNanos.get();
-            }
+        /**
+         * Creates a full bucket.
+         *
+         * @param tokens          initial token count in whole tokens (scaled internally)
+         * @param lastRefillNanos the creation timestamp in nanoseconds
+         */
+        Bucket(long tokens, long lastRefillNanos) {
+            this.state = new AtomicReference<>(new State(tokens * 1_000_000_000L, lastRefillNanos));
+        }

-            /**
-             * Refills the bucket according to elapsed time and attempts to consume one token,
-             * retrying via compare-and-set on contention.
-             *
-             * @param now              the current time in nanoseconds
-             * @param capacity         the bucket capacity in whole tokens
-             * @param tokensPerNano    the refill rate in tokens per nanosecond
-             * @param refillIntervalNs the nominal nanoseconds per token (unused in the hot path
-             *                         but kept for symmetry/retry computation)
-             * @return an allow result with the remaining tokens, or a deny result with a retry
-             *         hint when fewer than one token is available
-             */
-            Result tryAcquire(long now, long capacity, double tokensPerNano, long refillIntervalNs) {
-                while (true) {
-                    long lastRefill = lastRefillNanos.get();
-                    long currentTokens = tokensFixed.get();
+        /**
+         * Returns the timestamp of the last access, used by {@link #cleanup(long)}.
+         *
+         * @return the last-refill timestamp in nanoseconds
+         */
+        long lastAccess() {
+            return state.get().lastRefillNanos();
+        }

-                    long elapsed = now - lastRefill;
-                    long refilled = currentTokens;
-                    if (elapsed > 0) {
-                        long addedFixed = (long) (elapsed * tokensPerNano * 1_000_000_000.0);
-                        refilled = Math.min(currentTokens + addedFixed, capacity * 1_000_000_000L);
-                    }
+        /**
+         * Refills the bucket according to elapsed time and attempts to consume one token,
+         * retrying via compare-and-set on contention. The token count and the timestamp it was
+         * computed from are swapped in together, so no thread can ever observe refilled tokens
+         * paired with a stale timestamp (or vice versa).
+         *
+         * @param now              the current time in nanoseconds
+         * @param capacity         the bucket capacity in whole tokens
+         * @param tokensPerNano    the refill rate in tokens per nanosecond
+         * @param refillIntervalNs the nominal nanoseconds per token (kept for retry computation)
+         * @return an allow result with the remaining tokens, or a deny result with a retry
+         *         hint when fewer than one token is available
+         */
+        Result tryAcquire(long now, long capacity, double tokensPerNano, long refillIntervalNs) {
+            long oneTokenFixed = 1_000_000_000L;
+            while (true) {
+                State current = state.get();

-                    long oneTokenFixed = 1_000_000_000L;
-                    if (refilled < oneTokenFixed) {
-                        long deficitFixed = oneTokenFixed - refilled;
-                        long retryNs = (long) (deficitFixed / (tokensPerNano * 1_000_000_000.0));
-                        return Result.deny(capacity, Math.max(1, retryNs / 1_000_000));
-                    }
+                long elapsed = now - current.lastRefillNanos();
+                long refilled = current.tokensFixed();
+                if (elapsed > 0) {
+                    long addedFixed = (long) (elapsed * tokensPerNano * 1_000_000_000.0);
+                    refilled = Math.min(current.tokensFixed() + addedFixed, capacity * 1_000_000_000L);
+                }

-                    long newTokens = refilled - oneTokenFixed;
-                    if (tokensFixed.compareAndSet(currentTokens, newTokens)) {
-                        lastRefillNanos.set(now);
-                        return Result.allow(newTokens / 1_000_000_000L, capacity);
-                    }
+                if (refilled < oneTokenFixed) {
+                    long deficitFixed = oneTokenFixed - refilled;
+                    long retryNs = (long) (deficitFixed / (tokensPerNano * 1_000_000_000.0));
+                    return Result.deny(capacity, Math.max(1, retryNs / 1_000_000));
+                }
+
+                long newTokens = refilled - oneTokenFixed;
+                if (state.compareAndSet(current, new State(newTokens, now))) {
+                    return Result.allow(newTokens / 1_000_000_000L, capacity);
                }
            }
        }
+
+        /**
+         * Immutable snapshot of a bucket's mutable state.
+         *
+         * @param tokensFixed     current token count in fixed-point (tokens &times; 1e9)
+         * @param lastRefillNanos timestamp the token count was last advanced to, in nanoseconds
+         */
+        private record State(long tokensFixed, long lastRefillNanos) {
+        }
+    }
 }