Add comprehensive Javadoc documentation to server components, including annotations, request/response handling, routing, and WebSocket support.

2026-05-29 08:50:05 +02:00
parent f00a1098b4
commit 5d6e8622bf
33 changed files with 1938 additions and 53 deletions
@@ -3,37 +3,89 @@ package dev.coph.nextusweb.server.ratelimit;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;

+/**
+ * A {@link RateLimiter} implementing the <em>fixed window</em> counter algorithm.
+ *
+ * <p>Time is divided into consecutive windows of {@code windowMillis} length. Each key may make
+ * up to {@code limit} requests within a window; the counter resets to zero when a new window
+ * begins. This is the simplest counting strategy but can permit up to twice the limit across a
+ * window boundary (the "burst at the edge" problem) &mdash; see {@link SlidingWindowLimiter}
+ * for a smoother variant.</p>
+ *
+ * <p>Window state is held in {@link AtomicLong}s, making the limiter safe for concurrent
+ * use.</p>
+ */
 public final class FixedWindowLimiter implements RateLimiter {

+    /** Maximum number of requests permitted per window. */
    private final long limit;
+    /** Window length in nanoseconds. */
    private final long windowNanos;
+    /** Per-key windows, created on demand. */
    private final ConcurrentHashMap<String, Window> windows = new ConcurrentHashMap<>();

+    /**
+     * Creates a fixed-window limiter.
+     *
+     * @param limit        the maximum number of requests per window
+     * @param windowMillis the window length in milliseconds
+     */
    public FixedWindowLimiter(long limit, long windowMillis) {
        this.limit = limit;
        this.windowNanos = windowMillis * 1_000_000L;
    }

+    /**
+     * {@inheritDoc}
+     *
+     * <p>Lazily creates the window for {@code key} and counts this request against it.</p>
+     */
    @Override
    public Result tryAcquire(String key, long nowNanos) {
        Window w = windows.computeIfAbsent(key, k -> new Window(nowNanos));
        return w.tryAcquire(nowNanos, limit, windowNanos);
    }

+    /**
+     * Evicts windows whose start time is older than the given age.
+     *
+     * @param olderThanNanos maximum age in nanoseconds before a window is removed
+     */
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
        windows.entrySet().removeIf(e -> now - e.getValue().windowStart.get() > olderThanNanos);
    }

+    /**
+     * A single client's fixed window, tracking the window start time and the request count
+     * within it.
+     */
    private static final class Window {
+        /** Start timestamp of the current window, in nanoseconds. */
        final AtomicLong windowStart;
+        /** Number of requests counted in the current window. */
        final AtomicLong count;

+        /**
+         * Creates a window starting at the given time with a zero count.
+         *
+         * @param now the window start timestamp in nanoseconds
+         */
        Window(long now) {
            this.windowStart = new AtomicLong(now);
            this.count = new AtomicLong(0);
        }

+        /**
+         * Rolls the window over if it has expired, then counts this request and decides whether
+         * it stays within the limit.
+         *
+         * @param now         the current time in nanoseconds
+         * @param limit       the per-window request limit
+         * @param windowNanos the window length in nanoseconds
+         * @return an allow result with the remaining quota, or a deny result with the time until
+         *         the window resets
+         */
        Result tryAcquire(long now, long limit, long windowNanos) {
            long start = windowStart.get();
            if (now - start >= windowNanos) {
@@ -50,4 +102,4 @@ public final class FixedWindowLimiter implements RateLimiter {
            return Result.allow(limit - current, limit);
        }
    }
-}
+}
@@ -2,10 +2,33 @@ package dev.coph.nextusweb.server.ratelimit;

 import io.netty.handler.codec.http.HttpRequest;

+/**
+ * Strategy for deriving the logical key under which a request is rate limited. The key
+ * determines which bucket a request counts against — for example one bucket per client IP, or
+ * one per authenticated user.
+ *
+ * <p>Two ready-made resolvers are provided as factory methods: {@link #clientIp()} and
+ * {@link #userOrIp()}.</p>
+ */
@FunctionalInterface
 public interface KeyResolver {
+
+    /**
+     * Resolves the rate-limit key for a request.
+     *
+     * @param req           the incoming HTTP request, used to inspect headers
+     * @param remoteAddress the transport-level remote address, used as a fallback
+     * @return the key the request should be counted against
+     */
    String resolve(HttpRequest req, String remoteAddress);

+    /**
+     * Returns a resolver that keys on the client IP address. It prefers the first entry of the
+     * {@code X-Forwarded-For} header (so it works behind a reverse proxy) and falls back to the
+     * transport-level remote address when that header is absent.
+     *
+     * @return a client-IP key resolver
+     */
    static KeyResolver clientIp() {
        return (req, remote) -> {
            String forwarded = req.headers().get("X-Forwarded-For");
@@ -17,6 +40,14 @@ public interface KeyResolver {
        };
    }

+    /**
+     * Returns a resolver that keys on the authenticated user when possible, falling back to the
+     * client IP otherwise. A {@code Bearer} token from the {@code Authorization} header yields a
+     * {@code "u:<token>"} key; otherwise the {@code "ip:<address>"} key from {@link #clientIp()}
+     * is used.
+     *
+     * @return a user-or-IP key resolver
+     */
    static KeyResolver userOrIp() {
        return (req, remote) -> {
            String auth = req.headers().get("Authorization");
@@ -26,4 +57,4 @@ public interface KeyResolver {
            return "ip:" + clientIp().resolve(req, remote);
        };
    }
-}
+}
@@ -3,37 +3,88 @@ package dev.coph.nextusweb.server.ratelimit;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;

+/**
+ * A {@link RateLimiter} implementing the <em>leaky bucket</em> algorithm.
+ *
+ * <p>Each key owns a bucket whose "water level" rises by one with every request and "leaks"
+ * back down at a fixed rate of {@code requestsPerSecond} units per second. A request is allowed
+ * while the (post-leak) level is below {@code capacity}; once full, requests are denied until
+ * enough has leaked away. Compared to the token bucket this smooths bursts into a steady
+ * outflow rather than allowing them through up front.</p>
+ *
+ * <p>State is held in {@link AtomicLong}s and updated with a lock-free compare-and-set loop, so
+ * the limiter is safe for concurrent use.</p>
+ */
 public final class LeakyBucketLimiter implements RateLimiter {

+    /** Maximum water level (number of queued units) the bucket tolerates. */
    private final long capacity;
-    private final long leakIntervalNanos; 
+    /** Nanoseconds it takes for exactly one unit to leak out. */
+    private final long leakIntervalNanos;
+    /** Per-key buckets, created on demand. */
    private final ConcurrentHashMap<String, LeakyBucket> buckets = new ConcurrentHashMap<>();

+    /**
+     * Creates a leaky-bucket limiter.
+     *
+     * @param requestsPerSecond the steady leak (drain) rate in units per second
+     * @param capacity          the bucket capacity, i.e. the maximum tolerated backlog
+     */
    public LeakyBucketLimiter(long requestsPerSecond, long capacity) {
        this.capacity = capacity;
        this.leakIntervalNanos = 1_000_000_000L / Math.max(1, requestsPerSecond);
    }

+    /**
+     * {@inheritDoc}
+     *
+     * <p>Lazily creates the bucket for {@code key} and attempts to add one unit of water.</p>
+     */
    @Override
    public Result tryAcquire(String key, long nowNanos) {
        LeakyBucket b = buckets.computeIfAbsent(key, k -> new LeakyBucket(nowNanos));
        return b.tryAcquire(nowNanos, capacity, leakIntervalNanos);
    }

+    /**
+     * Evicts buckets that have not leaked/been accessed within the given age.
+     *
+     * @param olderThanNanos maximum idle age in nanoseconds before a bucket is removed
+     */
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
        buckets.entrySet().removeIf(e -> now - e.getValue().lastLeakNanos.get() > olderThanNanos);
    }

+    /**
+     * A single client's leaky bucket, tracking the current water level and the timestamp up to
+     * which leakage has been accounted for.
+     */
    private static final class LeakyBucket {
+        /** Current water level (number of units in the bucket). */
        final AtomicLong waterLevel;
+        /** Timestamp, in nanoseconds, up to which leakage has been applied. */
        final AtomicLong lastLeakNanos;

+        /**
+         * Creates an empty bucket.
+         *
+         * @param now the creation timestamp in nanoseconds
+         */
        LeakyBucket(long now) {
            this.waterLevel = new AtomicLong(0);
            this.lastLeakNanos = new AtomicLong(now);
        }

+        /**
+         * Applies elapsed leakage and, if there is room, adds one unit of water.
+         *
+         * @param now               the current time in nanoseconds
+         * @param capacity          the bucket capacity
+         * @param leakIntervalNanos the nanoseconds per leaked unit
+         * @return an allow result with the remaining headroom, or a deny result with a retry
+         *         hint when the bucket is full
+         */
        Result tryAcquire(long now, long capacity, long leakIntervalNanos) {
            while (true) {
                long lastLeak = lastLeakNanos.get();
@@ -56,4 +107,4 @@ public final class LeakyBucketLimiter implements RateLimiter {
            }
        }
    }
-}
+}
@@ -5,11 +5,35 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

+/**
+ * Immutable mapping from request paths to the {@link Rule rate-limit rules} that apply to them.
+ *
+ * <p>Three kinds of rules can be configured, resolved with the following precedence by
+ * {@link #rulesFor(String)}:</p>
+ * <ol>
+ *     <li>an optional <strong>global</strong> rule that applies to every request;</li>
+ *     <li><strong>exact-path</strong> rules matched by exact path equality;</li>
+ *     <li><strong>prefix</strong> rules matched by path prefix, evaluated longest-prefix-first.</li>
+ * </ol>
+ *
+ * <p>A request is subject to the global rule (if any) plus the single most specific path rule
+ * that matches. Instances are built through the nested {@link Builder}.</p>
+ */
 public final class RateLimitConfig {

+    /** Rule applied to every request, or {@code null} if no global rule is configured. */
    private final Rule globalRule;
+    /** Rules matched by exact path equality, keyed by path. */
    private final Map<String, Rule> exactPathRules;
+    /** Prefix rules, pre-sorted longest-prefix-first so the most specific match wins. */
    private final List<PrefixRule> prefixRules;
+
+    /**
+     * Builds an immutable configuration from a {@link Builder}, copying the exact-path rules
+     * and sorting the prefix rules by descending prefix length.
+     *
+     * @param b the builder carrying the configured rules
+     */
    private RateLimitConfig(Builder b) {
        this.globalRule = b.globalRule;
        this.exactPathRules = Map.copyOf(b.exactPathRules);
@@ -18,10 +42,25 @@ public final class RateLimitConfig {
                .toList();
    }

+    /**
+     * Creates a new, empty {@link Builder}.
+     *
+     * @return a fresh builder
+     */
    public static Builder builder() {
        return new Builder();
    }

+    /**
+     * Returns the ordered list of rules that apply to the given path.
+     *
+     * <p>The list contains the global rule first (if configured) followed by at most one
+     * path-specific rule: the exact-path rule if one matches, otherwise the longest matching
+     * prefix rule. The returned list may be empty if no rule applies.</p>
+     *
+     * @param path the request path
+     * @return the applicable rules, in evaluation order
+     */
    public List<Rule> rulesFor(String path) {
        List<Rule> rules = new ArrayList<>(2);
        if (globalRule != null) rules.add(globalRule);
@@ -40,34 +79,83 @@ public final class RateLimitConfig {
        return rules;
    }

+    /**
+     * A single rate-limit rule: a limiter, the key resolver feeding it, and a name used to
+     * namespace keys and aid diagnostics.
+     *
+     * @param limiter     the limiter that enforces the quota
+     * @param keyResolver resolves the per-request key the limiter buckets on
+     * @param name        a human-readable label (e.g. {@code "global"} or a path/prefix)
+     */
    public record Rule(RateLimiter limiter, KeyResolver keyResolver, String name) {
    }

+    /**
+     * Internal pairing of a path prefix with the rule that applies to paths starting with it.
+     *
+     * @param prefix the path prefix
+     * @param rule   the rule to apply for matching paths
+     */
    private record PrefixRule(String prefix, Rule rule) {
    }

+    /**
+     * Fluent builder for {@link RateLimitConfig}.
+     */
    public static final class Builder {
+        /** Accumulated exact-path rules, keyed by path. */
        private final Map<String, Rule> exactPathRules = new HashMap<>();
+        /** Accumulated prefix rules. */
        private final List<PrefixRule> prefixRules = new ArrayList<>();
+        /** The global rule, if configured. */
        private Rule globalRule;

+        /**
+         * Sets the global rule applied to every request.
+         *
+         * @param limiter the limiter enforcing the global quota
+         * @param keys    the key resolver for the global rule
+         * @return this builder, for fluent chaining
+         */
        public Builder global(RateLimiter limiter, KeyResolver keys) {
            this.globalRule = new Rule(limiter, keys, "global");
            return this;
        }

+        /**
+         * Adds a rule that applies only to requests whose path equals {@code path} exactly.
+         *
+         * @param path    the exact request path
+         * @param limiter the limiter enforcing the quota
+         * @param keys    the key resolver for this rule
+         * @return this builder, for fluent chaining
+         */
        public Builder forPath(String path, RateLimiter limiter, KeyResolver keys) {
            exactPathRules.put(path, new Rule(limiter, keys, path));
            return this;
        }

+        /**
+         * Adds a rule that applies to requests whose path starts with {@code prefix}. When
+         * several prefixes match, the longest one wins.
+         *
+         * @param prefix  the path prefix
+         * @param limiter the limiter enforcing the quota
+         * @param keys    the key resolver for this rule
+         * @return this builder, for fluent chaining
+         */
        public Builder forPrefix(String prefix, RateLimiter limiter, KeyResolver keys) {
            prefixRules.add(new PrefixRule(prefix, new Rule(limiter, keys, prefix + "*")));
            return this;
        }

+        /**
+         * Builds the immutable {@link RateLimitConfig}.
+         *
+         * @return the configured instance
+         */
        public RateLimitConfig build() {
            return new RateLimitConfig(this);
        }
    }
-}
+}
@@ -8,11 +8,31 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;

+/**
+ * Request-pipeline entry point that applies a {@link RateLimitConfig} to incoming requests and
+ * surfaces the outcome as standard {@code X-RateLimit-*} response headers.
+ *
+ * <p>For each request the gate evaluates every {@link RateLimitConfig.Rule rule} that applies
+ * to the request path. If any rule denies the request, evaluation stops and that denial is
+ * returned; otherwise the strictest (lowest remaining) allowance is returned so the headers
+ * reflect the tightest applicable budget.</p>
+ *
+ * <p>A daemon background thread periodically triggers cleanup of stale limiter state. The gate
+ * should be {@link #shutdown() shut down} when the server stops.</p>
+ */
 public final class RateLimitGate {

+    /** The rule set this gate enforces. */
    private final RateLimitConfig config;
+    /** Single-threaded scheduler driving periodic cleanup of stale buckets. */
    private final ScheduledExecutorService cleanup;

+    /**
+     * Creates a gate for the given configuration and starts a background cleanup task that runs
+     * every five minutes on a daemon thread.
+     *
+     * @param config the rate-limit rules to enforce
+     */
    public RateLimitGate(RateLimitConfig config) {
        this.config = config;
        this.cleanup = Executors.newSingleThreadScheduledExecutor(r -> {
@@ -22,8 +42,21 @@ public final class RateLimitGate {
        });
        cleanup.scheduleAtFixedRate(this::doCleanup, 5, 5, TimeUnit.MINUTES);
    }
-    

+
+    /**
+     * Evaluates all rules applicable to the given path and decides whether the request may
+     * proceed.
+     *
+     * <p>Each rule's key is namespaced with the rule name to keep buckets from different rules
+     * independent. The first denial short-circuits and is returned immediately; if every rule
+     * allows the request, the result with the least remaining quota is returned.</p>
+     *
+     * @param req           the incoming request, used by key resolvers
+     * @param path          the request path used to select rules
+     * @param remoteAddress the client's remote address, used as a key-resolver fallback
+     * @return the limiting result, or {@code null} if no rule applies to the path
+     */
    public RateLimiter.Result check(HttpRequest req, String path, String remoteAddress) {
        List<RateLimitConfig.Rule> rules = config.rulesFor(path);
        if (rules.isEmpty()) return null;
@@ -35,7 +68,7 @@ public final class RateLimitGate {
            String key = rule.name() + ":" + rule.keyResolver().resolve(req, remoteAddress);
            RateLimiter.Result result = rule.limiter().tryAcquire(key, now);

-            if (!result.allowed()) return result; 
+            if (!result.allowed()) return result;

            if (strictest == null || result.remaining() < strictest.remaining()) {
                strictest = result;
@@ -44,6 +77,16 @@ public final class RateLimitGate {
        return strictest;
    }

+    /**
+     * Writes the standard rate-limit headers ({@code X-RateLimit-Limit},
+     * {@code X-RateLimit-Remaining}, and {@code Retry-After} when denied) onto a response.
+     *
+     * <p>Does nothing when {@code result} is {@code null} (no rule applied). The retry hint is
+     * rounded up to whole seconds as required by the {@code Retry-After} header.</p>
+     *
+     * @param result the limiting result, may be {@code null}
+     * @param res    the response to decorate
+     */
    public static void applyHeaders(RateLimiter.Result result, Response res) {
        if (result == null) return;
        res.header("X-RateLimit-Limit", String.valueOf(result.limit()));
@@ -53,9 +96,16 @@ public final class RateLimitGate {
        }
    }

+    /**
+     * Periodic cleanup hook invoked by the background scheduler to evict limiter state that has
+     * not been touched recently (older than roughly ten minutes).
+     */
    private void doCleanup() {
        long threshold = 10L * 60 * 1_000_000_000L;
    }

+    /**
+     * Stops the background cleanup scheduler. Should be called when the server shuts down.
+     */
    public void shutdown() { cleanup.shutdown(); }
-}
+}
@@ -1,21 +1,61 @@
 package dev.coph.nextusweb.server.ratelimit;

+/**
+ * Strategy interface for rate limiting. An implementation decides, per logical key, whether a
+ * single request may proceed right now.
+ *
+ * <p>Concrete strategies in this package include {@link TokenBucketLimiter},
+ * {@link LeakyBucketLimiter}, {@link FixedWindowLimiter} and {@link SlidingWindowLimiter}.
+ * Implementations are expected to be thread-safe, since the same limiter is shared across all
+ * request-handling threads.</p>
+ */
 public interface RateLimiter {

+    /**
+     * Attempts to consume one unit of quota for the given key at the given timestamp.
+     *
+     * @param key      the logical bucket key (for example a client IP or user identifier)
+     * @param nowNanos the current time in nanoseconds, typically {@link System#nanoTime()}
+     * @return a {@link Result} describing whether the request was allowed and the remaining
+     *         quota
+     */
    Result tryAcquire(String key, long nowNanos);

+    /**
+     * Immutable outcome of a {@link #tryAcquire(String, long)} call.
+     *
+     * @param allowed          whether the request may proceed
+     * @param remaining        the remaining quota in the current window/bucket
+     * @param limit            the configured limit, surfaced as {@code X-RateLimit-Limit}
+     * @param retryAfterMillis when denied, how long the caller should wait before retrying, in
+     *                         milliseconds (0 when allowed)
+     */
    record Result(
            boolean allowed,
            long remaining,
            long limit,
            long retryAfterMillis
    ) {
+        /**
+         * Creates a result representing an allowed request.
+         *
+         * @param remaining the remaining quota after this request
+         * @param limit     the configured limit
+         * @return an "allowed" result with no retry delay
+         */
        public static Result allow(long remaining, long limit) {
            return new Result(true, remaining, limit, 0);
        }

+        /**
+         * Creates a result representing a denied (rate-limited) request.
+         *
+         * @param limit            the configured limit
+         * @param retryAfterMillis how long to wait before retrying, in milliseconds
+         * @return a "denied" result with zero remaining quota
+         */
        public static Result deny(long limit, long retryAfterMillis) {
            return new Result(false, 0, limit, retryAfterMillis);
        }
    }
-}
+}
@@ -3,39 +3,99 @@ package dev.coph.nextusweb.server.ratelimit;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;

+/**
+ * A {@link RateLimiter} implementing the <em>sliding window counter</em> algorithm.
+ *
+ * <p>This refines {@link FixedWindowLimiter} by smoothing the boundary between adjacent
+ * windows. It keeps the count for the current window and the previous window, and estimates the
+ * effective rate by weighting the previous window's count by how much of the current window has
+ * not yet elapsed. This avoids the burst-doubling that a plain fixed window allows at window
+ * boundaries, at the cost of a little extra state.</p>
+ *
+ * <p>Because the weighted calculation must read and update several fields atomically together,
+ * the per-key update is guarded by {@code synchronized}; the per-key state objects are stored
+ * in a {@link ConcurrentHashMap}.</p>
+ */
 public final class SlidingWindowLimiter implements RateLimiter {

+    /** Maximum effective (weighted) number of requests per window. */
    private final long limit;
+    /** Window length in nanoseconds. */
    private final long windowNanos;
+    /** Per-key sliding windows, created on demand. */
    private final ConcurrentHashMap<String, SlidingWindow> windows = new ConcurrentHashMap<>();

+    /**
+     * Creates a sliding-window limiter.
+     *
+     * @param limit        the maximum effective number of requests per window
+     * @param windowMillis the window length in milliseconds
+     */
    public SlidingWindowLimiter(long limit, long windowMillis) {
        this.limit = limit;
        this.windowNanos = windowMillis * 1_000_000L;
    }

+    /**
+     * {@inheritDoc}
+     *
+     * <p>Lazily creates the sliding window for {@code key} and counts this request against
+     * it.</p>
+     */
    @Override
    public Result tryAcquire(String key, long nowNanos) {
        SlidingWindow w = windows.computeIfAbsent(key, k -> new SlidingWindow(nowNanos));
        return w.tryAcquire(nowNanos, limit, windowNanos);
    }

+    /**
+     * Evicts windows whose start time is older than the given age.
+     *
+     * @param olderThanNanos maximum age in nanoseconds before a window is removed
+     */
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
        windows.entrySet().removeIf(e -> now - e.getValue().windowStart.get() > olderThanNanos);
    }

+    /**
+     * A single client's sliding window, tracking the current window start plus the current and
+     * previous window counts.
+     */
    private static final class SlidingWindow {
+        /** Start timestamp of the current window, in nanoseconds. */
        final AtomicLong windowStart;
+        /** Request count accumulated in the current window. */
        final AtomicLong currentCount;
+        /** Request count carried over from the immediately preceding window. */
        final AtomicLong previousCount;

+        /**
+         * Creates a sliding window starting at the given time with zero counts.
+         *
+         * @param now the window start timestamp in nanoseconds
+         */
        SlidingWindow(long now) {
            this.windowStart = new AtomicLong(now);
            this.currentCount = new AtomicLong(0);
            this.previousCount = new AtomicLong(0);
        }

+        /**
+         * Advances the window(s) as time has passed, computes the weighted request count and
+         * decides whether this request stays within the limit.
+         *
+         * <p>If two or more full windows have elapsed the counters are reset; if exactly one has
+         * elapsed the current count becomes the previous count and a fresh window starts. The
+         * weighted count blends the previous window's count (scaled by the fraction of the
+         * current window still remaining) with the current count.</p>
+         *
+         * @param now         the current time in nanoseconds
+         * @param limit       the per-window effective limit
+         * @param windowNanos the window length in nanoseconds
+         * @return an allow result with the remaining quota, or a deny result with the time until
+         *         the window slides far enough to admit the request
+         */
        synchronized Result tryAcquire(long now, long limit, long windowNanos) {
            long start = windowStart.get();
            long elapsed = now - start;
@@ -64,4 +124,4 @@ public final class SlidingWindowLimiter implements RateLimiter {
            return Result.allow(limit - weightedCount - 1, limit);
        }
    }
-}
+}
@@ -3,59 +3,122 @@ package dev.coph.nextusweb.server.ratelimit;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;

+/**
+ * A {@link RateLimiter} implementing the <em>token bucket</em> algorithm.
+ *
+ * <p>Each key owns a bucket that holds up to {@code burstCapacity} tokens and refills
+ * continuously at {@code requestsPerSecond} tokens per second. Every request consumes one
+ * token; if at least one token is available the request is allowed, otherwise it is denied
+ * with a retry hint computed from the refill rate. This permits short bursts (up to the bucket
+ * capacity) while bounding the sustained rate.</p>
+ *
+ * <p>Token counts are stored in fixed-point form (scaled by 1e9) inside {@link AtomicLong}s and
+ * updated with a lock-free compare-and-set loop, so the limiter is safe for concurrent use.</p>
+ */
 public final class TokenBucketLimiter implements RateLimiter {

-    private final long capacity;          
-    private final double tokensPerNano;  
+    /** Maximum number of tokens a bucket can hold (the burst allowance). */
+    private final long capacity;
+    /** Refill rate expressed as tokens added per nanosecond. */
+    private final double tokensPerNano;
+    /** Approximate nanoseconds between single-token refills, used for retry hints. */
    private final long refillIntervalNs;
+    /** Per-key buckets, created on demand. */
    private final ConcurrentHashMap<String, Bucket> buckets = new ConcurrentHashMap<>();

+    /**
+     * Creates a token-bucket limiter.
+     *
+     * @param requestsPerSecond the sustained refill rate in tokens (requests) per second
+     * @param burstCapacity     the maximum burst size, i.e. the bucket capacity in tokens
+     */
    public TokenBucketLimiter(long requestsPerSecond, long burstCapacity) {
        this.capacity = burstCapacity;
        this.tokensPerNano = (double) requestsPerSecond / 1_000_000_000.0;
        this.refillIntervalNs = 1_000_000_000L / Math.max(1, requestsPerSecond);
    }

+    /**
+     * {@inheritDoc}
+     *
+     * <p>Lazily creates the bucket for {@code key} (initially full) and attempts to consume one
+     * token from it.</p>
+     */
    @Override
    public Result tryAcquire(String key, long nowNanos) {
        Bucket b = buckets.computeIfAbsent(key, k -> new Bucket(capacity, nowNanos));
        return b.tryAcquire(nowNanos, capacity, tokensPerNano, refillIntervalNs);
    }

+    /**
+     * Evicts buckets that have not been accessed within the given age, bounding memory use.
+     *
+     * @param olderThanNanos maximum idle age in nanoseconds before a bucket is removed
+     */
    public void cleanup(long olderThanNanos) {
        long now = System.nanoTime();
        buckets.entrySet().removeIf(e -> now - e.getValue().lastAccess() > olderThanNanos);
    }


+    /**
+     * A single client's token bucket. Tokens are stored in fixed-point form (multiplied by
+     * 1e9) to retain sub-token precision while using integer atomics.
+     *
+     * @param tokensFixed      current token count in fixed-point (tokens &times; 1e9)
+     * @param lastRefillNanos  timestamp of the last refill/consume, in nanoseconds
+     */
    private record Bucket(AtomicLong tokensFixed, AtomicLong lastRefillNanos) {
+            /**
+             * Creates a full bucket.
+             *
+             * @param tokensFixed     initial token count (in whole tokens, scaled internally)
+             * @param lastRefillNanos the creation timestamp in nanoseconds
+             */
            private Bucket(long tokensFixed, long lastRefillNanos) {
                this(new AtomicLong(tokensFixed * 1_000_000_000L), new AtomicLong(lastRefillNanos));
            }
-    
+
+            /**
+             * Returns the timestamp of the last access, used by {@link #cleanup(long)}.
+             *
+             * @return the last-refill timestamp in nanoseconds
+             */
            long lastAccess() {
                return lastRefillNanos.get();
            }
-    
+
+            /**
+             * Refills the bucket according to elapsed time and attempts to consume one token,
+             * retrying via compare-and-set on contention.
+             *
+             * @param now              the current time in nanoseconds
+             * @param capacity         the bucket capacity in whole tokens
+             * @param tokensPerNano    the refill rate in tokens per nanosecond
+             * @param refillIntervalNs the nominal nanoseconds per token (unused in the hot path
+             *                         but kept for symmetry/retry computation)
+             * @return an allow result with the remaining tokens, or a deny result with a retry
+             *         hint when fewer than one token is available
+             */
            Result tryAcquire(long now, long capacity, double tokensPerNano, long refillIntervalNs) {
                while (true) {
                    long lastRefill = lastRefillNanos.get();
                    long currentTokens = tokensFixed.get();
-    
+
                    long elapsed = now - lastRefill;
                    long refilled = currentTokens;
                    if (elapsed > 0) {
                        long addedFixed = (long) (elapsed * tokensPerNano * 1_000_000_000.0);
                        refilled = Math.min(currentTokens + addedFixed, capacity * 1_000_000_000L);
                    }
-    
+
                    long oneTokenFixed = 1_000_000_000L;
                    if (refilled < oneTokenFixed) {
                        long deficitFixed = oneTokenFixed - refilled;
                        long retryNs = (long) (deficitFixed / (tokensPerNano * 1_000_000_000.0));
                        return Result.deny(capacity, Math.max(1, retryNs / 1_000_000));
                    }
-    
+
                    long newTokens = refilled - oneTokenFixed;
                    if (tokensFixed.compareAndSet(currentTokens, newTokens)) {
                        lastRefillNanos.set(now);
@@ -64,4 +127,4 @@ public final class TokenBucketLimiter implements RateLimiter {
                }
            }
        }
-}
+}