ccproxy.plugins.max_tokens.service¶

ccproxy.plugins.max_tokens.service ¶

Service for managing token limits and max_tokens modifications.

TokenLimitsService ¶

TokenLimitsService(config)

Service for managing model token limits and max_tokens modifications.

Source code in ccproxy/plugins/max_tokens/service.py

def __init__(self, config: MaxTokensConfig):
    """Initialize token limits service."""
    self.config = config
    self.token_limits_data = TokenLimitsData()
    self._pricing_cache_path = (
        Path.home() / ".cache" / "ccproxy" / "model_pricing.json"
    )

    if self.config.prioritize_local_file:
        # Load local file first (takes precedence)
        self._load_limits_from_local_file()
        self._load_limits_from_pricing_cache()
    else:
        # Load pricing cache first, local file as fallback
        self._load_limits_from_pricing_cache()
        self._load_limits_from_local_file()

get_max_output_tokens ¶

get_max_output_tokens(model_name)

Get maximum output tokens for a model.

Source code in ccproxy/plugins/max_tokens/service.py

def get_max_output_tokens(self, model_name: str) -> int | None:
    """Get maximum output tokens for a model."""
    return self.token_limits_data.get_max_output_tokens(model_name)

should_modify_max_tokens ¶

should_modify_max_tokens(request_data, model)

Determine if max_tokens should be modified for the request.

Source code in ccproxy/plugins/max_tokens/service.py

def should_modify_max_tokens(
    self, request_data: dict[str, Any], model: str
) -> tuple[bool, str]:
    """Determine if max_tokens should be modified for the request."""
    current_max_tokens = request_data.get("max_tokens")

    # Enforce mode: always modify to set max_tokens to model limit
    if self.config.enforce_mode:
        return True, "enforced"

    # Case 1: No max_tokens provided
    if current_max_tokens is None:
        return True, "missing"

    # Case 2: Invalid max_tokens (not a positive integer)
    if not isinstance(current_max_tokens, int) or current_max_tokens <= 0:
        return True, "invalid"

    # Case 3: Max tokens exceeds model limit
    model_limit = self.get_max_output_tokens(model)
    if model_limit and current_max_tokens > model_limit:
        return True, "exceeded"

    # No modification needed
    return False, "none"

modify_max_tokens ¶

modify_max_tokens(request_data, model, provider=None)

Modify max_tokens in request data if needed.

Source code in ccproxy/plugins/max_tokens/service.py

def modify_max_tokens(
    self, request_data: dict[str, Any], model: str, provider: str | None = None
) -> tuple[dict[str, Any], MaxTokensModification | None]:
    """Modify max_tokens in request data if needed."""
    should_modify, reason_type = self.should_modify_max_tokens(request_data, model)

    if not should_modify:
        return request_data, None

    original_max_tokens = request_data.get("max_tokens")

    # Determine the appropriate max_tokens value
    model_limit = self.get_max_output_tokens(model)

    if model_limit:
        new_max_tokens = model_limit
    else:
        # Use fallback when model limit is unknown
        new_max_tokens = self.config.fallback_max_tokens
        logger.debug(
            "using_fallback_max_tokens",
            model=model,
            fallback=self.config.fallback_max_tokens,
        )

    # Create modification info
    modification = MaxTokensModification(
        original_max_tokens=original_max_tokens,
        new_max_tokens=new_max_tokens,
        model=model,
        reason=self.config.get_modification_reason(reason_type),
    )

    # Create modified request data
    modified_data = request_data.copy()
    modified_data["max_tokens"] = new_max_tokens

    if self.config.log_modifications:
        logger.info(
            "max_tokens_modified",
            model=model,
            provider=provider,
            original=original_max_tokens,
            new=new_max_tokens,
            reason=modification.reason,
        )

    return modified_data, modification

initialize ¶

initialize()

Initialize the service.

Source code in ccproxy/plugins/max_tokens/service.py

def initialize(self) -> None:
    """Initialize the service."""
    logger.debug(
        "token_limits_service_initialized",
        models_count=len(self.token_limits_data.models),
        pricing_cache=str(self._pricing_cache_path),
        fallback=self.config.fallback_max_tokens,
        enforce_mode=self.config.enforce_mode,
        prioritize_local_file=self.config.prioritize_local_file,
    )