diff --git a/src/together/cli/api/endpoints.py b/src/together/cli/api/endpoints.py index d46844c..df0227f 100644 --- a/src/together/cli/api/endpoints.py +++ b/src/together/cli/api/endpoints.py @@ -45,14 +45,30 @@ def print_endpoint( def print_api_error( e: InvalidRequestError, + endpoint_id: str | None = None, ) -> None: error_details = e.api_response.message + error_lower = error_details.lower() if error_details else "" - if error_details and ( - "credentials" in error_details.lower() - or "authentication" in error_details.lower() - ): + if "credentials" in error_lower or "authentication" in error_lower: click.echo("Error: Invalid API key or authentication failed", err=True) + elif "not found" in error_lower and "endpoint" in error_lower: + endpoint_display = f"'{endpoint_id}'" if endpoint_id else "" + click.echo(f"Error: Endpoint {endpoint_display} not found.", err=True) + click.echo( + "The endpoint may have been deleted or the ID may be incorrect.", + err=True, + ) + click.echo( + "Use 'together endpoints list --mine true' to see your endpoints.", + err=True, + ) + elif "permission" in error_lower or "forbidden" in error_lower or "unauthorized" in error_lower: + click.echo("Error: You don't have permission to access this endpoint.", err=True) + click.echo( + "This endpoint may belong to another user or organization.", + err=True, + ) else: click.echo(f"Error: {error_details}", err=True) @@ -65,7 +81,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: try: return f(*args, **kwargs) except InvalidRequestError as e: - print_api_error(e) + # Try to extract endpoint_id from kwargs for better error messages + endpoint_id = kwargs.get("endpoint_id") + print_api_error(e, endpoint_id=endpoint_id) sys.exit(1) except Exception as e: click.echo(f"Error: An unexpected error occurred - {str(e)}", err=True) @@ -81,6 +99,92 @@ def endpoints(ctx: click.Context) -> None: pass +def _print_hardware_error( + client: Together, + model: str, + hardware_id: str, + gpu: str, + gpu_count: int, + *, + speculative_decoding_enabled: bool = False, +) -> None: + """Print a detailed error message when hardware selection fails.""" + click.echo( + f"Error: Cannot create endpoint with {gpu_count}x {gpu.upper()} for model '{model}'", + err=True, + ) + + # Fetch hardware options for this model to provide specific guidance + try: + hardware_options = client.endpoints.list_hardware(model) + except Exception: + # If we can't fetch hardware options, just show a generic message + click.echo( + "\nUse 'together endpoints hardware --model ' to see available options.", + err=True, + ) + return + + # Check if the requested hardware exists for this model + requested_hw = next((hw for hw in hardware_options if hw.id == hardware_id), None) + + if requested_hw is None: + # Hardware configuration is not compatible with this model + click.echo( + f"\nThe hardware configuration '{hardware_id}' is not compatible with this model.", + err=True, + ) + elif requested_hw.availability: + status = requested_hw.availability.status + if status == "unavailable": + click.echo( + f"\nThe {gpu_count}x {gpu.upper()} configuration is currently unavailable. " + "This hardware type has no available capacity at this time.", + err=True, + ) + elif status == "insufficient": + click.echo( + f"\nThe {gpu_count}x {gpu.upper()} configuration has insufficient capacity. " + "Not enough GPUs available for the requested number of replicas.", + err=True, + ) + elif status == "available": + # Hardware is available but request failed - suggest toggling speculative decoding + if speculative_decoding_enabled: + click.echo( + "\nHardware is available but this configuration is not supported. " + "Try adding --no-speculative-decoding.", + err=True, + ) + else: + click.echo( + "\nHardware is available but this configuration is not supported. " + "Try removing --no-speculative-decoding to enable speculative decoding.", + err=True, + ) + return + + # Show available alternatives + available_options = [ + hw + for hw in hardware_options + if hw.availability is not None and hw.availability.status == "available" + ] + + if available_options: + click.echo("\nAvailable hardware options for this model:", err=True) + click.echo("", err=True) + _format_hardware_options(available_options, show_availability=True) + else: + click.echo( + "\nNo hardware is currently available for this model. Please try again later.", + err=True, + ) + click.echo("\nAll hardware options for this model:", err=True) + click.echo("", err=True) + _format_hardware_options(hardware_options, show_availability=True) + + @endpoints.command() @click.option( "--model", @@ -162,6 +266,51 @@ def create( wait: bool, ) -> None: """Create a new dedicated inference endpoint.""" + # Client-side validation for replicas + if min_replicas < 0: + click.echo( + f"Error: --min-replicas must be non-negative, got {min_replicas}", err=True + ) + sys.exit(1) + if max_replicas < 0: + click.echo( + f"Error: --max-replicas must be non-negative, got {max_replicas}", err=True + ) + sys.exit(1) + if min_replicas > max_replicas: + click.echo( + f"Error: --min-replicas ({min_replicas}) cannot be greater than " + f"--max-replicas ({max_replicas})", + err=True, + ) + sys.exit(1) + + # Validate GPU count + valid_gpu_counts = [1, 2, 4, 8] + if gpu_count not in valid_gpu_counts: + click.echo( + f"Error: --gpu-count must be one of {valid_gpu_counts}, got {gpu_count}", + err=True, + ) + sys.exit(1) + + # Validate availability zone if specified + if availability_zone: + try: + valid_zones = client.endpoints.list_avzones() + if availability_zone not in valid_zones: + click.echo( + f"Error: Invalid availability zone '{availability_zone}'", err=True + ) + if valid_zones: + click.echo("Available zones:", err=True) + for zone in sorted(valid_zones): + click.echo(f" {zone}", err=True) + sys.exit(1) + except Exception: + # If we can't fetch zones, let the API validate it + pass + # Map GPU types to their full hardware ID names gpu_map = { "b200": "nvidia_b200_180gb_sxm", @@ -189,16 +338,55 @@ def create( availability_zone=availability_zone, ) except InvalidRequestError as e: + error_msg = str(e.args[0]).lower() if e.args else "" if ( - "check the hardware api" in str(e.args[0]).lower() - or "invalid hardware provided" in str(e.args[0]).lower() - or "the selected configuration" in str(e.args[0]).lower() + "check the hardware api" in error_msg + or "invalid hardware provided" in error_msg + or "the selected configuration" in error_msg + ): + # speculative decoding is enabled when --no-speculative-decoding is NOT passed + speculative_decoding_enabled = not no_speculative_decoding + _print_hardware_error( + client, + model, + hardware_id, + gpu, + gpu_count, + speculative_decoding_enabled=speculative_decoding_enabled, + ) + elif "model" in error_msg and ( + "not found" in error_msg + or "invalid" in error_msg + or "does not exist" in error_msg + or "not supported" in error_msg ): - click.secho("Invalid hardware selected.", fg="red", err=True) - click.echo("\nAvailable hardware options:") - fetch_and_print_hardware_options( - client=client, model=model, print_json=False, available=True + click.echo( + f"Error: Model '{model}' was not found or is not available for " + "dedicated endpoints.", + err=True, + ) + click.echo( + "Please check that the model name is correct and that it supports " + "dedicated endpoint deployment.", + err=True, + ) + click.echo( + "You can browse available models at: https://api.together.ai/models", + err=True, + ) + elif "availability" in error_msg and "zone" in error_msg: + click.echo( + f"Error: Availability zone '{availability_zone}' is not valid.", + err=True, ) + try: + valid_zones = client.endpoints.list_avzones() + if valid_zones: + click.echo("\nAvailable zones:", err=True) + for zone in sorted(valid_zones): + click.echo(f" {zone}", err=True) + except Exception: + pass else: print_api_error(e) sys.exit(1)