{
  "description": "AIGatewayRoute combines multiple AIServiceBackends and attaching them to Gateway(s) resources.\n\nThis serves as a way to define a \"unified\" AI API for a Gateway which allows downstream\nclients to use a single schema API to interact with multiple AI backends.\n\nEnvoy AI Gateway will generate the following k8s resources corresponding to the AIGatewayRoute:\n\n  - HTTPRoute of the Gateway API as a top-level resource to bind all backends.\n    The name of the HTTPRoute is the same as the AIGatewayRoute.\n  - HTTPRouteFilter of the Envoy Gateway API per namespace for automatic hostname rewrite.\n    The name of the HTTPRouteFilter is `ai-eg-host-rewrite-${AIGatewayRoute.Name}`.\n\nAll of these resources are created in the same namespace as the AIGatewayRoute. Note that this is the implementation\ndetail subject to change. If you want to customize the default behavior of the Envoy AI Gateway, you can use these\nresources as a reference and create your own resources. Alternatively, you can use EnvoyPatchPolicy API of the Envoy\nGateway to patch the generated resources. For example, you can configure the retry fallback behavior by attaching\nBackendTrafficPolicy API of Envoy Gateway to the generated HTTPRoute.",
  "properties": {
    "apiVersion": {
      "description": "APIVersion defines the versioned schema of this representation of an object.\nServers should convert recognized schemas to the latest internal value, and\nmay reject unrecognized values.\nMore info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources",
      "type": "string"
    },
    "kind": {
      "description": "Kind is a string value representing the REST resource this object represents.\nServers may infer this from the endpoint the client submits requests to.\nCannot be updated.\nIn CamelCase.\nMore info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds",
      "type": "string"
    },
    "metadata": {
      "type": "object"
    },
    "spec": {
      "description": "Spec defines the details of the AIGatewayRoute.",
      "properties": {
        "filterConfig": {
          "description": "FilterConfig is the configuration for the AI Gateway filter inserted in the generated HTTPRoute.\n\nAn AI Gateway filter is responsible for the transformation of the request and response\nas well as the routing behavior based on the model name extracted from the request content, etc.\n\nCurrently, the filter is only implemented as an external processor filter, which might be\nextended to other types of filters in the future. See https://github.com/envoyproxy/ai-gateway/issues/90",
          "properties": {
            "externalProcessor": {
              "description": "ExternalProcessor is the configuration for the external processor filter.\nThis is optional, and if not set, the default values of Deployment spec will be used.",
              "properties": {
                "resources": {
                  "description": "Resources required by the external processor container.\nMore info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/\n\nDeprecated: Use GatewayConfig for gateway-scoped resource configuration instead.\nConfigure resources using GatewayConfig.spec.extProc.resources and reference it\nfrom the Gateway via the \"aigateway.envoyproxy.io/gateway-config\" annotation.\nThis field will be removed in a future version.\n\nNote: when multiple AIGatewayRoute resources are attached to the same Gateway, and each\nAIGatewayRoute has a different resource configuration, the ai-gateway will pick one of them\nto configure the resource requirements of the external processor container.",
                  "properties": {
                    "claims": {
                      "description": "Claims lists the names of resources, defined in spec.resourceClaims,\nthat are used by this container.\n\nThis field depends on the\nDynamicResourceAllocation feature gate.\n\nThis field is immutable. It can only be set for containers.",
                      "items": {
                        "description": "ResourceClaim references one entry in PodSpec.ResourceClaims.",
                        "properties": {
                          "name": {
                            "description": "Name must match the name of one entry in pod.spec.resourceClaims of\nthe Pod where this field is used. It makes that resource available\ninside a container.",
                            "type": "string"
                          },
                          "request": {
                            "description": "Request is the name chosen for a request in the referenced claim.\nIf empty, everything from the claim is made available, otherwise\nonly the result of this request.",
                            "type": "string"
                          }
                        },
                        "required": [
                          "name"
                        ],
                        "type": "object",
                        "additionalProperties": false
                      },
                      "type": "array",
                      "x-kubernetes-list-map-keys": [
                        "name"
                      ],
                      "x-kubernetes-list-type": "map"
                    },
                    "limits": {
                      "additionalProperties": {
                        "anyOf": [
                          {
                            "type": "integer"
                          },
                          {
                            "type": "string"
                          }
                        ],
                        "pattern": "^(\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))))?$",
                        "x-kubernetes-int-or-string": true
                      },
                      "description": "Limits describes the maximum amount of compute resources allowed.\nMore info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/",
                      "type": "object"
                    },
                    "requests": {
                      "additionalProperties": {
                        "anyOf": [
                          {
                            "type": "integer"
                          },
                          {
                            "type": "string"
                          }
                        ],
                        "pattern": "^(\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\\+|-)?(([0-9]+(\\.[0-9]*)?)|(\\.[0-9]+))))?$",
                        "x-kubernetes-int-or-string": true
                      },
                      "description": "Requests describes the minimum amount of compute resources required.\nIf Requests is omitted for a container, it defaults to Limits if that is explicitly specified,\notherwise to an implementation-defined value. Requests cannot exceed Limits.\nMore info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/",
                      "type": "object"
                    }
                  },
                  "type": "object",
                  "additionalProperties": false
                }
              },
              "type": "object",
              "additionalProperties": false
            },
            "type": {
              "default": "ExternalProcessor",
              "description": "Type specifies the type of the filter configuration.\n\nCurrently, only ExternalProcessor is supported, and default is ExternalProcessor.",
              "enum": [
                "ExternalProcessor",
                "DynamicModule"
              ],
              "type": "string"
            }
          },
          "required": [
            "type"
          ],
          "type": "object",
          "additionalProperties": false
        },
        "llmRequestCosts": {
          "description": "LLMRequestCosts specifies how to capture the cost of the LLM-related request, notably the token usage.\nThe AI Gateway filter will capture each specified number and store it in the Envoy's dynamic\nmetadata per HTTP request. The namespaced key is \"io.envoy.ai_gateway\",\n\nFor example, let's say we have the following LLMRequestCosts configuration:\n```yaml\n\tllmRequestCosts:\n\t- metadataKey: llm_input_token\n\t  type: InputToken\n\t- metadataKey: llm_output_token\n\t  type: OutputToken\n\t- metadataKey: llm_total_token\n\t  type: TotalToken\n\t- metadataKey: llm_cached_input_token\n\t  type: CachedInputToken\n- metadataKey: llm_cache_creation_input_token\n   type: CacheCreationInputToken\n```\nThen, with the following BackendTrafficPolicy of Envoy Gateway, you can have three\nrate limit buckets for each unique x-user-id header value. One bucket is for the input token,\nthe other is for the output token, and the last one is for the total token.\nEach bucket will be reduced by the corresponding token usage captured by the AI Gateway filter.\n\n```yaml\n\tapiVersion: gateway.envoyproxy.io/v1alpha1\n\tkind: BackendTrafficPolicy\n\tmetadata:\n\t  name: some-example-token-rate-limit\n\t  namespace: default\n\tspec:\n\t  targetRefs:\n\t  - group: gateway.networking.k8s.io\n\t     kind: HTTPRoute\n\t     name: usage-rate-limit\n\t  rateLimit:\n\t    type: Global\n\t    global:\n\t      rules:\n\t        - clientSelectors:\n\t            # Do the rate limiting based on the x-user-id header.\n\t            - headers:\n\t                - name: x-user-id\n\t                  type: Distinct\n\t          limit:\n\t            # Configures the number of \"tokens\" allowed per hour.\n\t            requests: 10000\n\t            unit: Hour\n\t          cost:\n\t            request:\n\t              from: Number\n\t              # Setting the request cost to zero allows to only check the rate limit budget,\n\t              # and not consume the budget on the request path.\n\t              number: 0\n\t            # This specifies the cost of the response retrieved from the dynamic metadata set by the AI Gateway filter.\n\t            # The extracted value will be used to consume the rate limit budget, and subsequent requests will be rate limited\n\t            # if the budget is exhausted.\n\t            response:\n\t              from: Metadata\n\t              metadata:\n\t                namespace: io.envoy.ai_gateway\n\t                key: llm_input_token\n\t        - clientSelectors:\n\t            - headers:\n\t                - name: x-user-id\n\t                  type: Distinct\n\t          limit:\n\t            requests: 10000\n\t            unit: Hour\n\t          cost:\n\t            request:\n\t              from: Number\n\t              number: 0\n\t            response:\n\t              from: Metadata\n\t              metadata:\n\t                namespace: io.envoy.ai_gateway\n\t                key: llm_output_token\n\t        - clientSelectors:\n\t            - headers:\n\t                - name: x-user-id\n\t                  type: Distinct\n\t          limit:\n\t            requests: 10000\n\t            unit: Hour\n\t          cost:\n\t            request:\n\t              from: Number\n\t              number: 0\n\t            response:\n\t              from: Metadata\n\t              metadata:\n\t                namespace: io.envoy.ai_gateway\n\t                key: llm_total_token\n```\n\nNote that when multiple AIGatewayRoute resources are attached to the same Gateway, and\ndifferent costs are configured for the same metadata key, the ai-gateway will pick one of them\nto configure the metadata key in the generated HTTPRoute, and ignore the rest.",
          "items": {
            "description": "LLMRequestCost configures each request cost.",
            "properties": {
              "cel": {
                "description": "CEL is the CEL expression to calculate the cost of the request.\nThe CEL expression must return a signed or unsigned integer. If the\nreturn value is negative, it will be error.\n\nThe expression can use the following variables:\n\n\t* model: the model name extracted from the request content. Type: string.\n\t* backend: the backend name in the form of \"name.namespace\". Type: string.\n\t* input_tokens: the number of input tokens. Type: unsigned integer.\n\t* cached_input_tokens: the number of cached read input tokens. Type: unsigned integer.\n\t* cache_creation_input_tokens: the number of cache creation input tokens. Type: unsigned integer.\n\t* output_tokens: the number of output tokens. Type: unsigned integer.\n\t* total_tokens: the total number of tokens. Type: unsigned integer.\n\nFor example, the following expressions are valid:\n\n\t* \"model == 'llama' ?  input_tokens + output_token * 0.5 : total_tokens\"\n\t* \"backend == 'foo.default' ?  input_tokens + output_tokens : total_tokens\"\n\t* \"backend == 'bar.default' ?  (input_tokens - cached_input_tokens) + cached_input_tokens * 0.1 + cache_creation_input_tokens * 1.25 + output_tokens : total_tokens\"\n\t* \"input_tokens + output_tokens + total_tokens\"\n\t* \"input_tokens * output_tokens\"",
                "type": "string"
              },
              "metadataKey": {
                "description": "MetadataKey is the key of the metadata to store this cost of the request.",
                "type": "string"
              },
              "type": {
                "description": "Type specifies the type of the request cost. The default is \"OutputToken\",\nand it uses \"output token\" as the cost. The other types are \"InputToken\", \"TotalToken\",\n\"CachedInputToken\", \"CacheCreationInputToken\", and \"CEL\".",
                "enum": [
                  "OutputToken",
                  "InputToken",
                  "CachedInputToken",
                  "CacheCreationInputToken",
                  "TotalToken",
                  "CEL"
                ],
                "type": "string"
              }
            },
            "required": [
              "metadataKey",
              "type"
            ],
            "type": "object",
            "additionalProperties": false
          },
          "maxItems": 36,
          "type": "array"
        },
        "parentRefs": {
          "description": "ParentRefs are the names of the Gateway resources this AIGatewayRoute is being attached to.\nCurrently, each reference's Kind must be Gateway.",
          "items": {
            "description": "ParentReference identifies an API object (usually a Gateway) that can be considered\na parent of this resource (usually a route). There are two kinds of parent resources\nwith \"Core\" support:\n\n* Gateway (Gateway conformance profile)\n* Service (Mesh conformance profile, ClusterIP Services only)\n\nThis API may be extended in the future to support additional kinds of parent\nresources.\n\nThe API object must be valid in the cluster; the Group and Kind must\nbe registered in the cluster for this reference to be valid.",
            "properties": {
              "group": {
                "default": "gateway.networking.k8s.io",
                "description": "Group is the group of the referent.\nWhen unspecified, \"gateway.networking.k8s.io\" is inferred.\nTo set the core API group (such as for a \"Service\" kind referent),\nGroup must be explicitly set to \"\" (empty string).\n\nSupport: Core",
                "maxLength": 253,
                "pattern": "^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$",
                "type": "string"
              },
              "kind": {
                "default": "Gateway",
                "description": "Kind is kind of the referent.\n\nThere are two kinds of parent resources with \"Core\" support:\n\n* Gateway (Gateway conformance profile)\n* Service (Mesh conformance profile, ClusterIP Services only)\n\nSupport for other resources is Implementation-Specific.",
                "maxLength": 63,
                "minLength": 1,
                "pattern": "^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$",
                "type": "string"
              },
              "name": {
                "description": "Name is the name of the referent.\n\nSupport: Core",
                "maxLength": 253,
                "minLength": 1,
                "type": "string"
              },
              "namespace": {
                "description": "Namespace is the namespace of the referent. When unspecified, this refers\nto the local namespace of the Route.\n\nNote that there are specific rules for ParentRefs which cross namespace\nboundaries. Cross-namespace references are only valid if they are explicitly\nallowed by something in the namespace they are referring to. For example:\nGateway has the AllowedRoutes field, and ReferenceGrant provides a\ngeneric way to enable any other kind of cross-namespace reference.\n\n<gateway:experimental:description>\nParentRefs from a Route to a Service in the same namespace are \"producer\"\nroutes, which apply default routing rules to inbound connections from\nany namespace to the Service.\n\nParentRefs from a Route to a Service in a different namespace are\n\"consumer\" routes, and these routing rules are only applied to outbound\nconnections originating from the same namespace as the Route, for which\nthe intended destination of the connections are a Service targeted as a\nParentRef of the Route.\n</gateway:experimental:description>\n\nSupport: Core",
                "maxLength": 63,
                "minLength": 1,
                "pattern": "^[a-z0-9]([-a-z0-9]*[a-z0-9])?$",
                "type": "string"
              },
              "port": {
                "description": "Port is the network port this Route targets. It can be interpreted\ndifferently based on the type of parent resource.\n\nWhen the parent resource is a Gateway, this targets all listeners\nlistening on the specified port that also support this kind of Route(and\nselect this Route). It's not recommended to set `Port` unless the\nnetworking behaviors specified in a Route must apply to a specific port\nas opposed to a listener(s) whose port(s) may be changed. When both Port\nand SectionName are specified, the name and port of the selected listener\nmust match both specified values.\n\n<gateway:experimental:description>\nWhen the parent resource is a Service, this targets a specific port in the\nService spec. When both Port (experimental) and SectionName are specified,\nthe name and port of the selected port must match both specified values.\n</gateway:experimental:description>\n\nImplementations MAY choose to support other parent resources.\nImplementations supporting other types of parent resources MUST clearly\ndocument how/if Port is interpreted.\n\nFor the purpose of status, an attachment is considered successful as\nlong as the parent resource accepts it partially. For example, Gateway\nlisteners can restrict which Routes can attach to them by Route kind,\nnamespace, or hostname. If 1 of 2 Gateway listeners accept attachment\nfrom the referencing Route, the Route MUST be considered successfully\nattached. If no Gateway listeners accept attachment from this Route,\nthe Route MUST be considered detached from the Gateway.\n\nSupport: Extended",
                "format": "int32",
                "maximum": 65535,
                "minimum": 1,
                "type": "integer"
              },
              "sectionName": {
                "description": "SectionName is the name of a section within the target resource. In the\nfollowing resources, SectionName is interpreted as the following:\n\n* Gateway: Listener name. When both Port (experimental) and SectionName\nare specified, the name and port of the selected listener must match\nboth specified values.\n* Service: Port name. When both Port (experimental) and SectionName\nare specified, the name and port of the selected listener must match\nboth specified values.\n\nImplementations MAY choose to support attaching Routes to other resources.\nIf that is the case, they MUST clearly document how SectionName is\ninterpreted.\n\nWhen unspecified (empty string), this will reference the entire resource.\nFor the purpose of status, an attachment is considered successful if at\nleast one section in the parent resource accepts it. For example, Gateway\nlisteners can restrict which Routes can attach to them by Route kind,\nnamespace, or hostname. If 1 of 2 Gateway listeners accept attachment from\nthe referencing Route, the Route MUST be considered successfully\nattached. If no Gateway listeners accept attachment from this Route, the\nRoute MUST be considered detached from the Gateway.\n\nSupport: Core",
                "maxLength": 253,
                "minLength": 1,
                "pattern": "^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$",
                "type": "string"
              }
            },
            "required": [
              "name"
            ],
            "type": "object",
            "additionalProperties": false
          },
          "maxItems": 16,
          "type": "array",
          "x-kubernetes-validations": [
            {
              "message": "only Gateway is supported",
              "rule": "self.all(match, match.kind == 'Gateway')"
            }
          ]
        },
        "rules": {
          "description": "Rules is the list of AIGatewayRouteRule that this AIGatewayRoute will match the traffic to.\nEach rule is a subset of the HTTPRoute in the Gateway API (https://gateway-api.sigs.k8s.io/api-types/httproute/).\n\nAI Gateway controller will generate a HTTPRoute based on the configuration given here with the additional\nmodifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for\nthe transformation of the request and response, etc.\n\nIn the matching conditions in the AIGatewayRouteRule, `x-ai-eg-model` header is available\nif we want to describe the routing behavior based on the model name. The model name is extracted\nfrom the request content before the routing decision.\n\nHow multiple rules are matched is the same as the Gateway API. See for the details:\nhttps://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPRoute",
          "items": {
            "description": "AIGatewayRouteRule is a rule that defines the routing behavior of the AIGatewayRoute.",
            "properties": {
              "backendRefs": {
                "description": "BackendRefs is the list of backends that this rule will route the traffic to.\nEach backend can have a weight that determines the traffic distribution.\n\nThe namespace of each backend defaults to the same namespace as the AIGatewayRoute when not specified.\nCross-namespace references are supported by specifying the namespace field.\nWhen a namespace different than the AIGatewayRoute's namespace is specified,\na ReferenceGrant object is required in the referent namespace to allow that\nnamespace's owner to accept the reference.\n\nBackendRefs can reference either AIServiceBackend resources (default) or InferencePool resources\nfrom the Gateway API Inference Extension. When referencing InferencePool resources:\n- Only one InferencePool backend is allowed per rule\n- Cannot mix InferencePool with AIServiceBackend references in the same rule\n- Fallback behavior is handled by the InferencePool's endpoint picker\n\nFor AIServiceBackend references, you can achieve fallback behavior by configuring multiple backends\ncombined with the BackendTrafficPolicy of Envoy Gateway.\nPlease refer to https://gateway.envoyproxy.io/docs/tasks/traffic/failover/ as well as\nhttps://gateway.envoyproxy.io/docs/tasks/traffic/retry/.",
                "items": {
                  "description": "AIGatewayRouteRuleBackendRef is a reference to a backend with a weight.\nIt can reference either an AIServiceBackend or an InferencePool resource.",
                  "properties": {
                    "bodyMutation": {
                      "description": "BodyMutation defines the request body mutation to be applied to this backend.\nThis allows modification of JSON fields in the request body before sending to the backend.\nWhen both route-level and backend-level BodyMutation are defined,\nroute-level takes precedence over backend-level for conflicting operations.\nThis field is ignored when referencing InferencePool resources.",
                      "properties": {
                        "remove": {
                          "description": "Remove the given JSON field(s) from the HTTP request body before sending to the backend.\nThe value of Remove is a list of top-level field names to remove.\n\nInput:\n  {\n    \"model\": \"gpt-4\",\n    \"service_tier\": \"default\",\n    \"internal_flag\": true\n  }\n\nConfig:\n  remove: [\"service_tier\", \"internal_flag\"]\n\nOutput:\n  {\n    \"model\": \"gpt-4\"\n  }",
                          "items": {
                            "type": "string"
                          },
                          "maxItems": 16,
                          "type": "array",
                          "x-kubernetes-list-type": "set"
                        },
                        "set": {
                          "description": "Set overwrites/adds the request body with the given JSON field (name, value)\nbefore sending to the backend. Only top-level fields are currently supported.\n\nInput:\n  {\n    \"model\": \"gpt-4\",\n    \"service_tier\": \"default\"\n  }\n\nConfig:\n  set:\n  - path: \"service_tier\"\n    value: \"scale\"\n\nOutput:\n  {\n    \"model\": \"gpt-4\",\n    \"service_tier\": \"scale\"\n  }",
                          "items": {
                            "description": "HTTPBodyField represents a JSON field name and value for body mutation",
                            "properties": {
                              "path": {
                                "description": "Path is the top-level field name to set in the request body.\nExamples: \"service_tier\", \"max_tokens\", \"temperature\"",
                                "minLength": 1,
                                "type": "string"
                              },
                              "value": {
                                "description": "Value is the JSON value to set at the specified field. This can be any valid JSON value:\nstring, number, boolean, object, array, or null.\nThe value will be parsed as JSON and inserted at the specified field.\n\nExamples:\n  - \"\\\"scale\\\"\" (string)\n  - \"42\" (number)\n  - \"true\" (boolean)\n  - \"{\\\"key\\\": \\\"value\\\"}\" (object)\n  - \"[1, 2, 3]\" (array)\n  - \"null\" (null)",
                                "type": "string"
                              }
                            },
                            "required": [
                              "path",
                              "value"
                            ],
                            "type": "object",
                            "additionalProperties": false
                          },
                          "maxItems": 16,
                          "type": "array",
                          "x-kubernetes-list-map-keys": [
                            "path"
                          ],
                          "x-kubernetes-list-type": "map"
                        }
                      },
                      "type": "object",
                      "additionalProperties": false
                    },
                    "group": {
                      "description": "Group is the group of the backend resource.\nWhen not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).\nCurrently, only \"inference.networking.k8s.io\" is supported for InferencePool resources.",
                      "maxLength": 253,
                      "pattern": "^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$",
                      "type": "string"
                    },
                    "headerMutation": {
                      "description": "HeaderMutation defines the request header mutation to be applied to this backend.\nWhen both route-level and backend-level HeaderMutation are defined,\nroute-level takes precedence over backend-level for conflicting operations.\nThis field is ignored when referencing InferencePool resources.",
                      "properties": {
                        "remove": {
                          "description": "Remove the given header(s) from the HTTP request before the action. The\nvalue of Remove is a list of HTTP header names. Note that the header\nnames are case-insensitive (see\nhttps://datatracker.ietf.org/doc/html/rfc2616#section-4.2).\n\nInput:\n  GET /foo HTTP/1.1\n  my-header1: foo\n  my-header2: bar\n  my-header3: baz\n\nConfig:\n  remove: [\"my-header1\", \"my-header3\"]\n\nOutput:\n  GET /foo HTTP/1.1\n  my-header2: bar",
                          "items": {
                            "type": "string"
                          },
                          "maxItems": 16,
                          "type": "array",
                          "x-kubernetes-list-type": "set"
                        },
                        "set": {
                          "description": "Set overwrites/adds the request with the given header (name, value)\nbefore the action.\n\nInput:\n  GET /foo HTTP/1.1\n  my-header: foo\n\nConfig:\n  set:\n  - name: \"my-header\"\n    value: \"bar\"\n\nOutput:\n  GET /foo HTTP/1.1\n  my-header: bar",
                          "items": {
                            "description": "HTTPHeader represents an HTTP Header name and value as defined by RFC 7230.",
                            "properties": {
                              "name": {
                                "description": "Name is the name of the HTTP Header to be matched. Name matching MUST be\ncase-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2).\n\nIf multiple entries specify equivalent header names, the first entry with\nan equivalent name MUST be considered for a match. Subsequent entries\nwith an equivalent header name MUST be ignored. Due to the\ncase-insensitivity of header names, \"foo\" and \"Foo\" are considered\nequivalent.",
                                "maxLength": 256,
                                "minLength": 1,
                                "pattern": "^[A-Za-z0-9!#$%&'*+\\-.^_\\x60|~]+$",
                                "type": "string"
                              },
                              "value": {
                                "description": "Value is the value of HTTP Header to be matched.",
                                "maxLength": 4096,
                                "minLength": 1,
                                "type": "string"
                              }
                            },
                            "required": [
                              "name",
                              "value"
                            ],
                            "type": "object",
                            "additionalProperties": false
                          },
                          "maxItems": 16,
                          "type": "array",
                          "x-kubernetes-list-map-keys": [
                            "name"
                          ],
                          "x-kubernetes-list-type": "map"
                        }
                      },
                      "type": "object",
                      "additionalProperties": false
                    },
                    "kind": {
                      "description": "Kind is the kind of the backend resource.\nWhen not specified, defaults to AIServiceBackend.\nCurrently, only \"InferencePool\" is supported when Group is specified.",
                      "maxLength": 63,
                      "pattern": "^$|^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$",
                      "type": "string"
                    },
                    "modelNameOverride": {
                      "description": "Name of the model in the backend. If provided this will override the name provided in the request.\nThis field is ignored when referencing InferencePool resources.",
                      "type": "string"
                    },
                    "name": {
                      "description": "Name is the name of the backend resource.\nWhen Group and Kind are not specified, this refers to an AIServiceBackend.\nWhen Group and Kind are specified, this refers to the resource of the specified type.",
                      "minLength": 1,
                      "type": "string"
                    },
                    "namespace": {
                      "description": "Namespace is the namespace of the backend resource.\nWhen unspecified (or empty string), this refers to the local namespace of the AIGatewayRoute.\n\nNote that when a namespace different than the local namespace is specified,\na ReferenceGrant object is required in the referent namespace to allow that\nnamespace's owner to accept the reference. See the ReferenceGrant\ndocumentation for details.",
                      "maxLength": 63,
                      "minLength": 1,
                      "pattern": "^[a-z0-9]([-a-z0-9]*[a-z0-9])?$",
                      "type": "string"
                    },
                    "priority": {
                      "default": 0,
                      "description": "Priority is the priority of the backend. This sets the priority on the underlying endpoints.\nSee: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority\nNote: This will override the `faillback` property of the underlying Envoy Gateway Backend\nThis field is ignored when referencing InferencePool resources.\n\nDefault is 0.",
                      "format": "int32",
                      "minimum": 0,
                      "type": "integer"
                    },
                    "weight": {
                      "default": 1,
                      "description": "Weight is the weight of the backend. This is exactly the same as the weight in\nthe BackendRef in the Gateway API. See for the details:\nhttps://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef\n\nDefault is 1.",
                      "format": "int32",
                      "minimum": 0,
                      "type": "integer"
                    }
                  },
                  "required": [
                    "name"
                  ],
                  "type": "object",
                  "x-kubernetes-validations": [
                    {
                      "message": "group and kind must be specified together",
                      "rule": "!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))"
                    },
                    {
                      "message": "only InferencePool from inference.networking.k8s.io group is supported",
                      "rule": "!has(self.group) || (self.group == 'inference.networking.k8s.io' && self.kind == 'InferencePool')"
                    }
                  ],
                  "additionalProperties": false
                },
                "maxItems": 128,
                "type": "array"
              },
              "matches": {
                "description": "Matches is the list of AIGatewayRouteMatch that this rule will match the traffic to.\nThis is a subset of the HTTPRouteMatch in the Gateway API. See for the details:\nhttps://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPRouteMatch",
                "items": {
                  "properties": {
                    "headers": {
                      "description": "Headers specifies HTTP request header matchers. See HeaderMatch in the Gateway API for the details:\nhttps://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPHeaderMatch",
                      "items": {
                        "description": "HTTPHeaderMatch describes how to select a HTTP route by matching HTTP request\nheaders.",
                        "properties": {
                          "name": {
                            "description": "Name is the name of the HTTP Header to be matched. Name matching MUST be\ncase-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2).\n\nIf multiple entries specify equivalent header names, only the first\nentry with an equivalent name MUST be considered for a match. Subsequent\nentries with an equivalent header name MUST be ignored. Due to the\ncase-insensitivity of header names, \"foo\" and \"Foo\" are considered\nequivalent.\n\nWhen a header is repeated in an HTTP request, it is\nimplementation-specific behavior as to how this is represented.\nGenerally, proxies should follow the guidance from the RFC:\nhttps://www.rfc-editor.org/rfc/rfc7230.html#section-3.2.2 regarding\nprocessing a repeated header, with special handling for \"Set-Cookie\".",
                            "maxLength": 256,
                            "minLength": 1,
                            "pattern": "^[A-Za-z0-9!#$%&'*+\\-.^_\\x60|~]+$",
                            "type": "string"
                          },
                          "type": {
                            "default": "Exact",
                            "description": "Type specifies how to match against the value of the header.\n\nSupport: Core (Exact)\n\nSupport: Implementation-specific (RegularExpression)\n\nSince RegularExpression HeaderMatchType has implementation-specific\nconformance, implementations can support POSIX, PCRE or any other dialects\nof regular expressions. Please read the implementation's documentation to\ndetermine the supported dialect.",
                            "enum": [
                              "Exact",
                              "RegularExpression"
                            ],
                            "type": "string"
                          },
                          "value": {
                            "description": "Value is the value of HTTP Header to be matched.",
                            "maxLength": 4096,
                            "minLength": 1,
                            "type": "string"
                          }
                        },
                        "required": [
                          "name",
                          "value"
                        ],
                        "type": "object",
                        "additionalProperties": false
                      },
                      "maxItems": 16,
                      "type": "array",
                      "x-kubernetes-list-map-keys": [
                        "name"
                      ],
                      "x-kubernetes-list-type": "map"
                    }
                  },
                  "type": "object",
                  "additionalProperties": false
                },
                "maxItems": 128,
                "type": "array"
              },
              "modelsCreatedAt": {
                "description": "ModelsCreatedAt represents the creation timestamp of the running models serving by the backends,\nwhich will be exported as the field of \"Created\" in openai-compatible API \"/models\".\nIt follows the format of RFC 3339, for example \"2024-05-21T10:00:00Z\".\n\nThis is used only when this rule contains \"x-ai-eg-model\" in its header matching\nwhere the header value will be recognized as a \"model\" in \"/models\" endpoint.\nAll the matched models will share the same creation time.\n\nDefault to the creation timestamp of the AIGatewayRoute if not set.",
                "format": "date-time",
                "type": "string"
              },
              "modelsOwnedBy": {
                "default": "Envoy AI Gateway",
                "description": "ModelsOwnedBy represents the owner of the running models serving by the backends,\nwhich will be exported as the field of \"OwnedBy\" in openai-compatible API \"/models\".\n\nThis is used only when this rule contains \"x-ai-eg-model\" in its header matching\nwhere the header value will be recognized as a \"model\" in \"/models\" endpoint.\nAll the matched models will share the same owner.\n\nDefault to \"Envoy AI Gateway\" if not set.",
                "type": "string"
              },
              "timeouts": {
                "description": "Timeouts defines the timeouts that can be configured for an HTTP request.\n\nIf this field is not set, or the timeout.requestTimeout is nil, Envoy AI Gateway defaults to\nset 60s for the request timeout as opposed to 15s of the Envoy Gateway's default value.\n\nFor streaming responses (like chat completions with stream=true), consider setting\nlonger timeouts as the response may take time until the completion.",
                "properties": {
                  "backendRequest": {
                    "description": "BackendRequest specifies a timeout for an individual request from the gateway\nto a backend. This covers the time from when the request first starts being\nsent from the gateway to when the full response has been received from the backend.\n\nSetting a timeout to the zero duration (e.g. \"0s\") SHOULD disable the timeout\ncompletely. Implementations that cannot completely disable the timeout MUST\ninstead interpret the zero duration as the longest possible value to which\nthe timeout can be set.\n\nAn entire client HTTP transaction with a gateway, covered by the Request timeout,\nmay result in more than one call from the gateway to the destination backend,\nfor example, if automatic retries are supported.\n\nThe value of BackendRequest must be a Gateway API Duration string as defined by\nGEP-2257.  When this field is unspecified, its behavior is implementation-specific;\nwhen specified, the value of BackendRequest must be no more than the value of the\nRequest timeout (since the Request timeout encompasses the BackendRequest timeout).\n\nSupport: Extended",
                    "pattern": "^([0-9]{1,5}(h|m|s|ms)){1,4}$",
                    "type": "string"
                  },
                  "request": {
                    "description": "Request specifies the maximum duration for a gateway to respond to an HTTP request.\nIf the gateway has not been able to respond before this deadline is met, the gateway\nMUST return a timeout error.\n\nFor example, setting the `rules.timeouts.request` field to the value `10s` in an\n`HTTPRoute` will cause a timeout if a client request is taking longer than 10 seconds\nto complete.\n\nSetting a timeout to the zero duration (e.g. \"0s\") SHOULD disable the timeout\ncompletely. Implementations that cannot completely disable the timeout MUST\ninstead interpret the zero duration as the longest possible value to which\nthe timeout can be set.\n\nThis timeout is intended to cover as close to the whole request-response transaction\nas possible although an implementation MAY choose to start the timeout after the entire\nrequest stream has been received instead of immediately after the transaction is\ninitiated by the client.\n\nThe value of Request is a Gateway API Duration string as defined by GEP-2257. When this\nfield is unspecified, request timeout behavior is implementation-specific.\n\nSupport: Extended",
                    "pattern": "^([0-9]{1,5}(h|m|s|ms)){1,4}$",
                    "type": "string"
                  }
                },
                "type": "object",
                "x-kubernetes-validations": [
                  {
                    "message": "backendRequest timeout cannot be longer than request timeout",
                    "rule": "!(has(self.request) && has(self.backendRequest) && duration(self.request) != duration('0s') && duration(self.backendRequest) > duration(self.request))"
                  }
                ],
                "additionalProperties": false
              }
            },
            "type": "object",
            "x-kubernetes-validations": [
              {
                "message": "cannot mix InferencePool and AIServiceBackend references in the same rule",
                "rule": "!has(self.backendRefs) || size(self.backendRefs) == 0 || (self.backendRefs.all(ref, !has(ref.group) && !has(ref.kind)) || self.backendRefs.all(ref, has(ref.group) && has(ref.kind)))"
              },
              {
                "message": "only one InferencePool backend is allowed per rule",
                "rule": "!has(self.backendRefs) || size(self.backendRefs) == 0 || !self.backendRefs.exists(ref, has(ref.group) && has(ref.kind)) || size(self.backendRefs) == 1"
              }
            ],
            "additionalProperties": false
          },
          "maxItems": 128,
          "type": "array"
        }
      },
      "required": [
        "rules"
      ],
      "type": "object",
      "additionalProperties": false
    },
    "status": {
      "description": "Status defines the status details of the AIGatewayRoute.",
      "properties": {
        "conditions": {
          "description": "Conditions is the list of conditions by the reconciliation result.\nCurrently, at most one condition is set.\n\nKnown .status.conditions.type are: \"Accepted\", \"NotAccepted\".",
          "items": {
            "description": "Condition contains details for one aspect of the current state of this API Resource.",
            "properties": {
              "lastTransitionTime": {
                "description": "lastTransitionTime is the last time the condition transitioned from one status to another.\nThis should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.",
                "format": "date-time",
                "type": "string"
              },
              "message": {
                "description": "message is a human readable message indicating details about the transition.\nThis may be an empty string.",
                "maxLength": 32768,
                "type": "string"
              },
              "observedGeneration": {
                "description": "observedGeneration represents the .metadata.generation that the condition was set based upon.\nFor instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date\nwith respect to the current state of the instance.",
                "format": "int64",
                "minimum": 0,
                "type": "integer"
              },
              "reason": {
                "description": "reason contains a programmatic identifier indicating the reason for the condition's last transition.\nProducers of specific condition types may define expected values and meanings for this field,\nand whether the values are considered a guaranteed API.\nThe value should be a CamelCase string.\nThis field may not be empty.",
                "maxLength": 1024,
                "minLength": 1,
                "pattern": "^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$",
                "type": "string"
              },
              "status": {
                "description": "status of the condition, one of True, False, Unknown.",
                "enum": [
                  "True",
                  "False",
                  "Unknown"
                ],
                "type": "string"
              },
              "type": {
                "description": "type of condition in CamelCase or in foo.example.com/CamelCase.",
                "maxLength": 316,
                "pattern": "^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$",
                "type": "string"
              }
            },
            "required": [
              "lastTransitionTime",
              "message",
              "reason",
              "status",
              "type"
            ],
            "type": "object",
            "additionalProperties": false
          },
          "type": "array"
        }
      },
      "type": "object",
      "additionalProperties": false
    }
  },
  "type": "object"
}
