From 4ab5c7ca5c053625c4632351392be14cccb6ef71 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Mon, 12 Jul 2021 12:01:10 +0200
Subject: [PATCH 01/42] Initial kubebuilder scaffolding

---
 pkg/crds/PROJECT                              |  11 +-
 .../apis/api/v1alpha1/groupversion_info.go    |  36 ++++++
 .../apis/api/v1alpha1/realtimeapi_types.go    |  54 +++++++++
 .../api/v1alpha1/zz_generated.deepcopy.go     | 114 ++++++++++++++++++
 pkg/crds/config/crd/kustomization.yaml        |   3 +
 .../controllers/api/realtimeapi_controller.go |  63 ++++++++++
 pkg/crds/controllers/api/suite_test.go        |  80 ++++++++++++
 pkg/crds/main.go                              |  11 ++
 8 files changed, 371 insertions(+), 1 deletion(-)
 create mode 100644 pkg/crds/apis/api/v1alpha1/groupversion_info.go
 create mode 100644 pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
 create mode 100644 pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go
 create mode 100644 pkg/crds/controllers/api/realtimeapi_controller.go
 create mode 100644 pkg/crds/controllers/api/suite_test.go

diff --git a/pkg/crds/PROJECT b/pkg/crds/PROJECT
index 97b1925b84..01bbb7d1e9 100644
--- a/pkg/crds/PROJECT
+++ b/pkg/crds/PROJECT
@@ -3,7 +3,7 @@ layout:
 - go.kubebuilder.io/v3
 multigroup: true
 projectName: operator
-repo: github.com/cortexlabs/cortex
+repo: github.com/cortexlabs/cortex/pkg/crds
 resources:
 - api:
     crdVersion: v1
@@ -14,4 +14,13 @@ resources:
   kind: BatchJob
   path: github.com/cortexlabs/cortex/pkg/crds/apis/batch/v1alpha1
   version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: cortex.dev
+  group: api
+  kind: RealtimeAPI
+  path: github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1
+  version: v1alpha1
 version: "3"
diff --git a/pkg/crds/apis/api/v1alpha1/groupversion_info.go b/pkg/crds/apis/api/v1alpha1/groupversion_info.go
new file mode 100644
index 0000000000..3625dc1527
--- /dev/null
+++ b/pkg/crds/apis/api/v1alpha1/groupversion_info.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha1 contains API Schema definitions for the api v1alpha1 API group
+//+kubebuilder:object:generate=true
+//+groupName=api.cortex.dev
+package v1alpha1
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects
+	GroupVersion = schema.GroupVersion{Group: "api.cortex.dev", Version: "v1alpha1"}
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
new file mode 100644
index 0000000000..91802889ce
--- /dev/null
+++ b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
@@ -0,0 +1,54 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// RealtimeAPISpec defines the desired state of RealtimeAPI
+type RealtimeAPISpec struct {
+}
+
+// RealtimeAPIStatus defines the observed state of RealtimeAPI
+type RealtimeAPIStatus struct {
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+
+// RealtimeAPI is the Schema for the realtimeapis API
+type RealtimeAPI struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   RealtimeAPISpec   `json:"spec,omitempty"`
+	Status RealtimeAPIStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// RealtimeAPIList contains a list of RealtimeAPI
+type RealtimeAPIList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []RealtimeAPI `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&RealtimeAPI{}, &RealtimeAPIList{})
+}
diff --git a/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go b/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go
new file mode 100644
index 0000000000..852f080173
--- /dev/null
+++ b/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go
@@ -0,0 +1,114 @@
+// +build !ignore_autogenerated
+
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by controller-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	runtime "k8s.io/apimachinery/pkg/runtime"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RealtimeAPI) DeepCopyInto(out *RealtimeAPI) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	out.Spec = in.Spec
+	out.Status = in.Status
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RealtimeAPI.
+func (in *RealtimeAPI) DeepCopy() *RealtimeAPI {
+	if in == nil {
+		return nil
+	}
+	out := new(RealtimeAPI)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *RealtimeAPI) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RealtimeAPIList) DeepCopyInto(out *RealtimeAPIList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]RealtimeAPI, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RealtimeAPIList.
+func (in *RealtimeAPIList) DeepCopy() *RealtimeAPIList {
+	if in == nil {
+		return nil
+	}
+	out := new(RealtimeAPIList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *RealtimeAPIList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RealtimeAPISpec) DeepCopyInto(out *RealtimeAPISpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RealtimeAPISpec.
+func (in *RealtimeAPISpec) DeepCopy() *RealtimeAPISpec {
+	if in == nil {
+		return nil
+	}
+	out := new(RealtimeAPISpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RealtimeAPIStatus) DeepCopyInto(out *RealtimeAPIStatus) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RealtimeAPIStatus.
+func (in *RealtimeAPIStatus) DeepCopy() *RealtimeAPIStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(RealtimeAPIStatus)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/pkg/crds/config/crd/kustomization.yaml b/pkg/crds/config/crd/kustomization.yaml
index 73e33703bb..59e4b92a53 100644
--- a/pkg/crds/config/crd/kustomization.yaml
+++ b/pkg/crds/config/crd/kustomization.yaml
@@ -3,17 +3,20 @@
 # It should be run by config/default
 resources:
 - bases/batch.cortex.dev_batchjobs.yaml
+- bases/api.cortex.dev_realtimeapis.yaml
 #+kubebuilder:scaffold:crdkustomizeresource
 
 patchesStrategicMerge:
 # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
 # patches here are for enabling the conversion webhook for each CRD
 #- patches/webhook_in_batchjobs.yaml
+#- patches/webhook_in_realtimeapis.yaml
 #+kubebuilder:scaffold:crdkustomizewebhookpatch
 
 # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix.
 # patches here are for enabling the CA injection for each CRD
 #- patches/cainjection_in_batchjobs.yaml
+#- patches/cainjection_in_realtimeapis.yaml
 #+kubebuilder:scaffold:crdkustomizecainjectionpatch
 
 # the following config is for teaching kustomize how to do kustomization for CRDs.
diff --git a/pkg/crds/controllers/api/realtimeapi_controller.go b/pkg/crds/controllers/api/realtimeapi_controller.go
new file mode 100644
index 0000000000..f891b922c2
--- /dev/null
+++ b/pkg/crds/controllers/api/realtimeapi_controller.go
@@ -0,0 +1,63 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package api
+
+import (
+	"context"
+
+	"github.com/go-logr/logr"
+	"k8s.io/apimachinery/pkg/runtime"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
+)
+
+// RealtimeAPIReconciler reconciles a RealtimeAPI object
+type RealtimeAPIReconciler struct {
+	client.Client
+	Log    logr.Logger
+	Scheme *runtime.Scheme
+}
+
+//+kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/status,verbs=get;update;patch
+//+kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/finalizers,verbs=update
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+// TODO(user): Modify the Reconcile function to compare the state specified by
+// the RealtimeAPI object against the actual cluster state, and then
+// perform operations to make the cluster state reflect the state specified by
+// the user.
+//
+// For more details, check Reconcile and its Result here:
+// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.2/pkg/reconcile
+func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	_ = r.Log.WithValues("realtimeapi", req.NamespacedName)
+
+	// your logic here
+
+	return ctrl.Result{}, nil
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *RealtimeAPIReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&apiv1alpha1.RealtimeAPI{}).
+		Complete(r)
+}
diff --git a/pkg/crds/controllers/api/suite_test.go b/pkg/crds/controllers/api/suite_test.go
new file mode 100644
index 0000000000..134a7234a8
--- /dev/null
+++ b/pkg/crds/controllers/api/suite_test.go
@@ -0,0 +1,80 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package api
+
+import (
+	"path/filepath"
+	"testing"
+
+	. "github.com/onsi/ginkgo"
+	. "github.com/onsi/gomega"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/rest"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/envtest"
+	"sigs.k8s.io/controller-runtime/pkg/envtest/printer"
+	logf "sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+
+	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
+	//+kubebuilder:scaffold:imports
+)
+
+// These tests use Ginkgo (BDD-style Go testing framework). Refer to
+// http://onsi.github.io/ginkgo/ to learn more about Ginkgo.
+
+var cfg *rest.Config
+var k8sClient client.Client
+var testEnv *envtest.Environment
+
+func TestAPIs(t *testing.T) {
+	RegisterFailHandler(Fail)
+
+	RunSpecsWithDefaultAndCustomReporters(t,
+		"Controller Suite",
+		[]Reporter{printer.NewlineReporter{}})
+}
+
+var _ = BeforeSuite(func() {
+	logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
+
+	By("bootstrapping test environment")
+	testEnv = &envtest.Environment{
+		CRDDirectoryPaths:     []string{filepath.Join("..", "..", "config", "crd", "bases")},
+		ErrorIfCRDPathMissing: true,
+	}
+
+	cfg, err := testEnv.Start()
+	Expect(err).NotTo(HaveOccurred())
+	Expect(cfg).NotTo(BeNil())
+
+	err = apiv1alpha1.AddToScheme(scheme.Scheme)
+	Expect(err).NotTo(HaveOccurred())
+
+	//+kubebuilder:scaffold:scheme
+
+	k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme})
+	Expect(err).NotTo(HaveOccurred())
+	Expect(k8sClient).NotTo(BeNil())
+
+}, 60)
+
+var _ = AfterSuite(func() {
+	By("tearing down the test environment")
+	err := testEnv.Stop()
+	Expect(err).NotTo(HaveOccurred())
+})
diff --git a/pkg/crds/main.go b/pkg/crds/main.go
index ee8c0c476b..90502d08bc 100644
--- a/pkg/crds/main.go
+++ b/pkg/crds/main.go
@@ -41,7 +41,9 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/healthz"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
+	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
 	batch "github.com/cortexlabs/cortex/pkg/crds/apis/batch/v1alpha1"
+	apicontrollers "github.com/cortexlabs/cortex/pkg/crds/controllers/api"
 	batchcontrollers "github.com/cortexlabs/cortex/pkg/crds/controllers/batch"
 	//+kubebuilder:scaffold:imports
 )
@@ -55,6 +57,7 @@ func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 
 	utilruntime.Must(batch.AddToScheme(scheme))
+	utilruntime.Must(apiv1alpha1.AddToScheme(scheme))
 	//+kubebuilder:scaffold:scheme
 }
 
@@ -160,6 +163,14 @@ func main() {
 		setupLog.Error(err, "unable to create controller", "controller", "BatchJob")
 		os.Exit(1)
 	}
+	if err = (&apicontrollers.RealtimeAPIReconciler{
+		Client: mgr.GetClient(),
+		Log:    ctrl.Log.WithName("controllers").WithName("api").WithName("RealtimeAPI"),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "RealtimeAPI")
+		os.Exit(1)
+	}
 	//+kubebuilder:scaffold:builder
 
 	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {

From 20b7a6ffc9fce0a822b521c9432e641bba53af44 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Mon, 12 Jul 2021 17:06:01 +0200
Subject: [PATCH 02/42] Populate RealtimeAPI CRD types and add kubebuilder
 validation annotations

---
 .../apis/api/v1alpha1/realtimeapi_types.go    | 187 ++++++
 .../bases/api.cortex.dev_realtimeapis.yaml    | 615 ++++++++++++++++++
 .../patches/cainjection_in_realtimeapis.yaml  |   7 +
 .../crd/patches/webhook_in_realtimeapis.yaml  |  14 +
 .../config/rbac/realtimeapi_editor_role.yaml  |  24 +
 .../config/rbac/realtimeapi_viewer_role.yaml  |  20 +
 pkg/crds/config/rbac/role.yaml                |  26 +
 .../samples/api_v1alpha1_realtimeapi.yaml     |   7 +
 8 files changed, 900 insertions(+)
 create mode 100644 pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
 create mode 100644 pkg/crds/config/crd/patches/cainjection_in_realtimeapis.yaml
 create mode 100644 pkg/crds/config/crd/patches/webhook_in_realtimeapis.yaml
 create mode 100644 pkg/crds/config/rbac/realtimeapi_editor_role.yaml
 create mode 100644 pkg/crds/config/rbac/realtimeapi_viewer_role.yaml
 create mode 100644 pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml

diff --git a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
index 91802889ce..e61255039f 100644
--- a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
@@ -17,15 +17,202 @@ limitations under the License.
 package v1alpha1
 
 import (
+	"time"
+
+	"github.com/cortexlabs/cortex/pkg/types/status"
+	kcore "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
 )
 
 // RealtimeAPISpec defines the desired state of RealtimeAPI
 type RealtimeAPISpec struct {
+	// Pod configuration
+	// +kubebuilder:validation:Required
+	Pod PodSpec `json:"pod"`
+
+	// +kubebuilder:validation:Optional
+	// Autoscaling configuration
+	Autoscaling AutoscalingSpec `json:"autoscaling"`
+
+	// +kubebuilder:validation:Optional
+	// List of node groups on which this API can run (default: all node groups are eligible)
+	NodeGroups []string `json:"node_groups,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Deployment strategy to use when replacing existing replicas with new ones
+	UpdateStrategy UpdateStratagySpec `json:"update_strategy"`
+
+	// +kubebuilder:validation:Optional
+	// Networking configuration
+	Networking NetworkingSpec `json:"networking"`
+}
+
+type PodSpec struct {
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default=8080
+	// Port to which requests will be sent to
+	Port int `json:"port"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default=1
+	// Maximum number of requests that will be concurrently sent into the container
+	MaxConcurrency int `json:"max_concurrency"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default=100
+	// Maximum number of requests per replica which will be queued
+	// (beyond max_concurrency) before requests are rejected with error code 503
+	MaxQueueLength int `json:"max_queue_length"`
+
+	// +kubebuilder:validation:Required
+	// Configurations for the containers to run
+	Containers []ContainerSpec `json:"containers"`
+}
+
+type ContainerSpec struct {
+	// +kubebuilder:validation:Required
+	// Name of the container
+	Name string `json:"name"`
+
+	// +kubebuilder:validation:Required
+	// Docker image to use for the container
+	Image string `json:"image"`
+
+	// +kubebuilder:validation:Optional
+	// Entrypoint (not executed within a shell)
+	Command []string `json:"command,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Arguments to the entrypoint
+	Args []string `json:"args,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Environment variables to set in the container
+	Env []kcore.EnvVar `json:"env,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Compute resource requests
+	Compute *ComputeSpec `json:"compute,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Periodic probe of container readiness;
+	// traffic will not be sent into the pod unless all containers' readiness probes are succeeding
+	ReadinessProbe *kcore.Probe `json:"readiness_probe,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Periodic probe of container liveness; container will be restarted if the probe fails
+	LivenessProbe *kcore.Probe `json:"liveness_probe,omitempty"`
+}
+
+type ComputeSpec struct {
+	// +kubebuilder:validation:Optional
+	// CPU request for the container; one unit of CPU corresponds to one virtual CPU;
+	// fractional requests are allowed, and can be specified as a floating point number or via the "m" suffix
+	CPU *resource.Quantity `json:"cpu,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// GPU request for the container; one unit of GPU corresponds to one virtual GPU
+	GPU int `json:"gpu,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Memory request for the container;
+	// one unit of memory is one byte and can be expressed as an integer or by using one of these suffixes: K, M, G, T
+	// (or their power-of two counterparts: Ki, Mi, Gi, Ti)
+	Mem *resource.Quantity `json:"mem,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Size of shared memory (/dev/shm) for sharing data between multiple processes
+	Shm *resource.Quantity `json:"shm,omitempty"`
+}
+
+type AutoscalingSpec struct {
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default=1
+	// Minimum number of replicas
+	MinReplicas int `json:"min_replicas,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default=100
+	// Maximum number of replicas
+	MaxReplicas int `json:"max_replicas,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default=1
+	// Initial number of replicas
+	InitReplicas int `json:"init_replicas,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Desired number of in-flight requests per replica (including requests actively being processed as well as queued),
+	// which the autoscaler tries to maintain
+	TargetInFlight int `json:"target_in_flight,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="60s"
+	// Duration over which to average the API's in-flight requests per replica
+	Window time.Duration `json:"window,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="5m"
+	// The API will not scale below the highest recommendation made during this period
+	DownscaleStabilizationPeriod time.Duration `json:"downscale_stabilization_period,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="1m"
+	// The API will not scale above the lowest recommendation made during this period
+	UpscaleStabilizationPeriod time.Duration `json:"upscale_stabilization_period,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="750m"
+	// Maximum factor by which to scale down the API on a single scaling event
+	MaxDownscaleFactor resource.Quantity `json:"max_downscale_factor,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="1500m"
+	// Maximum factor by which to scale up the API on a single scaling event
+	MaxUpscaleFactor resource.Quantity `json:"max_upscale_factor,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="50m"
+	// Any recommendation falling within this factor below the current number of replicas will not trigger a
+	// scale down event
+	DownscaleTolerance resource.Quantity `json:"downscale_tolerance,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="50m"
+	// Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event
+	UpscaleTolerance resource.Quantity `json:"upscale_tolerance,omitempty"`
+}
+
+type UpdateStratagySpec struct {
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="25%"
+	// Maximum number of replicas that can be scheduled above the desired number of replicas during an update;
+	// can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
+	// (set to 0 to disable rolling updates)
+	MaxSurge intstr.IntOrString `json:"max_surge"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="25%"
+	// maximum number of replicas that can be unavailable during an update; can be an absolute number,
+	// e.g. 5, or a percentage of desired replicas, e.g. 10%
+	MaxUnavailable intstr.IntOrString `json:"max_unavailable"`
+}
+
+type NetworkingSpec struct {
+	// +kubebuilder:validation:Optional
+	// Endpoint for the API
+	Endpoint string `json:"endpoint,omitempty"`
 }
 
 // RealtimeAPIStatus defines the observed state of RealtimeAPI
 type RealtimeAPIStatus struct {
+	Status          status.Code `json:"status"`
+	DesiredReplicas int         `json:"desired_replicas"`
+	CurrentReplicas int         `json:"current_replicas"`
+	ReadyReplicas   int         `json:"ready_replicas"`
+	Endpoint        string      `json:"endpoint,omitempty"`
 }
 
 //+kubebuilder:object:root=true
diff --git a/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
new file mode 100644
index 0000000000..5fc4fbd7c8
--- /dev/null
+++ b/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
@@ -0,0 +1,615 @@
+
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.4.1
+  creationTimestamp: null
+  name: realtimeapis.api.cortex.dev
+spec:
+  group: api.cortex.dev
+  names:
+    kind: RealtimeAPI
+    listKind: RealtimeAPIList
+    plural: realtimeapis
+    singular: realtimeapi
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: RealtimeAPI is the Schema for the realtimeapis API
+        properties:
+          apiVersion:
+            description: 'APIVersion defines the versioned schema of this representation
+              of an object. Servers should convert recognized schemas to the latest
+              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+            type: string
+          kind:
+            description: 'Kind is a string value representing the REST resource this
+              object represents. Servers may infer this from the endpoint the client
+              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: RealtimeAPISpec defines the desired state of RealtimeAPI
+            properties:
+              autoscaling:
+                description: Autoscaling configuration
+                properties:
+                  downscale_stabilization_period:
+                    default: 5m
+                    description: The API will not scale below the highest recommendation
+                      made during this period
+                    format: int64
+                    type: integer
+                  downscale_tolerance:
+                    anyOf:
+                    - type: integer
+                    - type: string
+                    default: 50m
+                    description: Any recommendation falling within this factor below
+                      the current number of replicas will not trigger a scale down
+                      event
+                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                    x-kubernetes-int-or-string: true
+                  init_replicas:
+                    default: 1
+                    description: Initial number of replicas
+                    type: integer
+                  max_downscale_factor:
+                    anyOf:
+                    - type: integer
+                    - type: string
+                    default: 750m
+                    description: Maximum factor by which to scale down the API on
+                      a single scaling event
+                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                    x-kubernetes-int-or-string: true
+                  max_replicas:
+                    default: 100
+                    description: Maximum number of replicas
+                    type: integer
+                  max_upscale_factor:
+                    anyOf:
+                    - type: integer
+                    - type: string
+                    default: 1500m
+                    description: Maximum factor by which to scale up the API on a
+                      single scaling event
+                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                    x-kubernetes-int-or-string: true
+                  min_replicas:
+                    default: 1
+                    description: Minimum number of replicas
+                    type: integer
+                  target_in_flight:
+                    description: Desired number of in-flight requests per replica
+                      (including requests actively being processed as well as queued),
+                      which the autoscaler tries to maintain
+                    type: integer
+                  upscale_stabilization_period:
+                    default: 1m
+                    description: The API will not scale above the lowest recommendation
+                      made during this period
+                    format: int64
+                    type: integer
+                  upscale_tolerance:
+                    anyOf:
+                    - type: integer
+                    - type: string
+                    default: 50m
+                    description: Any recommendation falling within this factor above
+                      the current number of replicas will not trigger a scale up event
+                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                    x-kubernetes-int-or-string: true
+                  window:
+                    default: 60s
+                    description: Duration over which to average the API's in-flight
+                      requests per replica
+                    format: int64
+                    type: integer
+                type: object
+              networking:
+                description: Networking configuration
+                properties:
+                  endpoint:
+                    description: Endpoint for the API
+                    type: string
+                type: object
+              node_groups:
+                description: 'List of node groups on which this API can run (default:
+                  all node groups are eligible)'
+                items:
+                  type: string
+                type: array
+              pod:
+                description: Pod configuration
+                properties:
+                  containers:
+                    description: Configurations for the containers to run
+                    items:
+                      properties:
+                        args:
+                          description: Arguments to the entrypoint
+                          items:
+                            type: string
+                          type: array
+                        command:
+                          description: Entrypoint (not executed within a shell)
+                          items:
+                            type: string
+                          type: array
+                        compute:
+                          description: Compute resource requests
+                          properties:
+                            cpu:
+                              anyOf:
+                              - type: integer
+                              - type: string
+                              description: CPU request for the container; one unit
+                                of CPU corresponds to one virtual CPU; fractional
+                                requests are allowed, and can be specified as a floating
+                                point number or via the "m" suffix
+                              pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                              x-kubernetes-int-or-string: true
+                            gpu:
+                              description: GPU request for the container; one unit
+                                of GPU corresponds to one virtual GPU
+                              type: integer
+                            mem:
+                              anyOf:
+                              - type: integer
+                              - type: string
+                              description: 'Memory request for the container; one
+                                unit of memory is one byte and can be expressed as
+                                an integer or by using one of these suffixes: K, M,
+                                G, T (or their power-of two counterparts: Ki, Mi,
+                                Gi, Ti)'
+                              pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                              x-kubernetes-int-or-string: true
+                            shm:
+                              anyOf:
+                              - type: integer
+                              - type: string
+                              description: Size of shared memory (/dev/shm) for sharing
+                                data between multiple processes
+                              pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                              x-kubernetes-int-or-string: true
+                          type: object
+                        env:
+                          description: Environment variables to set in the container
+                          items:
+                            description: EnvVar represents an environment variable
+                              present in a Container.
+                            properties:
+                              name:
+                                description: Name of the environment variable. Must
+                                  be a C_IDENTIFIER.
+                                type: string
+                              value:
+                                description: 'Variable references $(VAR_NAME) are
+                                  expanded using the previous defined environment
+                                  variables in the container and any service environment
+                                  variables. If a variable cannot be resolved, the
+                                  reference in the input string will be unchanged.
+                                  The $(VAR_NAME) syntax can be escaped with a double
+                                  $$, ie: $$(VAR_NAME). Escaped references will never
+                                  be expanded, regardless of whether the variable
+                                  exists or not. Defaults to "".'
+                                type: string
+                              valueFrom:
+                                description: Source for the environment variable's
+                                  value. Cannot be used if value is not empty.
+                                properties:
+                                  configMapKeyRef:
+                                    description: Selects a key of a ConfigMap.
+                                    properties:
+                                      key:
+                                        description: The key to select.
+                                        type: string
+                                      name:
+                                        description: 'Name of the referent. More info:
+                                          https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                          TODO: Add other useful fields. apiVersion,
+                                          kind, uid?'
+                                        type: string
+                                      optional:
+                                        description: Specify whether the ConfigMap
+                                          or its key must be defined
+                                        type: boolean
+                                    required:
+                                    - key
+                                    type: object
+                                  fieldRef:
+                                    description: 'Selects a field of the pod: supports
+                                      metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
+                                      `metadata.annotations[''<KEY>'']`, spec.nodeName,
+                                      spec.serviceAccountName, status.hostIP, status.podIP,
+                                      status.podIPs.'
+                                    properties:
+                                      apiVersion:
+                                        description: Version of the schema the FieldPath
+                                          is written in terms of, defaults to "v1".
+                                        type: string
+                                      fieldPath:
+                                        description: Path of the field to select in
+                                          the specified API version.
+                                        type: string
+                                    required:
+                                    - fieldPath
+                                    type: object
+                                  resourceFieldRef:
+                                    description: 'Selects a resource of the container:
+                                      only resources limits and requests (limits.cpu,
+                                      limits.memory, limits.ephemeral-storage, requests.cpu,
+                                      requests.memory and requests.ephemeral-storage)
+                                      are currently supported.'
+                                    properties:
+                                      containerName:
+                                        description: 'Container name: required for
+                                          volumes, optional for env vars'
+                                        type: string
+                                      divisor:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        description: Specifies the output format of
+                                          the exposed resources, defaults to "1"
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      resource:
+                                        description: 'Required: resource to select'
+                                        type: string
+                                    required:
+                                    - resource
+                                    type: object
+                                  secretKeyRef:
+                                    description: Selects a key of a secret in the
+                                      pod's namespace
+                                    properties:
+                                      key:
+                                        description: The key of the secret to select
+                                          from.  Must be a valid secret key.
+                                        type: string
+                                      name:
+                                        description: 'Name of the referent. More info:
+                                          https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                          TODO: Add other useful fields. apiVersion,
+                                          kind, uid?'
+                                        type: string
+                                      optional:
+                                        description: Specify whether the Secret or
+                                          its key must be defined
+                                        type: boolean
+                                    required:
+                                    - key
+                                    type: object
+                                type: object
+                            required:
+                            - name
+                            type: object
+                          type: array
+                        image:
+                          description: Docker image to use for the container
+                          type: string
+                        liveness_probe:
+                          description: Periodic probe of container liveness; container
+                            will be restarted if the probe fails
+                          properties:
+                            exec:
+                              description: One and only one of the following should
+                                be specified. Exec specifies the action to take.
+                              properties:
+                                command:
+                                  description: Command is the command line to execute
+                                    inside the container, the working directory for
+                                    the command  is root ('/') in the container's
+                                    filesystem. The command is simply exec'd, it is
+                                    not run inside a shell, so traditional shell instructions
+                                    ('|', etc) won't work. To use a shell, you need
+                                    to explicitly call out to that shell. Exit status
+                                    of 0 is treated as live/healthy and non-zero is
+                                    unhealthy.
+                                  items:
+                                    type: string
+                                  type: array
+                              type: object
+                            failureThreshold:
+                              description: Minimum consecutive failures for the probe
+                                to be considered failed after having succeeded. Defaults
+                                to 3. Minimum value is 1.
+                              format: int32
+                              type: integer
+                            httpGet:
+                              description: HTTPGet specifies the http request to perform.
+                              properties:
+                                host:
+                                  description: Host name to connect to, defaults to
+                                    the pod IP. You probably want to set "Host" in
+                                    httpHeaders instead.
+                                  type: string
+                                httpHeaders:
+                                  description: Custom headers to set in the request.
+                                    HTTP allows repeated headers.
+                                  items:
+                                    description: HTTPHeader describes a custom header
+                                      to be used in HTTP probes
+                                    properties:
+                                      name:
+                                        description: The header field name
+                                        type: string
+                                      value:
+                                        description: The header field value
+                                        type: string
+                                    required:
+                                    - name
+                                    - value
+                                    type: object
+                                  type: array
+                                path:
+                                  description: Path to access on the HTTP server.
+                                  type: string
+                                port:
+                                  anyOf:
+                                  - type: integer
+                                  - type: string
+                                  description: Name or number of the port to access
+                                    on the container. Number must be in the range
+                                    1 to 65535. Name must be an IANA_SVC_NAME.
+                                  x-kubernetes-int-or-string: true
+                                scheme:
+                                  description: Scheme to use for connecting to the
+                                    host. Defaults to HTTP.
+                                  type: string
+                              required:
+                              - port
+                              type: object
+                            initialDelaySeconds:
+                              description: 'Number of seconds after the container
+                                has started before liveness probes are initiated.
+                                More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
+                              format: int32
+                              type: integer
+                            periodSeconds:
+                              description: How often (in seconds) to perform the probe.
+                                Default to 10 seconds. Minimum value is 1.
+                              format: int32
+                              type: integer
+                            successThreshold:
+                              description: Minimum consecutive successes for the probe
+                                to be considered successful after having failed. Defaults
+                                to 1. Must be 1 for liveness and startup. Minimum
+                                value is 1.
+                              format: int32
+                              type: integer
+                            tcpSocket:
+                              description: 'TCPSocket specifies an action involving
+                                a TCP port. TCP hooks not yet supported TODO: implement
+                                a realistic TCP lifecycle hook'
+                              properties:
+                                host:
+                                  description: 'Optional: Host name to connect to,
+                                    defaults to the pod IP.'
+                                  type: string
+                                port:
+                                  anyOf:
+                                  - type: integer
+                                  - type: string
+                                  description: Number or name of the port to access
+                                    on the container. Number must be in the range
+                                    1 to 65535. Name must be an IANA_SVC_NAME.
+                                  x-kubernetes-int-or-string: true
+                              required:
+                              - port
+                              type: object
+                            timeoutSeconds:
+                              description: 'Number of seconds after which the probe
+                                times out. Defaults to 1 second. Minimum value is
+                                1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
+                              format: int32
+                              type: integer
+                          type: object
+                        name:
+                          description: Name of the container
+                          type: string
+                        readiness_probe:
+                          description: Periodic probe of container readiness; traffic
+                            will not be sent into the pod unless all containers' readiness
+                            probes are succeeding
+                          properties:
+                            exec:
+                              description: One and only one of the following should
+                                be specified. Exec specifies the action to take.
+                              properties:
+                                command:
+                                  description: Command is the command line to execute
+                                    inside the container, the working directory for
+                                    the command  is root ('/') in the container's
+                                    filesystem. The command is simply exec'd, it is
+                                    not run inside a shell, so traditional shell instructions
+                                    ('|', etc) won't work. To use a shell, you need
+                                    to explicitly call out to that shell. Exit status
+                                    of 0 is treated as live/healthy and non-zero is
+                                    unhealthy.
+                                  items:
+                                    type: string
+                                  type: array
+                              type: object
+                            failureThreshold:
+                              description: Minimum consecutive failures for the probe
+                                to be considered failed after having succeeded. Defaults
+                                to 3. Minimum value is 1.
+                              format: int32
+                              type: integer
+                            httpGet:
+                              description: HTTPGet specifies the http request to perform.
+                              properties:
+                                host:
+                                  description: Host name to connect to, defaults to
+                                    the pod IP. You probably want to set "Host" in
+                                    httpHeaders instead.
+                                  type: string
+                                httpHeaders:
+                                  description: Custom headers to set in the request.
+                                    HTTP allows repeated headers.
+                                  items:
+                                    description: HTTPHeader describes a custom header
+                                      to be used in HTTP probes
+                                    properties:
+                                      name:
+                                        description: The header field name
+                                        type: string
+                                      value:
+                                        description: The header field value
+                                        type: string
+                                    required:
+                                    - name
+                                    - value
+                                    type: object
+                                  type: array
+                                path:
+                                  description: Path to access on the HTTP server.
+                                  type: string
+                                port:
+                                  anyOf:
+                                  - type: integer
+                                  - type: string
+                                  description: Name or number of the port to access
+                                    on the container. Number must be in the range
+                                    1 to 65535. Name must be an IANA_SVC_NAME.
+                                  x-kubernetes-int-or-string: true
+                                scheme:
+                                  description: Scheme to use for connecting to the
+                                    host. Defaults to HTTP.
+                                  type: string
+                              required:
+                              - port
+                              type: object
+                            initialDelaySeconds:
+                              description: 'Number of seconds after the container
+                                has started before liveness probes are initiated.
+                                More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
+                              format: int32
+                              type: integer
+                            periodSeconds:
+                              description: How often (in seconds) to perform the probe.
+                                Default to 10 seconds. Minimum value is 1.
+                              format: int32
+                              type: integer
+                            successThreshold:
+                              description: Minimum consecutive successes for the probe
+                                to be considered successful after having failed. Defaults
+                                to 1. Must be 1 for liveness and startup. Minimum
+                                value is 1.
+                              format: int32
+                              type: integer
+                            tcpSocket:
+                              description: 'TCPSocket specifies an action involving
+                                a TCP port. TCP hooks not yet supported TODO: implement
+                                a realistic TCP lifecycle hook'
+                              properties:
+                                host:
+                                  description: 'Optional: Host name to connect to,
+                                    defaults to the pod IP.'
+                                  type: string
+                                port:
+                                  anyOf:
+                                  - type: integer
+                                  - type: string
+                                  description: Number or name of the port to access
+                                    on the container. Number must be in the range
+                                    1 to 65535. Name must be an IANA_SVC_NAME.
+                                  x-kubernetes-int-or-string: true
+                              required:
+                              - port
+                              type: object
+                            timeoutSeconds:
+                              description: 'Number of seconds after which the probe
+                                times out. Defaults to 1 second. Minimum value is
+                                1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
+                              format: int32
+                              type: integer
+                          type: object
+                      required:
+                      - image
+                      - name
+                      type: object
+                    type: array
+                  max_concurrency:
+                    default: 1
+                    description: Maximum number of requests that will be concurrently
+                      sent into the container
+                    type: integer
+                  max_queue_length:
+                    default: 100
+                    description: Maximum number of requests per replica which will
+                      be queued (beyond max_concurrency) before requests are rejected
+                      with error code 503
+                    type: integer
+                  port:
+                    default: 8080
+                    description: Port to which requests will be sent to
+                    type: integer
+                required:
+                - containers
+                type: object
+              update_strategy:
+                description: Deployment strategy to use when replacing existing replicas
+                  with new ones
+                properties:
+                  max_surge:
+                    anyOf:
+                    - type: integer
+                    - type: string
+                    default: 25%
+                    description: 'Maximum number of replicas that can be scheduled
+                      above the desired number of replicas during an update; can be
+                      an absolute number, e.g. 5, or a percentage of desired replicas,
+                      e.g. 10% (default: 25%) (set to 0 to disable rolling updates)'
+                    x-kubernetes-int-or-string: true
+                  max_unavailable:
+                    anyOf:
+                    - type: integer
+                    - type: string
+                    default: 25%
+                    description: maximum number of replicas that can be unavailable
+                      during an update; can be an absolute number, e.g. 5, or a percentage
+                      of desired replicas, e.g. 10%
+                    x-kubernetes-int-or-string: true
+                type: object
+            required:
+            - pod
+            type: object
+          status:
+            description: RealtimeAPIStatus defines the observed state of RealtimeAPI
+            properties:
+              current_replicas:
+                type: integer
+              desired_replicas:
+                type: integer
+              endpoint:
+                type: string
+              ready_replicas:
+                type: integer
+              status:
+                type: integer
+            required:
+            - current_replicas
+            - desired_replicas
+            - ready_replicas
+            - status
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+status:
+  acceptedNames:
+    kind: ""
+    plural: ""
+  conditions: []
+  storedVersions: []
diff --git a/pkg/crds/config/crd/patches/cainjection_in_realtimeapis.yaml b/pkg/crds/config/crd/patches/cainjection_in_realtimeapis.yaml
new file mode 100644
index 0000000000..a1311cf904
--- /dev/null
+++ b/pkg/crds/config/crd/patches/cainjection_in_realtimeapis.yaml
@@ -0,0 +1,7 @@
+# The following patch adds a directive for certmanager to inject CA into the CRD
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
+  name: realtimeapis.api.cortex.dev
diff --git a/pkg/crds/config/crd/patches/webhook_in_realtimeapis.yaml b/pkg/crds/config/crd/patches/webhook_in_realtimeapis.yaml
new file mode 100644
index 0000000000..4ee0f5880c
--- /dev/null
+++ b/pkg/crds/config/crd/patches/webhook_in_realtimeapis.yaml
@@ -0,0 +1,14 @@
+# The following patch enables a conversion webhook for the CRD
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  name: realtimeapis.api.cortex.dev
+spec:
+  conversion:
+    strategy: Webhook
+    webhook:
+      clientConfig:
+        service:
+          namespace: system
+          name: webhook-service
+          path: /convert
diff --git a/pkg/crds/config/rbac/realtimeapi_editor_role.yaml b/pkg/crds/config/rbac/realtimeapi_editor_role.yaml
new file mode 100644
index 0000000000..34e836e2e9
--- /dev/null
+++ b/pkg/crds/config/rbac/realtimeapi_editor_role.yaml
@@ -0,0 +1,24 @@
+# permissions for end users to edit realtimeapis.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: realtimeapi-editor-role
+rules:
+- apiGroups:
+  - api.cortex.dev
+  resources:
+  - realtimeapis
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - api.cortex.dev
+  resources:
+  - realtimeapis/status
+  verbs:
+  - get
diff --git a/pkg/crds/config/rbac/realtimeapi_viewer_role.yaml b/pkg/crds/config/rbac/realtimeapi_viewer_role.yaml
new file mode 100644
index 0000000000..004387bf35
--- /dev/null
+++ b/pkg/crds/config/rbac/realtimeapi_viewer_role.yaml
@@ -0,0 +1,20 @@
+# permissions for end users to view realtimeapis.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: realtimeapi-viewer-role
+rules:
+- apiGroups:
+  - api.cortex.dev
+  resources:
+  - realtimeapis
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - api.cortex.dev
+  resources:
+  - realtimeapis/status
+  verbs:
+  - get
diff --git a/pkg/crds/config/rbac/role.yaml b/pkg/crds/config/rbac/role.yaml
index 4b64fb36ab..f8b89211e5 100644
--- a/pkg/crds/config/rbac/role.yaml
+++ b/pkg/crds/config/rbac/role.yaml
@@ -23,6 +23,32 @@ rules:
   - get
   - list
   - watch
+- apiGroups:
+  - api.cortex.dev
+  resources:
+  - realtimeapis
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - api.cortex.dev
+  resources:
+  - realtimeapis/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - api.cortex.dev
+  resources:
+  - realtimeapis/status
+  verbs:
+  - get
+  - patch
+  - update
 - apiGroups:
   - batch
   resources:
diff --git a/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml b/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml
new file mode 100644
index 0000000000..638b3039f3
--- /dev/null
+++ b/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml
@@ -0,0 +1,7 @@
+apiVersion: api.cortex.dev/v1alpha1
+kind: RealtimeAPI
+metadata:
+  name: realtimeapi-sample
+spec:
+  # Add fields here
+  foo: bar

From 0613ff3e6cd983a10f800ef6f33b60ea3ad4cb19 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Thu, 15 Jul 2021 11:42:48 +0200
Subject: [PATCH 03/42] Initial implementation of the realtime api controller

---
 .../apis/api/v1alpha1/realtimeapi_types.go    |  28 +--
 .../controllers/api/realtimeapi_controller.go | 195 ++++++++++++++++--
 pkg/types/status/code.go                      |   3 +
 3 files changed, 200 insertions(+), 26 deletions(-)

diff --git a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
index e61255039f..4ff5bbe074 100644
--- a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
@@ -50,22 +50,27 @@ type RealtimeAPISpec struct {
 }
 
 type PodSpec struct {
-	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Required
 	// +kubebuilder:default=8080
 	// Port to which requests will be sent to
 	Port int `json:"port"`
 
-	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Required
 	// +kubebuilder:default=1
 	// Maximum number of requests that will be concurrently sent into the container
 	MaxConcurrency int `json:"max_concurrency"`
 
-	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Required
 	// +kubebuilder:default=100
 	// Maximum number of requests per replica which will be queued
 	// (beyond max_concurrency) before requests are rejected with error code 503
 	MaxQueueLength int `json:"max_queue_length"`
 
+	// +kubebuilder:validation:Required
+	// +kubebuilder:default=1
+	// Number of desired replicas
+	Replicas int32 `json:"replicas"`
+
 	// +kubebuilder:validation:Required
 	// Configurations for the containers to run
 	Containers []ContainerSpec `json:"containers"`
@@ -131,22 +136,17 @@ type AutoscalingSpec struct {
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default=1
 	// Minimum number of replicas
-	MinReplicas int `json:"min_replicas,omitempty"`
+	MinReplicas int32 `json:"min_replicas,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default=100
 	// Maximum number of replicas
-	MaxReplicas int `json:"max_replicas,omitempty"`
-
-	// +kubebuilder:validation:Optional
-	// +kubebuilder:default=1
-	// Initial number of replicas
-	InitReplicas int `json:"init_replicas,omitempty"`
+	MaxReplicas int32 `json:"max_replicas,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// Desired number of in-flight requests per replica (including requests actively being processed as well as queued),
 	// which the autoscaler tries to maintain
-	TargetInFlight int `json:"target_in_flight,omitempty"`
+	TargetInFlight int32 `json:"target_in_flight,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default="60s"
@@ -209,9 +209,9 @@ type NetworkingSpec struct {
 // RealtimeAPIStatus defines the observed state of RealtimeAPI
 type RealtimeAPIStatus struct {
 	Status          status.Code `json:"status"`
-	DesiredReplicas int         `json:"desired_replicas"`
-	CurrentReplicas int         `json:"current_replicas"`
-	ReadyReplicas   int         `json:"ready_replicas"`
+	DesiredReplicas int32       `json:"desired_replicas"`
+	CurrentReplicas int32       `json:"current_replicas"`
+	ReadyReplicas   int32       `json:"ready_replicas"`
 	Endpoint        string      `json:"endpoint,omitempty"`
 }
 
diff --git a/pkg/crds/controllers/api/realtimeapi_controller.go b/pkg/crds/controllers/api/realtimeapi_controller.go
index f891b922c2..3b3055fa4c 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller.go
+++ b/pkg/crds/controllers/api/realtimeapi_controller.go
@@ -18,11 +18,23 @@ package api
 
 import (
 	"context"
+	"fmt"
 
+	"github.com/cortexlabs/cortex/pkg/consts"
+	"github.com/cortexlabs/cortex/pkg/crds/controllers"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	"github.com/cortexlabs/cortex/pkg/types/status"
+	"github.com/cortexlabs/cortex/pkg/workloads"
 	"github.com/go-logr/logr"
+	istionetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
+	kapps "k8s.io/api/apps/v1"
+	kcore "k8s.io/api/core/v1"
+	kerrors "k8s.io/apimachinery/pkg/api/errors"
+	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 
 	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
 )
@@ -34,23 +46,65 @@ type RealtimeAPIReconciler struct {
 	Scheme *runtime.Scheme
 }
 
-//+kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis,verbs=get;list;watch;create;update;patch;delete
-//+kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/status,verbs=get;update;patch
-//+kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/finalizers,verbs=update
+// +kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/finalizers,verbs=update
+// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch
+// +kubebuilder:rbac:groups=networking.istio.io,resources=virtualservices,verbs=get;list;watch;create;update;patch
+// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch
+// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
-// TODO(user): Modify the Reconcile function to compare the state specified by
-// the RealtimeAPI object against the actual cluster state, and then
-// perform operations to make the cluster state reflect the state specified by
-// the user.
-//
-// For more details, check Reconcile and its Result here:
-// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.2/pkg/reconcile
 func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
-	_ = r.Log.WithValues("realtimeapi", req.NamespacedName)
+	log := r.Log.WithValues("realtimeapi", req.NamespacedName)
 
-	// your logic here
+	// Step 1: get resource from request
+	api := apiv1alpha1.RealtimeAPI{}
+	log.V(1).Info("retrieving resource")
+	if err := r.Get(ctx, req.NamespacedName, &api); err != nil {
+		if !kerrors.IsNotFound(err) {
+			log.Error(err, "failed to retrieve resource")
+		}
+		return ctrl.Result{}, client.IgnoreNotFound(err)
+	}
+
+	// Step 2: Update status
+	log.V(1).Info("getting deployment")
+	deployment, err := r.getDeployment(ctx, api)
+	if err != nil {
+		log.Error(err, "failed to get deployment")
+		return ctrl.Result{}, err
+	}
+
+	log.V(1).Info("updating status")
+	if err = r.updateStatus(ctx, &api, deployment); err != nil {
+		if controllers.IsOptimisticLockError(err) {
+			log.Info("conflict during status update, retrying")
+			return ctrl.Result{Requeue: true}, nil
+		}
+		log.Error(err, "failed to update status")
+		return ctrl.Result{}, err
+	}
+
+	// Step 3: Create or Update Resources
+	deployOp, err := r.createOrUpdateDeployment(ctx, api)
+	if err != nil {
+		return ctrl.Result{}, err
+	}
+	log.V(1).Info(fmt.Sprintf("deployment %s", deployOp))
+
+	svcOp, err := r.createOrUpdateService(ctx, api)
+	if err != nil {
+		return ctrl.Result{}, err
+	}
+	log.V(1).Info(fmt.Sprintf("service %s", svcOp))
+
+	vsOp, err := r.createOrUpdateVirtualService(ctx, api)
+	if err != nil {
+		return ctrl.Result{}, err
+	}
+	log.V(1).Info(fmt.Sprintf("virtual service %s", vsOp))
 
 	return ctrl.Result{}, nil
 }
@@ -59,5 +113,122 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 func (r *RealtimeAPIReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
 		For(&apiv1alpha1.RealtimeAPI{}).
+		Owns(&kapps.Deployment{}).
+		Owns(&kcore.Service{}).
+		Owns(&istionetworking.VirtualService{}).
 		Complete(r)
 }
+
+func (r *RealtimeAPIReconciler) getDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (*kapps.Deployment, error) {
+	req := client.ObjectKey{Namespace: api.Namespace, Name: workloads.K8sName(api.Name)}
+	deployment := kapps.Deployment{}
+	if err := r.Get(ctx, req, &deployment); err != nil {
+		if kerrors.IsNotFound(err) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	return &deployment, nil
+}
+
+func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *apiv1alpha1.RealtimeAPI, deployment *kapps.Deployment) error {
+	apiStatus := status.Pending
+	api.Status.Status = apiStatus // FIXME: handle other status
+
+	endpoint, err := r.getEndpoint(ctx, api)
+	if err != nil {
+		return errors.Wrap(err, "failed to get api endpoint")
+	}
+
+	api.Status.Endpoint = endpoint
+	if deployment != nil {
+		api.Status.DesiredReplicas = *deployment.Spec.Replicas
+		api.Status.CurrentReplicas = deployment.Status.Replicas
+		api.Status.ReadyReplicas = deployment.Status.ReadyReplicas
+	}
+
+	if err = r.Status().Update(ctx, api); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+	deployment := kapps.Deployment{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      workloads.K8sName(api.Name),
+			Namespace: api.Namespace},
+	}
+	op, err := controllerutil.CreateOrUpdate(ctx, r, &deployment, func() error {
+		deployment.Spec = r.desiredDeployment(api).Spec
+		return nil
+	})
+	if err != nil {
+		return op, err
+	}
+	return op, nil
+}
+
+func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+	service := kcore.Service{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      workloads.K8sName(api.Name),
+			Namespace: api.Namespace},
+	}
+	op, err := controllerutil.CreateOrUpdate(ctx, r, &service, func() error {
+		service.Spec = r.desiredService(api).Spec
+		return nil
+	})
+	if err != nil {
+		return op, err
+	}
+	return op, nil
+}
+
+func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+	vs := istionetworking.VirtualService{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      workloads.K8sName(api.Name),
+			Namespace: api.Namespace},
+	}
+	op, err := controllerutil.CreateOrUpdate(ctx, r, &vs, func() error {
+		vs.Spec = r.desiredVirtualService(api).Spec
+		return nil
+	})
+	if err != nil {
+		return op, err
+	}
+	return op, nil
+}
+
+func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *apiv1alpha1.RealtimeAPI) (string, error) {
+	req := client.ObjectKey{Namespace: consts.IstioNamespace, Name: "ingressgateway-apis"}
+	svc := kcore.Service{}
+	if err := r.Get(ctx, req, &svc); err != nil {
+		return "", err
+	}
+
+	ingress := svc.Status.LoadBalancer.Ingress
+	if ingress == nil || len(ingress) == 0 {
+		return "", nil
+	}
+
+	endpoint := fmt.Sprintf("http://%s/%s",
+		svc.Status.LoadBalancer.Ingress[0].Hostname, api.Spec.Networking.Endpoint,
+	)
+
+	return endpoint, nil
+}
+
+func (r *RealtimeAPIReconciler) desiredDeployment(api apiv1alpha1.RealtimeAPI) kapps.Deployment {
+	panic("implement me!")
+}
+
+func (r *RealtimeAPIReconciler) desiredService(api apiv1alpha1.RealtimeAPI) kcore.Service {
+	panic("implement me!")
+}
+
+func (r *RealtimeAPIReconciler) desiredVirtualService(api apiv1alpha1.RealtimeAPI) istionetworking.VirtualService {
+	panic("implement me!")
+}
diff --git a/pkg/types/status/code.go b/pkg/types/status/code.go
index 41a8a13d91..3845a913c3 100644
--- a/pkg/types/status/code.go
+++ b/pkg/types/status/code.go
@@ -20,6 +20,7 @@ type Code int
 
 const (
 	Unknown Code = iota
+	Pending
 	Stalled
 	Error
 	ErrorImagePull
@@ -30,6 +31,7 @@ const (
 
 var _codes = []string{
 	"status_unknown",
+	"status_pending",
 	"status_stalled",
 	"status_error",
 	"status_error_image_pull",
@@ -42,6 +44,7 @@ var _ = [1]int{}[int(Updating)-(len(_codes)-1)] // Ensure list length matches
 
 var _codeMessages = []string{
 	"unknown",               // Unknown
+	"pending",               // Pending
 	"compute unavailable",   // Stalled
 	"error",                 // Error
 	"error (image pull)",    // Live

From c5ecd0835d68dc84aec6503354994c9013ba0061 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Tue, 20 Jul 2021 12:41:13 +0200
Subject: [PATCH 04/42] Add desired resources methods for realtime api
 controller

---
 .../apis/api/v1alpha1/realtimeapi_types.go    |  12 +-
 .../controllers/api/realtimeapi_controller.go | 136 +-----
 .../api/realtimeapi_controller_helpers.go     | 419 ++++++++++++++++++
 pkg/workloads/helpers.go                      |   9 +-
 pkg/workloads/k8s.go                          |  13 +-
 5 files changed, 449 insertions(+), 140 deletions(-)
 create mode 100644 pkg/crds/controllers/api/realtimeapi_controller_helpers.go

diff --git a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
index 4ff5bbe074..20d1e2efda 100644
--- a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
@@ -53,18 +53,18 @@ type PodSpec struct {
 	// +kubebuilder:validation:Required
 	// +kubebuilder:default=8080
 	// Port to which requests will be sent to
-	Port int `json:"port"`
+	Port int32 `json:"port"`
 
 	// +kubebuilder:validation:Required
 	// +kubebuilder:default=1
 	// Maximum number of requests that will be concurrently sent into the container
-	MaxConcurrency int `json:"max_concurrency"`
+	MaxConcurrency int32 `json:"max_concurrency"`
 
 	// +kubebuilder:validation:Required
 	// +kubebuilder:default=100
 	// Maximum number of requests per replica which will be queued
 	// (beyond max_concurrency) before requests are rejected with error code 503
-	MaxQueueLength int `json:"max_queue_length"`
+	MaxQueueLength int32 `json:"max_queue_length"`
 
 	// +kubebuilder:validation:Required
 	// +kubebuilder:default=1
@@ -119,7 +119,11 @@ type ComputeSpec struct {
 
 	// +kubebuilder:validation:Optional
 	// GPU request for the container; one unit of GPU corresponds to one virtual GPU
-	GPU int `json:"gpu,omitempty"`
+	GPU int64 `json:"gpu,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Inferentia request for the container; one unit of Inf corresponds to one virtual Inf chip
+	Inf int64 `json:"inf,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// Memory request for the container;
diff --git a/pkg/crds/controllers/api/realtimeapi_controller.go b/pkg/crds/controllers/api/realtimeapi_controller.go
index 3b3055fa4c..0e55e9a99d 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller.go
+++ b/pkg/crds/controllers/api/realtimeapi_controller.go
@@ -20,30 +20,28 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/cortexlabs/cortex/pkg/consts"
+	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/crds/controllers"
-	"github.com/cortexlabs/cortex/pkg/lib/errors"
-	"github.com/cortexlabs/cortex/pkg/types/status"
-	"github.com/cortexlabs/cortex/pkg/workloads"
+	"github.com/cortexlabs/cortex/pkg/types/clusterconfig"
 	"github.com/go-logr/logr"
-	istionetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
+	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
+
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
-	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
-
-	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
 )
 
+const _terminationGracePeriodSeconds int64 = 60 // seconds
+
 // RealtimeAPIReconciler reconciles a RealtimeAPI object
 type RealtimeAPIReconciler struct {
 	client.Client
-	Log    logr.Logger
-	Scheme *runtime.Scheme
+	ClusterConfig *clusterconfig.Config
+	Log           logr.Logger
+	Scheme        *runtime.Scheme
 }
 
 // +kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis,verbs=get;list;watch;create;update;patch;delete
@@ -115,120 +113,6 @@ func (r *RealtimeAPIReconciler) SetupWithManager(mgr ctrl.Manager) error {
 		For(&apiv1alpha1.RealtimeAPI{}).
 		Owns(&kapps.Deployment{}).
 		Owns(&kcore.Service{}).
-		Owns(&istionetworking.VirtualService{}).
+		Owns(&istioclientnetworking.VirtualService{}).
 		Complete(r)
 }
-
-func (r *RealtimeAPIReconciler) getDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (*kapps.Deployment, error) {
-	req := client.ObjectKey{Namespace: api.Namespace, Name: workloads.K8sName(api.Name)}
-	deployment := kapps.Deployment{}
-	if err := r.Get(ctx, req, &deployment); err != nil {
-		if kerrors.IsNotFound(err) {
-			return nil, nil
-		}
-		return nil, err
-	}
-	return &deployment, nil
-}
-
-func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *apiv1alpha1.RealtimeAPI, deployment *kapps.Deployment) error {
-	apiStatus := status.Pending
-	api.Status.Status = apiStatus // FIXME: handle other status
-
-	endpoint, err := r.getEndpoint(ctx, api)
-	if err != nil {
-		return errors.Wrap(err, "failed to get api endpoint")
-	}
-
-	api.Status.Endpoint = endpoint
-	if deployment != nil {
-		api.Status.DesiredReplicas = *deployment.Spec.Replicas
-		api.Status.CurrentReplicas = deployment.Status.Replicas
-		api.Status.ReadyReplicas = deployment.Status.ReadyReplicas
-	}
-
-	if err = r.Status().Update(ctx, api); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
-	deployment := kapps.Deployment{
-		ObjectMeta: kmeta.ObjectMeta{
-			Name:      workloads.K8sName(api.Name),
-			Namespace: api.Namespace},
-	}
-	op, err := controllerutil.CreateOrUpdate(ctx, r, &deployment, func() error {
-		deployment.Spec = r.desiredDeployment(api).Spec
-		return nil
-	})
-	if err != nil {
-		return op, err
-	}
-	return op, nil
-}
-
-func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
-	service := kcore.Service{
-		ObjectMeta: kmeta.ObjectMeta{
-			Name:      workloads.K8sName(api.Name),
-			Namespace: api.Namespace},
-	}
-	op, err := controllerutil.CreateOrUpdate(ctx, r, &service, func() error {
-		service.Spec = r.desiredService(api).Spec
-		return nil
-	})
-	if err != nil {
-		return op, err
-	}
-	return op, nil
-}
-
-func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
-	vs := istionetworking.VirtualService{
-		ObjectMeta: kmeta.ObjectMeta{
-			Name:      workloads.K8sName(api.Name),
-			Namespace: api.Namespace},
-	}
-	op, err := controllerutil.CreateOrUpdate(ctx, r, &vs, func() error {
-		vs.Spec = r.desiredVirtualService(api).Spec
-		return nil
-	})
-	if err != nil {
-		return op, err
-	}
-	return op, nil
-}
-
-func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *apiv1alpha1.RealtimeAPI) (string, error) {
-	req := client.ObjectKey{Namespace: consts.IstioNamespace, Name: "ingressgateway-apis"}
-	svc := kcore.Service{}
-	if err := r.Get(ctx, req, &svc); err != nil {
-		return "", err
-	}
-
-	ingress := svc.Status.LoadBalancer.Ingress
-	if ingress == nil || len(ingress) == 0 {
-		return "", nil
-	}
-
-	endpoint := fmt.Sprintf("http://%s/%s",
-		svc.Status.LoadBalancer.Ingress[0].Hostname, api.Spec.Networking.Endpoint,
-	)
-
-	return endpoint, nil
-}
-
-func (r *RealtimeAPIReconciler) desiredDeployment(api apiv1alpha1.RealtimeAPI) kapps.Deployment {
-	panic("implement me!")
-}
-
-func (r *RealtimeAPIReconciler) desiredService(api apiv1alpha1.RealtimeAPI) kcore.Service {
-	panic("implement me!")
-}
-
-func (r *RealtimeAPIReconciler) desiredVirtualService(api apiv1alpha1.RealtimeAPI) istionetworking.VirtualService {
-	panic("implement me!")
-}
diff --git a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
new file mode 100644
index 0000000000..3f3c658ad1
--- /dev/null
+++ b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
@@ -0,0 +1,419 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package api
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/cortexlabs/cortex/pkg/consts"
+	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	"github.com/cortexlabs/cortex/pkg/lib/k8s"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
+	"github.com/cortexlabs/cortex/pkg/lib/strings"
+	"github.com/cortexlabs/cortex/pkg/types/status"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	"github.com/cortexlabs/cortex/pkg/workloads"
+	istionetworking "istio.io/api/networking/v1beta1"
+	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
+	kapps "k8s.io/api/apps/v1"
+	kcore "k8s.io/api/core/v1"
+	kerrors "k8s.io/apimachinery/pkg/api/errors"
+	kresource "k8s.io/apimachinery/pkg/api/resource"
+	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+)
+
+func (r *RealtimeAPIReconciler) getDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (*kapps.Deployment, error) {
+	req := client.ObjectKey{Namespace: api.Namespace, Name: workloads.K8sName(api.Name)}
+	deployment := kapps.Deployment{}
+	if err := r.Get(ctx, req, &deployment); err != nil {
+		if kerrors.IsNotFound(err) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	return &deployment, nil
+}
+
+func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *apiv1alpha1.RealtimeAPI, deployment *kapps.Deployment) error {
+	apiStatus := status.Pending
+	api.Status.Status = apiStatus // FIXME: handle other status
+
+	endpoint, err := r.getEndpoint(ctx, api)
+	if err != nil {
+		return errors.Wrap(err, "failed to get api endpoint")
+	}
+
+	api.Status.Endpoint = endpoint
+	if deployment != nil {
+		api.Status.DesiredReplicas = *deployment.Spec.Replicas
+		api.Status.CurrentReplicas = deployment.Status.Replicas
+		api.Status.ReadyReplicas = deployment.Status.ReadyReplicas
+	}
+
+	if err = r.Status().Update(ctx, api); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+	deployment := kapps.Deployment{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      workloads.K8sName(api.Name),
+			Namespace: api.Namespace},
+	}
+	op, err := controllerutil.CreateOrUpdate(ctx, r, &deployment, func() error {
+		deployment.Spec = r.desiredDeployment(api).Spec
+		return nil
+	})
+	if err != nil {
+		return op, err
+	}
+	return op, nil
+}
+
+func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+	service := kcore.Service{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      workloads.K8sName(api.Name),
+			Namespace: api.Namespace},
+	}
+	op, err := controllerutil.CreateOrUpdate(ctx, r, &service, func() error {
+		service.Spec = r.desiredService(api).Spec
+		return nil
+	})
+	if err != nil {
+		return op, err
+	}
+	return op, nil
+}
+
+func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+	vs := istioclientnetworking.VirtualService{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      workloads.K8sName(api.Name),
+			Namespace: api.Namespace},
+	}
+	op, err := controllerutil.CreateOrUpdate(ctx, r, &vs, func() error {
+		vs.Spec = r.desiredVirtualService(api).Spec
+		return nil
+	})
+	if err != nil {
+		return op, err
+	}
+	return op, nil
+}
+
+func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *apiv1alpha1.RealtimeAPI) (string, error) {
+	req := client.ObjectKey{Namespace: consts.IstioNamespace, Name: "ingressgateway-apis"}
+	svc := kcore.Service{}
+	if err := r.Get(ctx, req, &svc); err != nil {
+		return "", err
+	}
+
+	ingress := svc.Status.LoadBalancer.Ingress
+	if ingress == nil || len(ingress) == 0 {
+		return "", nil
+	}
+
+	endpoint := fmt.Sprintf("http://%s/%s",
+		svc.Status.LoadBalancer.Ingress[0].Hostname, api.Spec.Networking.Endpoint,
+	)
+
+	return endpoint, nil
+}
+
+func (r *RealtimeAPIReconciler) desiredDeployment(api apiv1alpha1.RealtimeAPI) kapps.Deployment {
+	containers, volumes := r.desiredContainers(api)
+
+	return *k8s.Deployment(&k8s.DeploymentSpec{
+		Name:           workloads.K8sName(api.Name),
+		Replicas:       api.Spec.Pod.Replicas,
+		MaxSurge:       pointer.String(api.Spec.UpdateStrategy.MaxSurge.String()),
+		MaxUnavailable: pointer.String(api.Spec.UpdateStrategy.MaxUnavailable.String()),
+		Labels: map[string]string{
+			"apiName":        api.Name,
+			"apiKind":        userconfig.RealtimeAPIKind.String(),
+			"apiID":          api.Annotations["cortex.dev/api-id"],        // TODO: check if can be replaced with resource version
+			"deploymentID":   api.Annotations["cortex.dev/deployment-id"], // FIXME: needs to be created beforehand
+			"cortex.dev/api": "true",
+		},
+		Annotations: getAPIAnnotations(api),
+		Selector: map[string]string{
+			"apiName": api.Name,
+			"apiKind": userconfig.RealtimeAPIKind.String(),
+		},
+		PodSpec: k8s.PodSpec{
+			Labels: map[string]string{
+				"apiName":        api.Name,
+				"apiKind":        userconfig.RealtimeAPIKind.String(),
+				"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
+				"cortex.dev/api": "true",
+			},
+			Annotations: map[string]string{
+				"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
+			},
+			K8sPodSpec: kcore.PodSpec{
+				RestartPolicy:                 kcore.RestartPolicyAlways,
+				TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
+				Containers:                    containers,
+				NodeSelector:                  workloads.NodeSelectors(),
+				Tolerations:                   workloads.GenerateResourceTolerations(),
+				Affinity:                      workloads.GenerateNodeAffinities(api.Spec.NodeGroups),
+				Volumes:                       volumes,
+				ServiceAccountName:            workloads.ServiceAccountName,
+			},
+		},
+	})
+}
+
+func (r *RealtimeAPIReconciler) desiredContainers(api apiv1alpha1.RealtimeAPI) ([]kcore.Container, []kcore.Volume) {
+	containers, volumes := r.userContainers(api)
+	proxyContainer, proxyVolume := r.proxyContainer(api)
+
+	containers = append(containers, proxyContainer)
+	volumes = append(volumes, proxyVolume)
+
+	return containers, volumes
+}
+
+func (r *RealtimeAPIReconciler) desiredService(api apiv1alpha1.RealtimeAPI) kcore.Service {
+	return *k8s.Service(&k8s.ServiceSpec{
+		Name:        workloads.K8sName(api.Name),
+		PortName:    "http",
+		Port:        consts.ProxyPortInt32,
+		TargetPort:  consts.ProxyPortInt32,
+		Annotations: getAPIAnnotations(api),
+		Labels: map[string]string{
+			"apiName":        api.Name,
+			"apiKind":        userconfig.RealtimeAPIKind.String(),
+			"cortex.dev/api": "true",
+		},
+		Selector: map[string]string{
+			"apiName": api.Name,
+			"apiKind": userconfig.RealtimeAPIKind.String(),
+		},
+	})
+}
+
+func (r *RealtimeAPIReconciler) desiredVirtualService(api apiv1alpha1.RealtimeAPI) istioclientnetworking.VirtualService {
+	var activatorWeight int32
+	if api.Spec.Pod.Replicas == 0 {
+		activatorWeight = 100
+	}
+
+	return *k8s.VirtualService(&k8s.VirtualServiceSpec{
+		Name:     workloads.K8sName(api.Name),
+		Gateways: []string{"apis-gateway"},
+		Destinations: []k8s.Destination{
+			{
+				ServiceName: workloads.K8sName(api.Name),
+				Weight:      100 - activatorWeight,
+				Port:        uint32(consts.ProxyPortInt32),
+				Headers: &istionetworking.Headers{
+					Response: &istionetworking.Headers_HeaderOperations{
+						Set: map[string]string{
+							consts.CortexOriginHeader: "api",
+						},
+					},
+				},
+			},
+			{
+				ServiceName: consts.ActivatorName,
+				Weight:      activatorWeight,
+				Port:        uint32(consts.ActivatorPortInt32),
+				Headers: &istionetworking.Headers{
+					Request: &istionetworking.Headers_HeaderOperations{
+						Set: map[string]string{
+							consts.CortexAPINameHeader: api.Name,
+							consts.CortexTargetServiceHeader: fmt.Sprintf(
+								"http://%s.%s:%d",
+								workloads.K8sName(api.Name),
+								consts.DefaultNamespace,
+								consts.ProxyPortInt32,
+							),
+						},
+					},
+					Response: &istionetworking.Headers_HeaderOperations{
+						Set: map[string]string{
+							consts.CortexOriginHeader: consts.ActivatorName,
+						},
+					},
+				},
+			},
+		},
+		PrefixPath:  pointer.String(api.Spec.Networking.Endpoint),
+		Rewrite:     pointer.String("/"),
+		Annotations: getAPIAnnotations(api),
+		Labels: map[string]string{
+			"apiName":        api.Name,
+			"apiKind":        userconfig.RealtimeAPIKind.String(),
+			"apiID":          api.Annotations["cortex.dev/api-id"],
+			"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
+			"cortex.dev/api": "true",
+		},
+	})
+}
+
+func (r *RealtimeAPIReconciler) userContainers(api apiv1alpha1.RealtimeAPI) ([]kcore.Container, []kcore.Volume) {
+	volumes := []kcore.Volume{
+		workloads.MntVolume(),
+		workloads.CortexVolume(),
+		workloads.ClientConfigVolume(),
+	}
+	containerMounts := []kcore.VolumeMount{
+		workloads.MntMount(),
+		workloads.CortexMount(),
+		workloads.ClientConfigMount(),
+	}
+
+	var containers []kcore.Container
+	for _, container := range api.Spec.Pod.Containers {
+		containerResourceList := kcore.ResourceList{}
+		containerResourceLimitsList := kcore.ResourceList{}
+		securityContext := kcore.SecurityContext{
+			Privileged: pointer.Bool(true),
+		}
+
+		if container.Compute.CPU != nil {
+			containerResourceList[kcore.ResourceCPU] = *k8s.QuantityPtr(container.Compute.CPU.DeepCopy())
+		}
+
+		if container.Compute.Mem != nil {
+			containerResourceList[kcore.ResourceMemory] = *k8s.QuantityPtr(container.Compute.Mem.DeepCopy())
+		}
+
+		if container.Compute.GPU > 0 {
+			containerResourceList["nvidia.com/gpu"] = *kresource.NewQuantity(container.Compute.GPU, kresource.DecimalSI)
+			containerResourceLimitsList["nvidia.com/gpu"] = *kresource.NewQuantity(container.Compute.GPU, kresource.DecimalSI)
+		}
+
+		if container.Compute.Inf > 0 {
+			totalHugePages := container.Compute.Inf * workloads.HugePagesMemPerInf
+			containerResourceList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
+			containerResourceList["hugepages-2Mi"] = *kresource.NewQuantity(totalHugePages, kresource.BinarySI)
+			containerResourceLimitsList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
+			containerResourceLimitsList["hugepages-2Mi"] = *kresource.NewQuantity(totalHugePages, kresource.BinarySI)
+
+			securityContext.Capabilities = &kcore.Capabilities{
+				Add: []kcore.Capability{
+					"SYS_ADMIN",
+					"IPC_LOCK",
+				},
+			}
+		}
+
+		if container.Compute.Shm != nil {
+			volumes = append(volumes, workloads.ShmVolume(*container.Compute.Shm, "dshm-"+container.Name))
+			containerMounts = append(containerMounts, workloads.ShmMount("dshm-"+container.Name))
+		}
+
+		containerEnvVars := workloads.BaseEnvVars
+		containerEnvVars = append(containerEnvVars, workloads.ClientConfigEnvVar())
+		containerEnvVars = append(containerEnvVars, container.Env...)
+
+		containers = append(containers, kcore.Container{
+			Name:           container.Name,
+			Image:          container.Image,
+			Command:        container.Command,
+			Args:           container.Args,
+			Env:            containerEnvVars,
+			VolumeMounts:   containerMounts,
+			LivenessProbe:  container.LivenessProbe,
+			ReadinessProbe: container.ReadinessProbe,
+			Resources: kcore.ResourceRequirements{
+				Requests: containerResourceList,
+				Limits:   containerResourceLimitsList,
+			},
+			ImagePullPolicy: kcore.PullAlways,
+			SecurityContext: &securityContext,
+		})
+	}
+
+	return containers, volumes
+}
+
+func (r *RealtimeAPIReconciler) proxyContainer(api apiv1alpha1.RealtimeAPI) (kcore.Container, kcore.Volume) {
+	return kcore.Container{
+		Name:            workloads.ProxyContainerName,
+		Image:           r.ClusterConfig.ImageProxy,
+		ImagePullPolicy: kcore.PullAlways,
+		Args: []string{
+			"--cluster-config",
+			consts.DefaultInClusterConfigPath,
+			"--port",
+			consts.ProxyPortStr,
+			"--admin-port",
+			consts.AdminPortStr,
+			"--user-port",
+			strings.Int32(api.Spec.Pod.Port),
+			"--max-concurrency",
+			strings.Int32(api.Spec.Pod.MaxConcurrency),
+			"--max-queue-length",
+			strings.Int32(api.Spec.Pod.MaxQueueLength),
+		},
+		Ports: []kcore.ContainerPort{
+			{Name: consts.AdminPortName, ContainerPort: consts.AdminPortInt32},
+			{ContainerPort: consts.ProxyPortInt32},
+		},
+		Env:     workloads.BaseEnvVars,
+		EnvFrom: workloads.BaseClusterEnvVars(),
+		VolumeMounts: []kcore.VolumeMount{
+			workloads.ClusterConfigMount(),
+		},
+		Resources: kcore.ResourceRequirements{
+			Requests: kcore.ResourceList{
+				kcore.ResourceCPU:    consts.CortexProxyCPU,
+				kcore.ResourceMemory: consts.CortexProxyMem,
+			},
+		},
+		ReadinessProbe: &kcore.Probe{
+			Handler: kcore.Handler{
+				HTTPGet: &kcore.HTTPGetAction{
+					Path: "/healthz",
+					Port: intstr.FromInt(int(consts.AdminPortInt32)),
+				},
+			},
+			InitialDelaySeconds: 1,
+			TimeoutSeconds:      1,
+			PeriodSeconds:       10,
+			SuccessThreshold:    1,
+			FailureThreshold:    1,
+		},
+	}, workloads.ClusterConfigVolume()
+}
+
+func getAPIAnnotations(api apiv1alpha1.RealtimeAPI) map[string]string {
+	return map[string]string{
+		userconfig.MinReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MinReplicas),
+		userconfig.MaxReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MaxReplicas),
+		userconfig.TargetInFlightAnnotationKey:               strings.Int32(api.Spec.Autoscaling.TargetInFlight),
+		userconfig.WindowAnnotationKey:                       api.Spec.Autoscaling.Window.String(),
+		userconfig.DownscaleStabilizationPeriodAnnotationKey: api.Spec.Autoscaling.DownscaleStabilizationPeriod.String(),
+		userconfig.UpscaleStabilizationPeriodAnnotationKey:   api.Spec.Autoscaling.UpscaleStabilizationPeriod.String(),
+		userconfig.MaxDownscaleFactorAnnotationKey:           strings.Float64(api.Spec.Autoscaling.MaxDownscaleFactor.AsApproximateFloat64()),
+		userconfig.MaxUpscaleFactorAnnotationKey:             strings.Float64(api.Spec.Autoscaling.MaxUpscaleFactor.AsApproximateFloat64()),
+		userconfig.DownscaleToleranceAnnotationKey:           strings.Float64(api.Spec.Autoscaling.DownscaleTolerance.AsApproximateFloat64()),
+		userconfig.UpscaleToleranceAnnotationKey:             strings.Float64(api.Spec.Autoscaling.UpscaleTolerance.AsApproximateFloat64()),
+	}
+}
diff --git a/pkg/workloads/helpers.go b/pkg/workloads/helpers.go
index 1d0bf847ba..c14cb77d7a 100644
--- a/pkg/workloads/helpers.go
+++ b/pkg/workloads/helpers.go
@@ -228,7 +228,7 @@ func APIConfigMount(name string) kcore.VolumeMount {
 func ClientConfigMount() kcore.VolumeMount {
 	return kcore.VolumeMount{
 		Name:      _clientConfigDirVolume,
-		MountPath: path.Join(_clientConfigDir, "cli.yaml"),
+		MountPath: path.Join(clientConfigDir, "cli.yaml"),
 		SubPath:   "cli.yaml",
 	}
 }
@@ -248,3 +248,10 @@ func ShmMount(volumeName string) kcore.VolumeMount {
 func KubexitMount() kcore.VolumeMount {
 	return k8s.EmptyDirVolumeMount(_kubexitGraveyardName, _kubexitGraveyardMountPath)
 }
+
+func ClientConfigEnvVar() kcore.EnvVar {
+	return kcore.EnvVar{
+		Name:  "CORTEX_CLI_CONFIG_DIR",
+		Value: clientConfigDir,
+	}
+}
diff --git a/pkg/workloads/k8s.go b/pkg/workloads/k8s.go
index 0523c8cbfe..d4f3aec746 100644
--- a/pkg/workloads/k8s.go
+++ b/pkg/workloads/k8s.go
@@ -41,7 +41,7 @@ const (
 const (
 	_cortexDirVolumeName = "cortex"
 	_cortexDirMountPath  = "/cortex"
-	_clientConfigDir     = "/cortex/client"
+	clientConfigDir      = "/cortex/client"
 
 	_emptyDirVolumeName = "mnt"
 	_emptyDirMountPath  = "/mnt"
@@ -70,7 +70,7 @@ var (
 	_statsdAddress = fmt.Sprintf("prometheus-statsd-exporter.%s:9125", consts.PrometheusNamespace)
 
 	// each Inferentia chip requires 128 HugePages with each HugePage having a size of 2Mi
-	_hugePagesMemPerInf = int64(128 * 2 * 1024 * 1024) // bytes
+	HugePagesMemPerInf = int64(128 * 2 * 1024 * 1024) // bytes
 )
 
 func AsyncGatewayContainer(api spec.API, queueURL string, volumeMounts []kcore.VolumeMount) kcore.Container {
@@ -393,7 +393,7 @@ func userPodContainers(api spec.API) ([]kcore.Container, []kcore.Volume) {
 		}
 
 		if container.Compute.Inf > 0 {
-			totalHugePages := container.Compute.Inf * _hugePagesMemPerInf
+			totalHugePages := container.Compute.Inf * HugePagesMemPerInf
 			containerResourceList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
 			containerResourceList["hugepages-2Mi"] = *kresource.NewQuantity(totalHugePages, kresource.BinarySI)
 			containerResourceLimitsList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
@@ -413,12 +413,7 @@ func userPodContainers(api spec.API) ([]kcore.Container, []kcore.Volume) {
 		}
 
 		containerEnvVars := BaseEnvVars
-
-		containerEnvVars = append(containerEnvVars, kcore.EnvVar{
-			Name:  "CORTEX_CLI_CONFIG_DIR",
-			Value: _clientConfigDir,
-		})
-
+		containerEnvVars = append(containerEnvVars, ClientConfigEnvVar())
 		if api.Kind != userconfig.TaskAPIKind {
 			containerEnvVars = append(containerEnvVars, kcore.EnvVar{
 				Name:  "CORTEX_PORT",

From 3334a92dd603279e864a2e5f2e8c3e13125ef251 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Tue, 20 Jul 2021 15:01:55 +0200
Subject: [PATCH 05/42] Fix CRD types

---
 .../apis/api/v1alpha1/realtimeapi_types.go    |   9 +-
 .../api/v1alpha1/zz_generated.deepcopy.go     | 165 +++++++++++++++++-
 .../bases/api.cortex.dev_realtimeapis.yaml    |  37 ++--
 pkg/crds/config/rbac/role.yaml                |  33 ++++
 4 files changed, 228 insertions(+), 16 deletions(-)

diff --git a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
index 20d1e2efda..c569eb0e0e 100644
--- a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
@@ -17,11 +17,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"time"
-
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	kcore "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
+	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
 )
@@ -155,17 +154,17 @@ type AutoscalingSpec struct {
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default="60s"
 	// Duration over which to average the API's in-flight requests per replica
-	Window time.Duration `json:"window,omitempty"`
+	Window kmeta.Duration `json:"window,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default="5m"
 	// The API will not scale below the highest recommendation made during this period
-	DownscaleStabilizationPeriod time.Duration `json:"downscale_stabilization_period,omitempty"`
+	DownscaleStabilizationPeriod kmeta.Duration `json:"downscale_stabilization_period,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default="1m"
 	// The API will not scale above the lowest recommendation made during this period
-	UpscaleStabilizationPeriod time.Duration `json:"upscale_stabilization_period,omitempty"`
+	UpscaleStabilizationPeriod kmeta.Duration `json:"upscale_stabilization_period,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default="750m"
diff --git a/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go b/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go
index 852f080173..a52d87d385 100644
--- a/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go
@@ -21,15 +21,152 @@ limitations under the License.
 package v1alpha1
 
 import (
+	"k8s.io/api/core/v1"
 	runtime "k8s.io/apimachinery/pkg/runtime"
 )
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *AutoscalingSpec) DeepCopyInto(out *AutoscalingSpec) {
+	*out = *in
+	out.Window = in.Window
+	out.DownscaleStabilizationPeriod = in.DownscaleStabilizationPeriod
+	out.UpscaleStabilizationPeriod = in.UpscaleStabilizationPeriod
+	out.MaxDownscaleFactor = in.MaxDownscaleFactor.DeepCopy()
+	out.MaxUpscaleFactor = in.MaxUpscaleFactor.DeepCopy()
+	out.DownscaleTolerance = in.DownscaleTolerance.DeepCopy()
+	out.UpscaleTolerance = in.UpscaleTolerance.DeepCopy()
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalingSpec.
+func (in *AutoscalingSpec) DeepCopy() *AutoscalingSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(AutoscalingSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ComputeSpec) DeepCopyInto(out *ComputeSpec) {
+	*out = *in
+	if in.CPU != nil {
+		in, out := &in.CPU, &out.CPU
+		x := (*in).DeepCopy()
+		*out = &x
+	}
+	if in.Mem != nil {
+		in, out := &in.Mem, &out.Mem
+		x := (*in).DeepCopy()
+		*out = &x
+	}
+	if in.Shm != nil {
+		in, out := &in.Shm, &out.Shm
+		x := (*in).DeepCopy()
+		*out = &x
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeSpec.
+func (in *ComputeSpec) DeepCopy() *ComputeSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ComputeSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ContainerSpec) DeepCopyInto(out *ContainerSpec) {
+	*out = *in
+	if in.Command != nil {
+		in, out := &in.Command, &out.Command
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.Args != nil {
+		in, out := &in.Args, &out.Args
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.Env != nil {
+		in, out := &in.Env, &out.Env
+		*out = make([]v1.EnvVar, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	if in.Compute != nil {
+		in, out := &in.Compute, &out.Compute
+		*out = new(ComputeSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.ReadinessProbe != nil {
+		in, out := &in.ReadinessProbe, &out.ReadinessProbe
+		*out = new(v1.Probe)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.LivenessProbe != nil {
+		in, out := &in.LivenessProbe, &out.LivenessProbe
+		*out = new(v1.Probe)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerSpec.
+func (in *ContainerSpec) DeepCopy() *ContainerSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ContainerSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *NetworkingSpec) DeepCopyInto(out *NetworkingSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkingSpec.
+func (in *NetworkingSpec) DeepCopy() *NetworkingSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(NetworkingSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PodSpec) DeepCopyInto(out *PodSpec) {
+	*out = *in
+	if in.Containers != nil {
+		in, out := &in.Containers, &out.Containers
+		*out = make([]ContainerSpec, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodSpec.
+func (in *PodSpec) DeepCopy() *PodSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(PodSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *RealtimeAPI) DeepCopyInto(out *RealtimeAPI) {
 	*out = *in
 	out.TypeMeta = in.TypeMeta
 	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
-	out.Spec = in.Spec
+	in.Spec.DeepCopyInto(&out.Spec)
 	out.Status = in.Status
 }
 
@@ -86,6 +223,15 @@ func (in *RealtimeAPIList) DeepCopyObject() runtime.Object {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *RealtimeAPISpec) DeepCopyInto(out *RealtimeAPISpec) {
 	*out = *in
+	in.Pod.DeepCopyInto(&out.Pod)
+	in.Autoscaling.DeepCopyInto(&out.Autoscaling)
+	if in.NodeGroups != nil {
+		in, out := &in.NodeGroups, &out.NodeGroups
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	out.UpdateStrategy = in.UpdateStrategy
+	out.Networking = in.Networking
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RealtimeAPISpec.
@@ -112,3 +258,20 @@ func (in *RealtimeAPIStatus) DeepCopy() *RealtimeAPIStatus {
 	in.DeepCopyInto(out)
 	return out
 }
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *UpdateStratagySpec) DeepCopyInto(out *UpdateStratagySpec) {
+	*out = *in
+	out.MaxSurge = in.MaxSurge
+	out.MaxUnavailable = in.MaxUnavailable
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpdateStratagySpec.
+func (in *UpdateStratagySpec) DeepCopy() *UpdateStratagySpec {
+	if in == nil {
+		return nil
+	}
+	out := new(UpdateStratagySpec)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
index 5fc4fbd7c8..1b4a092ae3 100644
--- a/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
@@ -43,8 +43,7 @@ spec:
                     default: 5m
                     description: The API will not scale below the highest recommendation
                       made during this period
-                    format: int64
-                    type: integer
+                    type: string
                   downscale_tolerance:
                     anyOf:
                     - type: integer
@@ -55,10 +54,6 @@ spec:
                       event
                     pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                     x-kubernetes-int-or-string: true
-                  init_replicas:
-                    default: 1
-                    description: Initial number of replicas
-                    type: integer
                   max_downscale_factor:
                     anyOf:
                     - type: integer
@@ -71,6 +66,7 @@ spec:
                   max_replicas:
                     default: 100
                     description: Maximum number of replicas
+                    format: int32
                     type: integer
                   max_upscale_factor:
                     anyOf:
@@ -84,18 +80,19 @@ spec:
                   min_replicas:
                     default: 1
                     description: Minimum number of replicas
+                    format: int32
                     type: integer
                   target_in_flight:
                     description: Desired number of in-flight requests per replica
                       (including requests actively being processed as well as queued),
                       which the autoscaler tries to maintain
+                    format: int32
                     type: integer
                   upscale_stabilization_period:
                     default: 1m
                     description: The API will not scale above the lowest recommendation
                       made during this period
-                    format: int64
-                    type: integer
+                    type: string
                   upscale_tolerance:
                     anyOf:
                     - type: integer
@@ -109,8 +106,7 @@ spec:
                     default: 60s
                     description: Duration over which to average the API's in-flight
                       requests per replica
-                    format: int64
-                    type: integer
+                    type: string
                 type: object
               networking:
                 description: Networking configuration
@@ -158,6 +154,12 @@ spec:
                             gpu:
                               description: GPU request for the container; one unit
                                 of GPU corresponds to one virtual GPU
+                              format: int64
+                              type: integer
+                            inf:
+                              description: Inferentia request for the container; one
+                                unit of Inf corresponds to one virtual Inf chip
+                              format: int64
                               type: integer
                             mem:
                               anyOf:
@@ -542,19 +544,31 @@ spec:
                     default: 1
                     description: Maximum number of requests that will be concurrently
                       sent into the container
+                    format: int32
                     type: integer
                   max_queue_length:
                     default: 100
                     description: Maximum number of requests per replica which will
                       be queued (beyond max_concurrency) before requests are rejected
                       with error code 503
+                    format: int32
                     type: integer
                   port:
                     default: 8080
                     description: Port to which requests will be sent to
+                    format: int32
+                    type: integer
+                  replicas:
+                    default: 1
+                    description: Number of desired replicas
+                    format: int32
                     type: integer
                 required:
                 - containers
+                - max_concurrency
+                - max_queue_length
+                - port
+                - replicas
                 type: object
               update_strategy:
                 description: Deployment strategy to use when replacing existing replicas
@@ -587,12 +601,15 @@ spec:
             description: RealtimeAPIStatus defines the observed state of RealtimeAPI
             properties:
               current_replicas:
+                format: int32
                 type: integer
               desired_replicas:
+                format: int32
                 type: integer
               endpoint:
                 type: string
               ready_replicas:
+                format: int32
                 type: integer
               status:
                 type: integer
diff --git a/pkg/crds/config/rbac/role.yaml b/pkg/crds/config/rbac/role.yaml
index f8b89211e5..c6c2b052a2 100644
--- a/pkg/crds/config/rbac/role.yaml
+++ b/pkg/crds/config/rbac/role.yaml
@@ -23,6 +23,17 @@ rules:
   - get
   - list
   - watch
+- apiGroups:
+  - ""
+  resources:
+  - services
+  verbs:
+  - create
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - api.cortex.dev
   resources:
@@ -49,6 +60,17 @@ rules:
   - get
   - patch
   - update
+- apiGroups:
+  - apps
+  resources:
+  - deployments
+  verbs:
+  - create
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - batch
   resources:
@@ -86,3 +108,14 @@ rules:
   - get
   - patch
   - update
+- apiGroups:
+  - networking.istio.io
+  resources:
+  - virtualservices
+  verbs:
+  - create
+  - get
+  - list
+  - patch
+  - update
+  - watch

From a56ece57700d9177be7c3a9bf3c01444de39fe78 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Tue, 20 Jul 2021 15:17:08 +0200
Subject: [PATCH 06/42] Add istio to scheme

---
 pkg/crds/controllers/api/realtimeapi_controller.go         | 1 -
 pkg/crds/controllers/api/realtimeapi_controller_helpers.go | 6 +++---
 pkg/crds/main.go                                           | 2 ++
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pkg/crds/controllers/api/realtimeapi_controller.go b/pkg/crds/controllers/api/realtimeapi_controller.go
index 0e55e9a99d..53e368d856 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller.go
+++ b/pkg/crds/controllers/api/realtimeapi_controller.go
@@ -25,7 +25,6 @@ import (
 	"github.com/cortexlabs/cortex/pkg/types/clusterconfig"
 	"github.com/go-logr/logr"
 	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
-
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
diff --git a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
index 3f3c658ad1..1523665472 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
@@ -82,7 +82,7 @@ func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, ap
 			Name:      workloads.K8sName(api.Name),
 			Namespace: api.Namespace},
 	}
-	op, err := controllerutil.CreateOrUpdate(ctx, r, &deployment, func() error {
+	op, err := controllerutil.CreateOrUpdate(ctx, r.Client, &deployment, func() error {
 		deployment.Spec = r.desiredDeployment(api).Spec
 		return nil
 	})
@@ -98,7 +98,7 @@ func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api a
 			Name:      workloads.K8sName(api.Name),
 			Namespace: api.Namespace},
 	}
-	op, err := controllerutil.CreateOrUpdate(ctx, r, &service, func() error {
+	op, err := controllerutil.CreateOrUpdate(ctx, r.Client, &service, func() error {
 		service.Spec = r.desiredService(api).Spec
 		return nil
 	})
@@ -114,7 +114,7 @@ func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context
 			Name:      workloads.K8sName(api.Name),
 			Namespace: api.Namespace},
 	}
-	op, err := controllerutil.CreateOrUpdate(ctx, r, &vs, func() error {
+	op, err := controllerutil.CreateOrUpdate(ctx, r.Client, &vs, func() error {
 		vs.Spec = r.desiredVirtualService(api).Spec
 		return nil
 	})
diff --git a/pkg/crds/main.go b/pkg/crds/main.go
index 90502d08bc..aba1346cac 100644
--- a/pkg/crds/main.go
+++ b/pkg/crds/main.go
@@ -34,6 +34,7 @@ import (
 	// to ensure that exec-entrypoint and run can make use of them.
 	_ "k8s.io/client-go/plugin/pkg/client/auth"
 
+	istioscheme "istio.io/client-go/pkg/clientset/versioned/scheme"
 	"k8s.io/apimachinery/pkg/runtime"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
@@ -55,6 +56,7 @@ var (
 
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
+	utilruntime.Must(istioscheme.AddToScheme(scheme))
 
 	utilruntime.Must(batch.AddToScheme(scheme))
 	utilruntime.Must(apiv1alpha1.AddToScheme(scheme))

From 32b6be2c62c89966b1f124fab520c333bdffe5f9 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Tue, 20 Jul 2021 19:43:21 +0200
Subject: [PATCH 07/42] Fix RealtimeAPI CRD defaulting behaviour

---
 .../apis/api/v1alpha1/realtimeapi_types.go    | 10 ++--
 .../bases/api.cortex.dev_realtimeapis.yaml    |  8 ++-
 .../samples/api_v1alpha1_realtimeapi.yaml     | 14 +++--
 .../api/realtimeapi_controller_helpers.go     | 54 ++++++++++---------
 pkg/crds/main.go                              |  9 ++--
 pkg/types/status/code.go                      |  1 +
 6 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
index c569eb0e0e..997f9b87f0 100644
--- a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
@@ -32,18 +32,20 @@ type RealtimeAPISpec struct {
 	Pod PodSpec `json:"pod"`
 
 	// +kubebuilder:validation:Optional
+	// +kubebuilder:default={"min_replicas": 1}
 	// Autoscaling configuration
 	Autoscaling AutoscalingSpec `json:"autoscaling"`
 
 	// +kubebuilder:validation:Optional
 	// List of node groups on which this API can run (default: all node groups are eligible)
-	NodeGroups []string `json:"node_groups,omitempty"`
+	NodeGroups []string `json:"node_groups"`
 
 	// +kubebuilder:validation:Optional
+	// +kubebuilder:default={"max_surge": "25%", "max_unavailable": "25%"}
 	// Deployment strategy to use when replacing existing replicas with new ones
 	UpdateStrategy UpdateStratagySpec `json:"update_strategy"`
 
-	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Required
 	// Networking configuration
 	Networking NetworkingSpec `json:"networking"`
 }
@@ -96,7 +98,6 @@ type ContainerSpec struct {
 	// Environment variables to set in the container
 	Env []kcore.EnvVar `json:"env,omitempty"`
 
-	// +kubebuilder:validation:Optional
 	// Compute resource requests
 	Compute *ComputeSpec `json:"compute,omitempty"`
 
@@ -136,12 +137,10 @@ type ComputeSpec struct {
 }
 
 type AutoscalingSpec struct {
-	// +kubebuilder:validation:Optional
 	// +kubebuilder:default=1
 	// Minimum number of replicas
 	MinReplicas int32 `json:"min_replicas,omitempty"`
 
-	// +kubebuilder:validation:Optional
 	// +kubebuilder:default=100
 	// Maximum number of replicas
 	MaxReplicas int32 `json:"max_replicas,omitempty"`
@@ -211,6 +210,7 @@ type NetworkingSpec struct {
 
 // RealtimeAPIStatus defines the observed state of RealtimeAPI
 type RealtimeAPIStatus struct {
+	// +kubebuilder:validation:Type=string
 	Status          status.Code `json:"status"`
 	DesiredReplicas int32       `json:"desired_replicas"`
 	CurrentReplicas int32       `json:"current_replicas"`
diff --git a/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
index 1b4a092ae3..f68726183b 100644
--- a/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
@@ -37,6 +37,8 @@ spec:
             description: RealtimeAPISpec defines the desired state of RealtimeAPI
             properties:
               autoscaling:
+                default:
+                  min_replicas: 1
                 description: Autoscaling configuration
                 properties:
                   downscale_stabilization_period:
@@ -571,6 +573,9 @@ spec:
                 - replicas
                 type: object
               update_strategy:
+                default:
+                  max_surge: 25%
+                  max_unavailable: 25%
                 description: Deployment strategy to use when replacing existing replicas
                   with new ones
                 properties:
@@ -595,6 +600,7 @@ spec:
                     x-kubernetes-int-or-string: true
                 type: object
             required:
+            - networking
             - pod
             type: object
           status:
@@ -612,7 +618,7 @@ spec:
                 format: int32
                 type: integer
               status:
-                type: integer
+                type: string
             required:
             - current_replicas
             - desired_replicas
diff --git a/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml b/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml
index 638b3039f3..c3295fc4f4 100644
--- a/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml
+++ b/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml
@@ -1,7 +1,15 @@
 apiVersion: api.cortex.dev/v1alpha1
 kind: RealtimeAPI
 metadata:
-  name: realtimeapi-sample
+  name: hello-world
 spec:
-  # Add fields here
-  foo: bar
+  pod:
+    containers:
+      - name: api
+        image: quay.io/cortexlabs-test/realtime-hello-world-cpu:latest
+    max_concurrency: 1
+    max_queue_length: 100
+    port: 8080
+    replicas: 1
+  networking:
+    endpoint: "/hello-world"
diff --git a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
index 1523665472..b6d9b9e610 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
@@ -295,37 +295,39 @@ func (r *RealtimeAPIReconciler) userContainers(api apiv1alpha1.RealtimeAPI) ([]k
 			Privileged: pointer.Bool(true),
 		}
 
-		if container.Compute.CPU != nil {
-			containerResourceList[kcore.ResourceCPU] = *k8s.QuantityPtr(container.Compute.CPU.DeepCopy())
-		}
+		if container.Compute != nil {
+			if container.Compute.CPU != nil {
+				containerResourceList[kcore.ResourceCPU] = *k8s.QuantityPtr(container.Compute.CPU.DeepCopy())
+			}
 
-		if container.Compute.Mem != nil {
-			containerResourceList[kcore.ResourceMemory] = *k8s.QuantityPtr(container.Compute.Mem.DeepCopy())
-		}
+			if container.Compute.Mem != nil {
+				containerResourceList[kcore.ResourceMemory] = *k8s.QuantityPtr(container.Compute.Mem.DeepCopy())
+			}
 
-		if container.Compute.GPU > 0 {
-			containerResourceList["nvidia.com/gpu"] = *kresource.NewQuantity(container.Compute.GPU, kresource.DecimalSI)
-			containerResourceLimitsList["nvidia.com/gpu"] = *kresource.NewQuantity(container.Compute.GPU, kresource.DecimalSI)
-		}
+			if container.Compute.GPU > 0 {
+				containerResourceList["nvidia.com/gpu"] = *kresource.NewQuantity(container.Compute.GPU, kresource.DecimalSI)
+				containerResourceLimitsList["nvidia.com/gpu"] = *kresource.NewQuantity(container.Compute.GPU, kresource.DecimalSI)
+			}
 
-		if container.Compute.Inf > 0 {
-			totalHugePages := container.Compute.Inf * workloads.HugePagesMemPerInf
-			containerResourceList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
-			containerResourceList["hugepages-2Mi"] = *kresource.NewQuantity(totalHugePages, kresource.BinarySI)
-			containerResourceLimitsList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
-			containerResourceLimitsList["hugepages-2Mi"] = *kresource.NewQuantity(totalHugePages, kresource.BinarySI)
-
-			securityContext.Capabilities = &kcore.Capabilities{
-				Add: []kcore.Capability{
-					"SYS_ADMIN",
-					"IPC_LOCK",
-				},
+			if container.Compute.Inf > 0 {
+				totalHugePages := container.Compute.Inf * workloads.HugePagesMemPerInf
+				containerResourceList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
+				containerResourceList["hugepages-2Mi"] = *kresource.NewQuantity(totalHugePages, kresource.BinarySI)
+				containerResourceLimitsList["aws.amazon.com/neuron"] = *kresource.NewQuantity(container.Compute.Inf, kresource.DecimalSI)
+				containerResourceLimitsList["hugepages-2Mi"] = *kresource.NewQuantity(totalHugePages, kresource.BinarySI)
+
+				securityContext.Capabilities = &kcore.Capabilities{
+					Add: []kcore.Capability{
+						"SYS_ADMIN",
+						"IPC_LOCK",
+					},
+				}
 			}
-		}
 
-		if container.Compute.Shm != nil {
-			volumes = append(volumes, workloads.ShmVolume(*container.Compute.Shm, "dshm-"+container.Name))
-			containerMounts = append(containerMounts, workloads.ShmMount("dshm-"+container.Name))
+			if container.Compute.Shm != nil {
+				volumes = append(volumes, workloads.ShmVolume(*container.Compute.Shm, "dshm-"+container.Name))
+				containerMounts = append(containerMounts, workloads.ShmMount("dshm-"+container.Name))
+			}
 		}
 
 		containerEnvVars := workloads.BaseEnvVars
diff --git a/pkg/crds/main.go b/pkg/crds/main.go
index aba1346cac..d4400a5e88 100644
--- a/pkg/crds/main.go
+++ b/pkg/crds/main.go
@@ -156,7 +156,7 @@ func main() {
 	if err = (&batchcontrollers.BatchJobReconciler{
 		Client:        mgr.GetClient(),
 		Config:        batchcontrollers.BatchJobReconcilerConfig{}.ApplyDefaults(),
-		Log:           ctrl.Log.WithName("controllers").WithName("BatchJob"),
+		Log:           ctrl.Log.WithName("controllers").WithName("batch").WithName("BatchJob"),
 		ClusterConfig: clusterConfig,
 		AWS:           awsClient,
 		Prometheus:    promv1.NewAPI(promClient),
@@ -166,9 +166,10 @@ func main() {
 		os.Exit(1)
 	}
 	if err = (&apicontrollers.RealtimeAPIReconciler{
-		Client: mgr.GetClient(),
-		Log:    ctrl.Log.WithName("controllers").WithName("api").WithName("RealtimeAPI"),
-		Scheme: mgr.GetScheme(),
+		Client:        mgr.GetClient(),
+		ClusterConfig: clusterConfig,
+		Log:           ctrl.Log.WithName("controllers").WithName("api").WithName("RealtimeAPI"),
+		Scheme:        mgr.GetScheme(),
 	}).SetupWithManager(mgr); err != nil {
 		setupLog.Error(err, "unable to create controller", "controller", "RealtimeAPI")
 		os.Exit(1)
diff --git a/pkg/types/status/code.go b/pkg/types/status/code.go
index 3845a913c3..17fc8ca12f 100644
--- a/pkg/types/status/code.go
+++ b/pkg/types/status/code.go
@@ -16,6 +16,7 @@ limitations under the License.
 
 package status
 
+// +kubebuilder:validation:Type=string
 type Code int
 
 const (

From ad5cfdb857392b9a1f40da9d2157bc13789d8f7a Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 13:04:22 +0200
Subject: [PATCH 08/42] Fix createOrUpdateService method

---
 .../controllers/api/realtimeapi_controller_helpers.go     | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
index b6d9b9e610..4f4bab31f3 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
@@ -99,7 +99,13 @@ func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api a
 			Namespace: api.Namespace},
 	}
 	op, err := controllerutil.CreateOrUpdate(ctx, r.Client, &service, func() error {
-		service.Spec = r.desiredService(api).Spec
+		desiredSvc := r.desiredService(api)
+		// We need to set fields individually because some are immutable
+		service.Labels = desiredSvc.Labels
+		service.Annotations = desiredSvc.Annotations
+		service.Spec.Type = desiredSvc.Spec.Type
+		service.Spec.Ports = desiredSvc.Spec.Ports
+		service.Spec.Selector = desiredSvc.Spec.Selector
 		return nil
 	})
 	if err != nil {

From 164d492f0e4357bb9d9e9d9b0fe4f2fc312cf0fb Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 13:22:17 +0200
Subject: [PATCH 09/42] Rename CRD api group to serverless

---
 pkg/crds/PROJECT                              |  4 +-
 .../v1alpha1/groupversion_info.go             |  4 +-
 .../v1alpha1/realtimeapi_types.go             |  0
 .../v1alpha1/zz_generated.deepcopy.go         |  0
 ...> serverless.cortex.dev_realtimeapis.yaml} |  4 +-
 pkg/crds/config/crd/kustomization.yaml        |  4 +-
 pkg/crds/config/rbac/role.yaml                | 52 +++++++++----------
 ...l => serverless_v1alpha1_realtimeapi.yaml} |  2 +-
 .../realtimeapi_controller.go                 | 14 ++---
 .../realtimeapi_controller_helpers.go         | 36 ++++++-------
 .../{api => serverless}/suite_test.go         |  6 +--
 pkg/crds/main.go                              |  8 +--
 12 files changed, 67 insertions(+), 67 deletions(-)
 rename pkg/crds/apis/{api => serverless}/v1alpha1/groupversion_info.go (90%)
 rename pkg/crds/apis/{api => serverless}/v1alpha1/realtimeapi_types.go (100%)
 rename pkg/crds/apis/{api => serverless}/v1alpha1/zz_generated.deepcopy.go (100%)
 rename pkg/crds/config/crd/bases/{api.cortex.dev_realtimeapis.yaml => serverless.cortex.dev_realtimeapis.yaml} (99%)
 rename pkg/crds/config/samples/{api_v1alpha1_realtimeapi.yaml => serverless_v1alpha1_realtimeapi.yaml} (87%)
 rename pkg/crds/controllers/{api => serverless}/realtimeapi_controller.go (87%)
 rename pkg/crds/controllers/{api => serverless}/realtimeapi_controller_helpers.go (90%)
 rename pkg/crds/controllers/{api => serverless}/suite_test.go (93%)

diff --git a/pkg/crds/PROJECT b/pkg/crds/PROJECT
index 01bbb7d1e9..a80d48987a 100644
--- a/pkg/crds/PROJECT
+++ b/pkg/crds/PROJECT
@@ -19,8 +19,8 @@ resources:
     namespaced: true
   controller: true
   domain: cortex.dev
-  group: api
+  group: serverless
   kind: RealtimeAPI
-  path: github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1
+  path: github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1
   version: v1alpha1
 version: "3"
diff --git a/pkg/crds/apis/api/v1alpha1/groupversion_info.go b/pkg/crds/apis/serverless/v1alpha1/groupversion_info.go
similarity index 90%
rename from pkg/crds/apis/api/v1alpha1/groupversion_info.go
rename to pkg/crds/apis/serverless/v1alpha1/groupversion_info.go
index 3625dc1527..f9193a464a 100644
--- a/pkg/crds/apis/api/v1alpha1/groupversion_info.go
+++ b/pkg/crds/apis/serverless/v1alpha1/groupversion_info.go
@@ -16,7 +16,7 @@ limitations under the License.
 
 // Package v1alpha1 contains API Schema definitions for the api v1alpha1 API group
 //+kubebuilder:object:generate=true
-//+groupName=api.cortex.dev
+//+groupName=serverless.cortex.dev
 package v1alpha1
 
 import (
@@ -26,7 +26,7 @@ import (
 
 var (
 	// GroupVersion is group version used to register these objects
-	GroupVersion = schema.GroupVersion{Group: "api.cortex.dev", Version: "v1alpha1"}
+	GroupVersion = schema.GroupVersion{Group: "serverless.cortex.dev", Version: "v1alpha1"}
 
 	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
 	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
diff --git a/pkg/crds/apis/api/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
similarity index 100%
rename from pkg/crds/apis/api/v1alpha1/realtimeapi_types.go
rename to pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
diff --git a/pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
similarity index 100%
rename from pkg/crds/apis/api/v1alpha1/zz_generated.deepcopy.go
rename to pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
diff --git a/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
similarity index 99%
rename from pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
rename to pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index f68726183b..377b3cc658 100644
--- a/pkg/crds/config/crd/bases/api.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -6,9 +6,9 @@ metadata:
   annotations:
     controller-gen.kubebuilder.io/version: v0.4.1
   creationTimestamp: null
-  name: realtimeapis.api.cortex.dev
+  name: realtimeapis.serverless.cortex.dev
 spec:
-  group: api.cortex.dev
+  group: serverless.cortex.dev
   names:
     kind: RealtimeAPI
     listKind: RealtimeAPIList
diff --git a/pkg/crds/config/crd/kustomization.yaml b/pkg/crds/config/crd/kustomization.yaml
index 59e4b92a53..77bbf7b21d 100644
--- a/pkg/crds/config/crd/kustomization.yaml
+++ b/pkg/crds/config/crd/kustomization.yaml
@@ -3,10 +3,10 @@
 # It should be run by config/default
 resources:
 - bases/batch.cortex.dev_batchjobs.yaml
-- bases/api.cortex.dev_realtimeapis.yaml
+- bases/serverless.cortex.dev_realtimeapis.yaml
 #+kubebuilder:scaffold:crdkustomizeresource
 
-patchesStrategicMerge:
+#patchesStrategicMerge:
 # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
 # patches here are for enabling the conversion webhook for each CRD
 #- patches/webhook_in_batchjobs.yaml
diff --git a/pkg/crds/config/rbac/role.yaml b/pkg/crds/config/rbac/role.yaml
index c6c2b052a2..2c9787c377 100644
--- a/pkg/crds/config/rbac/role.yaml
+++ b/pkg/crds/config/rbac/role.yaml
@@ -34,32 +34,6 @@ rules:
   - patch
   - update
   - watch
-- apiGroups:
-  - api.cortex.dev
-  resources:
-  - realtimeapis
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - api.cortex.dev
-  resources:
-  - realtimeapis/finalizers
-  verbs:
-  - update
-- apiGroups:
-  - api.cortex.dev
-  resources:
-  - realtimeapis/status
-  verbs:
-  - get
-  - patch
-  - update
 - apiGroups:
   - apps
   resources:
@@ -119,3 +93,29 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - serverless.cortex.dev
+  resources:
+  - realtimeapis
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - serverless.cortex.dev
+  resources:
+  - realtimeapis/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - serverless.cortex.dev
+  resources:
+  - realtimeapis/status
+  verbs:
+  - get
+  - patch
+  - update
diff --git a/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml b/pkg/crds/config/samples/serverless_v1alpha1_realtimeapi.yaml
similarity index 87%
rename from pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml
rename to pkg/crds/config/samples/serverless_v1alpha1_realtimeapi.yaml
index c3295fc4f4..a3e9ab2f4f 100644
--- a/pkg/crds/config/samples/api_v1alpha1_realtimeapi.yaml
+++ b/pkg/crds/config/samples/serverless_v1alpha1_realtimeapi.yaml
@@ -1,4 +1,4 @@
-apiVersion: api.cortex.dev/v1alpha1
+apiVersion: serverless.cortex.dev/v1alpha1
 kind: RealtimeAPI
 metadata:
   name: hello-world
diff --git a/pkg/crds/controllers/api/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
similarity index 87%
rename from pkg/crds/controllers/api/realtimeapi_controller.go
rename to pkg/crds/controllers/serverless/realtimeapi_controller.go
index 53e368d856..1c1bd1b93d 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -14,13 +14,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-package api
+package serverlesscontroller
 
 import (
 	"context"
 	"fmt"
 
-	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/crds/controllers"
 	"github.com/cortexlabs/cortex/pkg/types/clusterconfig"
 	"github.com/go-logr/logr"
@@ -43,9 +43,9 @@ type RealtimeAPIReconciler struct {
 	Scheme        *runtime.Scheme
 }
 
-// +kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis,verbs=get;list;watch;create;update;patch;delete
-// +kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/status,verbs=get;update;patch
-// +kubebuilder:rbac:groups=api.cortex.dev,resources=realtimeapis/finalizers,verbs=update
+// +kubebuilder:rbac:groups=serverless.cortex.dev,resources=realtimeapis,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=serverless.cortex.dev,resources=realtimeapis/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=serverless.cortex.dev,resources=realtimeapis/finalizers,verbs=update
 // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch
 // +kubebuilder:rbac:groups=networking.istio.io,resources=virtualservices,verbs=get;list;watch;create;update;patch
 // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch
@@ -57,7 +57,7 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	log := r.Log.WithValues("realtimeapi", req.NamespacedName)
 
 	// Step 1: get resource from request
-	api := apiv1alpha1.RealtimeAPI{}
+	api := serverless.RealtimeAPI{}
 	log.V(1).Info("retrieving resource")
 	if err := r.Get(ctx, req.NamespacedName, &api); err != nil {
 		if !kerrors.IsNotFound(err) {
@@ -109,7 +109,7 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 // SetupWithManager sets up the controller with the Manager.
 func (r *RealtimeAPIReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
-		For(&apiv1alpha1.RealtimeAPI{}).
+		For(&serverless.RealtimeAPI{}).
 		Owns(&kapps.Deployment{}).
 		Owns(&kcore.Service{}).
 		Owns(&istioclientnetworking.VirtualService{}).
diff --git a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
similarity index 90%
rename from pkg/crds/controllers/api/realtimeapi_controller_helpers.go
rename to pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 4f4bab31f3..1beb66a88b 100644
--- a/pkg/crds/controllers/api/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -14,14 +14,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-package api
+package serverlesscontroller
 
 import (
 	"context"
 	"fmt"
 
 	"github.com/cortexlabs/cortex/pkg/consts"
-	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
@@ -41,7 +41,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 )
 
-func (r *RealtimeAPIReconciler) getDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (*kapps.Deployment, error) {
+func (r *RealtimeAPIReconciler) getDeployment(ctx context.Context, api serverless.RealtimeAPI) (*kapps.Deployment, error) {
 	req := client.ObjectKey{Namespace: api.Namespace, Name: workloads.K8sName(api.Name)}
 	deployment := kapps.Deployment{}
 	if err := r.Get(ctx, req, &deployment); err != nil {
@@ -53,7 +53,7 @@ func (r *RealtimeAPIReconciler) getDeployment(ctx context.Context, api apiv1alph
 	return &deployment, nil
 }
 
-func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *apiv1alpha1.RealtimeAPI, deployment *kapps.Deployment) error {
+func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *serverless.RealtimeAPI, deployment *kapps.Deployment) error {
 	apiStatus := status.Pending
 	api.Status.Status = apiStatus // FIXME: handle other status
 
@@ -76,7 +76,7 @@ func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *apiv1alph
 	return nil
 }
 
-func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api serverless.RealtimeAPI) (controllerutil.OperationResult, error) {
 	deployment := kapps.Deployment{
 		ObjectMeta: kmeta.ObjectMeta{
 			Name:      workloads.K8sName(api.Name),
@@ -92,7 +92,7 @@ func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, ap
 	return op, nil
 }
 
-func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api serverless.RealtimeAPI) (controllerutil.OperationResult, error) {
 	service := kcore.Service{
 		ObjectMeta: kmeta.ObjectMeta{
 			Name:      workloads.K8sName(api.Name),
@@ -114,7 +114,7 @@ func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api a
 	return op, nil
 }
 
-func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context, api apiv1alpha1.RealtimeAPI) (controllerutil.OperationResult, error) {
+func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context, api serverless.RealtimeAPI) (controllerutil.OperationResult, error) {
 	vs := istioclientnetworking.VirtualService{
 		ObjectMeta: kmeta.ObjectMeta{
 			Name:      workloads.K8sName(api.Name),
@@ -130,7 +130,7 @@ func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context
 	return op, nil
 }
 
-func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *apiv1alpha1.RealtimeAPI) (string, error) {
+func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *serverless.RealtimeAPI) (string, error) {
 	req := client.ObjectKey{Namespace: consts.IstioNamespace, Name: "ingressgateway-apis"}
 	svc := kcore.Service{}
 	if err := r.Get(ctx, req, &svc); err != nil {
@@ -149,7 +149,7 @@ func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *apiv1alpha
 	return endpoint, nil
 }
 
-func (r *RealtimeAPIReconciler) desiredDeployment(api apiv1alpha1.RealtimeAPI) kapps.Deployment {
+func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) kapps.Deployment {
 	containers, volumes := r.desiredContainers(api)
 
 	return *k8s.Deployment(&k8s.DeploymentSpec{
@@ -164,7 +164,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api apiv1alpha1.RealtimeAPI) k
 			"deploymentID":   api.Annotations["cortex.dev/deployment-id"], // FIXME: needs to be created beforehand
 			"cortex.dev/api": "true",
 		},
-		Annotations: getAPIAnnotations(api),
+		Annotations: r.getAPIAnnotations(api),
 		Selector: map[string]string{
 			"apiName": api.Name,
 			"apiKind": userconfig.RealtimeAPIKind.String(),
@@ -193,7 +193,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api apiv1alpha1.RealtimeAPI) k
 	})
 }
 
-func (r *RealtimeAPIReconciler) desiredContainers(api apiv1alpha1.RealtimeAPI) ([]kcore.Container, []kcore.Volume) {
+func (r *RealtimeAPIReconciler) desiredContainers(api serverless.RealtimeAPI) ([]kcore.Container, []kcore.Volume) {
 	containers, volumes := r.userContainers(api)
 	proxyContainer, proxyVolume := r.proxyContainer(api)
 
@@ -203,13 +203,13 @@ func (r *RealtimeAPIReconciler) desiredContainers(api apiv1alpha1.RealtimeAPI) (
 	return containers, volumes
 }
 
-func (r *RealtimeAPIReconciler) desiredService(api apiv1alpha1.RealtimeAPI) kcore.Service {
+func (r *RealtimeAPIReconciler) desiredService(api serverless.RealtimeAPI) kcore.Service {
 	return *k8s.Service(&k8s.ServiceSpec{
 		Name:        workloads.K8sName(api.Name),
 		PortName:    "http",
 		Port:        consts.ProxyPortInt32,
 		TargetPort:  consts.ProxyPortInt32,
-		Annotations: getAPIAnnotations(api),
+		Annotations: r.getAPIAnnotations(api),
 		Labels: map[string]string{
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
@@ -222,7 +222,7 @@ func (r *RealtimeAPIReconciler) desiredService(api apiv1alpha1.RealtimeAPI) kcor
 	})
 }
 
-func (r *RealtimeAPIReconciler) desiredVirtualService(api apiv1alpha1.RealtimeAPI) istioclientnetworking.VirtualService {
+func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI) istioclientnetworking.VirtualService {
 	var activatorWeight int32
 	if api.Spec.Pod.Replicas == 0 {
 		activatorWeight = 100
@@ -270,7 +270,7 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api apiv1alpha1.RealtimeAP
 		},
 		PrefixPath:  pointer.String(api.Spec.Networking.Endpoint),
 		Rewrite:     pointer.String("/"),
-		Annotations: getAPIAnnotations(api),
+		Annotations: r.getAPIAnnotations(api),
 		Labels: map[string]string{
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
@@ -281,7 +281,7 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api apiv1alpha1.RealtimeAP
 	})
 }
 
-func (r *RealtimeAPIReconciler) userContainers(api apiv1alpha1.RealtimeAPI) ([]kcore.Container, []kcore.Volume) {
+func (r *RealtimeAPIReconciler) userContainers(api serverless.RealtimeAPI) ([]kcore.Container, []kcore.Volume) {
 	volumes := []kcore.Volume{
 		workloads.MntVolume(),
 		workloads.CortexVolume(),
@@ -361,7 +361,7 @@ func (r *RealtimeAPIReconciler) userContainers(api apiv1alpha1.RealtimeAPI) ([]k
 	return containers, volumes
 }
 
-func (r *RealtimeAPIReconciler) proxyContainer(api apiv1alpha1.RealtimeAPI) (kcore.Container, kcore.Volume) {
+func (r *RealtimeAPIReconciler) proxyContainer(api serverless.RealtimeAPI) (kcore.Container, kcore.Volume) {
 	return kcore.Container{
 		Name:            workloads.ProxyContainerName,
 		Image:           r.ClusterConfig.ImageProxy,
@@ -411,7 +411,7 @@ func (r *RealtimeAPIReconciler) proxyContainer(api apiv1alpha1.RealtimeAPI) (kco
 	}, workloads.ClusterConfigVolume()
 }
 
-func getAPIAnnotations(api apiv1alpha1.RealtimeAPI) map[string]string {
+func (r *RealtimeAPIReconciler) getAPIAnnotations(api serverless.RealtimeAPI) map[string]string {
 	return map[string]string{
 		userconfig.MinReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MinReplicas),
 		userconfig.MaxReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MaxReplicas),
diff --git a/pkg/crds/controllers/api/suite_test.go b/pkg/crds/controllers/serverless/suite_test.go
similarity index 93%
rename from pkg/crds/controllers/api/suite_test.go
rename to pkg/crds/controllers/serverless/suite_test.go
index 134a7234a8..5698e2887b 100644
--- a/pkg/crds/controllers/api/suite_test.go
+++ b/pkg/crds/controllers/serverless/suite_test.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-package api
+package serverlesscontroller
 
 import (
 	"path/filepath"
@@ -30,7 +30,7 @@ import (
 	logf "sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
-	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	//+kubebuilder:scaffold:imports
 )
 
@@ -62,7 +62,7 @@ var _ = BeforeSuite(func() {
 	Expect(err).NotTo(HaveOccurred())
 	Expect(cfg).NotTo(BeNil())
 
-	err = apiv1alpha1.AddToScheme(scheme.Scheme)
+	err = serverless.AddToScheme(scheme.Scheme)
 	Expect(err).NotTo(HaveOccurred())
 
 	//+kubebuilder:scaffold:scheme
diff --git a/pkg/crds/main.go b/pkg/crds/main.go
index d4400a5e88..01e08b1d82 100644
--- a/pkg/crds/main.go
+++ b/pkg/crds/main.go
@@ -42,10 +42,10 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/healthz"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
-	apiv1alpha1 "github.com/cortexlabs/cortex/pkg/crds/apis/api/v1alpha1"
 	batch "github.com/cortexlabs/cortex/pkg/crds/apis/batch/v1alpha1"
-	apicontrollers "github.com/cortexlabs/cortex/pkg/crds/controllers/api"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	batchcontrollers "github.com/cortexlabs/cortex/pkg/crds/controllers/batch"
+	serverlesscontrollers "github.com/cortexlabs/cortex/pkg/crds/controllers/serverless"
 	//+kubebuilder:scaffold:imports
 )
 
@@ -59,7 +59,7 @@ func init() {
 	utilruntime.Must(istioscheme.AddToScheme(scheme))
 
 	utilruntime.Must(batch.AddToScheme(scheme))
-	utilruntime.Must(apiv1alpha1.AddToScheme(scheme))
+	utilruntime.Must(serverless.AddToScheme(scheme))
 	//+kubebuilder:scaffold:scheme
 }
 
@@ -165,7 +165,7 @@ func main() {
 		setupLog.Error(err, "unable to create controller", "controller", "BatchJob")
 		os.Exit(1)
 	}
-	if err = (&apicontrollers.RealtimeAPIReconciler{
+	if err = (&serverlesscontrollers.RealtimeAPIReconciler{
 		Client:        mgr.GetClient(),
 		ClusterConfig: clusterConfig,
 		Log:           ctrl.Log.WithName("controllers").WithName("api").WithName("RealtimeAPI"),

From 5f22fd83b29a6e2609106de7a1404b2d94b3d8d1 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 13:32:29 +0200
Subject: [PATCH 10/42] Update logger name for serverless CRD controllers

---
 pkg/crds/main.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/crds/main.go b/pkg/crds/main.go
index 01e08b1d82..9948837832 100644
--- a/pkg/crds/main.go
+++ b/pkg/crds/main.go
@@ -168,7 +168,7 @@ func main() {
 	if err = (&serverlesscontrollers.RealtimeAPIReconciler{
 		Client:        mgr.GetClient(),
 		ClusterConfig: clusterConfig,
-		Log:           ctrl.Log.WithName("controllers").WithName("api").WithName("RealtimeAPI"),
+		Log:           ctrl.Log.WithName("controllers").WithName("serverless").WithName("RealtimeAPI"),
 		Scheme:        mgr.GetScheme(),
 	}).SetupWithManager(mgr); err != nil {
 		setupLog.Error(err, "unable to create controller", "controller", "RealtimeAPI")

From 6a97d37f6ba9dd110784a65db16982d7dc0586bc Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 13:40:01 +0200
Subject: [PATCH 11/42] Add additional print columns to realtime crd

---
 .../apis/serverless/v1alpha1/realtimeapi_types.go |  4 ++++
 .../bases/serverless.cortex.dev_realtimeapis.yaml | 15 ++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 997f9b87f0..18ff08c4ee 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -220,6 +220,10 @@ type RealtimeAPIStatus struct {
 
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
+//+kubebuilder:printcolumn:JSONPath=".status.current_replicas",name="Replicas",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".status.ready_replicas",name="Ready",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".status.status",name="Status",type="string"
+//+kubebuilder:printcolumn:JSONPath=".status.endpoint",name="Endpoint",type="string"
 
 // RealtimeAPI is the Schema for the realtimeapis API
 type RealtimeAPI struct {
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 377b3cc658..507793db05 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -16,7 +16,20 @@ spec:
     singular: realtimeapi
   scope: Namespaced
   versions:
-  - name: v1alpha1
+  - additionalPrinterColumns:
+    - jsonPath: .status.current_replicas
+      name: Replicas
+      type: integer
+    - jsonPath: .status.ready_replicas
+      name: Ready
+      type: integer
+    - jsonPath: .status.status
+      name: Status
+      type: string
+    - jsonPath: .status.endpoint
+      name: Endpoint
+      type: string
+    name: v1alpha1
     schema:
       openAPIV3Schema:
         description: RealtimeAPI is the Schema for the realtimeapis API

From da929a6e7da02009a3ec2c40e95b51b550e3b4c9 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 16:35:57 +0200
Subject: [PATCH 12/42] Fix annotations in realtime crd

---
 .../serverless/realtimeapi_controller_helpers.go            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 1beb66a88b..5c2c23d3fd 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -416,9 +416,9 @@ func (r *RealtimeAPIReconciler) getAPIAnnotations(api serverless.RealtimeAPI) ma
 		userconfig.MinReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MinReplicas),
 		userconfig.MaxReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MaxReplicas),
 		userconfig.TargetInFlightAnnotationKey:               strings.Int32(api.Spec.Autoscaling.TargetInFlight),
-		userconfig.WindowAnnotationKey:                       api.Spec.Autoscaling.Window.String(),
-		userconfig.DownscaleStabilizationPeriodAnnotationKey: api.Spec.Autoscaling.DownscaleStabilizationPeriod.String(),
-		userconfig.UpscaleStabilizationPeriodAnnotationKey:   api.Spec.Autoscaling.UpscaleStabilizationPeriod.String(),
+		userconfig.WindowAnnotationKey:                       api.Spec.Autoscaling.Window.Duration.String(),
+		userconfig.DownscaleStabilizationPeriodAnnotationKey: api.Spec.Autoscaling.DownscaleStabilizationPeriod.Duration.String(),
+		userconfig.UpscaleStabilizationPeriodAnnotationKey:   api.Spec.Autoscaling.UpscaleStabilizationPeriod.Duration.String(),
 		userconfig.MaxDownscaleFactorAnnotationKey:           strings.Float64(api.Spec.Autoscaling.MaxDownscaleFactor.AsApproximateFloat64()),
 		userconfig.MaxUpscaleFactorAnnotationKey:             strings.Float64(api.Spec.Autoscaling.MaxUpscaleFactor.AsApproximateFloat64()),
 		userconfig.DownscaleToleranceAnnotationKey:           strings.Float64(api.Spec.Autoscaling.DownscaleTolerance.AsApproximateFloat64()),

From f164ecbd565818573b2ecb1c94479c4d4d0b1249 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 16:36:40 +0200
Subject: [PATCH 13/42] Fix endpoint string on realtime crd status

---
 .../controllers/serverless/realtimeapi_controller_helpers.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 5c2c23d3fd..5dd1f862bc 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -142,8 +142,9 @@ func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *serverless
 		return "", nil
 	}
 
-	endpoint := fmt.Sprintf("http://%s/%s",
-		svc.Status.LoadBalancer.Ingress[0].Hostname, api.Spec.Networking.Endpoint,
+	endpoint := urls.Join(
+		fmt.Sprintf("http://%s", svc.Status.LoadBalancer.Ingress[0].Hostname),
+		api.Spec.Networking.Endpoint,
 	)
 
 	return endpoint, nil

From 27c83071b419fc91f6fa433f03fdc54faa542be9 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 16:39:49 +0200
Subject: [PATCH 14/42] Update createOrUpdate* methods

---
 .../serverless/realtimeapi_controller_helpers.go   | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 5dd1f862bc..23c5260a7e 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -24,8 +24,10 @@ import (
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
+	"github.com/cortexlabs/cortex/pkg/lib/maps"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/lib/strings"
+	"github.com/cortexlabs/cortex/pkg/lib/urls"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
@@ -83,7 +85,10 @@ func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, ap
 			Namespace: api.Namespace},
 	}
 	op, err := controllerutil.CreateOrUpdate(ctx, r.Client, &deployment, func() error {
-		deployment.Spec = r.desiredDeployment(api).Spec
+		desiredDeployment := r.desiredDeployment(api)
+		deployment.Labels = desiredDeployment.Labels
+		deployment.Annotations = maps.MergeStrMapsString(deployment.Annotations, desiredDeployment.Annotations)
+		deployment.Spec = desiredDeployment.Spec
 		return nil
 	})
 	if err != nil {
@@ -102,7 +107,7 @@ func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api s
 		desiredSvc := r.desiredService(api)
 		// We need to set fields individually because some are immutable
 		service.Labels = desiredSvc.Labels
-		service.Annotations = desiredSvc.Annotations
+		service.Annotations = maps.MergeStrMapsString(service.Annotations, desiredSvc.Annotations)
 		service.Spec.Type = desiredSvc.Spec.Type
 		service.Spec.Ports = desiredSvc.Spec.Ports
 		service.Spec.Selector = desiredSvc.Spec.Selector
@@ -121,7 +126,10 @@ func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context
 			Namespace: api.Namespace},
 	}
 	op, err := controllerutil.CreateOrUpdate(ctx, r.Client, &vs, func() error {
-		vs.Spec = r.desiredVirtualService(api).Spec
+		desiredVirtualService := r.desiredVirtualService(api)
+		vs.Labels = desiredVirtualService.Labels
+		vs.Annotations = maps.MergeStrMapsString(vs.Annotations, desiredVirtualService.Annotations)
+		vs.Spec = desiredVirtualService.Spec
 		return nil
 	})
 	if err != nil {

From 2f0f481a18810e71675909b65d055c4ac9f02302 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 18:52:08 +0200
Subject: [PATCH 15/42] Create apiID and deploymentID annotations on resource
 creation

---
 .../serverless/v1alpha1/realtimeapi_types.go  |  2 +-
 .../serverless.cortex.dev_realtimeapis.yaml   |  2 +-
 .../serverless/realtimeapi_controller.go      | 15 +++-
 .../realtimeapi_controller_helpers.go         | 72 +++++++++++++------
 4 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 18ff08c4ee..eb82ed3944 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -220,7 +220,7 @@ type RealtimeAPIStatus struct {
 
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
-//+kubebuilder:printcolumn:JSONPath=".status.current_replicas",name="Replicas",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".spec.replicas",name="Replicas",type="integer"
 //+kubebuilder:printcolumn:JSONPath=".status.ready_replicas",name="Ready",type="integer"
 //+kubebuilder:printcolumn:JSONPath=".status.status",name="Status",type="string"
 //+kubebuilder:printcolumn:JSONPath=".status.endpoint",name="Endpoint",type="string"
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 507793db05..2e7a8479a5 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -17,7 +17,7 @@ spec:
   scope: Namespaced
   versions:
   - additionalPrinterColumns:
-    - jsonPath: .status.current_replicas
+    - jsonPath: .spec.replicas
       name: Replicas
       type: integer
     - jsonPath: .status.ready_replicas
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
index 1c1bd1b93d..f6cbf8ef6f 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -84,7 +84,20 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 		return ctrl.Result{}, err
 	}
 
-	// Step 3: Create or Update Resources
+	// Step 3: Get or create deployment and API ids
+	deploymentID, apiID := r.getOrCreateAPIIDs(api)
+	if api.Annotations["cortex.dev/deployment-id"] == "" ||
+		api.Annotations["cortex.dev/api-id"] == "" {
+
+		log.V(1).Info("creating api and deployment id annotations")
+		api.Annotations["cortex.dev/deployment-id"] = deploymentID
+		api.Annotations["cortex.dev/api-id"] = apiID
+		if err = r.Update(ctx, &api); err != nil {
+			return ctrl.Result{}, err
+		}
+	}
+
+	// Step 4: Create or Update Resources
 	deployOp, err := r.createOrUpdateDeployment(ctx, api)
 	if err != nil {
 		return ctrl.Result{}, err
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 23c5260a7e..cc86eb8135 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -17,17 +17,20 @@ limitations under the License.
 package serverlesscontroller
 
 import (
+	"bytes"
 	"context"
 	"fmt"
 
 	"github.com/cortexlabs/cortex/pkg/consts"
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	"github.com/cortexlabs/cortex/pkg/lib/hash"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/lib/maps"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
-	"github.com/cortexlabs/cortex/pkg/lib/strings"
+	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/lib/urls"
+	"github.com/cortexlabs/cortex/pkg/types/spec"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
@@ -39,6 +42,7 @@ import (
 	kresource "k8s.io/apimachinery/pkg/api/resource"
 	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
+	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 )
@@ -160,6 +164,7 @@ func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *serverless
 
 func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) kapps.Deployment {
 	containers, volumes := r.desiredContainers(api)
+	deploymentID, apiID := r.getOrCreateAPIIDs(api)
 
 	return *k8s.Deployment(&k8s.DeploymentSpec{
 		Name:           workloads.K8sName(api.Name),
@@ -169,11 +174,11 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 		Labels: map[string]string{
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
-			"apiID":          api.Annotations["cortex.dev/api-id"],        // TODO: check if can be replaced with resource version
-			"deploymentID":   api.Annotations["cortex.dev/deployment-id"], // FIXME: needs to be created beforehand
+			"apiID":          apiID,
+			"deploymentID":   deploymentID,
 			"cortex.dev/api": "true",
 		},
-		Annotations: r.getAPIAnnotations(api),
+		Annotations: r.generateAPIAnnotations(api),
 		Selector: map[string]string{
 			"apiName": api.Name,
 			"apiKind": userconfig.RealtimeAPIKind.String(),
@@ -182,7 +187,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 			Labels: map[string]string{
 				"apiName":        api.Name,
 				"apiKind":        userconfig.RealtimeAPIKind.String(),
-				"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
+				"deploymentID":   deploymentID,
 				"cortex.dev/api": "true",
 			},
 			Annotations: map[string]string{
@@ -218,7 +223,7 @@ func (r *RealtimeAPIReconciler) desiredService(api serverless.RealtimeAPI) kcore
 		PortName:    "http",
 		Port:        consts.ProxyPortInt32,
 		TargetPort:  consts.ProxyPortInt32,
-		Annotations: r.getAPIAnnotations(api),
+		Annotations: r.generateAPIAnnotations(api),
 		Labels: map[string]string{
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
@@ -237,6 +242,8 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI
 		activatorWeight = 100
 	}
 
+	deploymentID, apiID := r.getOrCreateAPIIDs(api)
+
 	return *k8s.VirtualService(&k8s.VirtualServiceSpec{
 		Name:     workloads.K8sName(api.Name),
 		Gateways: []string{"apis-gateway"},
@@ -279,12 +286,12 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI
 		},
 		PrefixPath:  pointer.String(api.Spec.Networking.Endpoint),
 		Rewrite:     pointer.String("/"),
-		Annotations: r.getAPIAnnotations(api),
+		Annotations: r.generateAPIAnnotations(api),
 		Labels: map[string]string{
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
-			"apiID":          api.Annotations["cortex.dev/api-id"],
-			"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
+			"apiID":          apiID,
+			"deploymentID":   deploymentID,
 			"cortex.dev/api": "true",
 		},
 	})
@@ -383,11 +390,11 @@ func (r *RealtimeAPIReconciler) proxyContainer(api serverless.RealtimeAPI) (kcor
 			"--admin-port",
 			consts.AdminPortStr,
 			"--user-port",
-			strings.Int32(api.Spec.Pod.Port),
+			s.Int32(api.Spec.Pod.Port),
 			"--max-concurrency",
-			strings.Int32(api.Spec.Pod.MaxConcurrency),
+			s.Int32(api.Spec.Pod.MaxConcurrency),
 			"--max-queue-length",
-			strings.Int32(api.Spec.Pod.MaxQueueLength),
+			s.Int32(api.Spec.Pod.MaxQueueLength),
 		},
 		Ports: []kcore.ContainerPort{
 			{Name: consts.AdminPortName, ContainerPort: consts.AdminPortInt32},
@@ -420,17 +427,42 @@ func (r *RealtimeAPIReconciler) proxyContainer(api serverless.RealtimeAPI) (kcor
 	}, workloads.ClusterConfigVolume()
 }
 
-func (r *RealtimeAPIReconciler) getAPIAnnotations(api serverless.RealtimeAPI) map[string]string {
+func (r *RealtimeAPIReconciler) getOrCreateAPIIDs(api serverless.RealtimeAPI) (deploymentID string, apiID string) {
+	deploymentID = api.Annotations["cortex.dev/deployment-id"]
+	if deploymentID == "" {
+		deploymentID = k8s.RandomName()[:10]
+	}
+
+	apiID = api.Annotations["cortex.dev/api-id"]
+	if apiID == "" {
+		var buf bytes.Buffer
+
+		buf.WriteString(api.Name)
+		buf.WriteString(s.Obj(api.TypeMeta))
+		buf.WriteString(s.Obj(api.Spec.Pod))
+		buf.WriteString(s.Obj(api.Spec.Networking))
+		buf.WriteString(s.Obj(api.Spec.Autoscaling))
+		buf.WriteString(s.Obj(api.Spec.NodeGroups))
+		buf.WriteString(s.Obj(api.Spec.UpdateStrategy))
+		specID := hash.Bytes(buf.Bytes())[:32]
+
+		apiID = fmt.Sprintf("%s-%s-%s", spec.MonotonicallyDecreasingID(), deploymentID, specID)
+	}
+
+	return deploymentID, apiID
+}
+
+func (r *RealtimeAPIReconciler) generateAPIAnnotations(api serverless.RealtimeAPI) map[string]string {
 	return map[string]string{
-		userconfig.MinReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MinReplicas),
-		userconfig.MaxReplicasAnnotationKey:                  strings.Int32(api.Spec.Autoscaling.MaxReplicas),
-		userconfig.TargetInFlightAnnotationKey:               strings.Int32(api.Spec.Autoscaling.TargetInFlight),
+		userconfig.MinReplicasAnnotationKey:                  s.Int32(api.Spec.Autoscaling.MinReplicas),
+		userconfig.MaxReplicasAnnotationKey:                  s.Int32(api.Spec.Autoscaling.MaxReplicas),
+		userconfig.TargetInFlightAnnotationKey:               s.Int32(api.Spec.Autoscaling.TargetInFlight),
 		userconfig.WindowAnnotationKey:                       api.Spec.Autoscaling.Window.Duration.String(),
 		userconfig.DownscaleStabilizationPeriodAnnotationKey: api.Spec.Autoscaling.DownscaleStabilizationPeriod.Duration.String(),
 		userconfig.UpscaleStabilizationPeriodAnnotationKey:   api.Spec.Autoscaling.UpscaleStabilizationPeriod.Duration.String(),
-		userconfig.MaxDownscaleFactorAnnotationKey:           strings.Float64(api.Spec.Autoscaling.MaxDownscaleFactor.AsApproximateFloat64()),
-		userconfig.MaxUpscaleFactorAnnotationKey:             strings.Float64(api.Spec.Autoscaling.MaxUpscaleFactor.AsApproximateFloat64()),
-		userconfig.DownscaleToleranceAnnotationKey:           strings.Float64(api.Spec.Autoscaling.DownscaleTolerance.AsApproximateFloat64()),
-		userconfig.UpscaleToleranceAnnotationKey:             strings.Float64(api.Spec.Autoscaling.UpscaleTolerance.AsApproximateFloat64()),
+		userconfig.MaxDownscaleFactorAnnotationKey:           s.Float64(api.Spec.Autoscaling.MaxDownscaleFactor.AsApproximateFloat64()),
+		userconfig.MaxUpscaleFactorAnnotationKey:             s.Float64(api.Spec.Autoscaling.MaxUpscaleFactor.AsApproximateFloat64()),
+		userconfig.DownscaleToleranceAnnotationKey:           s.Float64(api.Spec.Autoscaling.DownscaleTolerance.AsApproximateFloat64()),
+		userconfig.UpscaleToleranceAnnotationKey:             s.Float64(api.Spec.Autoscaling.UpscaleTolerance.AsApproximateFloat64()),
 	}
 }

From a51872e84999402547c4cf55f94255409ee2a5c6 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Wed, 21 Jul 2021 18:52:24 +0200
Subject: [PATCH 16/42] Set controller reference on child resources

---
 .../serverless/realtimeapi_controller_helpers.go  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index cc86eb8135..fbc934a9f0 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -93,6 +93,11 @@ func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, ap
 		deployment.Labels = desiredDeployment.Labels
 		deployment.Annotations = maps.MergeStrMapsString(deployment.Annotations, desiredDeployment.Annotations)
 		deployment.Spec = desiredDeployment.Spec
+
+		if err := ctrl.SetControllerReference(&api, &deployment, r.Scheme); err != nil {
+			return err
+		}
+
 		return nil
 	})
 	if err != nil {
@@ -115,6 +120,11 @@ func (r *RealtimeAPIReconciler) createOrUpdateService(ctx context.Context, api s
 		service.Spec.Type = desiredSvc.Spec.Type
 		service.Spec.Ports = desiredSvc.Spec.Ports
 		service.Spec.Selector = desiredSvc.Spec.Selector
+
+		if err := ctrl.SetControllerReference(&api, &service, r.Scheme); err != nil {
+			return err
+		}
+
 		return nil
 	})
 	if err != nil {
@@ -134,6 +144,11 @@ func (r *RealtimeAPIReconciler) createOrUpdateVirtualService(ctx context.Context
 		vs.Labels = desiredVirtualService.Labels
 		vs.Annotations = maps.MergeStrMapsString(vs.Annotations, desiredVirtualService.Annotations)
 		vs.Spec = desiredVirtualService.Spec
+
+		if err := ctrl.SetControllerReference(&api, &vs, r.Scheme); err != nil {
+			return err
+		}
+
 		return nil
 	})
 	if err != nil {

From 5d830c7cfc89aa0e92f95fcb00d1653e8028e401 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Thu, 22 Jul 2021 12:54:15 +0200
Subject: [PATCH 17/42] Add replica counts to RealtimeAPI CRD status

---
 .../serverless/v1alpha1/realtimeapi_types.go  | 12 +--
 .../v1alpha1/zz_generated.deepcopy.go         |  1 +
 .../serverless.cortex.dev_realtimeapis.yaml   | 95 +++++++++++++++---
 .../realtimeapi_controller_helpers.go         | 98 +++++++++++++++++--
 pkg/types/status/code.go                      | 16 +--
 pkg/types/status/status.go                    | 25 ++---
 6 files changed, 198 insertions(+), 49 deletions(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index eb82ed3944..023b1967aa 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -211,17 +211,15 @@ type NetworkingSpec struct {
 // RealtimeAPIStatus defines the observed state of RealtimeAPI
 type RealtimeAPIStatus struct {
 	// +kubebuilder:validation:Type=string
-	Status          status.Code `json:"status"`
-	DesiredReplicas int32       `json:"desired_replicas"`
-	CurrentReplicas int32       `json:"current_replicas"`
-	ReadyReplicas   int32       `json:"ready_replicas"`
-	Endpoint        string      `json:"endpoint,omitempty"`
+	Status        status.Code          `json:"status"`
+	ReplicaCounts status.ReplicaCounts `json:"replica_counts"`
+	Endpoint      string               `json:"endpoint,omitempty"`
 }
 
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
-//+kubebuilder:printcolumn:JSONPath=".spec.replicas",name="Replicas",type="integer"
-//+kubebuilder:printcolumn:JSONPath=".status.ready_replicas",name="Ready",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".spec.pod.replicas",name="Replicas",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".status.replica_counts.updated.ready",name="Ready",type="integer"
 //+kubebuilder:printcolumn:JSONPath=".status.status",name="Status",type="string"
 //+kubebuilder:printcolumn:JSONPath=".status.endpoint",name="Endpoint",type="string"
 
diff --git a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
index a52d87d385..9376932a2d 100644
--- a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
@@ -247,6 +247,7 @@ func (in *RealtimeAPISpec) DeepCopy() *RealtimeAPISpec {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *RealtimeAPIStatus) DeepCopyInto(out *RealtimeAPIStatus) {
 	*out = *in
+	out.ReplicaCounts = in.ReplicaCounts
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RealtimeAPIStatus.
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 2e7a8479a5..61e2d626f2 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -17,10 +17,10 @@ spec:
   scope: Namespaced
   versions:
   - additionalPrinterColumns:
-    - jsonPath: .spec.replicas
+    - jsonPath: .spec.pod.replicas
       name: Replicas
       type: integer
-    - jsonPath: .status.ready_replicas
+    - jsonPath: .status.replica_counts.updated.ready
       name: Ready
       type: integer
     - jsonPath: .status.status
@@ -619,23 +619,90 @@ spec:
           status:
             description: RealtimeAPIStatus defines the observed state of RealtimeAPI
             properties:
-              current_replicas:
-                format: int32
-                type: integer
-              desired_replicas:
-                format: int32
-                type: integer
               endpoint:
                 type: string
-              ready_replicas:
-                format: int32
-                type: integer
+              replica_counts:
+                properties:
+                  requested:
+                    format: int32
+                    type: integer
+                  stale:
+                    properties:
+                      err_image_pull:
+                        format: int32
+                        type: integer
+                      failed:
+                        format: int32
+                        type: integer
+                      initializing:
+                        format: int32
+                        type: integer
+                      killed:
+                        format: int32
+                        type: integer
+                      killed_oom:
+                        format: int32
+                        type: integer
+                      not_ready:
+                        format: int32
+                        type: integer
+                      pending:
+                        format: int32
+                        type: integer
+                      ready:
+                        format: int32
+                        type: integer
+                      stalled:
+                        format: int32
+                        type: integer
+                      terminating:
+                        format: int32
+                        type: integer
+                      unknown:
+                        format: int32
+                        type: integer
+                    type: object
+                  updated:
+                    properties:
+                      err_image_pull:
+                        format: int32
+                        type: integer
+                      failed:
+                        format: int32
+                        type: integer
+                      initializing:
+                        format: int32
+                        type: integer
+                      killed:
+                        format: int32
+                        type: integer
+                      killed_oom:
+                        format: int32
+                        type: integer
+                      not_ready:
+                        format: int32
+                        type: integer
+                      pending:
+                        format: int32
+                        type: integer
+                      ready:
+                        format: int32
+                        type: integer
+                      stalled:
+                        format: int32
+                        type: integer
+                      terminating:
+                        format: int32
+                        type: integer
+                      unknown:
+                        format: int32
+                        type: integer
+                    type: object
+                type: object
               status:
                 type: string
             required:
-            - current_replicas
-            - desired_replicas
-            - ready_replicas
+            - replica_counts
             - status
             type: object
         type: object
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index fbc934a9f0..632f11f9fd 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -20,6 +20,7 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"time"
 
 	"github.com/cortexlabs/cortex/pkg/consts"
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
@@ -60,21 +61,28 @@ func (r *RealtimeAPIReconciler) getDeployment(ctx context.Context, api serverles
 }
 
 func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *serverless.RealtimeAPI, deployment *kapps.Deployment) error {
-	apiStatus := status.Pending
-	api.Status.Status = apiStatus // FIXME: handle other status
-
-	endpoint, err := r.getEndpoint(ctx, api)
+	var err error
+	api.Status.Endpoint, err = r.getEndpoint(ctx, api)
 	if err != nil {
 		return errors.Wrap(err, "failed to get api endpoint")
 	}
 
-	api.Status.Endpoint = endpoint
+	apiStatus := status.Pending
+	api.Status.ReplicaCounts = status.ReplicaCounts{}
 	if deployment != nil {
-		api.Status.DesiredReplicas = *deployment.Spec.Replicas
-		api.Status.CurrentReplicas = deployment.Status.Replicas
-		api.Status.ReadyReplicas = deployment.Status.ReadyReplicas
+		if deployment.Status.ReadyReplicas == api.Spec.Pod.Replicas {
+			apiStatus = status.Live
+			api.Status.ReplicaCounts.Updated.Ready = deployment.Status.ReadyReplicas
+			// TODO: handle out of date (?)
+		} else {
+			if err = r.getReplicaCounts(ctx, api); err != nil {
+				return err
+			}
+			apiStatus = r.getStatusCode(api)
+		}
 	}
 
+	api.Status.Status = apiStatus
 	if err = r.Status().Update(ctx, api); err != nil {
 		return err
 	}
@@ -82,6 +90,80 @@ func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *serverles
 	return nil
 }
 
+func (r *RealtimeAPIReconciler) getReplicaCounts(ctx context.Context, api *serverless.RealtimeAPI) error {
+	var podList kcore.PodList
+	if err := r.List(ctx, &podList, client.MatchingLabels{
+		"apiName":      api.Name,
+		"apiKind":      userconfig.RealtimeAPIKind.String(),
+		"deploymentID": api.Annotations["cortex.dev/deployment-id"],
+	}); err != nil {
+		return err
+	}
+	for i := range podList.Items {
+		pod := &podList.Items[i]
+		if k8s.IsPodReady(pod) {
+			api.Status.ReplicaCounts.Updated.Ready++
+			continue
+		}
+
+		switch k8s.GetPodStatus(pod) {
+		case k8s.PodStatusPending:
+			if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
+				api.Status.ReplicaCounts.Updated.Stalled++
+			} else {
+				api.Status.ReplicaCounts.Updated.Pending++
+			}
+		case k8s.PodStatusInitializing:
+			api.Status.ReplicaCounts.Updated.Initializing++
+		case k8s.PodStatusRunning:
+			api.Status.ReplicaCounts.Updated.Initializing++
+		case k8s.PodStatusErrImagePull:
+			api.Status.ReplicaCounts.Updated.ErrImagePull++
+		case k8s.PodStatusTerminating:
+			api.Status.ReplicaCounts.Updated.Terminating++
+		case k8s.PodStatusFailed:
+			api.Status.ReplicaCounts.Updated.Failed++
+		case k8s.PodStatusKilled:
+			api.Status.ReplicaCounts.Updated.Killed++
+		case k8s.PodStatusKilledOOM:
+			api.Status.ReplicaCounts.Updated.KilledOOM++
+		default:
+			api.Status.ReplicaCounts.Updated.Unknown++
+		}
+	}
+
+	return nil
+}
+
+func (r *RealtimeAPIReconciler) getStatusCode(api *serverless.RealtimeAPI) status.Code {
+	counts := api.Status.ReplicaCounts
+	if counts.Updated.Ready >= api.Spec.Pod.Replicas {
+		return status.Live
+	}
+
+	if counts.Updated.ErrImagePull > 0 {
+		return status.ErrorImagePull
+	}
+
+	if counts.Updated.Failed > 0 || counts.Updated.Killed > 0 {
+		return status.Error
+	}
+
+	if counts.Updated.KilledOOM > 0 {
+		return status.OOM
+	}
+
+	if counts.Updated.Stalled > 0 {
+		return status.Stalled
+	}
+
+	if counts.Updated.Ready >= api.Spec.Autoscaling.MinReplicas {
+		return status.Live
+	}
+
+	return status.Updating
+}
+
 func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api serverless.RealtimeAPI) (controllerutil.OperationResult, error) {
 	deployment := kapps.Deployment{
 		ObjectMeta: kmeta.ObjectMeta{
diff --git a/pkg/types/status/code.go b/pkg/types/status/code.go
index 17fc8ca12f..11d9c002ea 100644
--- a/pkg/types/status/code.go
+++ b/pkg/types/status/code.go
@@ -31,14 +31,14 @@ const (
 )
 
 var _codes = []string{
-	"status_unknown",
-	"status_pending",
-	"status_stalled",
-	"status_error",
-	"status_error_image_pull",
-	"status_oom",
-	"status_live",
-	"status_updating",
+	"unknown",
+	"pending",
+	"stalled",
+	"error",
+	"error_image_pull",
+	"oom",
+	"live",
+	"updating",
 }
 
 var _ = [1]int{}[int(Updating)-(len(_codes)-1)] // Ensure list length matches
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index 6dad4e1992..b1ef426504 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -24,22 +24,23 @@ type Status struct {
 }
 
 type ReplicaCounts struct {
-	Updated   SubReplicaCounts `json:"updated"`
-	Stale     SubReplicaCounts `json:"stale"`
-	Requested int32            `json:"requested"`
+	Updated   SubReplicaCounts `json:"updated,omitempty"`
+	Stale     SubReplicaCounts `json:"stale,omitempty"`
+	Requested int32            `json:"requested,omitempty"`
 }
 
 type SubReplicaCounts struct {
-	Pending      int32 `json:"pending"`
-	Initializing int32 `json:"initializing"`
+	Pending      int32 `json:"pending,omitempty"`
+	Initializing int32 `json:"initializing,omitempty"`
 	Ready        int32 `json:"ready"`
-	ErrImagePull int32 `json:"err_image_pull"`
-	Terminating  int32 `json:"terminating"`
-	Failed       int32 `json:"failed"`
-	Killed       int32 `json:"killed"`
-	KilledOOM    int32 `json:"killed_oom"`
-	Stalled      int32 `json:"stalled"` // pending for a long time
-	Unknown      int32 `json:"unknown"`
+	NotReady     int32 `json:"not_ready,omitempty"`
+	ErrImagePull int32 `json:"err_image_pull,omitempty"`
+	Terminating  int32 `json:"terminating,omitempty"`
+	Failed       int32 `json:"failed,omitempty"`
+	Killed       int32 `json:"killed,omitempty"`
+	KilledOOM    int32 `json:"killed_oom,omitempty"`
+	Stalled      int32 `json:"stalled,omitempty"` // pending for a long time
+	Unknown      int32 `json:"unknown,omitempty"`
 }
 
 // Worker counts don't have as many failure variations because Jobs clean up dead pods, so counting different failure scenarios isn't interesting

From 766f154e4421220a078c067a8ca3aa5cb7f08e3f Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Thu, 22 Jul 2021 14:58:55 +0200
Subject: [PATCH 18/42] Handle api ids annotations

---
 .../serverless/realtimeapi_controller.go      | 22 +++++++++---
 .../realtimeapi_controller_helpers.go         | 36 +++++++++++--------
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
index f6cbf8ef6f..1697be70bf 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -85,13 +85,27 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	// Step 3: Get or create deployment and API ids
-	deploymentID, apiID := r.getOrCreateAPIIDs(api)
-	if api.Annotations["cortex.dev/deployment-id"] == "" ||
-		api.Annotations["cortex.dev/api-id"] == "" {
+	deploymentID, specID, apiID := r.getOrCreateAPIIDs(api)
+	idsOutdated := api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
+		api.Annotations["cortex.dev/spec-id"] != specID ||
+		api.Annotations["cortex.dev/api-id"] != apiID
 
-		log.V(1).Info("creating api and deployment id annotations")
+	if api.Annotations["cortex.dev/deployment-id"] != deploymentID {
+		log.V(1).Info("updating deployment id annotation")
 		api.Annotations["cortex.dev/deployment-id"] = deploymentID
+	}
+
+	if api.Annotations["cortex.dev/spec-id"] != specID {
+		log.V(1).Info("updating spec id annotation")
+		api.Annotations["cortex.dev/spec-id"] = specID
+	}
+
+	if api.Annotations["cortex.dev/api-id"] != apiID {
+		log.V(1).Info("updating api id annotation")
 		api.Annotations["cortex.dev/api-id"] = apiID
+	}
+
+	if idsOutdated {
 		if err = r.Update(ctx, &api); err != nil {
 			return ctrl.Result{}, err
 		}
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 632f11f9fd..c561539e50 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -261,7 +261,7 @@ func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *serverless
 
 func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) kapps.Deployment {
 	containers, volumes := r.desiredContainers(api)
-	deploymentID, apiID := r.getOrCreateAPIIDs(api)
+	deploymentID, _, apiID := r.getOrCreateAPIIDs(api)
 
 	return *k8s.Deployment(&k8s.DeploymentSpec{
 		Name:           workloads.K8sName(api.Name),
@@ -339,7 +339,7 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI
 		activatorWeight = 100
 	}
 
-	deploymentID, apiID := r.getOrCreateAPIIDs(api)
+	deploymentID, _, apiID := r.getOrCreateAPIIDs(api)
 
 	return *k8s.VirtualService(&k8s.VirtualServiceSpec{
 		Name:     workloads.K8sName(api.Name),
@@ -524,29 +524,35 @@ func (r *RealtimeAPIReconciler) proxyContainer(api serverless.RealtimeAPI) (kcor
 	}, workloads.ClusterConfigVolume()
 }
 
-func (r *RealtimeAPIReconciler) getOrCreateAPIIDs(api serverless.RealtimeAPI) (deploymentID string, apiID string) {
+func (r *RealtimeAPIReconciler) getOrCreateAPIIDs(api serverless.RealtimeAPI) (deploymentID string, specID string, apiID string) {
 	deploymentID = api.Annotations["cortex.dev/deployment-id"]
 	if deploymentID == "" {
 		deploymentID = k8s.RandomName()[:10]
 	}
 
+	specID = r.getSpecHash(api)
+
 	apiID = api.Annotations["cortex.dev/api-id"]
-	if apiID == "" {
-		var buf bytes.Buffer
-
-		buf.WriteString(api.Name)
-		buf.WriteString(s.Obj(api.TypeMeta))
-		buf.WriteString(s.Obj(api.Spec.Pod))
-		buf.WriteString(s.Obj(api.Spec.Networking))
-		buf.WriteString(s.Obj(api.Spec.Autoscaling))
-		buf.WriteString(s.Obj(api.Spec.NodeGroups))
-		buf.WriteString(s.Obj(api.Spec.UpdateStrategy))
-		specID := hash.Bytes(buf.Bytes())[:32]
+	if apiID == "" ||
+		api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
+		api.Annotations["cortex.dev/spec-id"] != specID {
 
 		apiID = fmt.Sprintf("%s-%s-%s", spec.MonotonicallyDecreasingID(), deploymentID, specID)
 	}
 
-	return deploymentID, apiID
+	return deploymentID, specID, apiID
+}
+
+func (r *RealtimeAPIReconciler) getSpecHash(api serverless.RealtimeAPI) string {
+	var buf bytes.Buffer
+	buf.WriteString(api.Name)
+	buf.WriteString(s.Obj(api.TypeMeta))
+	buf.WriteString(s.Obj(api.Spec.Pod))
+	buf.WriteString(s.Obj(api.Spec.Networking))
+	buf.WriteString(s.Obj(api.Spec.Autoscaling))
+	buf.WriteString(s.Obj(api.Spec.NodeGroups))
+	buf.WriteString(s.Obj(api.Spec.UpdateStrategy))
+	return hash.Bytes(buf.Bytes())[:32]
 }
 
 func (r *RealtimeAPIReconciler) generateAPIAnnotations(api serverless.RealtimeAPI) map[string]string {

From 07af2a828b57eafa811dc9e7f61c7bf479b5e43b Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Sat, 24 Jul 2021 17:39:49 +0200
Subject: [PATCH 19/42] WIP: refactor UpdateAPI function for RealtimeAPI to
 work with the CRD

---
 pkg/config/config.go                          |   2 +
 .../serverless/v1alpha1/realtimeapi_types.go  |  18 +-
 .../v1alpha1/zz_generated.deepcopy.go         |   6 +-
 .../serverless.cortex.dev_realtimeapis.yaml   |  39 ++--
 .../serverless/realtimeapi_controller.go      |   4 +
 .../realtimeapi_controller_helpers.go         |  10 +-
 pkg/operator/resources/realtimeapi/api.go     | 174 ++++++++++++------
 7 files changed, 152 insertions(+), 101 deletions(-)

diff --git a/pkg/config/config.go b/pkg/config/config.go
index eb7bd5e269..0e8b9bc566 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -24,6 +24,7 @@ import (
 	"github.com/DataDog/datadog-go/statsd"
 	"github.com/cortexlabs/cortex/pkg/consts"
 	batch "github.com/cortexlabs/cortex/pkg/crds/apis/batch/v1alpha1"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/aws"
 	cr "github.com/cortexlabs/cortex/pkg/lib/configreader"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
@@ -55,6 +56,7 @@ var (
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 	utilruntime.Must(batch.AddToScheme(scheme))
+	utilruntime.Must(serverless.AddToScheme(scheme))
 }
 
 func InitConfigs(clusterConfig *clusterconfig.Config, operatorMetadata *clusterconfig.OperatorMetadata) {
diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 023b1967aa..66749debe3 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -148,7 +148,7 @@ type AutoscalingSpec struct {
 	// +kubebuilder:validation:Optional
 	// Desired number of in-flight requests per replica (including requests actively being processed as well as queued),
 	// which the autoscaler tries to maintain
-	TargetInFlight int32 `json:"target_in_flight,omitempty"`
+	TargetInFlight string `json:"target_in_flight,omitempty"`
 
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default="60s"
@@ -166,25 +166,25 @@ type AutoscalingSpec struct {
 	UpscaleStabilizationPeriod kmeta.Duration `json:"upscale_stabilization_period,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="750m"
+	// +kubebuilder:default="0.75"
 	// Maximum factor by which to scale down the API on a single scaling event
-	MaxDownscaleFactor resource.Quantity `json:"max_downscale_factor,omitempty"`
+	MaxDownscaleFactor string `json:"max_downscale_factor,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="1500m"
+	// +kubebuilder:default="1.5"
 	// Maximum factor by which to scale up the API on a single scaling event
-	MaxUpscaleFactor resource.Quantity `json:"max_upscale_factor,omitempty"`
+	MaxUpscaleFactor string `json:"max_upscale_factor,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="50m"
+	// +kubebuilder:default="0.5"
 	// Any recommendation falling within this factor below the current number of replicas will not trigger a
 	// scale down event
-	DownscaleTolerance resource.Quantity `json:"downscale_tolerance,omitempty"`
+	DownscaleTolerance string `json:"downscale_tolerance,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="50m"
+	// +kubebuilder:default="0.5"
 	// Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event
-	UpscaleTolerance resource.Quantity `json:"upscale_tolerance,omitempty"`
+	UpscaleTolerance string `json:"upscale_tolerance,omitempty"`
 }
 
 type UpdateStratagySpec struct {
diff --git a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
index 9376932a2d..030c91042f 100644
--- a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
@@ -31,10 +31,6 @@ func (in *AutoscalingSpec) DeepCopyInto(out *AutoscalingSpec) {
 	out.Window = in.Window
 	out.DownscaleStabilizationPeriod = in.DownscaleStabilizationPeriod
 	out.UpscaleStabilizationPeriod = in.UpscaleStabilizationPeriod
-	out.MaxDownscaleFactor = in.MaxDownscaleFactor.DeepCopy()
-	out.MaxUpscaleFactor = in.MaxUpscaleFactor.DeepCopy()
-	out.DownscaleTolerance = in.DownscaleTolerance.DeepCopy()
-	out.UpscaleTolerance = in.UpscaleTolerance.DeepCopy()
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalingSpec.
@@ -224,7 +220,7 @@ func (in *RealtimeAPIList) DeepCopyObject() runtime.Object {
 func (in *RealtimeAPISpec) DeepCopyInto(out *RealtimeAPISpec) {
 	*out = *in
 	in.Pod.DeepCopyInto(&out.Pod)
-	in.Autoscaling.DeepCopyInto(&out.Autoscaling)
+	out.Autoscaling = in.Autoscaling
 	if in.NodeGroups != nil {
 		in, out := &in.NodeGroups, &out.NodeGroups
 		*out = make([]string, len(*in))
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 61e2d626f2..3e8b6d267b 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -60,38 +60,26 @@ spec:
                       made during this period
                     type: string
                   downscale_tolerance:
-                    anyOf:
-                    - type: integer
-                    - type: string
-                    default: 50m
+                    default: "0.5"
                     description: Any recommendation falling within this factor below
                       the current number of replicas will not trigger a scale down
                       event
-                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                    x-kubernetes-int-or-string: true
+                    type: string
                   max_downscale_factor:
-                    anyOf:
-                    - type: integer
-                    - type: string
-                    default: 750m
+                    default: "0.75"
                     description: Maximum factor by which to scale down the API on
                       a single scaling event
-                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                    x-kubernetes-int-or-string: true
+                    type: string
                   max_replicas:
                     default: 100
                     description: Maximum number of replicas
                     format: int32
                     type: integer
                   max_upscale_factor:
-                    anyOf:
-                    - type: integer
-                    - type: string
-                    default: 1500m
+                    default: "1.5"
                     description: Maximum factor by which to scale up the API on a
                       single scaling event
-                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                    x-kubernetes-int-or-string: true
+                    type: string
                   min_replicas:
                     default: 1
                     description: Minimum number of replicas
@@ -101,22 +89,17 @@ spec:
                     description: Desired number of in-flight requests per replica
                       (including requests actively being processed as well as queued),
                       which the autoscaler tries to maintain
-                    format: int32
-                    type: integer
+                    type: string
                   upscale_stabilization_period:
                     default: 1m
                     description: The API will not scale above the lowest recommendation
                       made during this period
                     type: string
                   upscale_tolerance:
-                    anyOf:
-                    - type: integer
-                    - type: string
-                    default: 50m
+                    default: "0.5"
                     description: Any recommendation falling within this factor above
                       the current number of replicas will not trigger a scale up event
-                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                    x-kubernetes-int-or-string: true
+                    type: string
                   window:
                     default: 60s
                     description: Duration over which to average the API's in-flight
@@ -661,6 +644,8 @@ spec:
                       unknown:
                         format: int32
                         type: integer
+                    required:
+                    - ready
                     type: object
                   updated:
                     properties:
@@ -697,6 +682,8 @@ spec:
                       unknown:
                         format: int32
                         type: integer
+                    required:
+                    - ready
                     type: object
                 type: object
               status:
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
index 1697be70bf..fa970708c1 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -90,6 +90,10 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 		api.Annotations["cortex.dev/spec-id"] != specID ||
 		api.Annotations["cortex.dev/api-id"] != apiID
 
+	if api.Annotations == nil {
+		api.Annotations = map[string]string{}
+	}
+
 	if api.Annotations["cortex.dev/deployment-id"] != deploymentID {
 		log.V(1).Info("updating deployment id annotation")
 		api.Annotations["cortex.dev/deployment-id"] = deploymentID
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index c561539e50..41316512bc 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -559,13 +559,13 @@ func (r *RealtimeAPIReconciler) generateAPIAnnotations(api serverless.RealtimeAP
 	return map[string]string{
 		userconfig.MinReplicasAnnotationKey:                  s.Int32(api.Spec.Autoscaling.MinReplicas),
 		userconfig.MaxReplicasAnnotationKey:                  s.Int32(api.Spec.Autoscaling.MaxReplicas),
-		userconfig.TargetInFlightAnnotationKey:               s.Int32(api.Spec.Autoscaling.TargetInFlight),
+		userconfig.TargetInFlightAnnotationKey:               api.Spec.Autoscaling.TargetInFlight,
 		userconfig.WindowAnnotationKey:                       api.Spec.Autoscaling.Window.Duration.String(),
 		userconfig.DownscaleStabilizationPeriodAnnotationKey: api.Spec.Autoscaling.DownscaleStabilizationPeriod.Duration.String(),
 		userconfig.UpscaleStabilizationPeriodAnnotationKey:   api.Spec.Autoscaling.UpscaleStabilizationPeriod.Duration.String(),
-		userconfig.MaxDownscaleFactorAnnotationKey:           s.Float64(api.Spec.Autoscaling.MaxDownscaleFactor.AsApproximateFloat64()),
-		userconfig.MaxUpscaleFactorAnnotationKey:             s.Float64(api.Spec.Autoscaling.MaxUpscaleFactor.AsApproximateFloat64()),
-		userconfig.DownscaleToleranceAnnotationKey:           s.Float64(api.Spec.Autoscaling.DownscaleTolerance.AsApproximateFloat64()),
-		userconfig.UpscaleToleranceAnnotationKey:             s.Float64(api.Spec.Autoscaling.UpscaleTolerance.AsApproximateFloat64()),
+		userconfig.MaxDownscaleFactorAnnotationKey:           api.Spec.Autoscaling.MaxDownscaleFactor,
+		userconfig.MaxUpscaleFactorAnnotationKey:             api.Spec.Autoscaling.MaxUpscaleFactor,
+		userconfig.DownscaleToleranceAnnotationKey:           api.Spec.Autoscaling.DownscaleTolerance,
+		userconfig.UpscaleToleranceAnnotationKey:             api.Spec.Autoscaling.UpscaleTolerance,
 	}
 }
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 256b253f8e..84e24bf4b8 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -17,16 +17,18 @@ limitations under the License.
 package realtimeapi
 
 import (
+	"context"
 	"fmt"
 	"path/filepath"
-	"time"
+	"reflect"
 
 	"github.com/cortexlabs/cortex/pkg/config"
+	"github.com/cortexlabs/cortex/pkg/consts"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
-	"github.com/cortexlabs/cortex/pkg/operator/lib/routines"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
@@ -36,6 +38,11 @@ import (
 	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
+	kerrors "k8s.io/apimachinery/pkg/api/errors"
+	kresource "k8s.io/apimachinery/pkg/api/resource"
+	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
 const _realtimeDashboardUID = "realtimeapi"
@@ -45,67 +52,35 @@ func generateDeploymentID() string {
 }
 
 func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error) {
-	prevDeployment, prevService, prevVirtualService, err := getK8sResources(apiConfig.Name)
-	if err != nil {
-		return nil, "", err
-	}
-
-	initialDeploymentTime := time.Now().UnixNano()
-	deploymentID := generateDeploymentID()
-	if prevVirtualService != nil && prevVirtualService.Labels["initialDeploymentTime"] != "" {
-		var err error
-		initialDeploymentTime, err = k8s.ParseInt64Label(prevVirtualService, "initialDeploymentTime")
-		if err != nil {
-			return nil, "", err
-		}
-		deploymentID = prevVirtualService.Labels["deploymentID"]
-	}
+	ctx := context.Background()
+	var api serverless.RealtimeAPI
+	key := client.ObjectKey{Namespace: consts.DefaultNamespace, Name: apiConfig.Name}
 
-	api := spec.GetAPISpec(apiConfig, initialDeploymentTime, deploymentID, config.ClusterConfig.ClusterUID)
-
-	if prevDeployment == nil {
-		if err := config.AWS.UploadJSONToS3(api, config.ClusterConfig.Bucket, api.Key); err != nil {
-			return nil, "", errors.Wrap(err, "upload api spec")
-		}
-
-		if err := applyK8sResources(api, prevDeployment, prevService, prevVirtualService); err != nil {
-			routines.RunWithPanicHandler(func() {
-				_ = deleteK8sResources(api.Name)
-			})
-			return nil, "", err
+	apiSpec := &spec.API{API: apiConfig}
+	err := config.K8s.Get(ctx, key, &api)
+	if err != nil {
+		if kerrors.IsNotFound(err) {
+			if kerrors.IsNotFound(err) {
+				api := APIConfigToK8sResource(*apiConfig)
+				if err = config.K8s.Create(ctx, &api); err != nil {
+					return nil, "", errors.Wrap(err, "failed to create realtime api resource")
+				}
+				return apiSpec, fmt.Sprintf("creating %s", apiConfig.Resource.UserString()), nil
+			}
 		}
-
-		return api, fmt.Sprintf("creating %s", api.Resource.UserString()), nil
+		return nil, "", errors.Wrap(err, "failed to get realtime api resource")
 	}
 
-	if prevVirtualService.Labels["specID"] != api.SpecID || prevVirtualService.Labels["deploymentID"] != api.DeploymentID {
-		isUpdating, err := isAPIUpdating(prevDeployment)
-		if err != nil {
-			return nil, "", err
-		}
-		if isUpdating && !force {
-			return nil, "", ErrorAPIUpdating(api.Name)
+	desiredAPI := APIConfigToK8sResource(*apiConfig)
+	if !reflect.DeepEqual(api.Spec, desiredAPI.Spec) || force {
+		api.Spec = desiredAPI.Spec
+		if err = config.K8s.Update(ctx, &api); err != nil {
+			return nil, "", errors.Wrap(err, "failed to update realtime api resource")
 		}
-
-		if err := config.AWS.UploadJSONToS3(api, config.ClusterConfig.Bucket, api.Key); err != nil {
-			return nil, "", errors.Wrap(err, "upload api spec")
-		}
-
-		if err := applyK8sResources(api, prevDeployment, prevService, prevVirtualService); err != nil {
-			return nil, "", err
-		}
-		return api, fmt.Sprintf("updating %s", api.Resource.UserString()), nil
+		return apiSpec, fmt.Sprintf("updating %s", apiConfig.Resource.UserString()), nil
 	}
 
-	// deployment didn't change
-	isUpdating, err := isAPIUpdating(prevDeployment)
-	if err != nil {
-		return nil, "", err
-	}
-	if isUpdating {
-		return api, fmt.Sprintf("%s is already updating", api.Resource.UserString()), nil
-	}
-	return api, fmt.Sprintf("%s is up to date", api.Resource.UserString()), nil
+	return apiSpec, fmt.Sprintf("%s is up to date", apiConfig.Resource.UserString()), nil
 }
 
 func RefreshAPI(apiName string, force bool) (string, error) {
@@ -396,3 +371,90 @@ func getDashboardURL(apiName string) string {
 
 	return dashboardURL
 }
+
+func APIConfigToK8sResource(apiConfig userconfig.API) serverless.RealtimeAPI {
+	var containers []serverless.ContainerSpec
+	for _, containerConfig := range apiConfig.Pod.Containers {
+		var env []kcore.EnvVar
+		for k, v := range containerConfig.Env {
+			env = append(env, kcore.EnvVar{
+				Name:  k,
+				Value: v,
+			})
+		}
+
+		var compute *serverless.ComputeSpec
+		if containerConfig.Compute != nil {
+			var cpu *kresource.Quantity
+			if containerConfig.Compute.CPU != nil {
+				cpu = &containerConfig.Compute.CPU.Quantity
+			}
+			var mem *kresource.Quantity
+			if containerConfig.Compute.Mem != nil {
+				mem = &containerConfig.Compute.Mem.Quantity
+			}
+			var shm *kresource.Quantity
+			if containerConfig.Compute.Shm != nil {
+				shm = &containerConfig.Compute.Shm.Quantity
+			}
+
+			compute = &serverless.ComputeSpec{
+				CPU: cpu,
+				GPU: containerConfig.Compute.GPU,
+				Inf: containerConfig.Compute.Inf,
+				Mem: mem,
+				Shm: shm,
+			}
+		}
+
+		container := serverless.ContainerSpec{
+			Name:           containerConfig.Name,
+			Image:          containerConfig.Image,
+			Command:        containerConfig.Command,
+			Args:           containerConfig.Args,
+			Env:            env,
+			Compute:        compute,
+			ReadinessProbe: workloads.GetProbeSpec(containerConfig.ReadinessProbe),
+			LivenessProbe:  workloads.GetProbeSpec(containerConfig.LivenessProbe),
+		}
+
+		containers = append(containers, container)
+	}
+
+	api := serverless.RealtimeAPI{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      apiConfig.Name,
+			Namespace: consts.DefaultNamespace,
+		},
+		Spec: serverless.RealtimeAPISpec{
+			Pod: serverless.PodSpec{
+				Port:           *apiConfig.Pod.Port,
+				MaxConcurrency: int32(apiConfig.Pod.MaxConcurrency),
+				MaxQueueLength: int32(apiConfig.Pod.MaxQueueLength),
+				Replicas:       apiConfig.Autoscaling.InitReplicas,
+				Containers:     containers,
+			},
+			Autoscaling: serverless.AutoscalingSpec{
+				MinReplicas:                  apiConfig.Autoscaling.MinReplicas,
+				MaxReplicas:                  apiConfig.Autoscaling.MaxReplicas,
+				TargetInFlight:               fmt.Sprintf("%f", *apiConfig.Autoscaling.TargetInFlight),
+				Window:                       kmeta.Duration{Duration: apiConfig.Autoscaling.Window},
+				DownscaleStabilizationPeriod: kmeta.Duration{Duration: apiConfig.Autoscaling.DownscaleStabilizationPeriod},
+				UpscaleStabilizationPeriod:   kmeta.Duration{Duration: apiConfig.Autoscaling.UpscaleStabilizationPeriod},
+				MaxDownscaleFactor:           fmt.Sprintf("%f", apiConfig.Autoscaling.MaxDownscaleFactor),
+				MaxUpscaleFactor:             fmt.Sprintf("%f", apiConfig.Autoscaling.MaxUpscaleFactor),
+				DownscaleTolerance:           fmt.Sprintf("%f", apiConfig.Autoscaling.DownscaleTolerance),
+				UpscaleTolerance:             fmt.Sprintf("%f", apiConfig.Autoscaling.UpscaleTolerance),
+			},
+			NodeGroups: apiConfig.NodeGroups,
+			UpdateStrategy: serverless.UpdateStratagySpec{
+				MaxSurge:       intstr.FromString(apiConfig.UpdateStrategy.MaxSurge),
+				MaxUnavailable: intstr.FromString(apiConfig.UpdateStrategy.MaxUnavailable),
+			},
+			Networking: serverless.NetworkingSpec{
+				Endpoint: *apiConfig.Networking.Endpoint,
+			},
+		},
+	}
+	return api
+}

From 9feb4e765f5359b3c6dc96c58c97668348047573 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Sun, 25 Jul 2021 12:37:43 +0200
Subject: [PATCH 20/42] Implementation of get, refresh and delete operations
 for realtime apis in the cortex operator

---
 pkg/operator/endpoints/logs.go                |   2 +-
 pkg/operator/resources/realtimeapi/api.go     | 344 ++++++------------
 .../resources/realtimeapi/k8s_specs.go        | 182 ---------
 pkg/operator/resources/realtimeapi/status.go  | 174 ---------
 pkg/operator/resources/resources.go           |  12 +-
 5 files changed, 110 insertions(+), 604 deletions(-)
 delete mode 100644 pkg/operator/resources/realtimeapi/k8s_specs.go
 delete mode 100644 pkg/operator/resources/realtimeapi/status.go

diff --git a/pkg/operator/endpoints/logs.go b/pkg/operator/endpoints/logs.go
index 2d335e27da..dbe10828b1 100644
--- a/pkg/operator/endpoints/logs.go
+++ b/pkg/operator/endpoints/logs.go
@@ -107,7 +107,7 @@ func GetLogURL(w http.ResponseWriter, r *http.Request) {
 			LogURL: logURL,
 		})
 	case userconfig.RealtimeAPIKind:
-		apiResponse, err := realtimeapi.GetAPIByName(deployedResource)
+		apiResponse, err := realtimeapi.GetAPIByName(apiName)
 		if err != nil {
 			respondError(w, r, err)
 			return
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 84e24bf4b8..9405235faf 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -19,28 +19,27 @@ package realtimeapi
 import (
 	"context"
 	"fmt"
-	"path/filepath"
 	"reflect"
+	"time"
 
 	"github.com/cortexlabs/cortex/pkg/config"
 	"github.com/cortexlabs/cortex/pkg/consts"
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
+	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
-	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
-	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
 	kresource "k8s.io/apimachinery/pkg/api/resource"
 	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
+	ktypes "k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/intstr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
@@ -74,6 +73,8 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 	desiredAPI := APIConfigToK8sResource(*apiConfig)
 	if !reflect.DeepEqual(api.Spec, desiredAPI.Spec) || force {
 		api.Spec = desiredAPI.Spec
+		api.Annotations["cortex.dev/last-updated"] = s.Int64(time.Now().Unix())
+
 		if err = config.K8s.Update(ctx, &api); err != nil {
 			return nil, "", errors.Wrap(err, "failed to update realtime api resource")
 		}
@@ -83,281 +84,148 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 	return apiSpec, fmt.Sprintf("%s is up to date", apiConfig.Resource.UserString()), nil
 }
 
-func RefreshAPI(apiName string, force bool) (string, error) {
-	prevDeployment, prevService, prevVirtualService, err := getK8sResources(apiName)
-	if err != nil {
-		return "", err
-	} else if prevDeployment == nil || prevVirtualService == nil {
-		return "", errors.ErrorUnexpected("unable to find deployment", apiName)
-	}
-
-	isUpdating, err := isAPIUpdating(prevDeployment)
-	if err != nil {
-		return "", err
-	}
-
-	if isUpdating && !force {
-		return "", ErrorAPIUpdating(apiName)
-	}
-
-	apiID, err := k8s.GetLabel(prevDeployment, "apiID")
-	if err != nil {
-		return "", err
-	}
-
-	api, err := operator.DownloadAPISpec(apiName, apiID)
-	if err != nil {
-		return "", err
-	}
-
-	initialDeploymentTime, err := k8s.ParseInt64Label(prevVirtualService, "initialDeploymentTime")
-	if err != nil {
-		return "", err
+func RefreshAPI(apiName string) (string, error) {
+	ctx := context.Background()
+	api := serverless.RealtimeAPI{
+		ObjectMeta: kmeta.ObjectMeta{
+			Namespace: consts.DefaultNamespace,
+			Name:      apiName,
+		},
 	}
 
-	api = spec.GetAPISpec(api.API, initialDeploymentTime, generateDeploymentID(), config.ClusterConfig.ClusterUID)
-
-	if err := config.AWS.UploadJSONToS3(api, config.ClusterConfig.Bucket, api.Key); err != nil {
-		return "", errors.Wrap(err, "upload api spec")
+	// slashes are encoded as ~1 in the json patch
+	patch := []byte(fmt.Sprintf(
+		"[{\"op\": \"replace\", \"path\": \"/metadata/annotations/cortex.dev~1deployment-id\", \"value\": \"%s\" }]",
+		generateDeploymentID()))
+	if err := config.K8s.Patch(ctx, &api, client.RawPatch(ktypes.JSONPatchType, patch)); err != nil {
+		return "", errors.Wrap(err, "failed to get realtime api resource")
 	}
 
-	if err := applyK8sResources(api, prevDeployment, prevService, prevVirtualService); err != nil {
-		return "", err
+	apiResource := userconfig.Resource{
+		Name: apiName,
+		Kind: userconfig.RealtimeAPIKind,
 	}
 
-	return fmt.Sprintf("updating %s", api.Resource.UserString()), nil
+	return fmt.Sprintf("updating %s", apiResource.UserString()), nil
 }
 
 func DeleteAPI(apiName string, keepCache bool) error {
-	err := parallel.RunFirstErr(
-		func() error {
-			return deleteK8sResources(apiName)
-		},
-		func() error {
-			if keepCache {
-				return nil
-			}
-			// best effort deletion, swallow errors because there could be weird error messages
-			_ = deleteBucketResources(apiName)
-			return nil
+	ctx := context.Background()
+	api := serverless.RealtimeAPI{
+		ObjectMeta: kmeta.ObjectMeta{
+			Name:      apiName,
+			Namespace: consts.DefaultNamespace,
 		},
-	)
-
-	if err != nil {
-		return err
+	}
+	if err := config.K8s.Delete(ctx, &api); err != nil {
+		return errors.Wrap(err, "failed to delete realtime api resource")
 	}
 
+	// TODO: delete bucket resources (?)
+
 	return nil
 }
 
-func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.APIResponse, error) {
-	statuses, err := GetAllStatuses(deployments, pods)
-	if err != nil {
-		return nil, err
-	}
-
-	apiNames, apiIDs := namesAndIDsFromStatuses(statuses)
-	apis, err := operator.DownloadAPISpecs(apiNames, apiIDs)
-	if err != nil {
-		return nil, err
+func GetAllAPIs() ([]schema.APIResponse, error) {
+	ctx := context.Background()
+	apis := serverless.RealtimeAPIList{}
+	if err := config.K8s.List(ctx, &apis); err != nil {
+		return nil, errors.Wrap(err, "failed to list realtime api resources")
 	}
 
-	realtimeAPIs := make([]schema.APIResponse, len(apis))
-
-	for i := range apis {
-		api := apis[i]
-		endpoint, err := operator.APIEndpoint(&api)
-		if err != nil {
-			return nil, err
+	realtimeAPIs := make([]schema.APIResponse, len(apis.Items))
+	for i := range apis.Items {
+		api := apis.Items[i]
+		api.Status.ReplicaCounts.Requested = api.Spec.Pod.Replicas
+
+		lastUpdated := api.CreationTimestamp.Unix()
+		if api.Annotations["cortex.dev/last-updated"] != "" {
+			var ok bool
+			lastUpdated, ok = s.ParseInt64(api.Annotations["cortex.dev/last-updated"])
+			if !ok {
+				return nil, errors.ErrorUnexpected("failed to parse 'cortex.dev/last-updated' annotation")
+			}
 		}
 
 		realtimeAPIs[i] = schema.APIResponse{
-			Spec:     api,
-			Status:   &statuses[i],
-			Endpoint: endpoint,
+			Spec: spec.API{
+				API: &userconfig.API{
+					Resource: userconfig.Resource{
+						Name: api.Name,
+						Kind: userconfig.RealtimeAPIKind,
+					},
+				},
+				LastUpdated:           lastUpdated,
+				InitialDeploymentTime: api.CreationTimestamp.Unix(),
+			},
+			Status: &status.Status{
+				APIName:       api.Name,
+				APIID:         api.Annotations["cortex.dev/api-id"],
+				Code:          api.Status.Status,
+				ReplicaCounts: api.Status.ReplicaCounts,
+			},
+			Endpoint: api.Status.Endpoint,
 		}
 	}
 
 	return realtimeAPIs, nil
 }
 
-func namesAndIDsFromStatuses(statuses []status.Status) ([]string, []string) {
-	apiNames := make([]string, len(statuses))
-	apiIDs := make([]string, len(statuses))
+func GetAPIByName(apiName string) ([]schema.APIResponse, error) {
+	ctx := context.Background()
 
-	for i, st := range statuses {
-		apiNames[i] = st.APIName
-		apiIDs[i] = st.APIID
+	api := serverless.RealtimeAPI{}
+	key := client.ObjectKey{Namespace: consts.DefaultNamespace, Name: apiName}
+	if err := config.K8s.Get(ctx, key, &api); err != nil {
+		return nil, errors.Wrap(err, "failed to get realtime api resource")
 	}
 
-	return apiNames, apiIDs
-}
+	// TODO: needs api id history
+	//api, err := operator.DownloadAPISpec(st.APIName, st.APIID)
+	//if err != nil {
+	//	return nil, err
+	//}
 
-func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	st, err := GetStatus(deployedResource.Name)
-	if err != nil {
-		return nil, err
-	}
-
-	api, err := operator.DownloadAPISpec(st.APIName, st.APIID)
-	if err != nil {
-		return nil, err
-	}
+	dashboardURL := pointer.String(getDashboardURL(api.Name))
 
-	apiEndpoint, err := operator.APIEndpoint(api)
-	if err != nil {
-		return nil, err
+	lastUpdated := api.CreationTimestamp.Unix()
+	if api.Annotations["cortex.dev/last-updated"] != "" {
+		var ok bool
+		lastUpdated, ok = s.ParseInt64(api.Annotations["cortex.dev/last-updated"])
+		if !ok {
+			return nil, errors.ErrorUnexpected("failed to parse 'cortex.dev/last-updated' annotation")
+		}
 	}
 
-	dashboardURL := pointer.String(getDashboardURL(api.Name))
+	api.Status.ReplicaCounts.Requested = api.Spec.Pod.Replicas
 
 	return []schema.APIResponse{
 		{
-			Spec:         *api,
-			Status:       st,
-			Endpoint:     apiEndpoint,
+			Spec: spec.API{
+				API: &userconfig.API{
+					Resource: userconfig.Resource{
+						Name: api.Name,
+						Kind: userconfig.RealtimeAPIKind,
+					},
+				},
+				ID:                    api.Annotations["cortex.dev/api-id"],
+				SpecID:                api.Annotations["cortex.dev/spec-id"],
+				DeploymentID:          api.Annotations["cortex.dev/deployment-id"],
+				InitialDeploymentTime: api.CreationTimestamp.Unix(),
+				LastUpdated:           lastUpdated,
+			},
+			Status: &status.Status{
+				APIName:       api.Name,
+				APIID:         api.Annotations["cortex.dev/api-id"],
+				Code:          api.Status.Status,
+				ReplicaCounts: api.Status.ReplicaCounts,
+			},
+			Endpoint:     api.Status.Endpoint,
 			DashboardURL: dashboardURL,
 		},
 	}, nil
 }
 
-func getK8sResources(apiName string) (*kapps.Deployment, *kcore.Service, *istioclientnetworking.VirtualService, error) {
-	var deployment *kapps.Deployment
-	var service *kcore.Service
-	var virtualService *istioclientnetworking.VirtualService
-
-	err := parallel.RunFirstErr(
-		func() error {
-			var err error
-			deployment, err = config.K8s.GetDeployment(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			var err error
-			service, err = config.K8s.GetService(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			var err error
-			virtualService, err = config.K8s.GetVirtualService(workloads.K8sName(apiName))
-			return err
-		},
-	)
-
-	return deployment, service, virtualService, err
-}
-
-func applyK8sResources(api *spec.API, prevDeployment *kapps.Deployment, prevService *kcore.Service, prevVirtualService *istioclientnetworking.VirtualService) error {
-	return parallel.RunFirstErr(
-		func() error {
-			return applyK8sDeployment(api, prevDeployment)
-		},
-		func() error {
-			return applyK8sService(api, prevService)
-		},
-		func() error {
-			return applyK8sVirtualService(api, prevVirtualService)
-		},
-	)
-}
-
-func applyK8sDeployment(api *spec.API, prevDeployment *kapps.Deployment) error {
-	newDeployment := deploymentSpec(api, prevDeployment)
-
-	if prevDeployment == nil {
-		_, err := config.K8s.CreateDeployment(newDeployment)
-		if err != nil {
-			return err
-		}
-	} else if prevDeployment.Status.ReadyReplicas == 0 {
-		// Delete deployment if it never became ready
-		_, _ = config.K8s.DeleteDeployment(workloads.K8sName(api.Name))
-		_, err := config.K8s.CreateDeployment(newDeployment)
-		if err != nil {
-			return err
-		}
-	} else {
-		_, err := config.K8s.UpdateDeployment(newDeployment)
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func applyK8sService(api *spec.API, prevService *kcore.Service) error {
-	newService := serviceSpec(api)
-
-	if prevService == nil {
-		_, err := config.K8s.CreateService(newService)
-		return err
-	}
-
-	_, err := config.K8s.UpdateService(prevService, newService)
-	return err
-}
-
-func applyK8sVirtualService(api *spec.API, prevVirtualService *istioclientnetworking.VirtualService) error {
-	newVirtualService := virtualServiceSpec(api)
-
-	if prevVirtualService == nil {
-		_, err := config.K8s.CreateVirtualService(newVirtualService)
-		return err
-	}
-
-	_, err := config.K8s.UpdateVirtualService(prevVirtualService, newVirtualService)
-	return err
-}
-
-func deleteK8sResources(apiName string) error {
-	return parallel.RunFirstErr(
-		func() error {
-			_, err := config.K8s.DeleteDeployment(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			_, err := config.K8s.DeleteService(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			_, err := config.K8s.DeleteVirtualService(workloads.K8sName(apiName))
-			return err
-		},
-	)
-}
-
-func deleteBucketResources(apiName string) error {
-	prefix := filepath.Join(config.ClusterConfig.ClusterUID, "apis", apiName)
-	return config.AWS.DeleteS3Dir(config.ClusterConfig.Bucket, prefix, true)
-}
-
-// returns true if min_replicas are not ready and no updated replicas have errored
-func isAPIUpdating(deployment *kapps.Deployment) (bool, error) {
-	pods, err := config.K8s.ListPodsByLabel("apiName", deployment.Labels["apiName"])
-	if err != nil {
-		return false, err
-	}
-
-	replicaCounts := getReplicaCounts(deployment, pods)
-
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
-	if err != nil {
-		return false, err
-	}
-
-	if replicaCounts.Updated.Ready < autoscalingSpec.MinReplicas && replicaCounts.Updated.TotalFailed() == 0 {
-		return true, nil
-	}
-
-	return false, nil
-}
-
-func isPodSpecLatest(deployment *kapps.Deployment, pod *kcore.Pod) bool {
-	return deployment.Spec.Template.Labels["podID"] == pod.Labels["podID"] &&
-		deployment.Spec.Template.Labels["deploymentID"] == pod.Labels["deploymentID"]
-}
-
 func getDashboardURL(apiName string) string {
 	loadBalancerURL, err := operator.LoadBalancerURL()
 	if err != nil {
diff --git a/pkg/operator/resources/realtimeapi/k8s_specs.go b/pkg/operator/resources/realtimeapi/k8s_specs.go
deleted file mode 100644
index 7a6824d14d..0000000000
--- a/pkg/operator/resources/realtimeapi/k8s_specs.go
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
-Copyright 2021 Cortex Labs, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package realtimeapi
-
-import (
-	"fmt"
-
-	"github.com/cortexlabs/cortex/pkg/consts"
-	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	"github.com/cortexlabs/cortex/pkg/lib/pointer"
-	s "github.com/cortexlabs/cortex/pkg/lib/strings"
-	"github.com/cortexlabs/cortex/pkg/types/spec"
-	"github.com/cortexlabs/cortex/pkg/workloads"
-	istionetworking "istio.io/api/networking/v1beta1"
-	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
-	kapps "k8s.io/api/apps/v1"
-	kcore "k8s.io/api/core/v1"
-)
-
-var _terminationGracePeriodSeconds int64 = 60 // seconds
-
-func deploymentSpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deployment {
-	containers, volumes := workloads.RealtimeContainers(*api)
-
-	return k8s.Deployment(&k8s.DeploymentSpec{
-		Name:           workloads.K8sName(api.Name),
-		Replicas:       getRequestedReplicasFromDeployment(*api, prevDeployment),
-		MaxSurge:       pointer.String(api.UpdateStrategy.MaxSurge),
-		MaxUnavailable: pointer.String(api.UpdateStrategy.MaxUnavailable),
-		Labels: map[string]string{
-			"apiName":               api.Name,
-			"apiKind":               api.Kind.String(),
-			"apiID":                 api.ID,
-			"specID":                api.SpecID,
-			"initialDeploymentTime": s.Int64(api.InitialDeploymentTime),
-			"deploymentID":          api.DeploymentID,
-			"podID":                 api.PodID,
-			"cortex.dev/api":        "true",
-		},
-		Annotations: api.ToK8sAnnotations(),
-		Selector: map[string]string{
-			"apiName": api.Name,
-			"apiKind": api.Kind.String(),
-		},
-		PodSpec: k8s.PodSpec{
-			Labels: map[string]string{
-				"apiName":               api.Name,
-				"apiKind":               api.Kind.String(),
-				"initialDeploymentTime": s.Int64(api.InitialDeploymentTime),
-				"deploymentID":          api.DeploymentID,
-				"podID":                 api.PodID,
-				"cortex.dev/api":        "true",
-			},
-			Annotations: map[string]string{
-				"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
-			},
-			K8sPodSpec: kcore.PodSpec{
-				RestartPolicy:                 "Always",
-				TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
-				Containers:                    containers,
-				NodeSelector:                  workloads.NodeSelectors(),
-				Tolerations:                   workloads.GenerateResourceTolerations(),
-				Affinity:                      workloads.GenerateNodeAffinities(api.NodeGroups),
-				Volumes:                       volumes,
-				ServiceAccountName:            workloads.ServiceAccountName,
-			},
-		},
-	})
-}
-
-func serviceSpec(api *spec.API) *kcore.Service {
-	return k8s.Service(&k8s.ServiceSpec{
-		Name:        workloads.K8sName(api.Name),
-		PortName:    "http",
-		Port:        consts.ProxyPortInt32,
-		TargetPort:  consts.ProxyPortInt32,
-		Annotations: api.ToK8sAnnotations(),
-		Labels: map[string]string{
-			"apiName":        api.Name,
-			"apiKind":        api.Kind.String(),
-			"cortex.dev/api": "true",
-		},
-		Selector: map[string]string{
-			"apiName": api.Name,
-			"apiKind": api.Kind.String(),
-		},
-	})
-}
-
-func virtualServiceSpec(api *spec.API) *istioclientnetworking.VirtualService {
-	var activatorWeight int32
-	if api.Autoscaling.InitReplicas == 0 {
-		activatorWeight = 100
-	}
-
-	return k8s.VirtualService(&k8s.VirtualServiceSpec{
-		Name:     workloads.K8sName(api.Name),
-		Gateways: []string{"apis-gateway"},
-		Destinations: []k8s.Destination{
-			{
-				ServiceName: workloads.K8sName(api.Name),
-				Weight:      100 - activatorWeight,
-				Port:        uint32(consts.ProxyPortInt32),
-				Headers: &istionetworking.Headers{
-					Response: &istionetworking.Headers_HeaderOperations{
-						Set: map[string]string{
-							consts.CortexOriginHeader: "api",
-						},
-					},
-				},
-			},
-			{
-				ServiceName: consts.ActivatorName,
-				Weight:      activatorWeight,
-				Port:        uint32(consts.ActivatorPortInt32),
-				Headers: &istionetworking.Headers{
-					Request: &istionetworking.Headers_HeaderOperations{
-						Set: map[string]string{
-							consts.CortexAPINameHeader: api.Name,
-							consts.CortexTargetServiceHeader: fmt.Sprintf(
-								"http://%s.%s:%d",
-								workloads.K8sName(api.Name),
-								consts.DefaultNamespace,
-								consts.ProxyPortInt32,
-							),
-						},
-					},
-					Response: &istionetworking.Headers_HeaderOperations{
-						Set: map[string]string{
-							consts.CortexOriginHeader: consts.ActivatorName,
-						},
-					},
-				},
-			},
-		},
-		PrefixPath:  api.Networking.Endpoint,
-		Rewrite:     pointer.String("/"),
-		Annotations: api.ToK8sAnnotations(),
-		Labels: map[string]string{
-			"apiName":               api.Name,
-			"apiKind":               api.Kind.String(),
-			"apiID":                 api.ID,
-			"specID":                api.SpecID,
-			"initialDeploymentTime": s.Int64(api.InitialDeploymentTime),
-			"deploymentID":          api.DeploymentID,
-			"podID":                 api.PodID,
-			"cortex.dev/api":        "true",
-		},
-	})
-}
-
-func getRequestedReplicasFromDeployment(api spec.API, deployment *kapps.Deployment) int32 {
-	requestedReplicas := api.Autoscaling.InitReplicas
-
-	if deployment != nil && deployment.Spec.Replicas != nil && *deployment.Spec.Replicas > 0 {
-		requestedReplicas = *deployment.Spec.Replicas
-	}
-
-	if requestedReplicas < api.Autoscaling.MinReplicas {
-		requestedReplicas = api.Autoscaling.MinReplicas
-	}
-
-	if requestedReplicas > api.Autoscaling.MaxReplicas {
-		requestedReplicas = api.Autoscaling.MaxReplicas
-	}
-
-	return requestedReplicas
-}
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
deleted file mode 100644
index a65716f35c..0000000000
--- a/pkg/operator/resources/realtimeapi/status.go
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
-Copyright 2021 Cortex Labs, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package realtimeapi
-
-import (
-	"sort"
-	"time"
-
-	"github.com/cortexlabs/cortex/pkg/config"
-	"github.com/cortexlabs/cortex/pkg/consts"
-	"github.com/cortexlabs/cortex/pkg/lib/errors"
-	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	"github.com/cortexlabs/cortex/pkg/lib/parallel"
-	"github.com/cortexlabs/cortex/pkg/types/status"
-	"github.com/cortexlabs/cortex/pkg/types/userconfig"
-	"github.com/cortexlabs/cortex/pkg/workloads"
-	kapps "k8s.io/api/apps/v1"
-	kcore "k8s.io/api/core/v1"
-)
-
-func GetStatus(apiName string) (*status.Status, error) {
-	var deployment *kapps.Deployment
-	var pods []kcore.Pod
-
-	err := parallel.RunFirstErr(
-		func() error {
-			var err error
-			deployment, err = config.K8s.GetDeployment(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			var err error
-			pods, err = config.K8s.ListPodsByLabel("apiName", apiName)
-			return err
-		},
-	)
-	if err != nil {
-		return nil, err
-	}
-
-	if deployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find deployment", apiName)
-	}
-
-	return apiStatus(deployment, pods)
-}
-
-func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.Status, error) {
-	statuses := make([]status.Status, len(deployments))
-	for i := range deployments {
-		st, err := apiStatus(&deployments[i], pods)
-		if err != nil {
-			return nil, err
-		}
-		statuses[i] = *st
-	}
-
-	sort.Slice(statuses, func(i, j int) bool {
-		return statuses[i].APIName < statuses[j].APIName
-	})
-
-	return statuses, nil
-}
-
-func apiStatus(deployment *kapps.Deployment, allPods []kcore.Pod) (*status.Status, error) {
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
-	if err != nil {
-		return nil, err
-	}
-
-	status := &status.Status{}
-	status.APIName = deployment.Labels["apiName"]
-	status.APIID = deployment.Labels["apiID"]
-	status.ReplicaCounts = getReplicaCounts(deployment, allPods)
-	status.Code = getStatusCode(&status.ReplicaCounts, autoscalingSpec.MinReplicas)
-
-	return status, nil
-}
-
-func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.ReplicaCounts {
-	counts := status.ReplicaCounts{}
-	counts.Requested = *deployment.Spec.Replicas
-
-	for i := range pods {
-		pod := pods[i]
-		if pod.Labels["apiName"] != deployment.Labels["apiName"] {
-			continue
-		}
-		addPodToReplicaCounts(&pods[i], deployment, &counts)
-	}
-
-	return counts
-}
-
-func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts *status.ReplicaCounts) {
-	var subCounts *status.SubReplicaCounts
-	if isPodSpecLatest(deployment, pod) {
-		subCounts = &counts.Updated
-	} else {
-		subCounts = &counts.Stale
-	}
-
-	if k8s.IsPodReady(pod) {
-		subCounts.Ready++
-		return
-	}
-
-	switch k8s.GetPodStatus(pod) {
-	case k8s.PodStatusPending:
-		if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
-			subCounts.Stalled++
-		} else {
-			subCounts.Pending++
-		}
-	case k8s.PodStatusInitializing:
-		subCounts.Initializing++
-	case k8s.PodStatusRunning:
-		subCounts.Initializing++
-	case k8s.PodStatusErrImagePull:
-		subCounts.ErrImagePull++
-	case k8s.PodStatusTerminating:
-		subCounts.Terminating++
-	case k8s.PodStatusFailed:
-		subCounts.Failed++
-	case k8s.PodStatusKilled:
-		subCounts.Killed++
-	case k8s.PodStatusKilledOOM:
-		subCounts.KilledOOM++
-	default:
-		subCounts.Unknown++
-	}
-}
-
-func getStatusCode(counts *status.ReplicaCounts, minReplicas int32) status.Code {
-	if counts.Updated.Ready >= counts.Requested {
-		return status.Live
-	}
-
-	if counts.Updated.ErrImagePull > 0 {
-		return status.ErrorImagePull
-	}
-
-	if counts.Updated.Failed > 0 || counts.Updated.Killed > 0 {
-		return status.Error
-	}
-
-	if counts.Updated.KilledOOM > 0 {
-		return status.OOM
-	}
-
-	if counts.Updated.Stalled > 0 {
-		return status.Stalled
-	}
-
-	if counts.Updated.Ready >= minReplicas {
-		return status.Live
-	}
-
-	return status.Updating
-}
diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go
index 87069c2136..6eeb95b3b0 100644
--- a/pkg/operator/resources/resources.go
+++ b/pkg/operator/resources/resources.go
@@ -174,7 +174,7 @@ func RefreshAPI(apiName string, force bool) (string, error) {
 
 	switch deployedResource.Kind {
 	case userconfig.RealtimeAPIKind:
-		return realtimeapi.RefreshAPI(apiName, force)
+		return realtimeapi.RefreshAPI(apiName)
 	case userconfig.AsyncAPIKind:
 		return asyncapi.RefreshAPI(apiName, force)
 	default:
@@ -297,25 +297,19 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		return nil, err
 	}
 
-	var realtimeAPIDeployments []kapps.Deployment
 	var asyncAPIDeployments []kapps.Deployment
 	for _, deployment := range deployments {
 		switch deployment.Labels["apiKind"] {
-		case userconfig.RealtimeAPIKind.String():
-			realtimeAPIDeployments = append(realtimeAPIDeployments, deployment)
 		case userconfig.AsyncAPIKind.String():
 			asyncAPIDeployments = append(asyncAPIDeployments, deployment)
 		}
 	}
 
-	var realtimeAPIPods []kcore.Pod
 	var batchAPIPods []kcore.Pod
 	var taskAPIPods []kcore.Pod
 	var asyncAPIPods []kcore.Pod
 	for _, pod := range pods {
 		switch pod.Labels["apiKind"] {
-		case userconfig.RealtimeAPIKind.String():
-			realtimeAPIPods = append(realtimeAPIPods, pod)
 		case userconfig.BatchAPIKind.String():
 			batchAPIPods = append(batchAPIPods, pod)
 		case userconfig.TaskAPIKind.String():
@@ -340,7 +334,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIPods, realtimeAPIDeployments)
+	realtimeAPIList, err := realtimeapi.GetAllAPIs()
 	if err != nil {
 		return nil, err
 	}
@@ -387,7 +381,7 @@ func GetAPI(apiName string) ([]schema.APIResponse, error) {
 
 	switch deployedResource.Kind {
 	case userconfig.RealtimeAPIKind:
-		apiResponse, err = realtimeapi.GetAPIByName(deployedResource)
+		apiResponse, err = realtimeapi.GetAPIByName(apiName)
 		if err != nil {
 			return nil, err
 		}

From 532c8f25b318b92dbbc8f4b22ce5e715346243e6 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Sun, 25 Jul 2021 12:47:57 +0200
Subject: [PATCH 21/42] Fix linting errors

---
 pkg/operator/resources/realtimeapi/api.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 9405235faf..76dcafb517 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -242,7 +242,8 @@ func getDashboardURL(apiName string) string {
 
 func APIConfigToK8sResource(apiConfig userconfig.API) serverless.RealtimeAPI {
 	var containers []serverless.ContainerSpec
-	for _, containerConfig := range apiConfig.Pod.Containers {
+	for i := range apiConfig.Pod.Containers {
+		containerConfig := apiConfig.Pod.Containers[i]
 		var env []kcore.EnvVar
 		for k, v := range containerConfig.Env {
 			env = append(env, kcore.EnvVar{

From 7c7afa06ef74b1fe0c1ad5aafe3fb3ce11be6467 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Mon, 26 Jul 2021 13:48:12 +0200
Subject: [PATCH 22/42] Fix typo in UpdateStrategySpec struct

---
 pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go    | 4 ++--
 .../apis/serverless/v1alpha1/zz_generated.deepcopy.go     | 8 ++++----
 pkg/operator/resources/realtimeapi/api.go                 | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 66749debe3..59403ca18e 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -43,7 +43,7 @@ type RealtimeAPISpec struct {
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default={"max_surge": "25%", "max_unavailable": "25%"}
 	// Deployment strategy to use when replacing existing replicas with new ones
-	UpdateStrategy UpdateStratagySpec `json:"update_strategy"`
+	UpdateStrategy UpdateStrategySpec `json:"update_strategy"`
 
 	// +kubebuilder:validation:Required
 	// Networking configuration
@@ -187,7 +187,7 @@ type AutoscalingSpec struct {
 	UpscaleTolerance string `json:"upscale_tolerance,omitempty"`
 }
 
-type UpdateStratagySpec struct {
+type UpdateStrategySpec struct {
 	// +kubebuilder:validation:Optional
 	// +kubebuilder:default="25%"
 	// Maximum number of replicas that can be scheduled above the desired number of replicas during an update;
diff --git a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
index 030c91042f..df2ba5ab3d 100644
--- a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
@@ -257,18 +257,18 @@ func (in *RealtimeAPIStatus) DeepCopy() *RealtimeAPIStatus {
 }
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *UpdateStratagySpec) DeepCopyInto(out *UpdateStratagySpec) {
+func (in *UpdateStrategySpec) DeepCopyInto(out *UpdateStrategySpec) {
 	*out = *in
 	out.MaxSurge = in.MaxSurge
 	out.MaxUnavailable = in.MaxUnavailable
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpdateStratagySpec.
-func (in *UpdateStratagySpec) DeepCopy() *UpdateStratagySpec {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpdateStrategySpec.
+func (in *UpdateStrategySpec) DeepCopy() *UpdateStrategySpec {
 	if in == nil {
 		return nil
 	}
-	out := new(UpdateStratagySpec)
+	out := new(UpdateStrategySpec)
 	in.DeepCopyInto(out)
 	return out
 }
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 76dcafb517..7815fa4f31 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -316,7 +316,7 @@ func APIConfigToK8sResource(apiConfig userconfig.API) serverless.RealtimeAPI {
 				UpscaleTolerance:             fmt.Sprintf("%f", apiConfig.Autoscaling.UpscaleTolerance),
 			},
 			NodeGroups: apiConfig.NodeGroups,
-			UpdateStrategy: serverless.UpdateStratagySpec{
+			UpdateStrategy: serverless.UpdateStrategySpec{
 				MaxSurge:       intstr.FromString(apiConfig.UpdateStrategy.MaxSurge),
 				MaxUnavailable: intstr.FromString(apiConfig.UpdateStrategy.MaxUnavailable),
 			},

From 424cba79b428235e5f2b1c6d8c95057fb9d3e910 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Mon, 26 Jul 2021 13:49:04 +0200
Subject: [PATCH 23/42] Declare slice size when known beforehand

---
 pkg/operator/resources/realtimeapi/api.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 7815fa4f31..1e0b1ca327 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -109,7 +109,7 @@ func RefreshAPI(apiName string) (string, error) {
 	return fmt.Sprintf("updating %s", apiResource.UserString()), nil
 }
 
-func DeleteAPI(apiName string, keepCache bool) error {
+func DeleteAPI(apiName string, _ bool) error {
 	ctx := context.Background()
 	api := serverless.RealtimeAPI{
 		ObjectMeta: kmeta.ObjectMeta{
@@ -241,7 +241,7 @@ func getDashboardURL(apiName string) string {
 }
 
 func APIConfigToK8sResource(apiConfig userconfig.API) serverless.RealtimeAPI {
-	var containers []serverless.ContainerSpec
+	containers := make([]serverless.ContainerSpec, len(apiConfig.Pod.Containers))
 	for i := range apiConfig.Pod.Containers {
 		containerConfig := apiConfig.Pod.Containers[i]
 		var env []kcore.EnvVar

From b8d8b6a474a66b1576e493620a12e55606f25108 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Tue, 27 Jul 2021 12:15:21 +0200
Subject: [PATCH 24/42] Rename helper function

---
 pkg/operator/resources/realtimeapi/api.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 1e0b1ca327..6fa7cb452d 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -60,7 +60,7 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 	if err != nil {
 		if kerrors.IsNotFound(err) {
 			if kerrors.IsNotFound(err) {
-				api := APIConfigToK8sResource(*apiConfig)
+				api = K8sResourceFromAPIConfig(*apiConfig)
 				if err = config.K8s.Create(ctx, &api); err != nil {
 					return nil, "", errors.Wrap(err, "failed to create realtime api resource")
 				}
@@ -70,7 +70,7 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 		return nil, "", errors.Wrap(err, "failed to get realtime api resource")
 	}
 
-	desiredAPI := APIConfigToK8sResource(*apiConfig)
+	desiredAPI := K8sResourceFromAPIConfig(*apiConfig)
 	if !reflect.DeepEqual(api.Spec, desiredAPI.Spec) || force {
 		api.Spec = desiredAPI.Spec
 		api.Annotations["cortex.dev/last-updated"] = s.Int64(time.Now().Unix())
@@ -240,7 +240,8 @@ func getDashboardURL(apiName string) string {
 	return dashboardURL
 }
 
-func APIConfigToK8sResource(apiConfig userconfig.API) serverless.RealtimeAPI {
+// K8sResourceFromAPIConfig converts a cortex API config into a realtime API CRD resource
+func K8sResourceFromAPIConfig(apiConfig userconfig.API) serverless.RealtimeAPI {
 	containers := make([]serverless.ContainerSpec, len(apiConfig.Pod.Containers))
 	for i := range apiConfig.Pod.Containers {
 		containerConfig := apiConfig.Pod.Containers[i]

From 39ddc6b9f8fce632c59b9a77c60b082b1c6ecf5c Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Tue, 27 Jul 2021 19:15:04 +0200
Subject: [PATCH 25/42] Fix `cortex get` behaviour by uploading specs to S3

---
 .../serverless/v1alpha1/realtimeapi_types.go  |  53 ++++-
 .../serverless/realtimeapi_controller.go      |   5 +-
 .../realtimeapi_controller_helpers.go         |  57 +----
 pkg/crds/controllers/serverless/suite_test.go |   3 +-
 pkg/operator/resources/realtimeapi/api.go     | 198 ++++++++++--------
 pkg/operator/resources/realtimeapi/errors.go  |  34 ---
 6 files changed, 176 insertions(+), 174 deletions(-)
 delete mode 100644 pkg/operator/resources/realtimeapi/errors.go

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 59403ca18e..d3247de9b8 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -17,11 +17,18 @@ limitations under the License.
 package v1alpha1
 
 import (
+	"bytes"
+	"fmt"
+
+	"github.com/cortexlabs/cortex/pkg/lib/hash"
+	"github.com/cortexlabs/cortex/pkg/lib/k8s"
+	s "github.com/cortexlabs/cortex/pkg/lib/strings"
+	"github.com/cortexlabs/cortex/pkg/types/spec"
 	"github.com/cortexlabs/cortex/pkg/types/status"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	kcore "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
 )
 
@@ -225,20 +232,54 @@ type RealtimeAPIStatus struct {
 
 // RealtimeAPI is the Schema for the realtimeapis API
 type RealtimeAPI struct {
-	metav1.TypeMeta   `json:",inline"`
-	metav1.ObjectMeta `json:"metadata,omitempty"`
+	kmeta.TypeMeta   `json:",inline"`
+	kmeta.ObjectMeta `json:"metadata,omitempty"`
 
 	Spec   RealtimeAPISpec   `json:"spec,omitempty"`
 	Status RealtimeAPIStatus `json:"status,omitempty"`
 }
 
+// GetOrCreateAPIIDs retrieves API ids from annotations or creates them if they don't exist
+func (api RealtimeAPI) GetOrCreateAPIIDs() (deploymentID, podID, specID, apiID string) {
+	deploymentID = api.Annotations["cortex.dev/deployment-id"]
+	if deploymentID == "" {
+		deploymentID = k8s.RandomName()[:10]
+	}
+
+	var buf bytes.Buffer
+
+	buf.WriteString(api.Name)
+	buf.WriteString(api.Name)
+	buf.WriteString(userconfig.RealtimeAPIKind.String())
+	buf.WriteString(s.Obj(api.Spec.Pod))
+	podID = hash.Bytes(buf.Bytes())
+
+	buf.Reset()
+	buf.WriteString(podID)
+	buf.WriteString(s.Obj(api.Spec.Networking))
+	buf.WriteString(s.Obj(api.Spec.Autoscaling))
+	buf.WriteString(s.Obj(api.Spec.NodeGroups))
+	buf.WriteString(s.Obj(api.Spec.UpdateStrategy))
+	specID = hash.Bytes(buf.Bytes())[:32]
+
+	apiID = api.Annotations["cortex.dev/api-id"]
+	if apiID == "" ||
+		api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
+		api.Annotations["cortex.dev/spec-id"] != specID {
+
+		apiID = fmt.Sprintf("%s-%s-%s", spec.MonotonicallyDecreasingID(), deploymentID, specID)
+	}
+
+	return deploymentID, podID, specID, apiID
+}
+
 //+kubebuilder:object:root=true
 
 // RealtimeAPIList contains a list of RealtimeAPI
 type RealtimeAPIList struct {
-	metav1.TypeMeta `json:",inline"`
-	metav1.ListMeta `json:"metadata,omitempty"`
-	Items           []RealtimeAPI `json:"items"`
+	kmeta.TypeMeta `json:",inline"`
+	kmeta.ListMeta `json:"metadata,omitempty"`
+	Items          []RealtimeAPI `json:"items"`
 }
 
 func init() {
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
index fa970708c1..dab7d0e7a4 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -85,7 +85,7 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	// Step 3: Get or create deployment and API ids
-	deploymentID, specID, apiID := r.getOrCreateAPIIDs(api)
+	deploymentID, podID, specID, apiID := api.GetOrCreateAPIIDs()
 	idsOutdated := api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
 		api.Annotations["cortex.dev/spec-id"] != specID ||
 		api.Annotations["cortex.dev/api-id"] != apiID
@@ -100,7 +100,8 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	if api.Annotations["cortex.dev/spec-id"] != specID {
-		log.V(1).Info("updating spec id annotation")
+		log.V(1).Info("updating pod and spec id annotations")
+		api.Annotations["cortex.dev/pod-id"] = podID
 		api.Annotations["cortex.dev/spec-id"] = specID
 	}
 
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 41316512bc..047ef42a43 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -17,7 +17,6 @@ limitations under the License.
 package serverlesscontroller
 
 import (
-	"bytes"
 	"context"
 	"fmt"
 	"time"
@@ -25,13 +24,11 @@ import (
 	"github.com/cortexlabs/cortex/pkg/consts"
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
-	"github.com/cortexlabs/cortex/pkg/lib/hash"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/lib/maps"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/lib/urls"
-	"github.com/cortexlabs/cortex/pkg/types/spec"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
@@ -247,7 +244,7 @@ func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *serverless
 	}
 
 	ingress := svc.Status.LoadBalancer.Ingress
-	if ingress == nil || len(ingress) == 0 {
+	if len(ingress) == 0 {
 		return "", nil
 	}
 
@@ -261,7 +258,6 @@ func (r *RealtimeAPIReconciler) getEndpoint(ctx context.Context, api *serverless
 
 func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) kapps.Deployment {
 	containers, volumes := r.desiredContainers(api)
-	deploymentID, _, apiID := r.getOrCreateAPIIDs(api)
 
 	return *k8s.Deployment(&k8s.DeploymentSpec{
 		Name:           workloads.K8sName(api.Name),
@@ -271,8 +267,8 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 		Labels: map[string]string{
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
-			"apiID":          apiID,
-			"deploymentID":   deploymentID,
+			"apiID":          api.Annotations["cortex.dev/api-id"],
+			"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
 			"cortex.dev/api": "true",
 		},
 		Annotations: r.generateAPIAnnotations(api),
@@ -284,7 +280,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 			Labels: map[string]string{
 				"apiName":        api.Name,
 				"apiKind":        userconfig.RealtimeAPIKind.String(),
-				"deploymentID":   deploymentID,
+				"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
 				"cortex.dev/api": "true",
 			},
 			Annotations: map[string]string{
@@ -339,8 +335,6 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI
 		activatorWeight = 100
 	}
 
-	deploymentID, _, apiID := r.getOrCreateAPIIDs(api)
-
 	return *k8s.VirtualService(&k8s.VirtualServiceSpec{
 		Name:     workloads.K8sName(api.Name),
 		Gateways: []string{"apis-gateway"},
@@ -387,8 +381,8 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI
 		Labels: map[string]string{
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
-			"apiID":          apiID,
-			"deploymentID":   deploymentID,
+			"apiID":          api.Annotations["cortex.dev/api-id"],
+			"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
 			"cortex.dev/api": "true",
 		},
 	})
@@ -406,8 +400,8 @@ func (r *RealtimeAPIReconciler) userContainers(api serverless.RealtimeAPI) ([]kc
 		workloads.ClientConfigMount(),
 	}
 
-	var containers []kcore.Container
-	for _, container := range api.Spec.Pod.Containers {
+	containers := make([]kcore.Container, len(api.Spec.Pod.Containers))
+	for i, container := range api.Spec.Pod.Containers {
 		containerResourceList := kcore.ResourceList{}
 		containerResourceLimitsList := kcore.ResourceList{}
 		securityContext := kcore.SecurityContext{
@@ -453,7 +447,7 @@ func (r *RealtimeAPIReconciler) userContainers(api serverless.RealtimeAPI) ([]kc
 		containerEnvVars = append(containerEnvVars, workloads.ClientConfigEnvVar())
 		containerEnvVars = append(containerEnvVars, container.Env...)
 
-		containers = append(containers, kcore.Container{
+		containers[i] = kcore.Container{
 			Name:           container.Name,
 			Image:          container.Image,
 			Command:        container.Command,
@@ -468,7 +462,7 @@ func (r *RealtimeAPIReconciler) userContainers(api serverless.RealtimeAPI) ([]kc
 			},
 			ImagePullPolicy: kcore.PullAlways,
 			SecurityContext: &securityContext,
-		})
+		}
 	}
 
 	return containers, volumes
@@ -524,37 +518,6 @@ func (r *RealtimeAPIReconciler) proxyContainer(api serverless.RealtimeAPI) (kcor
 	}, workloads.ClusterConfigVolume()
 }
 
-func (r *RealtimeAPIReconciler) getOrCreateAPIIDs(api serverless.RealtimeAPI) (deploymentID string, specID string, apiID string) {
-	deploymentID = api.Annotations["cortex.dev/deployment-id"]
-	if deploymentID == "" {
-		deploymentID = k8s.RandomName()[:10]
-	}
-
-	specID = r.getSpecHash(api)
-
-	apiID = api.Annotations["cortex.dev/api-id"]
-	if apiID == "" ||
-		api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
-		api.Annotations["cortex.dev/spec-id"] != specID {
-
-		apiID = fmt.Sprintf("%s-%s-%s", spec.MonotonicallyDecreasingID(), deploymentID, specID)
-	}
-
-	return deploymentID, specID, apiID
-}
-
-func (r *RealtimeAPIReconciler) getSpecHash(api serverless.RealtimeAPI) string {
-	var buf bytes.Buffer
-	buf.WriteString(api.Name)
-	buf.WriteString(s.Obj(api.TypeMeta))
-	buf.WriteString(s.Obj(api.Spec.Pod))
-	buf.WriteString(s.Obj(api.Spec.Networking))
-	buf.WriteString(s.Obj(api.Spec.Autoscaling))
-	buf.WriteString(s.Obj(api.Spec.NodeGroups))
-	buf.WriteString(s.Obj(api.Spec.UpdateStrategy))
-	return hash.Bytes(buf.Bytes())[:32]
-}
-
 func (r *RealtimeAPIReconciler) generateAPIAnnotations(api serverless.RealtimeAPI) map[string]string {
 	return map[string]string{
 		userconfig.MinReplicasAnnotationKey:                  s.Int32(api.Spec.Autoscaling.MinReplicas),
diff --git a/pkg/crds/controllers/serverless/suite_test.go b/pkg/crds/controllers/serverless/suite_test.go
index 5698e2887b..96c9626946 100644
--- a/pkg/crds/controllers/serverless/suite_test.go
+++ b/pkg/crds/controllers/serverless/suite_test.go
@@ -23,7 +23,6 @@ import (
 	. "github.com/onsi/ginkgo"
 	. "github.com/onsi/gomega"
 	"k8s.io/client-go/kubernetes/scheme"
-	"k8s.io/client-go/rest"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/envtest"
 	"sigs.k8s.io/controller-runtime/pkg/envtest/printer"
@@ -37,7 +36,7 @@ import (
 // These tests use Ginkgo (BDD-style Go testing framework). Refer to
 // http://onsi.github.io/ginkgo/ to learn more about Ginkgo.
 
-var cfg *rest.Config
+//var cfg *rest.Config
 var k8sClient client.Client
 var testEnv *envtest.Environment
 
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 6fa7cb452d..cd7284f276 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -19,6 +19,7 @@ package realtimeapi
 import (
 	"context"
 	"fmt"
+	"path/filepath"
 	"reflect"
 	"time"
 
@@ -27,6 +28,8 @@ import (
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
+	"github.com/cortexlabs/cortex/pkg/lib/maps"
+	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
@@ -55,7 +58,6 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 	var api serverless.RealtimeAPI
 	key := client.ObjectKey{Namespace: consts.DefaultNamespace, Name: apiConfig.Name}
 
-	apiSpec := &spec.API{API: apiConfig}
 	err := config.K8s.Get(ctx, key, &api)
 	if err != nil {
 		if kerrors.IsNotFound(err) {
@@ -64,6 +66,23 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 				if err = config.K8s.Create(ctx, &api); err != nil {
 					return nil, "", errors.Wrap(err, "failed to create realtime api resource")
 				}
+
+				apiSpec := &spec.API{
+					API:                   apiConfig,
+					ID:                    api.Annotations["cortex.dev/api-id"],
+					SpecID:                api.Annotations["cortex.dev/spec-id"],
+					PodID:                 api.Annotations["cortex.dev/pod-id"],
+					DeploymentID:          api.Annotations["cortex.dev/deployment-id"],
+					Key:                   spec.Key(apiConfig.Name, api.Annotations["cortex.dev/api-id"], config.ClusterConfig.ClusterUID),
+					InitialDeploymentTime: api.CreationTimestamp.Unix(),
+					LastUpdated:           api.CreationTimestamp.Unix(),
+					MetadataRoot:          spec.MetadataRoot(apiConfig.Name, config.ClusterConfig.ClusterUID),
+				}
+
+				if err := config.AWS.UploadJSONToS3(apiSpec, config.ClusterConfig.Bucket, apiSpec.Key); err != nil {
+					return nil, "", errors.Wrap(err, "failed to upload api spec")
+				}
+
 				return apiSpec, fmt.Sprintf("creating %s", apiConfig.Resource.UserString()), nil
 			}
 		}
@@ -71,13 +90,34 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 	}
 
 	desiredAPI := K8sResourceFromAPIConfig(*apiConfig)
+
+	apiSpec := &spec.API{
+		API:                   apiConfig,
+		ID:                    desiredAPI.Annotations["cortex.dev/api-id"],
+		SpecID:                desiredAPI.Annotations["cortex.dev/spec-id"],
+		PodID:                 desiredAPI.Annotations["cortex.dev/pod-id"],
+		DeploymentID:          desiredAPI.Annotations["cortex.dev/deployment-id"],
+		Key:                   spec.Key(apiConfig.Name, desiredAPI.Annotations["cortex.dev/api-id"], config.ClusterConfig.ClusterUID),
+		InitialDeploymentTime: api.CreationTimestamp.Unix(),
+		MetadataRoot:          spec.MetadataRoot(apiConfig.Name, config.ClusterConfig.ClusterUID),
+	}
+
 	if !reflect.DeepEqual(api.Spec, desiredAPI.Spec) || force {
 		api.Spec = desiredAPI.Spec
-		api.Annotations["cortex.dev/last-updated"] = s.Int64(time.Now().Unix())
+		api.Annotations = maps.MergeStrMapsString(api.Annotations, desiredAPI.Annotations)
+
+		lastUpdated := time.Now().Unix()
+		api.Annotations["cortex.dev/last-updated"] = s.Int64(lastUpdated)
+		apiSpec.LastUpdated = lastUpdated
 
 		if err = config.K8s.Update(ctx, &api); err != nil {
 			return nil, "", errors.Wrap(err, "failed to update realtime api resource")
 		}
+
+		if err := config.AWS.UploadJSONToS3(apiSpec, config.ClusterConfig.Bucket, apiSpec.Key); err != nil {
+			return nil, "", errors.Wrap(err, "failed to upload api spec")
+		}
+
 		return apiSpec, fmt.Sprintf("updating %s", apiConfig.Resource.UserString()), nil
 	}
 
@@ -109,21 +149,30 @@ func RefreshAPI(apiName string) (string, error) {
 	return fmt.Sprintf("updating %s", apiResource.UserString()), nil
 }
 
-func DeleteAPI(apiName string, _ bool) error {
-	ctx := context.Background()
-	api := serverless.RealtimeAPI{
-		ObjectMeta: kmeta.ObjectMeta{
-			Name:      apiName,
-			Namespace: consts.DefaultNamespace,
+func DeleteAPI(apiName string, keepCache bool) error {
+	return parallel.RunFirstErr(
+		func() error {
+			ctx := context.Background()
+			api := serverless.RealtimeAPI{
+				ObjectMeta: kmeta.ObjectMeta{
+					Name:      apiName,
+					Namespace: consts.DefaultNamespace,
+				},
+			}
+			if err := config.K8s.Delete(ctx, &api); err != nil {
+				return errors.Wrap(err, "failed to delete realtime api resource")
+			}
+			return nil
 		},
-	}
-	if err := config.K8s.Delete(ctx, &api); err != nil {
-		return errors.Wrap(err, "failed to delete realtime api resource")
-	}
-
-	// TODO: delete bucket resources (?)
-
-	return nil
+		func() error {
+			if keepCache {
+				return nil
+			}
+			// best effort deletion, swallow errors because there could be weird error messages
+			_ = deleteBucketResources(apiName)
+			return nil
+		},
+	)
 }
 
 func GetAllAPIs() ([]schema.APIResponse, error) {
@@ -133,31 +182,25 @@ func GetAllAPIs() ([]schema.APIResponse, error) {
 		return nil, errors.Wrap(err, "failed to list realtime api resources")
 	}
 
+	apiNames := make([]string, len(apis.Items))
+	apiIDs := make([]string, len(apis.Items))
+	for i, api := range apis.Items {
+		apiNames[i] = api.Name
+		apiIDs[i] = api.Annotations["cortex.dev/api-id"]
+	}
+
+	apiSpecs, err := operator.DownloadAPISpecs(apiNames, apiIDs)
+	if err != nil {
+		return nil, err
+	}
+
 	realtimeAPIs := make([]schema.APIResponse, len(apis.Items))
 	for i := range apis.Items {
 		api := apis.Items[i]
 		api.Status.ReplicaCounts.Requested = api.Spec.Pod.Replicas
 
-		lastUpdated := api.CreationTimestamp.Unix()
-		if api.Annotations["cortex.dev/last-updated"] != "" {
-			var ok bool
-			lastUpdated, ok = s.ParseInt64(api.Annotations["cortex.dev/last-updated"])
-			if !ok {
-				return nil, errors.ErrorUnexpected("failed to parse 'cortex.dev/last-updated' annotation")
-			}
-		}
-
 		realtimeAPIs[i] = schema.APIResponse{
-			Spec: spec.API{
-				API: &userconfig.API{
-					Resource: userconfig.Resource{
-						Name: api.Name,
-						Kind: userconfig.RealtimeAPIKind,
-					},
-				},
-				LastUpdated:           lastUpdated,
-				InitialDeploymentTime: api.CreationTimestamp.Unix(),
-			},
+			Spec: apiSpecs[i],
 			Status: &status.Status{
 				APIName:       api.Name,
 				APIID:         api.Annotations["cortex.dev/api-id"],
@@ -180,40 +223,17 @@ func GetAPIByName(apiName string) ([]schema.APIResponse, error) {
 		return nil, errors.Wrap(err, "failed to get realtime api resource")
 	}
 
-	// TODO: needs api id history
-	//api, err := operator.DownloadAPISpec(st.APIName, st.APIID)
-	//if err != nil {
-	//	return nil, err
-	//}
-
-	dashboardURL := pointer.String(getDashboardURL(api.Name))
-
-	lastUpdated := api.CreationTimestamp.Unix()
-	if api.Annotations["cortex.dev/last-updated"] != "" {
-		var ok bool
-		lastUpdated, ok = s.ParseInt64(api.Annotations["cortex.dev/last-updated"])
-		if !ok {
-			return nil, errors.ErrorUnexpected("failed to parse 'cortex.dev/last-updated' annotation")
-		}
+	apiSpec, err := operator.DownloadAPISpec(api.Name, api.Annotations["cortex.dev/api-id"])
+	if err != nil {
+		return nil, err
 	}
 
+	dashboardURL := pointer.String(getDashboardURL(api.Name))
 	api.Status.ReplicaCounts.Requested = api.Spec.Pod.Replicas
 
 	return []schema.APIResponse{
 		{
-			Spec: spec.API{
-				API: &userconfig.API{
-					Resource: userconfig.Resource{
-						Name: api.Name,
-						Kind: userconfig.RealtimeAPIKind,
-					},
-				},
-				ID:                    api.Annotations["cortex.dev/api-id"],
-				SpecID:                api.Annotations["cortex.dev/spec-id"],
-				DeploymentID:          api.Annotations["cortex.dev/deployment-id"],
-				InitialDeploymentTime: api.CreationTimestamp.Unix(),
-				LastUpdated:           lastUpdated,
-			},
+			Spec: *apiSpec,
 			Status: &status.Status{
 				APIName:       api.Name,
 				APIID:         api.Annotations["cortex.dev/api-id"],
@@ -244,9 +264,9 @@ func getDashboardURL(apiName string) string {
 func K8sResourceFromAPIConfig(apiConfig userconfig.API) serverless.RealtimeAPI {
 	containers := make([]serverless.ContainerSpec, len(apiConfig.Pod.Containers))
 	for i := range apiConfig.Pod.Containers {
-		containerConfig := apiConfig.Pod.Containers[i]
+		container := apiConfig.Pod.Containers[i]
 		var env []kcore.EnvVar
-		for k, v := range containerConfig.Env {
+		for k, v := range container.Env {
 			env = append(env, kcore.EnvVar{
 				Name:  k,
 				Value: v,
@@ -254,41 +274,39 @@ func K8sResourceFromAPIConfig(apiConfig userconfig.API) serverless.RealtimeAPI {
 		}
 
 		var compute *serverless.ComputeSpec
-		if containerConfig.Compute != nil {
+		if container.Compute != nil {
 			var cpu *kresource.Quantity
-			if containerConfig.Compute.CPU != nil {
-				cpu = &containerConfig.Compute.CPU.Quantity
+			if container.Compute.CPU != nil {
+				cpu = &container.Compute.CPU.Quantity
 			}
 			var mem *kresource.Quantity
-			if containerConfig.Compute.Mem != nil {
-				mem = &containerConfig.Compute.Mem.Quantity
+			if container.Compute.Mem != nil {
+				mem = &container.Compute.Mem.Quantity
 			}
 			var shm *kresource.Quantity
-			if containerConfig.Compute.Shm != nil {
-				shm = &containerConfig.Compute.Shm.Quantity
+			if container.Compute.Shm != nil {
+				shm = &container.Compute.Shm.Quantity
 			}
 
 			compute = &serverless.ComputeSpec{
 				CPU: cpu,
-				GPU: containerConfig.Compute.GPU,
-				Inf: containerConfig.Compute.Inf,
+				GPU: container.Compute.GPU,
+				Inf: container.Compute.Inf,
 				Mem: mem,
 				Shm: shm,
 			}
 		}
 
-		container := serverless.ContainerSpec{
-			Name:           containerConfig.Name,
-			Image:          containerConfig.Image,
-			Command:        containerConfig.Command,
-			Args:           containerConfig.Args,
+		containers[i] = serverless.ContainerSpec{
+			Name:           container.Name,
+			Image:          container.Image,
+			Command:        container.Command,
+			Args:           container.Args,
 			Env:            env,
 			Compute:        compute,
-			ReadinessProbe: workloads.GetProbeSpec(containerConfig.ReadinessProbe),
-			LivenessProbe:  workloads.GetProbeSpec(containerConfig.LivenessProbe),
+			ReadinessProbe: workloads.GetProbeSpec(container.ReadinessProbe),
+			LivenessProbe:  workloads.GetProbeSpec(container.LivenessProbe),
 		}
-
-		containers = append(containers, container)
 	}
 
 	api := serverless.RealtimeAPI{
@@ -326,5 +344,19 @@ func K8sResourceFromAPIConfig(apiConfig userconfig.API) serverless.RealtimeAPI {
 			},
 		},
 	}
+
+	deploymentID, podID, specID, apiID := api.GetOrCreateAPIIDs()
+	api.Annotations = map[string]string{
+		"cortex.dev/deployment-id": deploymentID,
+		"cortex.dev/spec-id":       specID,
+		"cortex.dev/pod-id":        podID,
+		"cortex.dev/api-id":        apiID,
+	}
+
 	return api
 }
+
+func deleteBucketResources(apiName string) error {
+	prefix := filepath.Join(config.ClusterConfig.ClusterUID, "apis", apiName)
+	return config.AWS.DeleteS3Dir(config.ClusterConfig.Bucket, prefix, true)
+}
diff --git a/pkg/operator/resources/realtimeapi/errors.go b/pkg/operator/resources/realtimeapi/errors.go
deleted file mode 100644
index 58b60a7743..0000000000
--- a/pkg/operator/resources/realtimeapi/errors.go
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-Copyright 2021 Cortex Labs, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package realtimeapi
-
-import (
-	"fmt"
-
-	"github.com/cortexlabs/cortex/pkg/lib/errors"
-)
-
-const (
-	ErrAPIUpdating = "realtimeapi.api_updating"
-)
-
-func ErrorAPIUpdating(apiName string) error {
-	return errors.WithStack(&errors.Error{
-		Kind:    ErrAPIUpdating,
-		Message: fmt.Sprintf("%s is updating (override with --force)", apiName),
-	})
-}

From 17427b3a73a1a8c7e0e6052697d37970913c7d1c Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 29 Jul 2021 10:29:20 +0300
Subject: [PATCH 26/42] Update API status following the Realtime CRD addition
 (#2375)

---
 cli/cluster/delete.go                         |   4 +-
 cli/cluster/get.go                            |  14 +
 cli/cmd/describe.go                           | 113 +++++++
 cli/cmd/get.go                                |  67 ++--
 cli/cmd/lib_apis.go                           |  64 ++++
 cli/cmd/lib_async_apis.go                     |  77 +++--
 cli/cmd/lib_batch_apis.go                     |  59 ++--
 cli/cmd/lib_realtime_apis.go                  |  74 +++--
 cli/cmd/lib_task_apis.go                      |  59 ++--
 cli/cmd/lib_traffic_splitters.go              |  40 +--
 cli/cmd/lib_watch.go                          |   4 +-
 cli/cmd/root.go                               |   2 +
 cmd/operator/main.go                          |   1 +
 dev/generate_cli_md.sh                        |   1 +
 docs/clients/cli.md                           |  14 +
 docs/workloads/async/statuses.md              |  21 +-
 docs/workloads/batch/statuses.md              |   2 +-
 docs/workloads/realtime/statuses.md           |  26 +-
 docs/workloads/realtime/troubleshooting.md    |   6 +-
 docs/workloads/task/statuses.md               |   2 +-
 go.mod                                        |   4 +-
 go.sum                                        |   6 +-
 pkg/activator/activator.go                    |   2 +-
 pkg/activator/helpers.go                      |  21 +-
 pkg/consts/consts.go                          |   3 +-
 .../serverless/v1alpha1/realtimeapi_types.go  |  22 +-
 .../v1alpha1/zz_generated.deepcopy.go         |   1 -
 .../crd/bases/batch.cortex.dev_batchjobs.yaml |  19 +-
 .../serverless.cortex.dev_realtimeapis.yaml   | 110 +------
 .../batch/batchjob_controller_helpers.go      |  92 ++++--
 .../realtimeapi_controller_helpers.go         |  97 +-----
 pkg/lib/k8s/pod.go                            | 142 ++++++---
 pkg/operator/endpoints/describe.go            |  36 +++
 pkg/operator/endpoints/logs.go                |  11 +-
 pkg/operator/operator/k8s.go                  |  18 ++
 pkg/operator/resources/asyncapi/api.go        | 164 ++++++++--
 pkg/operator/resources/asyncapi/status.go     | 293 +++---------------
 pkg/operator/resources/job/batchapi/api.go    |  37 +--
 pkg/operator/resources/job/taskapi/api.go     |  38 +--
 pkg/operator/resources/job/worker_stats.go    |  31 +-
 pkg/operator/resources/realtimeapi/api.go     | 172 ++++++++--
 pkg/operator/resources/resources.go           |  56 ++--
 pkg/operator/resources/trafficsplitter/api.go |  46 +--
 pkg/operator/schema/schema.go                 |  41 +--
 pkg/types/spec/api.go                         |  58 +++-
 pkg/types/spec/job.go                         |  40 +--
 pkg/types/status/code.go                      | 101 ------
 pkg/types/status/job_status.go                |  14 +-
 pkg/types/status/status.go                    | 130 ++++++--
 pkg/types/userconfig/api.go                   |  34 ++
 pkg/types/userconfig/config_key.go            |   1 +
 51 files changed, 1457 insertions(+), 1033 deletions(-)
 create mode 100644 cli/cmd/describe.go
 create mode 100644 cli/cmd/lib_apis.go
 create mode 100644 pkg/operator/endpoints/describe.go
 delete mode 100644 pkg/types/status/code.go

diff --git a/cli/cluster/delete.go b/cli/cluster/delete.go
index e81624f98d..47618b304b 100644
--- a/cli/cluster/delete.go
+++ b/cli/cluster/delete.go
@@ -22,6 +22,7 @@ import (
 
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/json"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/lib/prompt"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
@@ -70,8 +71,7 @@ func getReadyRealtimeAPIReplicasOrNil(operatorConfig OperatorConfig, apiName str
 		return nil
 	}
 
-	totalReady := apiRes.Status.Updated.Ready + apiRes.Status.Stale.Ready
-	return &totalReady
+	return pointer.Int32(apiRes.Status.Ready)
 }
 
 func StopJob(operatorConfig OperatorConfig, kind userconfig.Kind, apiName string, jobID string) (schema.DeleteResponse, error) {
diff --git a/cli/cluster/get.go b/cli/cluster/get.go
index 47a24aa0a3..6d88e707b8 100644
--- a/cli/cluster/get.go
+++ b/cli/cluster/get.go
@@ -51,6 +51,20 @@ func GetAPI(operatorConfig OperatorConfig, apiName string) ([]schema.APIResponse
 	return apiRes, nil
 }
 
+func DescribeAPI(operatorConfig OperatorConfig, apiName string) ([]schema.APIResponse, error) {
+	httpRes, err := HTTPGet(operatorConfig, "/describe/"+apiName)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiRes []schema.APIResponse
+	if err = json.Unmarshal(httpRes, &apiRes); err != nil {
+		return nil, errors.Wrap(err, "/describe/"+apiName, string(httpRes))
+	}
+
+	return apiRes, nil
+}
+
 func GetAPIByID(operatorConfig OperatorConfig, apiName string, apiID string) ([]schema.APIResponse, error) {
 	httpRes, err := HTTPGet(operatorConfig, "/get/"+apiName+"/"+apiID)
 	if err != nil {
diff --git a/cli/cmd/describe.go b/cli/cmd/describe.go
new file mode 100644
index 0000000000..767045c5a2
--- /dev/null
+++ b/cli/cmd/describe.go
@@ -0,0 +1,113 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/cortexlabs/cortex/cli/cluster"
+	"github.com/cortexlabs/cortex/cli/types/cliconfig"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	"github.com/cortexlabs/cortex/pkg/lib/exit"
+	"github.com/cortexlabs/cortex/pkg/lib/telemetry"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	"github.com/spf13/cobra"
+)
+
+const (
+	_titleReplicaStatus = "replica status"
+	_titleReplicaCount  = "replica count"
+)
+
+var (
+	_flagDescribeEnv   string
+	_flagDescribeWatch bool
+)
+
+func describeInit() {
+	_describeCmd.Flags().SortFlags = false
+	_describeCmd.Flags().StringVarP(&_flagDescribeEnv, "env", "e", "", "environment to use")
+	_describeCmd.Flags().BoolVarP(&_flagDescribeWatch, "watch", "w", false, "re-run the command every 2 seconds")
+}
+
+var _describeCmd = &cobra.Command{
+	Use:   "describe [API_NAME]",
+	Short: "describe an api",
+	Args:  cobra.ExactArgs(1),
+	Run: func(cmd *cobra.Command, args []string) {
+		apiName := args[0]
+
+		var envName string
+		if wasFlagProvided(cmd, "env") {
+			envName = _flagDescribeEnv
+		} else {
+			var err error
+			envName, err = getEnvFromFlag("")
+			if err != nil {
+				telemetry.Event("cli.describe")
+				exit.Error(err)
+			}
+		}
+
+		env, err := ReadOrConfigureEnv(envName)
+		if err != nil {
+			telemetry.Event("cli.describe")
+			exit.Error(err)
+		}
+		telemetry.Event("cli.describe", map[string]interface{}{"env_name": env.Name})
+
+		rerun(_flagDescribeWatch, func() (string, error) {
+			env, err := ReadOrConfigureEnv(envName)
+			if err != nil {
+				exit.Error(err)
+			}
+
+			out, err := envStringIfNotSpecified(envName, cmd)
+			if err != nil {
+				return "", err
+			}
+			apiTable, err := describeAPI(env, apiName)
+			if err != nil {
+				return "", err
+			}
+
+			return out + apiTable, nil
+		})
+	},
+}
+
+func describeAPI(env cliconfig.Environment, apiName string) (string, error) {
+	apisRes, err := cluster.DescribeAPI(MustGetOperatorConfig(env.Name), apiName)
+	if err != nil {
+		return "", err
+	}
+
+	if len(apisRes) == 0 {
+		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find api %s", apiName)))
+	}
+
+	apiRes := apisRes[0]
+
+	switch apiRes.Metadata.Kind {
+	case userconfig.RealtimeAPIKind:
+		return realtimeDescribeAPITable(apiRes, env)
+	case userconfig.AsyncAPIKind:
+		return asyncDescribeAPITable(apiRes, env)
+	default:
+		return "", errors.ErrorUnexpected(fmt.Sprintf("encountered unexpected kind %s for api %s", apiRes.Spec.Kind, apiRes.Spec.Name))
+	}
+}
diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index 1b11b984a0..c260d0c5e9 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -35,29 +35,28 @@ import (
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	"github.com/cortexlabs/yaml"
 	"github.com/spf13/cobra"
 )
 
 const (
 	_titleEnvironment = "env"
 	_titleRealtimeAPI = "realtime api"
-	_titleStatus      = "status"
+	_titleAsyncAPI    = "async api"
+	_titleLive        = "live"
 	_titleUpToDate    = "up-to-date"
-	_titleStale       = "stale"
-	_titleRequested   = "requested"
-	_titleFailed      = "failed"
-	_titleLastupdated = "last update"
+	_titleLastUpdated = "last update"
 )
 
 var (
-	_flagGetEnv string
-	_flagWatch  bool
+	_flagGetEnv   string
+	_flagGetWatch bool
 )
 
 func getInit() {
 	_getCmd.Flags().SortFlags = false
 	_getCmd.Flags().StringVarP(&_flagGetEnv, "env", "e", "", "environment to use")
-	_getCmd.Flags().BoolVarP(&_flagWatch, "watch", "w", false, "re-run the command every 2 seconds")
+	_getCmd.Flags().BoolVarP(&_flagGetWatch, "watch", "w", false, "re-run the command every 2 seconds")
 	_getCmd.Flags().VarP(&_flagOutput, "output", "o", fmt.Sprintf("output format: one of %s", strings.Join(flags.OutputTypeStringsExcluding(flags.YAMLOutputType), "|")))
 	addVerboseFlag(_getCmd)
 }
@@ -90,7 +89,7 @@ var _getCmd = &cobra.Command{
 			telemetry.Event("cli.get")
 		}
 
-		rerun(func() (string, error) {
+		rerun(_flagGetWatch, func() (string, error) {
 			if len(args) == 1 {
 				env, err := ReadOrConfigureEnv(envName)
 				if err != nil {
@@ -106,7 +105,7 @@ var _getCmd = &cobra.Command{
 					return "", err
 				}
 
-				if _flagOutput == flags.JSONOutputType {
+				if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 					return apiTable, nil
 				}
 
@@ -136,7 +135,7 @@ var _getCmd = &cobra.Command{
 				if err != nil {
 					return "", err
 				}
-				if _flagOutput == flags.JSONOutputType {
+				if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 					return jobTable, nil
 				}
 
@@ -166,7 +165,7 @@ var _getCmd = &cobra.Command{
 						return "", err
 					}
 
-					if _flagOutput == flags.JSONOutputType {
+					if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 						return apiTable, nil
 					}
 
@@ -221,7 +220,7 @@ func getAPIsInAllEnvironments() (string, error) {
 
 		if err == nil {
 			for _, api := range apisRes {
-				switch api.Spec.Kind {
+				switch api.Metadata.Kind {
 				case userconfig.BatchAPIKind:
 					allBatchAPIEnvs = append(allBatchAPIEnvs, env.Name)
 					allBatchAPIs = append(allBatchAPIs, api)
@@ -247,12 +246,16 @@ func getAPIsInAllEnvironments() (string, error) {
 		allAPIsOutput = append(allAPIsOutput, apisOutput)
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(allAPIsOutput)
-		if err != nil {
-			return "", err
-		}
-
+		bytes, err = libjson.Marshal(allAPIsOutput)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(allAPIsOutput)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
@@ -337,11 +340,16 @@ func getAPIsByEnv(env cliconfig.Environment) (string, error) {
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(apisRes)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(apisRes)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(apisRes)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
@@ -457,16 +465,21 @@ func getAPI(env cliconfig.Environment, apiName string) (string, error) {
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(apisRes)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(apisRes)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(apisRes)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
 	if len(apisRes) == 0 {
-		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find API %s", apiName)))
+		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find api %s", apiName)))
 	}
 
 	apiRes := apisRes[0]
diff --git a/cli/cmd/lib_apis.go b/cli/cmd/lib_apis.go
new file mode 100644
index 0000000000..bce36d0ca2
--- /dev/null
+++ b/cli/cmd/lib_apis.go
@@ -0,0 +1,64 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cmd
+
+import (
+	"github.com/cortexlabs/cortex/pkg/lib/table"
+	"github.com/cortexlabs/cortex/pkg/types/status"
+)
+
+func replicaCountTable(counts *status.ReplicaCounts) table.Table {
+	var rows [][]interface{}
+	for _, replicaCountType := range status.ReplicaCountTypes {
+		// skip up-to-date count
+		if replicaCountType == status.ReplicaCountUpToDate {
+			continue
+		}
+
+		count := counts.GetCountBy(replicaCountType)
+		canBeHiddenIfZero := false
+		switch replicaCountType {
+		case status.ReplicaCountFailed:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountKilled:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountKilledOOM:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountErrImagePull:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountUnknown:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountStalled:
+			canBeHiddenIfZero = true
+		}
+		if count == 0 && canBeHiddenIfZero {
+			continue
+		}
+		rows = append(rows, []interface{}{
+			replicaCountType,
+			count,
+		})
+	}
+
+	return table.Table{
+		Headers: []table.Header{
+			{Title: _titleReplicaStatus, MinWidth: 32, MaxWidth: 32},
+			{Title: _titleReplicaCount},
+		},
+		Rows: rows,
+	}
+}
diff --git a/cli/cmd/lib_async_apis.go b/cli/cmd/lib_async_apis.go
index 114c88bca8..e534a9923d 100644
--- a/cli/cmd/lib_async_apis.go
+++ b/cli/cmd/lib_async_apis.go
@@ -17,26 +17,22 @@ limitations under the License.
 package cmd
 
 import (
+	"fmt"
 	"strings"
 	"time"
 
 	"github.com/cortexlabs/cortex/cli/types/cliconfig"
 	"github.com/cortexlabs/cortex/pkg/lib/console"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 )
 
-const (
-	_titleAsyncAPI = "async api"
-)
-
 func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (string, error) {
 	var out string
 
 	t := asyncAPIsTable([]schema.APIResponse{asyncAPI}, []string{env.Name})
-	t.FindHeaderByTitle(_titleEnvironment).Hidden = true
-	t.FindHeaderByTitle(_titleAsyncAPI).Hidden = true
 
 	out += t.MustFormat()
 
@@ -44,7 +40,9 @@ func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (stri
 		out += "\n" + console.Bold("metrics dashboard: ") + *asyncAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + asyncAPI.Endpoint + "\n"
+	if asyncAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *asyncAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(asyncAPI.APIVersions)
 
@@ -57,39 +55,68 @@ func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (stri
 	return out, nil
 }
 
+func asyncDescribeAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (string, error) {
+	if asyncAPI.Metadata == nil {
+		return "", errors.ErrorUnexpected("missing metadata from operator response")
+	}
+
+	if asyncAPI.ReplicaCounts == nil {
+		return "", errors.ErrorUnexpected(fmt.Sprintf("missing replica counts for %s api", asyncAPI.Metadata.Name))
+	}
+
+	t := asyncAPIsTable([]schema.APIResponse{asyncAPI}, []string{env.Name})
+	out := t.MustFormat()
+
+	if asyncAPI.DashboardURL != nil && *asyncAPI.DashboardURL != "" {
+		out += "\n" + console.Bold("metrics dashboard: ") + *asyncAPI.DashboardURL + "\n"
+	}
+
+	if asyncAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *asyncAPI.Endpoint + "\n"
+	}
+
+	t = replicaCountTable(asyncAPI.ReplicaCounts)
+	out += "\n" + t.MustFormat()
+
+	return out, nil
+}
+
 func asyncAPIsTable(asyncAPIs []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(asyncAPIs))
 
-	var totalFailed int32
-	var totalStale int32
-
 	for i, asyncAPI := range asyncAPIs {
-		lastUpdated := time.Unix(asyncAPI.Spec.LastUpdated, 0)
+		if asyncAPI.Metadata == nil || (asyncAPI.Status == nil && asyncAPI.ReplicaCounts == nil) {
+			continue
+		}
+
+		var ready, requested, upToDate int32
+		if asyncAPI.Status != nil {
+			ready = asyncAPI.Status.Ready
+			requested = asyncAPI.Status.Requested
+			upToDate = asyncAPI.Status.UpToDate
+		} else {
+			ready = asyncAPI.ReplicaCounts.Ready
+			requested = asyncAPI.ReplicaCounts.Requested
+			upToDate = asyncAPI.ReplicaCounts.UpToDate
+		}
+
+		lastUpdated := time.Unix(asyncAPI.Metadata.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
-			asyncAPI.Spec.Name,
-			asyncAPI.Status.Message(),
-			asyncAPI.Status.Updated.Ready,
-			asyncAPI.Status.Stale.Ready,
-			asyncAPI.Status.Requested,
-			asyncAPI.Status.Updated.TotalFailed(),
+			asyncAPI.Metadata.Name,
+			fmt.Sprintf("%d/%d", ready, requested),
+			upToDate,
 			libtime.SinceStr(&lastUpdated),
 		})
-
-		totalFailed += asyncAPI.Status.Updated.TotalFailed()
-		totalStale += asyncAPI.Status.Stale.Ready
 	}
 
 	return table.Table{
 		Headers: []table.Header{
 			{Title: _titleEnvironment},
 			{Title: _titleAsyncAPI},
-			{Title: _titleStatus},
+			{Title: _titleLive},
 			{Title: _titleUpToDate},
-			{Title: _titleStale, Hidden: totalStale == 0},
-			{Title: _titleRequested},
-			{Title: _titleFailed, Hidden: totalFailed == 0},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/cli/cmd/lib_batch_apis.go b/cli/cmd/lib_batch_apis.go
index 272dbfa0fa..ebabc29243 100644
--- a/cli/cmd/lib_batch_apis.go
+++ b/cli/cmd/lib_batch_apis.go
@@ -31,6 +31,7 @@ import (
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/status"
+	"github.com/cortexlabs/yaml"
 )
 
 const (
@@ -43,7 +44,10 @@ func batchAPIsTable(batchAPIs []schema.APIResponse, envNames []string) table.Tab
 	rows := make([][]interface{}, 0, len(batchAPIs))
 
 	for i, batchAPI := range batchAPIs {
-		lastAPIUpdated := time.Unix(batchAPI.Spec.LastUpdated, 0)
+		if batchAPI.Metadata == nil {
+			continue
+		}
+		lastAPIUpdated := time.Unix(batchAPI.Metadata.LastUpdated, 0)
 		latestStartTime := time.Time{}
 		latestJobID := "-"
 		runningJobs := 0
@@ -61,7 +65,7 @@ func batchAPIsTable(batchAPIs []schema.APIResponse, envNames []string) table.Tab
 
 		rows = append(rows, []interface{}{
 			envNames[i],
-			batchAPI.Spec.Name,
+			batchAPI.Metadata.Name,
 			runningJobs,
 			latestJobID,
 			libtime.SinceStr(&lastAPIUpdated),
@@ -74,7 +78,7 @@ func batchAPIsTable(batchAPIs []schema.APIResponse, envNames []string) table.Tab
 			{Title: _titleBatchAPI},
 			{Title: _titleJobCount},
 			{Title: _titleLatestJobID},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
@@ -123,7 +127,9 @@ func batchAPITable(batchAPI schema.APIResponse) string {
 		out += "\n" + console.Bold("metrics dashboard: ") + *batchAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + batchAPI.Endpoint + "\n"
+	if batchAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *batchAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(batchAPI.APIVersions)
 
@@ -142,11 +148,16 @@ func getBatchJob(env cliconfig.Environment, apiName string, jobID string) (strin
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(resp)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(resp)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(resp)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
@@ -216,22 +227,34 @@ func getBatchJob(env cliconfig.Environment, apiName string, jobID string) (strin
 		if job.WorkerCounts != nil {
 			t := table.Table{
 				Headers: []table.Header{
-					{Title: "requested"},
-					{Title: "pending", Hidden: job.WorkerCounts.Pending == 0},
-					{Title: "initializing", Hidden: job.WorkerCounts.Initializing == 0},
-					{Title: "stalled", Hidden: job.WorkerCounts.Stalled == 0},
-					{Title: "running"},
-					{Title: "failed", Hidden: job.WorkerCounts.Failed == 0},
-					{Title: "succeeded"},
+					{Title: "Requested"},
+					{Title: "Pending"},
+					{Title: "Creating"},
+					{Title: "Ready"},
+					{Title: "NotReady"},
+					{Title: "ErrImagePull", Hidden: job.WorkerCounts.ErrImagePull == 0},
+					{Title: "Terminating", Hidden: job.WorkerCounts.Terminating == 0},
+					{Title: "Failed", Hidden: job.WorkerCounts.Failed == 0},
+					{Title: "Killed", Hidden: job.WorkerCounts.Killed == 0},
+					{Title: "KilledOOM", Hidden: job.WorkerCounts.KilledOOM == 0},
+					{Title: "Stalled", Hidden: job.WorkerCounts.Stalled == 0},
+					{Title: "Unknown", Hidden: job.WorkerCounts.Unknown == 0},
+					{Title: "Succeeded"},
 				},
 				Rows: [][]interface{}{
 					{
 						job.Workers,
 						job.WorkerCounts.Pending,
-						job.WorkerCounts.Initializing,
-						job.WorkerCounts.Stalled,
-						job.WorkerCounts.Running,
+						job.WorkerCounts.Creating,
+						job.WorkerCounts.Ready,
+						job.WorkerCounts.NotReady,
+						job.WorkerCounts.ErrImagePull,
+						job.WorkerCounts.Terminating,
 						job.WorkerCounts.Failed,
+						job.WorkerCounts.Killed,
+						job.WorkerCounts.KilledOOM,
+						job.WorkerCounts.Stalled,
+						job.WorkerCounts.Unknown,
 						job.WorkerCounts.Succeeded,
 					},
 				},
diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go
index be4316e0a8..92234a83f9 100644
--- a/cli/cmd/lib_realtime_apis.go
+++ b/cli/cmd/lib_realtime_apis.go
@@ -17,11 +17,13 @@ limitations under the License.
 package cmd
 
 import (
+	"fmt"
 	"strings"
 	"time"
 
 	"github.com/cortexlabs/cortex/cli/types/cliconfig"
 	"github.com/cortexlabs/cortex/pkg/lib/console"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
@@ -31,16 +33,15 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 	var out string
 
 	t := realtimeAPIsTable([]schema.APIResponse{realtimeAPI}, []string{env.Name})
-	t.FindHeaderByTitle(_titleEnvironment).Hidden = true
-	t.FindHeaderByTitle(_titleRealtimeAPI).Hidden = true
-
 	out += t.MustFormat()
 
 	if realtimeAPI.DashboardURL != nil && *realtimeAPI.DashboardURL != "" {
 		out += "\n" + console.Bold("metrics dashboard: ") + *realtimeAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + realtimeAPI.Endpoint + "\n"
+	if realtimeAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *realtimeAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(realtimeAPI.APIVersions)
 
@@ -53,39 +54,68 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 	return out, nil
 }
 
+func realtimeDescribeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment) (string, error) {
+	if realtimeAPI.Metadata == nil {
+		return "", errors.ErrorUnexpected("missing metadata from operator response")
+	}
+
+	if realtimeAPI.ReplicaCounts == nil {
+		return "", errors.ErrorUnexpected(fmt.Sprintf("missing replica counts for %s api", realtimeAPI.Metadata.Name))
+	}
+
+	t := realtimeAPIsTable([]schema.APIResponse{realtimeAPI}, []string{env.Name})
+	out := t.MustFormat()
+
+	if realtimeAPI.DashboardURL != nil && *realtimeAPI.DashboardURL != "" {
+		out += "\n" + console.Bold("metrics dashboard: ") + *realtimeAPI.DashboardURL + "\n"
+	}
+
+	if realtimeAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *realtimeAPI.Endpoint + "\n"
+	}
+
+	t = replicaCountTable(realtimeAPI.ReplicaCounts)
+	out += "\n" + t.MustFormat()
+
+	return out, nil
+}
+
 func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(realtimeAPIs))
 
-	var totalFailed int32
-	var totalStale int32
-
 	for i, realtimeAPI := range realtimeAPIs {
-		lastUpdated := time.Unix(realtimeAPI.Spec.LastUpdated, 0)
+		if realtimeAPI.Metadata == nil || (realtimeAPI.Status == nil && realtimeAPI.ReplicaCounts == nil) {
+			continue
+		}
+
+		var ready, requested, upToDate int32
+		if realtimeAPI.Status != nil {
+			ready = realtimeAPI.Status.Ready
+			requested = realtimeAPI.Status.Requested
+			upToDate = realtimeAPI.Status.UpToDate
+		} else {
+			ready = realtimeAPI.ReplicaCounts.Ready
+			requested = realtimeAPI.ReplicaCounts.Requested
+			upToDate = realtimeAPI.ReplicaCounts.UpToDate
+		}
+
+		lastUpdated := time.Unix(realtimeAPI.Metadata.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
-			realtimeAPI.Spec.Name,
-			realtimeAPI.Status.Message(),
-			realtimeAPI.Status.Updated.Ready,
-			realtimeAPI.Status.Stale.Ready,
-			realtimeAPI.Status.Requested,
-			realtimeAPI.Status.Updated.TotalFailed(),
+			realtimeAPI.Metadata.Name,
+			fmt.Sprintf("%d/%d", ready, requested),
+			upToDate,
 			libtime.SinceStr(&lastUpdated),
 		})
-
-		totalFailed += realtimeAPI.Status.Updated.TotalFailed()
-		totalStale += realtimeAPI.Status.Stale.Ready
 	}
 
 	return table.Table{
 		Headers: []table.Header{
 			{Title: _titleEnvironment},
 			{Title: _titleRealtimeAPI},
-			{Title: _titleStatus},
+			{Title: _titleLive},
 			{Title: _titleUpToDate},
-			{Title: _titleStale, Hidden: totalStale == 0},
-			{Title: _titleRequested},
-			{Title: _titleFailed, Hidden: totalFailed == 0},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/cli/cmd/lib_task_apis.go b/cli/cmd/lib_task_apis.go
index 50575b8516..295e1af875 100644
--- a/cli/cmd/lib_task_apis.go
+++ b/cli/cmd/lib_task_apis.go
@@ -29,6 +29,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
+	"github.com/cortexlabs/yaml"
 )
 
 const (
@@ -41,7 +42,10 @@ func taskAPIsTable(taskAPIs []schema.APIResponse, envNames []string) table.Table
 	rows := make([][]interface{}, 0, len(taskAPIs))
 
 	for i, taskAPI := range taskAPIs {
-		lastAPIUpdated := time.Unix(taskAPI.Spec.LastUpdated, 0)
+		if taskAPI.Metadata == nil {
+			continue
+		}
+		lastAPIUpdated := time.Unix(taskAPI.Metadata.LastUpdated, 0)
 		latestStartTime := time.Time{}
 		latestJobID := "-"
 		runningJobs := 0
@@ -59,7 +63,7 @@ func taskAPIsTable(taskAPIs []schema.APIResponse, envNames []string) table.Table
 
 		rows = append(rows, []interface{}{
 			envNames[i],
-			taskAPI.Spec.Name,
+			taskAPI.Metadata.Name,
 			runningJobs,
 			latestJobID,
 			libtime.SinceStr(&lastAPIUpdated),
@@ -72,7 +76,7 @@ func taskAPIsTable(taskAPIs []schema.APIResponse, envNames []string) table.Table
 			{Title: _titleTaskAPI},
 			{Title: _titleTaskJobCount},
 			{Title: _titleLatestTaskJobID},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
@@ -118,7 +122,9 @@ func taskAPITable(taskAPI schema.APIResponse) string {
 		out += "\n" + console.Bold("metrics dashboard: ") + *taskAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + taskAPI.Endpoint + "\n"
+	if taskAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *taskAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(taskAPI.APIVersions)
 
@@ -137,11 +143,16 @@ func getTaskJob(env cliconfig.Environment, apiName string, jobID string) (string
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(resp)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(resp)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(resp)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
@@ -176,22 +187,34 @@ func getTaskJob(env cliconfig.Environment, apiName string, jobID string) (string
 		if job.WorkerCounts != nil {
 			t := table.Table{
 				Headers: []table.Header{
-					{Title: "requested"},
-					{Title: "pending", Hidden: job.WorkerCounts.Pending == 0},
-					{Title: "initializing", Hidden: job.WorkerCounts.Initializing == 0},
-					{Title: "stalled", Hidden: job.WorkerCounts.Stalled == 0},
-					{Title: "running"},
-					{Title: "failed", Hidden: job.WorkerCounts.Failed == 0},
-					{Title: "succeeded"},
+					{Title: "Requested"},
+					{Title: "Pending"},
+					{Title: "Creating"},
+					{Title: "Ready"},
+					{Title: "NotReady"},
+					{Title: "ErrImagePull", Hidden: job.WorkerCounts.ErrImagePull == 0},
+					{Title: "Terminating", Hidden: job.WorkerCounts.Terminating == 0},
+					{Title: "Failed", Hidden: job.WorkerCounts.Failed == 0},
+					{Title: "Killed", Hidden: job.WorkerCounts.Killed == 0},
+					{Title: "KilledOOM", Hidden: job.WorkerCounts.KilledOOM == 0},
+					{Title: "Stalled", Hidden: job.WorkerCounts.Stalled == 0},
+					{Title: "Unknown", Hidden: job.WorkerCounts.Unknown == 0},
+					{Title: "Succeeded"},
 				},
 				Rows: [][]interface{}{
 					{
 						job.Workers,
 						job.WorkerCounts.Pending,
-						job.WorkerCounts.Initializing,
-						job.WorkerCounts.Stalled,
-						job.WorkerCounts.Running,
+						job.WorkerCounts.Creating,
+						job.WorkerCounts.Ready,
+						job.WorkerCounts.NotReady,
+						job.WorkerCounts.ErrImagePull,
+						job.WorkerCounts.Terminating,
 						job.WorkerCounts.Failed,
+						job.WorkerCounts.Killed,
+						job.WorkerCounts.KilledOOM,
+						job.WorkerCounts.Stalled,
+						job.WorkerCounts.Unknown,
 						job.WorkerCounts.Succeeded,
 					},
 				},
diff --git a/cli/cmd/lib_traffic_splitters.go b/cli/cmd/lib_traffic_splitters.go
index 39c344038a..af2b4e4aad 100644
--- a/cli/cmd/lib_traffic_splitters.go
+++ b/cli/cmd/lib_traffic_splitters.go
@@ -17,6 +17,7 @@ limitations under the License.
 package cmd
 
 import (
+	"fmt"
 	"strings"
 	"time"
 
@@ -44,12 +45,14 @@ func trafficSplitterTable(trafficSplitter schema.APIResponse, env cliconfig.Envi
 	if err != nil {
 		return "", err
 	}
-	t.FindHeaderByTitle(_titleEnvironment).Hidden = true
 
 	out += t.MustFormat()
 
 	out += "\n" + console.Bold("last updated: ") + libtime.SinceStr(&lastUpdated)
-	out += "\n" + console.Bold("endpoint: ") + trafficSplitter.Endpoint + "\n"
+
+	if trafficSplitter.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *trafficSplitter.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(trafficSplitter.APIVersions)
 
@@ -72,7 +75,10 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 		}
 
 		apiRes := apisRes[0]
-		lastUpdated := time.Unix(apiRes.Spec.LastUpdated, 0)
+		if apiRes.Metadata == nil || apiRes.Status == nil {
+			continue
+		}
+		lastUpdated := time.Unix(apiRes.Metadata.LastUpdated, 0)
 
 		apiName := apiRes.Spec.Name
 		if api.Shadow {
@@ -82,8 +88,8 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 			env.Name,
 			apiName,
 			api.Weight,
-			apiRes.Status.Message(),
-			apiRes.Status.Requested,
+			fmt.Sprintf("%d/%d", apiRes.Status.Ready, apiRes.Status.Requested),
+			apiRes.Status.UpToDate,
 			libtime.SinceStr(&lastUpdated),
 		})
 	}
@@ -93,9 +99,9 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 			{Title: _titleEnvironment},
 			{Title: _titleAPIs},
 			{Title: _trafficSplitterWeights},
-			{Title: _titleStatus},
-			{Title: _titleRequested},
-			{Title: _titleLastupdated},
+			{Title: _titleLive},
+			{Title: _titleUpToDate},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}, nil
@@ -104,20 +110,14 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 func trafficSplitterListTable(trafficSplitter []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(trafficSplitter))
 	for i, splitAPI := range trafficSplitter {
-		lastUpdated := time.Unix(splitAPI.Spec.LastUpdated, 0)
-		var apis []string
-		for _, api := range splitAPI.Spec.APIs {
-			apiName := api.Name
-			if api.Shadow {
-				apiName += " (shadow)"
-			}
-			apis = append(apis, apiName+":"+s.Int32(api.Weight))
+		if splitAPI.Metadata == nil || splitAPI.NumTrafficSplitterTargets == nil {
+			continue
 		}
-		apisStr := s.TruncateEllipses(strings.Join(apis, " "), 50)
+		lastUpdated := time.Unix(splitAPI.Metadata.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
-			splitAPI.Spec.Name,
-			apisStr,
+			splitAPI.Metadata.Name,
+			s.Int32(*splitAPI.NumTrafficSplitterTargets),
 			libtime.SinceStr(&lastUpdated),
 		})
 	}
@@ -127,7 +127,7 @@ func trafficSplitterListTable(trafficSplitter []schema.APIResponse, envNames []s
 			{Title: _titleEnvironment},
 			{Title: _titleTrafficSplitter},
 			{Title: _titleAPIs},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/cli/cmd/lib_watch.go b/cli/cmd/lib_watch.go
index 06aebb26c2..a0f9043492 100644
--- a/cli/cmd/lib_watch.go
+++ b/cli/cmd/lib_watch.go
@@ -56,8 +56,8 @@ func watchHeader() string {
 	return fmt.Sprintf("$ %s  %s%s", _cmdStr, padding, libtime.LocalHourNow())
 }
 
-func rerun(f func() (string, error)) {
-	if _flagWatch {
+func rerun(watchFlag bool, f func() (string, error)) {
+	if watchFlag {
 		print("\033[H\033[2J") // clear the screen
 
 		var prevStrSlice []string
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
index 68649c0cc1..8aa7d1e0e0 100644
--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -112,6 +112,7 @@ func init() {
 	clusterInit()
 	completionInit()
 	deleteInit()
+	describeInit()
 	deployInit()
 	envInit()
 	getInit()
@@ -154,6 +155,7 @@ func Execute() {
 
 	_rootCmd.AddCommand(_deployCmd)
 	_rootCmd.AddCommand(_getCmd)
+	_rootCmd.AddCommand(_describeCmd)
 	_rootCmd.AddCommand(_logsCmd)
 	_rootCmd.AddCommand(_refreshCmd)
 	_rootCmd.AddCommand(_deleteCmd)
diff --git a/cmd/operator/main.go b/cmd/operator/main.go
index bf5a50d33b..ac38ee7130 100644
--- a/cmd/operator/main.go
+++ b/cmd/operator/main.go
@@ -105,6 +105,7 @@ func main() {
 	routerWithAuth.HandleFunc("/get", endpoints.GetAPIs).Methods("GET")
 	routerWithAuth.HandleFunc("/get/{apiName}", endpoints.GetAPI).Methods("GET")
 	routerWithAuth.HandleFunc("/get/{apiName}/{apiID}", endpoints.GetAPIByID).Methods("GET")
+	routerWithAuth.HandleFunc("/describe/{apiName}", endpoints.DescribeAPI).Methods("GET")
 	routerWithAuth.HandleFunc("/streamlogs/{apiName}", endpoints.ReadLogs)
 	routerWithAuth.HandleFunc("/logs/{apiName}", endpoints.GetLogURL).Methods("GET")
 
diff --git a/dev/generate_cli_md.sh b/dev/generate_cli_md.sh
index 5715f6fdb8..fdf2566624 100755
--- a/dev/generate_cli_md.sh
+++ b/dev/generate_cli_md.sh
@@ -33,6 +33,7 @@ echo "# CLI commands" >> $out_file
 commands=(
   "deploy"
   "get"
+  "describe"
   "logs"
   "refresh"
   "delete"
diff --git a/docs/clients/cli.md b/docs/clients/cli.md
index be43886dba..b10957bfe4 100644
--- a/docs/clients/cli.md
+++ b/docs/clients/cli.md
@@ -32,6 +32,20 @@ Flags:
   -h, --help            help for get
 ```
 
+## describe
+
+```text
+describe an api
+
+Usage:
+  cortex describe [API_NAME] [flags]
+
+Flags:
+  -e, --env string   environment to use
+  -w, --watch        re-run the command every 2 seconds
+  -h, --help         help for describe
+```
+
 ## logs
 
 ```text
diff --git a/docs/workloads/async/statuses.md b/docs/workloads/async/statuses.md
index 3ecaeba865..9c4787f293 100644
--- a/docs/workloads/async/statuses.md
+++ b/docs/workloads/async/statuses.md
@@ -1,4 +1,4 @@
-# Statuses
+# Request statuses
 
 | Status            | Meaning                                                               |
 | :---              | :---                                                                  |
@@ -6,3 +6,22 @@
 | in_progress       | Workload has been pulled by the API and is currently being processed  |
 | completed         | Workload has completed with success                                   |
 | failed            | Workload encountered an error during processing                       |
+
+# Replica states
+
+The replica states of an API can be inspected by running `cortex describe <api-name>`. Here are the possible states for each replica in an API:
+
+| State | Meaning |
+|:---|:---|
+| Ready | Replica is running and it has passed the readiness checks |
+| ReadyOutOfDate | Replica is running and it has passed the readiness checks (for an out-of-date replica) |
+| NotReady | Replica is running but it's not passing the readiness checks; make sure the server is listening on the designed port of the API |
+| Pending | Replica is in a pending state (waiting to get scheduled onto a node) |
+| Creating | Replica is in the process of having its containers created |
+| ErrImagePull | Replica was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
+| Failed | Replica couldn't start due to an error; run `cortex logs <name>` to view the logs |
+| Killed | Replica has had one of its containers killed |
+| KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
+| Stalled | Replica has been in a pending state for more than 15 minutes; see [troubleshooting](../realtime/troubleshooting.md) |
+| Terminating | Replica is currently in the process of being terminated |
+| Unknown | Replica is in an unknown state |
diff --git a/docs/workloads/batch/statuses.md b/docs/workloads/batch/statuses.md
index 1bcddcd6bd..019ca55789 100644
--- a/docs/workloads/batch/statuses.md
+++ b/docs/workloads/batch/statuses.md
@@ -1,4 +1,4 @@
-# Statuses
+# Job statuses
 
 | Status                   | Meaning |
 | :--- | :--- |
diff --git a/docs/workloads/realtime/statuses.md b/docs/workloads/realtime/statuses.md
index 2ee32aca40..d4e201bfba 100644
--- a/docs/workloads/realtime/statuses.md
+++ b/docs/workloads/realtime/statuses.md
@@ -1,10 +1,18 @@
-# Statuses
+# Replica states
 
-| Status                | Meaning |
-| :--- | :--- |
-| live                  | API is deployed and ready to serve requests (at least one replica is running) |
-| updating              | API is updating |
-| error                 | API was not created due to an error; run `cortex logs <name>` to view the logs |
-| error (image pull)    | API was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
-| error (out of memory) | API was terminated due to excessive memory usage; try allocating more memory to the API and re-deploying |
-| compute unavailable   | API could not start due to insufficient memory, CPU, GPU, or Inf in the cluster; some replicas may be ready |
+The replica states of an API can be inspected by running `cortex describe <api-name>`. Here are the possible states for each replica in an API:
+
+| State | Meaning |
+|:---|:---|
+| Ready | Replica is running and it has passed the readiness checks |
+| ReadyOutOfDate | Replica is running and it has passed the readiness checks (for an out-of-date replica) |
+| NotReady | Replica is running but it's not passing the readiness checks; make sure the server is listening on the designed port of the API |
+| Pending | Replica is in a pending state (waiting to get scheduled onto a node) |
+| Creating | Replica is in the process of having its containers created |
+| ErrImagePull | Replica was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
+| Failed | Replica couldn't start due to an error; run `cortex logs <name>` to view the logs |
+| Killed | Replica has had one of its containers killed |
+| KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
+| Stalled | Replica has been in a pending state for more than 15 minutes; see [troubleshooting](../realtime/troubleshooting.md) |
+| Terminating | Replica is currently in the process of being terminated |
+| Unknown | Replica is in an unknown state |
diff --git a/docs/workloads/realtime/troubleshooting.md b/docs/workloads/realtime/troubleshooting.md
index 61de9dfe74..5254d25aaa 100644
--- a/docs/workloads/realtime/troubleshooting.md
+++ b/docs/workloads/realtime/troubleshooting.md
@@ -4,14 +4,14 @@
 
 When making requests to your API, it's possible to get a `no healthy upstream` error message (with HTTP status code `503`). This means that there are currently no live replicas running for your API. This could happen for a few reasons:
 
-1. It's possible that your API is simply not ready yet. You can check the status of your API with `cortex get API_NAME`, and inspect the logs in CloudWatch with the help of `cortex logs API_NAME`.
-1. Your API may have errored during initialization or while responding to a previous request. `cortex get API_NAME` will show the status of your API, and you can view the logs for all replicas by visiting the CloudWatch Insights URL from `cortex logs API_NAME`.
+1. It's possible that your API is simply not ready yet. You can check the number of ready replicas on your API with `cortex get API_NAME`, and inspect the logs in CloudWatch with the help of `cortex logs API_NAME`.
+1. Your API may have errored during initialization or while responding to a previous request. `cortex describe API_NAME` will show the number of replicas that have failed to start on your API, and you can view the logs for all replicas by visiting the CloudWatch Insights URL from `cortex logs API_NAME`.
 
 If you are using API Gateway in front of your API endpoints, it is also possible to receive a `{"message":"Service Unavailable"}` error message (with HTTP status code `503`) after 29 seconds if your request exceeds API Gateway's 29 second timeout. If this is the case, you can either modify your code to take less time, run on faster hardware (e.g. GPUs), or don't use API Gateway (there is no timeout when using the API's endpoint directly).
 
 ## API is stuck updating
 
-If your API is stuck in the "updating" or "compute unavailable" state (which is displayed when running `cortex get`), there are a few possible causes. Here are some things to check:
+If your API has pods stuck in the "pending" or "stalled" states (which is displayed when running `cortex describe API_NAME`), there are a few possible causes. Here are some things to check:
 
 ### Inspect API logs in CloudWatch
 
diff --git a/docs/workloads/task/statuses.md b/docs/workloads/task/statuses.md
index b51eaf010f..0631ab68f2 100644
--- a/docs/workloads/task/statuses.md
+++ b/docs/workloads/task/statuses.md
@@ -1,4 +1,4 @@
-# Statuses
+# Job statuses
 
 | Status                   | Meaning |
 | :--- | :--- |
diff --git a/go.mod b/go.mod
index 6acb918587..4381c8a46b 100644
--- a/go.mod
+++ b/go.mod
@@ -10,7 +10,7 @@ require (
 	github.com/aws/amazon-vpc-cni-k8s v1.8.0
 	github.com/aws/aws-sdk-go v1.38.70
 	github.com/cenkalti/backoff/v4 v4.1.1 // indirect
-	github.com/containerd/containerd v1.5.2 // indirect
+	github.com/containerd/containerd v1.5.4 // indirect
 	github.com/cortexlabs/go-input v0.0.0-20200503032952-8b67a7a7b28d
 	github.com/cortexlabs/yaml v0.0.0-20210628201654-31e52ba8433b
 	github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect
@@ -67,7 +67,7 @@ require (
 	golang.org/x/time v0.0.0-20210611083556-38a9dc6acbc6 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
 	google.golang.org/genproto v0.0.0-20210701133433-6b8dcf568a95 // indirect
-	google.golang.org/grpc v1.39.0 // indirect
+	google.golang.org/grpc v1.39.0
 	gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
 	gopkg.in/karalabe/cookiejar.v2 v2.0.0-20150724131613-8dcd6a7f4951
 	gopkg.in/segmentio/analytics-go.v3 v3.1.0
diff --git a/go.sum b/go.sum
index 3517621d1e..e2c0784e16 100644
--- a/go.sum
+++ b/go.sum
@@ -94,6 +94,7 @@ github.com/Microsoft/hcsshim v0.8.9/go.mod h1:5692vkUqntj1idxauYlpoINNKeqCiG6Sg3
 github.com/Microsoft/hcsshim v0.8.14/go.mod h1:NtVKoYxQuTLx6gEq0L96c9Ju4JbRJ4nY2ow3VK6a9Lg=
 github.com/Microsoft/hcsshim v0.8.15/go.mod h1:x38A4YbHbdxJtc0sF6oIz+RG0npwSCAvn69iY6URG00=
 github.com/Microsoft/hcsshim v0.8.16/go.mod h1:o5/SZqmR7x9JNKsW3pu+nqHm0MF8vbA+VxGOoXdC600=
+github.com/Microsoft/hcsshim v0.8.18/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4=
 github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da/go.mod h1:5hlzMzRKMLyo42nCZ9oml8AdTlq/0cvIaBv6tK1RehU=
 github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3/go.mod h1:mw7qgWloBUl75W/gVH3cQszUg1+gUITj7D6NY7ywVnY=
 github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
@@ -211,8 +212,9 @@ github.com/containerd/containerd v1.5.0-beta.1/go.mod h1:5HfvG1V2FsKesEGQ17k5/T7
 github.com/containerd/containerd v1.5.0-beta.3/go.mod h1:/wr9AVtEM7x9c+n0+stptlo/uBBoBORwEx6ardVcmKU=
 github.com/containerd/containerd v1.5.0-beta.4/go.mod h1:GmdgZd2zA2GYIBZ0w09ZvgqEq8EfBp/m3lcVZIvPHhI=
 github.com/containerd/containerd v1.5.0-rc.0/go.mod h1:V/IXoMqNGgBlabz3tHD2TWDoTJseu1FGOKuoA4nNb2s=
-github.com/containerd/containerd v1.5.2 h1:MG/Bg1pbmMb61j3wHCFWPxESXHieiKr2xG64px/k8zQ=
-github.com/containerd/containerd v1.5.2/go.mod h1:0DOxVqwDy2iZvrZp2JUx/E+hS0UNTVn7dJnIOwtYR4g=
+github.com/containerd/containerd v1.5.1/go.mod h1:0DOxVqwDy2iZvrZp2JUx/E+hS0UNTVn7dJnIOwtYR4g=
+github.com/containerd/containerd v1.5.4 h1:uPF0og3ByFzDnaStfiQj3fVGTEtaSNyU+bW7GR/nqGA=
+github.com/containerd/containerd v1.5.4/go.mod h1:sx18RgvW6ABJ4iYUw7Q5x7bgFOAB9B6G7+yO0XBc4zw=
 github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
 github.com/containerd/continuity v0.0.0-20190815185530-f2a389ac0a02/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
 github.com/containerd/continuity v0.0.0-20190827140505-75bee3e2ccb6/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
diff --git a/pkg/activator/activator.go b/pkg/activator/activator.go
index b7c54adc3d..7b68736951 100644
--- a/pkg/activator/activator.go
+++ b/pkg/activator/activator.go
@@ -131,7 +131,7 @@ func (a *activator) getOrCreateAPIActivator(ctx context.Context, apiName string)
 		return nil, errors.WithStack(err)
 	}
 
-	maxQueueLength, maxConcurrency, err := concurrencyFromAnnotations(vs.Annotations)
+	maxQueueLength, maxConcurrency, err := userconfig.ConcurrencyFromAnnotations(vs)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/activator/helpers.go b/pkg/activator/helpers.go
index f32c7e54f2..5bce2cb7bf 100644
--- a/pkg/activator/helpers.go
+++ b/pkg/activator/helpers.go
@@ -17,8 +17,6 @@ limitations under the License.
 package activator
 
 import (
-	"strconv"
-
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"k8s.io/apimachinery/pkg/api/meta"
@@ -50,8 +48,7 @@ func getAPIMeta(obj interface{}) (apiMeta, error) {
 		return apiMeta{}, errors.ErrorUnexpected("got a virtual service without apiName label")
 	}
 
-	annotations := resource.GetAnnotations()
-	maxQueueLength, maxConcurrency, err := concurrencyFromAnnotations(annotations)
+	maxQueueLength, maxConcurrency, err := userconfig.ConcurrencyFromAnnotations(resource)
 	if err != nil {
 		return apiMeta{}, err
 	}
@@ -60,22 +57,8 @@ func getAPIMeta(obj interface{}) (apiMeta, error) {
 		apiName:        apiName,
 		apiKind:        userconfig.KindFromString(apiKind),
 		labels:         labels,
-		annotations:    annotations,
+		annotations:    resource.GetAnnotations(),
 		maxConcurrency: maxConcurrency,
 		maxQueueLength: maxQueueLength,
 	}, nil
 }
-
-func concurrencyFromAnnotations(annotations map[string]string) (int, int, error) {
-	maxQueueLength, err := strconv.Atoi(annotations[userconfig.MaxQueueLengthAnnotationKey])
-	if err != nil {
-		return 0, 0, errors.ErrorUnexpected("failed to parse annotation", userconfig.MaxQueueLengthAnnotationKey)
-	}
-
-	maxConcurrency, err := strconv.Atoi(annotations[userconfig.MaxConcurrencyAnnotationKey])
-	if err != nil {
-		return 0, 0, errors.ErrorUnexpected("failed to parse annotation", userconfig.MaxConcurrencyAnnotationKey)
-	}
-
-	return maxQueueLength, maxConcurrency, err
-}
diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 7ea590fc45..3fe860d776 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -76,8 +76,7 @@ var (
 	CortexProbeHeader         = "X-Cortex-Probe"
 	CortexOriginHeader        = "X-Cortex-Origin"
 
-	WaitForInitializingReplicasTimeout = 15 * time.Minute
-	WaitForReadyReplicasTimeout        = 20 * time.Minute
+	WaitForReadyReplicasTimeout = 20 * time.Minute
 )
 
 func DefaultRegistry() string {
diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index d3247de9b8..693557818e 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -24,7 +24,6 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
-	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	kcore "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -217,17 +216,25 @@ type NetworkingSpec struct {
 
 // RealtimeAPIStatus defines the observed state of RealtimeAPI
 type RealtimeAPIStatus struct {
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Type=integer
+	Ready int32 `json:"ready"`
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Type=integer
+	Requested int32 `json:"requested"`
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Type=integer
+	UpToDate int32 `json:"up_to_date"`
+	// +kubebuilder:validation:Optional
 	// +kubebuilder:validation:Type=string
-	Status        status.Code          `json:"status"`
-	ReplicaCounts status.ReplicaCounts `json:"replica_counts"`
-	Endpoint      string               `json:"endpoint,omitempty"`
+	Endpoint string `json:"endpoint,omitempty"`
 }
 
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
-//+kubebuilder:printcolumn:JSONPath=".spec.pod.replicas",name="Replicas",type="integer"
-//+kubebuilder:printcolumn:JSONPath=".status.replica_counts.updated.ready",name="Ready",type="integer"
-//+kubebuilder:printcolumn:JSONPath=".status.status",name="Status",type="string"
+//+kubebuilder:printcolumn:JSONPath=".status.ready",name="Ready",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".status.requested",name="Requested",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".status.up_to_date",name="Up-To-Date",type="integer"
 //+kubebuilder:printcolumn:JSONPath=".status.endpoint",name="Endpoint",type="string"
 
 // RealtimeAPI is the Schema for the realtimeapis API
@@ -266,7 +273,6 @@ func (api RealtimeAPI) GetOrCreateAPIIDs() (deploymentID, podID, specID, apiID s
 	if apiID == "" ||
 		api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
 		api.Annotations["cortex.dev/spec-id"] != specID {
-
 		apiID = fmt.Sprintf("%s-%s-%s", spec.MonotonicallyDecreasingID(), deploymentID, specID)
 	}
 
diff --git a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
index df2ba5ab3d..c81d4ac6f6 100644
--- a/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/crds/apis/serverless/v1alpha1/zz_generated.deepcopy.go
@@ -243,7 +243,6 @@ func (in *RealtimeAPISpec) DeepCopy() *RealtimeAPISpec {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *RealtimeAPIStatus) DeepCopyInto(out *RealtimeAPIStatus) {
 	*out = *in
-	out.ReplicaCounts = in.ReplicaCounts
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RealtimeAPIStatus.
diff --git a/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml b/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml
index 63b1987bd9..a60ccbba4a 100644
--- a/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml
+++ b/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml
@@ -251,16 +251,28 @@ spec:
               worker_counts:
                 description: Detailed worker counts with respective status
                 properties:
+                  creating:
+                    format: int32
+                    type: integer
+                  err_image_pull:
+                    format: int32
+                    type: integer
                   failed:
                     format: int32
                     type: integer
-                  initializing:
+                  killed:
+                    format: int32
+                    type: integer
+                  killed_oom:
+                    format: int32
+                    type: integer
+                  not_ready:
                     format: int32
                     type: integer
                   pending:
                     format: int32
                     type: integer
-                  running:
+                  ready:
                     format: int32
                     type: integer
                   stalled:
@@ -269,6 +281,9 @@ spec:
                   succeeded:
                     format: int32
                     type: integer
+                  terminating:
+                    format: int32
+                    type: integer
                   unknown:
                     format: int32
                     type: integer
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 3e8b6d267b..58d12f66df 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -17,15 +17,15 @@ spec:
   scope: Namespaced
   versions:
   - additionalPrinterColumns:
-    - jsonPath: .spec.pod.replicas
-      name: Replicas
-      type: integer
-    - jsonPath: .status.replica_counts.updated.ready
+    - jsonPath: .status.ready
       name: Ready
       type: integer
-    - jsonPath: .status.status
-      name: Status
-      type: string
+    - jsonPath: .status.requested
+      name: Requested
+      type: integer
+    - jsonPath: .status.up_to_date
+      name: Up-To-Date
+      type: integer
     - jsonPath: .status.endpoint
       name: Endpoint
       type: string
@@ -604,93 +604,15 @@ spec:
             properties:
               endpoint:
                 type: string
-              replica_counts:
-                properties:
-                  requested:
-                    format: int32
-                    type: integer
-                  stale:
-                    properties:
-                      err_image_pull:
-                        format: int32
-                        type: integer
-                      failed:
-                        format: int32
-                        type: integer
-                      initializing:
-                        format: int32
-                        type: integer
-                      killed:
-                        format: int32
-                        type: integer
-                      killed_oom:
-                        format: int32
-                        type: integer
-                      not_ready:
-                        format: int32
-                        type: integer
-                      pending:
-                        format: int32
-                        type: integer
-                      ready:
-                        format: int32
-                        type: integer
-                      stalled:
-                        format: int32
-                        type: integer
-                      terminating:
-                        format: int32
-                        type: integer
-                      unknown:
-                        format: int32
-                        type: integer
-                    required:
-                    - ready
-                    type: object
-                  updated:
-                    properties:
-                      err_image_pull:
-                        format: int32
-                        type: integer
-                      failed:
-                        format: int32
-                        type: integer
-                      initializing:
-                        format: int32
-                        type: integer
-                      killed:
-                        format: int32
-                        type: integer
-                      killed_oom:
-                        format: int32
-                        type: integer
-                      not_ready:
-                        format: int32
-                        type: integer
-                      pending:
-                        format: int32
-                        type: integer
-                      ready:
-                        format: int32
-                        type: integer
-                      stalled:
-                        format: int32
-                        type: integer
-                      terminating:
-                        format: int32
-                        type: integer
-                      unknown:
-                        format: int32
-                        type: integer
-                    required:
-                    - ready
-                    type: object
-                type: object
-              status:
-                type: string
-            required:
-            - replica_counts
-            - status
+              ready:
+                format: int32
+                type: integer
+              requested:
+                format: int32
+                type: integer
+              up_to_date:
+                format: int32
+                type: integer
             type: object
         type: object
     served: true
diff --git a/pkg/crds/controllers/batch/batchjob_controller_helpers.go b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
index dd052dfc58..0f11ba67ba 100644
--- a/pkg/crds/controllers/batch/batchjob_controller_helpers.go
+++ b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
@@ -442,6 +442,22 @@ func (r *BatchJobReconciler) getWorkerJob(ctx context.Context, batchJob batch.Ba
 	return &job, nil
 }
 
+func (r *BatchJobReconciler) getWorkerJobPods(ctx context.Context, batchJob batch.BatchJob) ([]kcore.Pod, error) {
+	workerJobPods := kcore.PodList{}
+	if err := r.List(ctx, &workerJobPods,
+		client.InNamespace(consts.DefaultNamespace),
+		client.MatchingLabels{
+			"jobID":            batchJob.Name,
+			"apiName":          batchJob.Spec.APIName,
+			"apiID":            batchJob.Spec.APIID,
+			"cortex.dev/batch": "worker",
+		},
+	); err != nil {
+		return nil, err
+	}
+	return workerJobPods.Items, nil
+}
+
 func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.BatchJob, statusInfo batchJobStatusInfo) error {
 	batchJob.Status.ID = batchJob.Name
 
@@ -461,6 +477,11 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 		batchJob.Status.TotalBatchCount = statusInfo.TotalBatchCount
 	}
 
+	workerJobPods, err := r.getWorkerJobPods(ctx, *batchJob)
+	if err != nil {
+		return errors.Wrap(err, "failed to retrieve worker pods")
+	}
+
 	worker := statusInfo.WorkerJob
 	if worker != nil {
 		batchJob.Status.EndTime = worker.Status.CompletionTime // assign right away, because it's a pointer
@@ -486,13 +507,11 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 				}
 			}
 
-			isWorkerOOM, err := r.checkWorkersOOM(ctx, batchJob)
-			if err != nil {
-				return err
-			}
-
-			if isWorkerOOM {
-				batchJobStatus = status.JobWorkerOOM
+			for i := range workerJobPods {
+				if k8s.WasPodOOMKilled(&workerJobPods[i]) {
+					batchJobStatus = status.JobWorkerOOM
+					break
+				}
 			}
 
 			batchJob.Status.Status = batchJobStatus
@@ -512,11 +531,8 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 			batchJob.Status.Status = status.JobRunning
 		}
 
-		batchJob.Status.WorkerCounts = &status.WorkerCounts{
-			Running:   worker.Status.Active,
-			Succeeded: worker.Status.Succeeded,
-			Failed:    worker.Status.Failed,
-		}
+		workerCounts := getReplicaCounts(workerJobPods)
+		batchJob.Status.WorkerCounts = &workerCounts
 	}
 
 	if err := r.Status().Update(ctx, batchJob); err != nil {
@@ -526,27 +542,6 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 	return nil
 }
 
-func (r *BatchJobReconciler) checkWorkersOOM(ctx context.Context, batchJob *batch.BatchJob) (bool, error) {
-	workerJobPods := kcore.PodList{}
-	if err := r.List(ctx, &workerJobPods,
-		client.InNamespace(consts.DefaultNamespace),
-		client.MatchingLabels{
-			"jobID":   batchJob.Name,
-			"apiName": batchJob.Spec.APIName,
-			"apiID":   batchJob.Spec.APIID,
-		},
-	); err != nil {
-		return false, err
-	}
-
-	for i := range workerJobPods.Items {
-		if k8s.WasPodOOMKilled(&workerJobPods.Items[i]) {
-			return true, nil
-		}
-	}
-	return false, nil
-}
-
 func (r *BatchJobReconciler) deleteSQSQueue(batchJob batch.BatchJob) error {
 	queueURL := r.getQueueURL(batchJob)
 	input := sqs.DeleteQueueInput{QueueUrl: aws.String(queueURL)}
@@ -736,3 +731,34 @@ func saveJobStatus(r *BatchJobReconciler, batchJob batch.BatchJob) error {
 		},
 	)
 }
+
+func getReplicaCounts(workerJobPods []kcore.Pod) status.WorkerCounts {
+	workerCounts := status.WorkerCounts{}
+	for i := range workerJobPods {
+		switch k8s.GetPodStatus(&workerJobPods[i]) {
+		case k8s.PodStatusPending:
+			workerCounts.Pending++
+		case k8s.PodStatusStalled:
+			workerCounts.Stalled++
+		case k8s.PodStatusCreating:
+			workerCounts.Creating++
+		case k8s.PodStatusNotReady:
+			workerCounts.NotReady++
+		case k8s.PodStatusErrImagePull:
+			workerCounts.ErrImagePull++
+		case k8s.PodStatusTerminating:
+			workerCounts.Terminating++
+		case k8s.PodStatusFailed:
+			workerCounts.Failed++
+		case k8s.PodStatusKilled:
+			workerCounts.Killed++
+		case k8s.PodStatusKilledOOM:
+			workerCounts.KilledOOM++
+		case k8s.PodStatusSucceeded:
+			workerCounts.Succeeded++
+		case k8s.PodStatusUnknown:
+			workerCounts.Unknown++
+		}
+	}
+	return workerCounts
+}
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 047ef42a43..64ec0cd000 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -19,7 +19,6 @@ package serverlesscontroller
 import (
 	"context"
 	"fmt"
-	"time"
 
 	"github.com/cortexlabs/cortex/pkg/consts"
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
@@ -29,7 +28,6 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/lib/urls"
-	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
 	istionetworking "istio.io/api/networking/v1beta1"
@@ -64,103 +62,21 @@ func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *serverles
 		return errors.Wrap(err, "failed to get api endpoint")
 	}
 
-	apiStatus := status.Pending
-	api.Status.ReplicaCounts = status.ReplicaCounts{}
 	if deployment != nil {
-		if deployment.Status.ReadyReplicas == api.Spec.Pod.Replicas {
-			apiStatus = status.Live
-			api.Status.ReplicaCounts.Updated.Ready = deployment.Status.ReadyReplicas
-			// TODO: handle out of date (?)
-		} else {
-			if err = r.getReplicaCounts(ctx, api); err != nil {
-				return err
-			}
-			apiStatus = r.getStatusCode(api)
+		api.Status.Ready = deployment.Status.ReadyReplicas
+		api.Status.UpToDate = deployment.Status.UpdatedReplicas
+		if deployment.Spec.Replicas != nil {
+			api.Status.Requested = *deployment.Spec.Replicas
 		}
-	}
-
-	api.Status.Status = apiStatus
-	if err = r.Status().Update(ctx, api); err != nil {
-		return err
-	}
-
-	return nil
-}
 
-func (r *RealtimeAPIReconciler) getReplicaCounts(ctx context.Context, api *serverless.RealtimeAPI) error {
-	var podList kcore.PodList
-	if err := r.List(ctx, &podList, client.MatchingLabels{
-		"apiName":      api.Name,
-		"apiKind":      userconfig.RealtimeAPIKind.String(),
-		"deploymentID": api.Annotations["cortex.dev/deployment-id"],
-	}); err != nil {
-		return err
-	}
-	for i := range podList.Items {
-		pod := &podList.Items[i]
-		if k8s.IsPodReady(pod) {
-			api.Status.ReplicaCounts.Updated.Ready++
-			continue
-		}
-
-		switch k8s.GetPodStatus(pod) {
-		case k8s.PodStatusPending:
-			if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
-				api.Status.ReplicaCounts.Updated.Stalled++
-			} else {
-				api.Status.ReplicaCounts.Updated.Pending++
-			}
-		case k8s.PodStatusInitializing:
-			api.Status.ReplicaCounts.Updated.Initializing++
-		case k8s.PodStatusRunning:
-			api.Status.ReplicaCounts.Updated.Initializing++
-		case k8s.PodStatusErrImagePull:
-			api.Status.ReplicaCounts.Updated.ErrImagePull++
-		case k8s.PodStatusTerminating:
-			api.Status.ReplicaCounts.Updated.Terminating++
-		case k8s.PodStatusFailed:
-			api.Status.ReplicaCounts.Updated.Failed++
-		case k8s.PodStatusKilled:
-			api.Status.ReplicaCounts.Updated.Killed++
-		case k8s.PodStatusKilledOOM:
-			api.Status.ReplicaCounts.Updated.KilledOOM++
-		default:
-			api.Status.ReplicaCounts.Updated.Unknown++
+		if err = r.Status().Update(ctx, api); err != nil {
+			return err
 		}
 	}
 
 	return nil
 }
 
-func (r *RealtimeAPIReconciler) getStatusCode(api *serverless.RealtimeAPI) status.Code {
-	counts := api.Status.ReplicaCounts
-	if counts.Updated.Ready >= api.Spec.Pod.Replicas {
-		return status.Live
-	}
-
-	if counts.Updated.ErrImagePull > 0 {
-		return status.ErrorImagePull
-	}
-
-	if counts.Updated.Failed > 0 || counts.Updated.Killed > 0 {
-		return status.Error
-	}
-
-	if counts.Updated.KilledOOM > 0 {
-		return status.OOM
-	}
-
-	if counts.Updated.Stalled > 0 {
-		return status.Stalled
-	}
-
-	if counts.Updated.Ready >= api.Spec.Autoscaling.MinReplicas {
-		return status.Live
-	}
-
-	return status.Updating
-}
-
 func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api serverless.RealtimeAPI) (controllerutil.OperationResult, error) {
 	deployment := kapps.Deployment{
 		ObjectMeta: kmeta.ObjectMeta{
@@ -281,6 +197,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 				"apiName":        api.Name,
 				"apiKind":        userconfig.RealtimeAPIKind.String(),
 				"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
+				"apiID":          api.Annotations["cortex.dev/api-id"],
 				"cortex.dev/api": "true",
 			},
 			Annotations: map[string]string{
diff --git a/pkg/lib/k8s/pod.go b/pkg/lib/k8s/pod.go
index e841a7b8a8..293e88a476 100644
--- a/pkg/lib/k8s/pod.go
+++ b/pkg/lib/k8s/pod.go
@@ -23,6 +23,7 @@ import (
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 	kcore "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
@@ -45,37 +46,48 @@ const (
 	ReasonCompleted = "Completed"
 )
 
+type PodSpec struct {
+	Name        string
+	K8sPodSpec  kcore.PodSpec
+	Labels      map[string]string
+	Annotations map[string]string
+}
+
 type PodStatus string
 
 const (
-	PodStatusUnknown      PodStatus = "Unknown"
 	PodStatusPending      PodStatus = "Pending"
-	PodStatusInitializing PodStatus = "Initializing"
-	PodStatusRunning      PodStatus = "Running"
-	PodStatusErrImagePull PodStatus = "Image pull error"
+	PodStatusCreating     PodStatus = "Creating"
+	PodStatusNotReady     PodStatus = "NotReady"
+	PodStatusReady        PodStatus = "Ready"
+	PodStatusErrImagePull PodStatus = "ErrImagePull"
 	PodStatusTerminating  PodStatus = "Terminating"
-	PodStatusSucceeded    PodStatus = "Succeeded"
 	PodStatusFailed       PodStatus = "Failed"
 	PodStatusKilled       PodStatus = "Killed"
-	PodStatusKilledOOM    PodStatus = "Out of Memory"
+	PodStatusKilledOOM    PodStatus = "KilledOOM"
+	PodStatusStalled      PodStatus = "Stalled"
+	PodStatusSucceeded    PodStatus = "Succeeded"
+	PodStatusUnknown      PodStatus = "Unknown"
 )
 
-var _killStatuses = map[int32]bool{
-	137: true, // SIGKILL
-	143: true, // SIGTERM
-	130: true, // SIGINT
-	129: true, // SIGHUP
-}
+var (
+	_killStatuses = map[int32]bool{
+		137: true, // SIGKILL
+		143: true, // SIGTERM
+		130: true, // SIGINT
+		129: true, // SIGHUP
+	}
 
-// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/images/types.go#L27
-var _imagePullErrorStrings = strset.New("ErrImagePull", "ImagePullBackOff", "RegistryUnavailable")
+	_evictedMemoryMessageRegex = regexp.MustCompile(`(?i)low\W+on\W+resource\W+memory`)
 
-type PodSpec struct {
-	Name        string
-	K8sPodSpec  kcore.PodSpec
-	Labels      map[string]string
-	Annotations map[string]string
-}
+	// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/images/types.go#L27
+	_imagePullErrorStrings = strset.New("ErrImagePull", "ImagePullBackOff", "RegistryUnavailable")
+
+	// https://github.com/kubernetes/kubernetes/blob/9f47110aa29094ed2878cf1d85874cb59214664a/staging/src/k8s.io/api/core/v1/types.go#L76-L77
+	_creatingReasons = strset.New("ContainerCreating", "PodInitializing")
+
+	_waitForCreatingPodTimeout = time.Minute * 15
+)
 
 func Pod(spec *PodSpec) *kcore.Pod {
 	pod := &kcore.Pod{
@@ -90,6 +102,28 @@ func Pod(spec *PodSpec) *kcore.Pod {
 	return pod
 }
 
+func GetPodConditionOf(pod *kcore.Pod, podType kcore.PodConditionType) (*bool, *kcore.PodCondition) {
+	if pod == nil {
+		return nil, nil
+	}
+
+	var conditionState *bool
+	var condition *kcore.PodCondition
+	for i := range pod.Status.Conditions {
+		if pod.Status.Conditions[i].Type == podType {
+			if pod.Status.Conditions[i].Status == kcore.ConditionTrue {
+				conditionState = pointer.Bool(true)
+			}
+			if pod.Status.Conditions[i].Status == kcore.ConditionFalse {
+				conditionState = pointer.Bool(false)
+			}
+			condition = &pod.Status.Conditions[i]
+			break
+		}
+	}
+	return conditionState, condition
+}
+
 func (c *Client) CreatePod(pod *kcore.Pod) (*kcore.Pod, error) {
 	pod.TypeMeta = _podTypeMeta
 	pod, err := c.podClient.Create(context.Background(), pod, kmeta.CreateOptions{})
@@ -120,14 +154,26 @@ func (c *Client) ApplyPod(pod *kcore.Pod) (*kcore.Pod, error) {
 }
 
 func IsPodReady(pod *kcore.Pod) bool {
-	if GetPodStatus(pod) != PodStatusRunning {
+	if GetPodStatus(pod) != PodStatusReady {
 		return false
 	}
 
-	for _, condition := range pod.Status.Conditions {
-		if condition.Type == "Ready" && condition.Status == kcore.ConditionTrue {
-			return true
-		}
+	podConditionState, _ := GetPodConditionOf(pod, kcore.PodReady)
+	if podConditionState != nil && *podConditionState {
+		return true
+	}
+
+	return false
+}
+
+func IsPodStalled(pod *kcore.Pod) bool {
+	if GetPodStatus(pod) != PodStatusPending {
+		return false
+	}
+
+	podConditionState, podCondition := GetPodConditionOf(pod, kcore.PodScheduled)
+	if podConditionState != nil && !*podConditionState && !podCondition.LastTransitionTime.Time.IsZero() && time.Since(podCondition.LastTransitionTime.Time) >= _waitForCreatingPodTimeout {
+		return true
 	}
 
 	return false
@@ -137,7 +183,7 @@ func GetPodReadyTime(pod *kcore.Pod) *time.Time {
 	for i := range pod.Status.Conditions {
 		condition := pod.Status.Conditions[i]
 
-		if condition.Type == "Ready" && condition.Status == kcore.ConditionTrue {
+		if condition.Type == kcore.PodReady && condition.Status == kcore.ConditionTrue {
 			if condition.LastTransitionTime.Time.IsZero() {
 				return nil
 			}
@@ -148,8 +194,6 @@ func GetPodReadyTime(pod *kcore.Pod) *time.Time {
 	return nil
 }
 
-var _evictedMemoryMessageRegex = regexp.MustCompile(`(?i)low\W+on\W+resource\W+memory`)
-
 func WasPodOOMKilled(pod *kcore.Pod) bool {
 	if pod.Status.Reason == ReasonEvicted && _evictedMemoryMessageRegex.MatchString(pod.Status.Message) {
 		return true
@@ -176,15 +220,11 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 
 	switch pod.Status.Phase {
 	case kcore.PodPending:
-		initPodStatus := PodStatusFromContainerStatuses(pod.Status.InitContainerStatuses)
-		if initPodStatus == PodStatusRunning {
-			return PodStatusInitializing
+		podConditionState, podCondition := GetPodConditionOf(pod, kcore.PodScheduled)
+		if podConditionState != nil && !*podConditionState && !podCondition.LastTransitionTime.Time.IsZero() && time.Since(podCondition.LastTransitionTime.Time) >= _waitForCreatingPodTimeout {
+			return PodStatusStalled
 		}
-		allPodStatus := PodStatusFromContainerStatuses(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...))
-		if allPodStatus == PodStatusErrImagePull {
-			return PodStatusErrImagePull
-		}
-		return PodStatusPending
+		return PodStatusFromContainerStatuses(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...))
 	case kcore.PodSucceeded:
 		return PodStatusSucceeded
 	case kcore.PodFailed:
@@ -215,7 +255,17 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 			return PodStatusTerminating
 		}
 
-		return PodStatusFromContainerStatuses(pod.Status.ContainerStatuses)
+		podConditionState, _ := GetPodConditionOf(pod, kcore.PodReady)
+		if podConditionState != nil && *podConditionState {
+			return PodStatusReady
+		}
+
+		status := PodStatusFromContainerStatuses(pod.Status.ContainerStatuses)
+		if status == PodStatusReady {
+			return PodStatusNotReady
+		}
+
+		return status
 	default:
 		return PodStatusUnknown
 	}
@@ -224,7 +274,9 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) PodStatus {
 	numContainers := len(containerStatuses)
 	numWaiting := 0
-	numRunning := 0
+	numCreating := 0
+	numNotReady := 0
+	numReady := 0
 	numSucceeded := 0
 	numFailed := 0
 	numKilled := 0
@@ -235,9 +287,9 @@ func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) P
 	}
 	for _, containerStatus := range containerStatuses {
 		if containerStatus.State.Running != nil && containerStatus.Ready {
-			numRunning++
-		} else if containerStatus.State.Running != nil && containerStatus.RestartCount == 0 {
-			numRunning++
+			numReady++
+		} else if containerStatus.State.Running != nil && !containerStatus.Ready {
+			numNotReady++
 		} else if containerStatus.State.Terminated != nil {
 			exitCode := containerStatus.State.Terminated.ExitCode
 			reason := containerStatus.State.Terminated.Reason
@@ -264,6 +316,8 @@ func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) P
 			}
 		} else if containerStatus.State.Waiting != nil && _imagePullErrorStrings.Has(containerStatus.State.Waiting.Reason) {
 			return PodStatusErrImagePull
+		} else if containerStatus.State.Waiting != nil && _creatingReasons.Has(containerStatus.State.Waiting.Reason) {
+			numCreating++
 		} else {
 			// either containerStatus.State.Waiting != nil or all containerStatus.States are nil (which implies waiting)
 			numWaiting++
@@ -279,8 +333,12 @@ func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) P
 		return PodStatusPending
 	} else if numSucceeded == numContainers {
 		return PodStatusSucceeded
+	} else if numCreating > 0 {
+		return PodStatusCreating
+	} else if numNotReady > 0 {
+		return PodStatusNotReady
 	} else {
-		return PodStatusRunning
+		return PodStatusReady
 	}
 }
 
diff --git a/pkg/operator/endpoints/describe.go b/pkg/operator/endpoints/describe.go
new file mode 100644
index 0000000000..b574d5eefc
--- /dev/null
+++ b/pkg/operator/endpoints/describe.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package endpoints
+
+import (
+	"net/http"
+
+	"github.com/cortexlabs/cortex/pkg/operator/resources"
+	"github.com/gorilla/mux"
+)
+
+func DescribeAPI(w http.ResponseWriter, r *http.Request) {
+	apiName := mux.Vars(r)["apiName"]
+
+	response, err := resources.DescribeAPI(apiName)
+	if err != nil {
+		respondError(w, r, err)
+		return
+	}
+
+	respondJSON(w, r, response)
+}
diff --git a/pkg/operator/endpoints/logs.go b/pkg/operator/endpoints/logs.go
index dbe10828b1..4daa1904c4 100644
--- a/pkg/operator/endpoints/logs.go
+++ b/pkg/operator/endpoints/logs.go
@@ -19,6 +19,7 @@ package endpoints
 import (
 	"net/http"
 
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/resources"
 	"github.com/cortexlabs/cortex/pkg/operator/resources/asyncapi"
@@ -98,7 +99,10 @@ func GetLogURL(w http.ResponseWriter, r *http.Request) {
 			respondError(w, r, err)
 			return
 		}
-		logURL, err := operator.APILogURL(apiResponse[0].Spec)
+		if apiResponse[0].Spec == nil {
+			respondError(w, r, errors.ErrorUnexpected("unable to get api spec", apiName))
+		}
+		logURL, err := operator.APILogURL(*apiResponse[0].Spec)
 		if err != nil {
 			respondError(w, r, err)
 			return
@@ -112,7 +116,10 @@ func GetLogURL(w http.ResponseWriter, r *http.Request) {
 			respondError(w, r, err)
 			return
 		}
-		logURL, err := operator.APILogURL(apiResponse[0].Spec)
+		if apiResponse[0].Spec == nil {
+			respondError(w, r, errors.ErrorUnexpected("unable to get api spec", apiName))
+		}
+		logURL, err := operator.APILogURL(*apiResponse[0].Spec)
 		if err != nil {
 			respondError(w, r, err)
 			return
diff --git a/pkg/operator/operator/k8s.go b/pkg/operator/operator/k8s.go
index f9536596ce..43e36168c9 100644
--- a/pkg/operator/operator/k8s.go
+++ b/pkg/operator/operator/k8s.go
@@ -22,6 +22,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/config"
 	"github.com/cortexlabs/cortex/pkg/lib/urls"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 )
 
 // APILoadBalancerURL returns the http endpoint of the ingress load balancer for deployed APIs
@@ -63,3 +64,20 @@ func APIEndpoint(api *spec.API) (string, error) {
 
 	return urls.Join(baseAPIEndpoint, *api.Networking.Endpoint), nil
 }
+
+func APIEndpointFromResource(deployedResource *DeployedResource) (string, error) {
+	apiEndpoint, err := userconfig.EndpointFromAnnotation(deployedResource.VirtualService)
+	if err != nil {
+		return "", err
+	}
+
+	baseAPIEndpoint := ""
+
+	baseAPIEndpoint, err = APILoadBalancerURL()
+	if err != nil {
+		return "", err
+	}
+	baseAPIEndpoint = strings.Replace(baseAPIEndpoint, "https://", "http://", 1)
+
+	return urls.Join(baseAPIEndpoint, apiEndpoint), nil
+}
diff --git a/pkg/operator/resources/asyncapi/api.go b/pkg/operator/resources/asyncapi/api.go
index 39cce27446..d662223f2b 100644
--- a/pkg/operator/resources/asyncapi/api.go
+++ b/pkg/operator/resources/asyncapi/api.go
@@ -19,6 +19,7 @@ package asyncapi
 import (
 	"fmt"
 	"path/filepath"
+	"sort"
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/config"
@@ -31,6 +32,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
+	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
 	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
@@ -249,13 +251,71 @@ func DeleteAPI(apiName string, keepCache bool) error {
 	return nil
 }
 
+func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
+	asyncAPIs := make([]schema.APIResponse, 0)
+	mappedAsyncAPIs := make(map[string]schema.APIResponse, 0)
+	apiNames := make([]string, 0)
+
+	for i := range deployments {
+		if deployments[i].Labels["cortex.dev/async"] != "api" {
+			continue
+		}
+		apiName := deployments[i].Labels["apiName"]
+		apiNames = append(apiNames, apiName)
+
+		metadata, err := spec.MetadataFromDeployment(&deployments[i])
+		if err != nil {
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
+		}
+		mappedAsyncAPIs[apiName] = schema.APIResponse{
+			Status:   status.FromDeployment(&deployments[i]),
+			Metadata: metadata,
+		}
+	}
+
+	sort.Strings(apiNames)
+	for _, apiName := range apiNames {
+		asyncAPIs = append(asyncAPIs, mappedAsyncAPIs[apiName])
+	}
+
+	return asyncAPIs, nil
+}
+
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	status, err := GetStatus(deployedResource.Name)
+	var apiDeployment *kapps.Deployment
+	var gatewayDeployment *kapps.Deployment
+
+	err := parallel.RunFirstErr(
+		func() error {
+			var err error
+			apiDeployment, err = config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
+			return err
+		},
+		func() error {
+			var err error
+			gatewayDeployment, err = config.K8s.GetDeployment(getGatewayK8sName(deployedResource.Name))
+			return err
+		},
+	)
 	if err != nil {
 		return nil, err
 	}
 
-	api, err := operator.DownloadAPISpec(status.APIName, status.APIID)
+	if apiDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find api deployment", deployedResource.Name)
+	}
+
+	if gatewayDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find gateway deployment", deployedResource.Name)
+	}
+
+	apiStatus := status.FromDeployment(apiDeployment)
+	apiMetadata, err := spec.MetadataFromDeployment(apiDeployment)
+	if err != nil {
+		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
+	}
+
+	api, err := operator.DownloadAPISpec(apiMetadata.Name, apiMetadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -269,43 +329,72 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:         *api,
-			Status:       status,
-			Endpoint:     apiEndpoint,
+			Spec:         api,
+			Metadata:     apiMetadata,
+			Status:       apiStatus,
+			Endpoint:     &apiEndpoint,
 			DashboardURL: dashboardURL,
 		},
 	}, nil
 }
 
-func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.APIResponse, error) {
-	statuses, err := GetAllStatuses(deployments, pods)
+func DescribeAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
+	var apiDeployment *kapps.Deployment
+	var gatewayDeployment *kapps.Deployment
+
+	err := parallel.RunFirstErr(
+		func() error {
+			var err error
+			apiDeployment, err = config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
+			return err
+		},
+		func() error {
+			var err error
+			gatewayDeployment, err = config.K8s.GetDeployment(getGatewayK8sName(deployedResource.Name))
+			return err
+		},
+	)
 	if err != nil {
 		return nil, err
 	}
 
-	apiNames, apiIDs := namesAndIDsFromStatuses(statuses)
-	apis, err := operator.DownloadAPISpecs(apiNames, apiIDs)
-	if err != nil {
-		return nil, err
+	if apiDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find api deployment", deployedResource.Name)
 	}
 
-	asyncAPIs := make([]schema.APIResponse, len(apis))
+	if gatewayDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find gateway deployment", deployedResource.Name)
+	}
 
-	for i := range apis {
-		api := apis[i]
-		endpoint, err := operator.APIEndpoint(&api)
-		if err != nil {
-			return nil, err
-		}
+	apiStatus := status.FromDeployment(apiDeployment)
+	apiMetadata, err := spec.MetadataFromDeployment(apiDeployment)
+	if err != nil {
+		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
+	}
 
-		asyncAPIs[i] = schema.APIResponse{
-			Spec:     api,
-			Status:   &statuses[i],
-			Endpoint: endpoint,
-		}
+	apiPods, err := config.K8s.ListPodsByLabels(map[string]string{
+		"apiName":          apiDeployment.Labels["apiName"],
+		"cortex.dev/async": "api",
+	})
+	if err != nil {
+		return nil, err
 	}
 
-	return asyncAPIs, nil
+	apiEndpoint, err := operator.APIEndpointFromResource(deployedResource)
+	if err != nil {
+		return nil, err
+	}
+
+	dashboardURL := pointer.String(getDashboardURL(deployedResource.Name))
+
+	return []schema.APIResponse{
+		{
+			Metadata:      apiMetadata,
+			ReplicaCounts: GetReplicaCounts(apiStatus, apiDeployment, apiPods),
+			Endpoint:      &apiEndpoint,
+			DashboardURL:  dashboardURL,
+		},
+	}, nil
 }
 
 func UpdateAPIMetricsCron(apiDeployment *kapps.Deployment) error {
@@ -545,6 +634,33 @@ func deleteK8sResources(apiName string) error {
 	return err
 }
 
+// returns true if min_replicas are not ready and no updated replicas have errored
+func isAPIUpdating(deployment *kapps.Deployment) (bool, error) {
+	pods, err := config.K8s.ListPodsByLabel("apiName", deployment.Labels["apiName"])
+	if err != nil {
+		return false, err
+	}
+
+	replicaCounts := GetReplicaCounts(nil, deployment, pods)
+
+	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
+	if err != nil {
+		return false, err
+	}
+
+	if replicaCounts.Ready < autoscalingSpec.MinReplicas && replicaCounts.TotalFailed() == 0 {
+		return true, nil
+	}
+
+	return false, nil
+}
+
+func isPodSpecLatest(deployment *kapps.Deployment, pod *kcore.Pod) bool {
+	// Note: the gateway deployment/pods don't have "podID" or "deploymentID" labels, which is ok since it is always up-to-date
+	return deployment.Spec.Template.Labels["podID"] == pod.Labels["podID"] &&
+		deployment.Spec.Template.Labels["deploymentID"] == pod.Labels["deploymentID"]
+}
+
 func getDashboardURL(apiName string) string {
 	loadBalancerURL, err := operator.LoadBalancerURL()
 	if err != nil {
diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index 38e02329d0..41b0d11fab 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -17,234 +17,18 @@ limitations under the License.
 package asyncapi
 
 import (
-	"sort"
-	"time"
-
-	"github.com/cortexlabs/cortex/pkg/config"
-	"github.com/cortexlabs/cortex/pkg/consts"
-	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/types/status"
-	"github.com/cortexlabs/cortex/pkg/types/userconfig"
-	"github.com/cortexlabs/cortex/pkg/workloads"
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 )
 
-type asyncResourceGroup struct {
-	APIDeployment     *kapps.Deployment
-	APIPods           []kcore.Pod
-	GatewayDeployment *kapps.Deployment
-	GatewayPods       []kcore.Pod
-}
-
-func GetStatus(apiName string) (*status.Status, error) {
-	var apiDeployment *kapps.Deployment
-	var gatewayDeployment *kapps.Deployment
-	var gatewayPods []kcore.Pod
-	var apiPods []kcore.Pod
-
-	err := parallel.RunFirstErr(
-		func() error {
-			var err error
-			apiDeployment, err = config.K8s.GetDeployment(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			var err error
-			gatewayDeployment, err = config.K8s.GetDeployment(getGatewayK8sName(apiName))
-			return err
-		},
-		func() error {
-			var err error
-			gatewayPods, err = config.K8s.ListPodsByLabels(
-				map[string]string{
-					"apiName":          apiName,
-					"cortex.dev/async": "gateway",
-				},
-			)
-			return err
-		},
-		func() error {
-			var err error
-			apiPods, err = config.K8s.ListPodsByLabels(
-				map[string]string{
-					"apiName":          apiName,
-					"cortex.dev/async": "api",
-				},
-			)
-			return err
-		},
-	)
-	if err != nil {
-		return nil, err
-	}
-
-	if apiDeployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find api deployment", apiName)
-	}
-
-	if gatewayDeployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
-	}
-
-	return apiStatus(apiDeployment, apiPods, gatewayDeployment, gatewayPods)
-}
-
-func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.Status, error) {
-	resourcesByAPI := groupResourcesByAPI(deployments, pods)
-	statuses := make([]status.Status, len(resourcesByAPI))
-
-	var i int
-	for apiName, k8sResources := range resourcesByAPI {
-		if k8sResources.APIDeployment == nil {
-			return nil, errors.ErrorUnexpected("unable to find api deployment", apiName)
-		}
-
-		if k8sResources.GatewayDeployment == nil {
-			return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
-		}
-
-		st, err := apiStatus(k8sResources.APIDeployment, k8sResources.APIPods, k8sResources.GatewayDeployment, k8sResources.GatewayPods)
-		if err != nil {
-			return nil, err
-		}
-		statuses[i] = *st
-		i++
-	}
-
-	sort.Slice(statuses, func(i, j int) bool {
-		return statuses[i].APIName < statuses[j].APIName
-	})
-
-	return statuses, nil
-}
-
-func namesAndIDsFromStatuses(statuses []status.Status) ([]string, []string) {
-	apiNames := make([]string, len(statuses))
-	apiIDs := make([]string, len(statuses))
-
-	for i, st := range statuses {
-		apiNames[i] = st.APIName
-		apiIDs[i] = st.APIID
-	}
-
-	return apiNames, apiIDs
-}
-
-// let's do CRDs instead, to avoid this
-func groupResourcesByAPI(deployments []kapps.Deployment, pods []kcore.Pod) map[string]*asyncResourceGroup {
-	resourcesByAPI := map[string]*asyncResourceGroup{}
-	for i := range deployments {
-		deployment := deployments[i]
-		apiName := deployment.Labels["apiName"]
-		asyncType := deployment.Labels["cortex.dev/async"]
-		apiResources, exists := resourcesByAPI[apiName]
-		if exists {
-			if asyncType == "api" {
-				apiResources.APIDeployment = &deployment
-			} else {
-				apiResources.GatewayDeployment = &deployment
-			}
-		} else {
-			if asyncType == "api" {
-				resourcesByAPI[apiName] = &asyncResourceGroup{APIDeployment: &deployment}
-			} else {
-				resourcesByAPI[apiName] = &asyncResourceGroup{GatewayDeployment: &deployment}
-			}
-		}
-	}
-
-	for _, pod := range pods {
-		apiName := pod.Labels["apiName"]
-		asyncType := pod.Labels["cortex.dev/async"]
-		apiResources, exists := resourcesByAPI[apiName]
-		if !exists {
-			// ignore pods that might still be waiting to be deleted while the deployment has already been deleted
-			continue
-		}
-
-		if asyncType == "api" {
-			apiResources.APIPods = append(resourcesByAPI[apiName].APIPods, pod)
-		} else {
-			apiResources.GatewayPods = append(resourcesByAPI[apiName].GatewayPods, pod)
-		}
-	}
-	return resourcesByAPI
-}
-
-func apiStatus(apiDeployment *kapps.Deployment, apiPods []kcore.Pod, gatewayDeployment *kapps.Deployment, gatewayPods []kcore.Pod) (*status.Status, error) {
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(apiDeployment)
-	if err != nil {
-		return nil, err
-	}
-
-	apiReplicaCounts := getReplicaCounts(apiDeployment, apiPods)
-	gatewayReplicaCounts := getReplicaCounts(gatewayDeployment, gatewayPods)
-
-	st := &status.Status{}
-	st.APIName = apiDeployment.Labels["apiName"]
-	st.APIID = apiDeployment.Labels["apiID"]
-	st.ReplicaCounts = apiReplicaCounts
-	st.Code = getStatusCode(apiReplicaCounts, gatewayReplicaCounts, autoscalingSpec.MinReplicas)
-
-	return st, nil
-}
-
-func getStatusCode(apiCounts status.ReplicaCounts, gatewayCounts status.ReplicaCounts, apiMinReplicas int32) status.Code {
-	if apiCounts.Updated.Ready >= apiCounts.Requested && gatewayCounts.Updated.Ready >= 1 {
-		return status.Live
-	}
-
-	if apiCounts.Updated.ErrImagePull > 0 || gatewayCounts.Updated.ErrImagePull > 0 {
-		return status.ErrorImagePull
-	}
-
-	if apiCounts.Updated.Failed > 0 || apiCounts.Updated.Killed > 0 ||
-		gatewayCounts.Updated.Failed > 0 || gatewayCounts.Updated.Killed > 0 {
-		return status.Error
-	}
-
-	if apiCounts.Updated.KilledOOM > 0 || gatewayCounts.Updated.KilledOOM > 0 {
-		return status.OOM
-	}
-
-	if apiCounts.Updated.Stalled > 0 || gatewayCounts.Updated.Stalled > 0 {
-		return status.Stalled
-	}
-
-	if apiCounts.Updated.Ready >= apiMinReplicas && gatewayCounts.Updated.Ready >= 1 {
-		return status.Live
-	}
-
-	return status.Updating
-}
-
-// returns true if min_replicas are not ready and no updated replicas have errored
-func isAPIUpdating(deployment *kapps.Deployment) (bool, error) {
-	pods, err := config.K8s.ListPodsByLabel("apiName", deployment.Labels["apiName"])
-	if err != nil {
-		return false, err
-	}
-
-	replicaCounts := getReplicaCounts(deployment, pods)
-
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
-	if err != nil {
-		return false, err
-	}
-
-	if replicaCounts.Updated.Ready < autoscalingSpec.MinReplicas && replicaCounts.Updated.TotalFailed() == 0 {
-		return true, nil
-	}
-
-	return false, nil
-}
-
-func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.ReplicaCounts {
+func GetReplicaCounts(apiStatus *status.Status, deployment *kapps.Deployment, pods []kcore.Pod) *status.ReplicaCounts {
 	counts := status.ReplicaCounts{}
-	counts.Requested = *deployment.Spec.Replicas
+	if apiStatus != nil {
+		counts.Requested = apiStatus.Requested
+		counts.UpToDate = apiStatus.UpToDate
+	}
 
 	for i := range pods {
 		pod := pods[i]
@@ -255,50 +39,55 @@ func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.Rep
 		addPodToReplicaCounts(&pod, deployment, &counts)
 	}
 
-	return counts
+	return &counts
 }
 
 func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts *status.ReplicaCounts) {
-	var subCounts *status.SubReplicaCounts
+	latest := false
 	if isPodSpecLatest(deployment, pod) {
-		subCounts = &counts.Updated
-	} else {
-		subCounts = &counts.Stale
+		latest = true
 	}
 
-	if k8s.IsPodReady(pod) {
-		subCounts.Ready++
+	isPodReady := k8s.IsPodReady(pod)
+	if latest && isPodReady {
+		counts.Ready++
+		return
+	} else if !latest && isPodReady {
+		counts.ReadyOutOfDate++
 		return
 	}
 
-	switch k8s.GetPodStatus(pod) {
+	podStatus := k8s.GetPodStatus(pod)
+
+	if podStatus == k8s.PodStatusTerminating {
+		counts.Terminating++
+		return
+	}
+
+	if !latest {
+		return
+	}
+
+	switch podStatus {
 	case k8s.PodStatusPending:
-		if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
-			subCounts.Stalled++
-		} else {
-			subCounts.Pending++
-		}
-	case k8s.PodStatusInitializing:
-		subCounts.Initializing++
-	case k8s.PodStatusRunning:
-		subCounts.Initializing++
+		counts.Pending++
+	case k8s.PodStatusStalled:
+		counts.Stalled++
+	case k8s.PodStatusCreating:
+		counts.Creating++
+	case k8s.PodStatusReady:
+		counts.Ready++
+	case k8s.PodStatusNotReady:
+		counts.NotReady++
 	case k8s.PodStatusErrImagePull:
-		subCounts.ErrImagePull++
-	case k8s.PodStatusTerminating:
-		subCounts.Terminating++
+		counts.ErrImagePull++
 	case k8s.PodStatusFailed:
-		subCounts.Failed++
+		counts.Failed++
 	case k8s.PodStatusKilled:
-		subCounts.Killed++
+		counts.Killed++
 	case k8s.PodStatusKilledOOM:
-		subCounts.KilledOOM++
-	default:
-		subCounts.Unknown++
+		counts.KilledOOM++
+	case k8s.PodStatusUnknown:
+		counts.Unknown++
 	}
 }
-
-func isPodSpecLatest(deployment *kapps.Deployment, pod *kcore.Pod) bool {
-	// Note: the gateway deployment/pods don't have "podID" or "deploymentID" labels, which is ok since it is always up-to-date
-	return deployment.Spec.Template.Labels["podID"] == pod.Labels["podID"] &&
-		deployment.Spec.Template.Labels["deploymentID"] == pod.Labels["deploymentID"]
-}
diff --git a/pkg/operator/resources/job/batchapi/api.go b/pkg/operator/resources/job/batchapi/api.go
index b85726a531..6ac1c87219 100644
--- a/pkg/operator/resources/job/batchapi/api.go
+++ b/pkg/operator/resources/job/batchapi/api.go
@@ -140,25 +140,18 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 		apiNameToBatchJobsMap[batchJob.Spec.APIName] = append(apiNameToBatchJobsMap[batchJob.Spec.APIName], &batchJobList[i])
 	}
 
-	for _, virtualService := range virtualServices {
-		apiName := virtualService.Labels["apiName"]
-		apiID := virtualService.Labels["apiID"]
-
-		api, err := operator.DownloadAPISpec(apiName, apiID)
+	for i := range virtualServices {
+		apiName := virtualServices[i].Labels["apiName"]
+		metadata, err := spec.MetadataFromVirtualService(&virtualServices[i])
 		if err != nil {
-			return nil, err
-		}
-
-		endpoint, err := operator.APIEndpoint(api)
-		if err != nil {
-			return nil, err
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
 		var jobStatuses []status.BatchJobStatus
-		batchJobs := apiNameToBatchJobsMap[apiName]
+		batchJobs := apiNameToBatchJobsMap[metadata.Name]
 
 		if len(batchJobs) == 0 {
-			jobStates, err := job.GetMostRecentlySubmittedJobStates(apiName, 1, userconfig.BatchAPIKind)
+			jobStates, err := job.GetMostRecentlySubmittedJobStates(metadata.Name, 1, userconfig.BatchAPIKind)
 			if err != nil {
 				return nil, err
 			}
@@ -183,9 +176,8 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 			}
 		}
 
-		batchAPIsMap[apiName] = &schema.APIResponse{
-			Spec:             *api,
-			Endpoint:         endpoint,
+		batchAPIsMap[metadata.Name] = &schema.APIResponse{
+			Metadata:         metadata,
 			BatchJobStatuses: jobStatuses,
 		}
 	}
@@ -200,10 +192,12 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 }
 
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	virtualService := deployedResource.VirtualService
+	metadata, err := spec.MetadataFromVirtualService(deployedResource.VirtualService)
+	if err != nil {
+		return nil, err
+	}
 
-	apiID := virtualService.Labels["apiID"]
-	api, err := operator.DownloadAPISpec(deployedResource.Name, apiID)
+	api, err := operator.DownloadAPISpec(deployedResource.Name, metadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -263,9 +257,10 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:             *api,
+			Spec:             api,
+			Metadata:         metadata,
 			BatchJobStatuses: jobStatuses,
-			Endpoint:         endpoint,
+			Endpoint:         &endpoint,
 			DashboardURL:     dashboardURL,
 		},
 	}, nil
diff --git a/pkg/operator/resources/job/taskapi/api.go b/pkg/operator/resources/job/taskapi/api.go
index 9261cc16a9..c5ca6e17fa 100644
--- a/pkg/operator/resources/job/taskapi/api.go
+++ b/pkg/operator/resources/job/taskapi/api.go
@@ -146,21 +146,15 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 		}
 	}
 
-	for _, virtualService := range virtualServices {
-		apiName := virtualService.Labels["apiName"]
-		apiID := virtualService.Labels["apiID"]
+	for i := range virtualServices {
+		apiName := virtualServices[i].Labels["apiName"]
 
-		api, err := operator.DownloadAPISpec(apiName, apiID)
+		metadata, err := spec.MetadataFromVirtualService(&virtualServices[i])
 		if err != nil {
-			return nil, err
-		}
-
-		endpoint, err := operator.APIEndpoint(api)
-		if err != nil {
-			return nil, err
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
-		jobStates, err := job.GetMostRecentlySubmittedJobStates(apiName, 1, userconfig.TaskAPIKind)
+		jobStates, err := job.GetMostRecentlySubmittedJobStates(metadata.Name, 1, userconfig.TaskAPIKind)
 
 		jobStatuses := []status.TaskJobStatus{}
 		if len(jobStates) > 0 {
@@ -172,9 +166,8 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 			jobStatuses = append(jobStatuses, *jobStatus)
 		}
 
-		taskAPIsMap[apiName] = &schema.APIResponse{
-			Spec:            *api,
-			Endpoint:        endpoint,
+		taskAPIsMap[metadata.Name] = &schema.APIResponse{
+			Metadata:        metadata,
 			TaskJobStatuses: jobStatuses,
 		}
 	}
@@ -209,8 +202,8 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 
 	taskAPIList := make([]schema.APIResponse, 0, len(taskAPIsMap))
 
-	for _, batchAPI := range taskAPIsMap {
-		taskAPIList = append(taskAPIList, *batchAPI)
+	for _, taskAPI := range taskAPIsMap {
+		taskAPIList = append(taskAPIList, *taskAPI)
 	}
 
 	return taskAPIList, nil
@@ -218,10 +211,12 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 
 // GetAPIByName returns a single task API and its most recently submitted job along with all running task jobs
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	virtualService := deployedResource.VirtualService
+	metadata, err := spec.MetadataFromVirtualService(deployedResource.VirtualService)
+	if err != nil {
+		return nil, err
+	}
 
-	apiID := virtualService.Labels["apiID"]
-	api, err := operator.DownloadAPISpec(deployedResource.Name, apiID)
+	api, err := operator.DownloadAPISpec(deployedResource.Name, metadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -295,9 +290,10 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:            *api,
+			Spec:            api,
+			Metadata:        metadata,
 			TaskJobStatuses: jobStatuses,
-			Endpoint:        endpoint,
+			Endpoint:        &endpoint,
 			DashboardURL:    dashboardURL,
 		},
 	}, nil
diff --git a/pkg/operator/resources/job/worker_stats.go b/pkg/operator/resources/job/worker_stats.go
index 07628995e4..797d65980e 100644
--- a/pkg/operator/resources/job/worker_stats.go
+++ b/pkg/operator/resources/job/worker_stats.go
@@ -17,9 +17,6 @@ limitations under the License.
 package job
 
 import (
-	"time"
-
-	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	kbatch "k8s.io/api/batch/v1"
@@ -43,34 +40,32 @@ func GetWorkerCountsForJob(k8sJob kbatch.Job, pods []kcore.Pod) status.WorkerCou
 
 func addPodToWorkerCounts(pod *kcore.Pod, workerCounts *status.WorkerCounts) {
 	if k8s.IsPodReady(pod) {
-		workerCounts.Running++
+		workerCounts.Ready++
 		return
 	}
 
 	switch k8s.GetPodStatus(pod) {
 	case k8s.PodStatusPending:
-		if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
-			workerCounts.Stalled++
-		} else {
-			workerCounts.Pending++
-		}
-	case k8s.PodStatusInitializing:
-		workerCounts.Initializing++
-	case k8s.PodStatusRunning:
-		workerCounts.Initializing++
+		workerCounts.Pending++
+	case k8s.PodStatusStalled:
+		workerCounts.Stalled++
+	case k8s.PodStatusCreating:
+		workerCounts.Creating++
+	case k8s.PodStatusNotReady:
+		workerCounts.NotReady++
 	case k8s.PodStatusErrImagePull:
-		workerCounts.Failed++
+		workerCounts.ErrImagePull++
 	case k8s.PodStatusTerminating:
-		workerCounts.Failed++
+		workerCounts.Terminating++
 	case k8s.PodStatusFailed:
 		workerCounts.Failed++
 	case k8s.PodStatusKilled:
-		workerCounts.Failed++
+		workerCounts.Killed++
 	case k8s.PodStatusKilledOOM:
-		workerCounts.Failed++
+		workerCounts.KilledOOM++
 	case k8s.PodStatusSucceeded:
 		workerCounts.Succeeded++
-	default:
+	case k8s.PodStatusUnknown:
 		workerCounts.Unknown++
 	}
 }
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index cd7284f276..096f83348c 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"path/filepath"
 	"reflect"
+	"sort"
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/config"
@@ -183,34 +184,35 @@ func GetAllAPIs() ([]schema.APIResponse, error) {
 	}
 
 	apiNames := make([]string, len(apis.Items))
-	apiIDs := make([]string, len(apis.Items))
 	for i, api := range apis.Items {
 		apiNames[i] = api.Name
-		apiIDs[i] = api.Annotations["cortex.dev/api-id"]
-	}
-
-	apiSpecs, err := operator.DownloadAPISpecs(apiNames, apiIDs)
-	if err != nil {
-		return nil, err
 	}
 
 	realtimeAPIs := make([]schema.APIResponse, len(apis.Items))
+	mappedRealtimeAPIs := make(map[string]schema.APIResponse, 0)
 	for i := range apis.Items {
 		api := apis.Items[i]
-		api.Status.ReplicaCounts.Requested = api.Spec.Pod.Replicas
 
-		realtimeAPIs[i] = schema.APIResponse{
-			Spec: apiSpecs[i],
+		metadata, err := metadataFromRealtimeAPI(&api)
+		if err != nil {
+			return nil, err
+		}
+
+		mappedRealtimeAPIs[api.Name] = schema.APIResponse{
+			Metadata: metadata,
 			Status: &status.Status{
-				APIName:       api.Name,
-				APIID:         api.Annotations["cortex.dev/api-id"],
-				Code:          api.Status.Status,
-				ReplicaCounts: api.Status.ReplicaCounts,
+				Ready:     api.Status.Ready,
+				Requested: api.Status.Requested,
+				UpToDate:  api.Status.UpToDate,
 			},
-			Endpoint: api.Status.Endpoint,
 		}
 	}
 
+	sort.Strings(apiNames)
+	for i, apiName := range apiNames {
+		realtimeAPIs[i] = mappedRealtimeAPIs[apiName]
+	}
+
 	return realtimeAPIs, nil
 }
 
@@ -223,29 +225,71 @@ func GetAPIByName(apiName string) ([]schema.APIResponse, error) {
 		return nil, errors.Wrap(err, "failed to get realtime api resource")
 	}
 
+	metadata, err := metadataFromRealtimeAPI(&api)
+	if err != nil {
+		return nil, err
+	}
+
 	apiSpec, err := operator.DownloadAPISpec(api.Name, api.Annotations["cortex.dev/api-id"])
 	if err != nil {
 		return nil, err
 	}
 
 	dashboardURL := pointer.String(getDashboardURL(api.Name))
-	api.Status.ReplicaCounts.Requested = api.Spec.Pod.Replicas
 
 	return []schema.APIResponse{
 		{
-			Spec: *apiSpec,
+			Spec:     apiSpec,
+			Metadata: metadata,
 			Status: &status.Status{
-				APIName:       api.Name,
-				APIID:         api.Annotations["cortex.dev/api-id"],
-				Code:          api.Status.Status,
-				ReplicaCounts: api.Status.ReplicaCounts,
+				Ready:     api.Status.Ready,
+				Requested: api.Status.Requested,
+				UpToDate:  api.Status.UpToDate,
 			},
-			Endpoint:     api.Status.Endpoint,
+			Endpoint:     &api.Status.Endpoint,
 			DashboardURL: dashboardURL,
 		},
 	}, nil
 }
 
+func DescribeAPIByName(apiName string) ([]schema.APIResponse, error) {
+	ctx := context.Background()
+
+	api := serverless.RealtimeAPI{}
+	key := client.ObjectKey{Namespace: consts.DefaultNamespace, Name: apiName}
+	if err := config.K8s.Get(ctx, key, &api); err != nil {
+		return nil, errors.Wrap(err, "failed to get realtime api resource")
+	}
+
+	metadata, err := metadataFromRealtimeAPI(&api)
+	if err != nil {
+		return nil, err
+	}
+
+	var podList kcore.PodList
+	if err := config.K8s.List(ctx, &podList, client.MatchingLabels{
+		"apiName": metadata.Name,
+		"apiKind": userconfig.RealtimeAPIKind.String(),
+	}); err != nil {
+		return nil, err
+	}
+
+	replicaCounts := getReplicaCounts(podList.Items, metadata)
+	replicaCounts.Requested = api.Status.Requested
+	replicaCounts.UpToDate = api.Status.UpToDate
+
+	dashboardURL := pointer.String(getDashboardURL(api.Name))
+
+	return []schema.APIResponse{
+		{
+			Metadata:      metadata,
+			ReplicaCounts: &replicaCounts,
+			Endpoint:      &api.Status.Endpoint,
+			DashboardURL:  dashboardURL,
+		},
+	}, nil
+}
+
 func getDashboardURL(apiName string) string {
 	loadBalancerURL, err := operator.LoadBalancerURL()
 	if err != nil {
@@ -360,3 +404,87 @@ func deleteBucketResources(apiName string) error {
 	prefix := filepath.Join(config.ClusterConfig.ClusterUID, "apis", apiName)
 	return config.AWS.DeleteS3Dir(config.ClusterConfig.Bucket, prefix, true)
 }
+
+func metadataFromRealtimeAPI(sv *serverless.RealtimeAPI) (*spec.Metadata, error) {
+	lastUpdated, err := spec.TimeFromAPIID(sv.Annotations["cortex.dev/api-id"])
+	if err != nil {
+		return nil, err
+	}
+	return &spec.Metadata{
+		Resource: &userconfig.Resource{
+			Name: sv.Name,
+			Kind: userconfig.RealtimeAPIKind,
+		},
+		APIID:        sv.Annotations["cortex.dev/api-id"],
+		DeploymentID: sv.Annotations["cortex.dev/deployment-id"],
+		LastUpdated:  lastUpdated.Unix(),
+	}, nil
+}
+
+func getReplicaCounts(pods []kcore.Pod, metadata *spec.Metadata) status.ReplicaCounts {
+	counts := status.ReplicaCounts{}
+
+	for i := range pods {
+		pod := pods[i]
+		if pod.Labels["apiName"] != metadata.Name {
+			continue
+		}
+		addPodToReplicaCounts(&pods[i], metadata, &counts)
+	}
+
+	return counts
+}
+
+func addPodToReplicaCounts(pod *kcore.Pod, metadata *spec.Metadata, counts *status.ReplicaCounts) {
+	latest := false
+	if isPodSpecLatest(pod, metadata) {
+		latest = true
+	}
+
+	isPodReady := k8s.IsPodReady(pod)
+	if latest && isPodReady {
+		counts.Ready++
+		return
+	} else if !latest && isPodReady {
+		counts.ReadyOutOfDate++
+		return
+	}
+
+	podStatus := k8s.GetPodStatus(pod)
+
+	if podStatus == k8s.PodStatusTerminating {
+		counts.Terminating++
+		return
+	}
+
+	if !latest {
+		return
+	}
+
+	switch podStatus {
+	case k8s.PodStatusPending:
+		counts.Pending++
+	case k8s.PodStatusStalled:
+		counts.Stalled++
+	case k8s.PodStatusCreating:
+		counts.Creating++
+	case k8s.PodStatusReady:
+		counts.Ready++
+	case k8s.PodStatusNotReady:
+		counts.NotReady++
+	case k8s.PodStatusErrImagePull:
+		counts.ErrImagePull++
+	case k8s.PodStatusFailed:
+		counts.Failed++
+	case k8s.PodStatusKilled:
+		counts.Killed++
+	case k8s.PodStatusKilledOOM:
+		counts.KilledOOM++
+	case k8s.PodStatusUnknown:
+		counts.Unknown++
+	}
+}
+
+func isPodSpecLatest(pod *kcore.Pod, metadata *spec.Metadata) bool {
+	return metadata.APIID == pod.Labels["apiID"]
+}
diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go
index 6eeb95b3b0..11cd50a5c8 100644
--- a/pkg/operator/resources/resources.go
+++ b/pkg/operator/resources/resources.go
@@ -158,8 +158,8 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*schema.APIResponse, stri
 		apiEndpoint, _ := operator.APIEndpoint(api)
 
 		return &schema.APIResponse{
-			Spec:     *api,
-			Endpoint: apiEndpoint,
+			Spec:     api,
+			Endpoint: &apiEndpoint,
 		}, msg, nil
 	}
 
@@ -256,7 +256,7 @@ func DeleteAPI(apiName string, keepCache bool) (*schema.DeleteResponse, error) {
 func GetAPIs() ([]schema.APIResponse, error) {
 	var deployments []kapps.Deployment
 	var k8sTaskJobs []kbatch.Job
-	var pods []kcore.Pod
+	var taskAPIPods []kcore.Pod
 	var virtualServices []istioclientnetworking.VirtualService
 	var batchJobList batch.BatchJobList
 
@@ -268,7 +268,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		},
 		func() error {
 			var err error
-			pods, err = config.K8s.ListPodsWithLabelKeys("apiName")
+			taskAPIPods, err = config.K8s.ListPodsByLabel("apiKind", userconfig.TaskAPIKind.String())
 			return err
 		},
 		func() error {
@@ -305,20 +305,6 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	var batchAPIPods []kcore.Pod
-	var taskAPIPods []kcore.Pod
-	var asyncAPIPods []kcore.Pod
-	for _, pod := range pods {
-		switch pod.Labels["apiKind"] {
-		case userconfig.BatchAPIKind.String():
-			batchAPIPods = append(batchAPIPods, pod)
-		case userconfig.TaskAPIKind.String():
-			taskAPIPods = append(taskAPIPods, pod)
-		case userconfig.AsyncAPIKind.String():
-			asyncAPIPods = append(asyncAPIPods, pod)
-		}
-	}
-
 	var batchAPIVirtualServices []istioclientnetworking.VirtualService
 	var taskAPIVirtualServices []istioclientnetworking.VirtualService
 	var trafficSplitterVirtualServices []istioclientnetworking.VirtualService
@@ -350,7 +336,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		return nil, err
 	}
 
-	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIPods, asyncAPIDeployments)
+	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIDeployments)
 	if err != nil {
 		return nil, err
 	}
@@ -443,7 +429,7 @@ func GetAPIByID(apiName string, apiID string) ([]schema.APIResponse, error) {
 
 	return []schema.APIResponse{
 		{
-			Spec: *apiSpec,
+			Spec: apiSpec,
 		},
 	}, nil
 }
@@ -494,3 +480,33 @@ func checkIfUsedByTrafficSplitter(apiName string) error {
 	}
 	return nil
 }
+
+func DescribeAPI(apiName string) ([]schema.APIResponse, error) {
+	deployedResource, err := GetDeployedResourceByName(apiName)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiResponse []schema.APIResponse
+
+	switch deployedResource.Kind {
+	case userconfig.RealtimeAPIKind:
+		apiResponse, err = realtimeapi.DescribeAPIByName(apiName)
+		if err != nil {
+			return nil, err
+		}
+	case userconfig.AsyncAPIKind:
+		apiResponse, err = asyncapi.DescribeAPIByName(deployedResource)
+		if err != nil {
+			return nil, err
+		}
+	default:
+		return nil, ErrorOperationIsOnlySupportedForKind(
+			*deployedResource,
+			userconfig.RealtimeAPIKind,
+			userconfig.AsyncAPIKind,
+		) // unexpected
+	}
+
+	return apiResponse, nil
+}
diff --git a/pkg/operator/resources/trafficsplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go
index 9d81a17faa..4881f724e3 100644
--- a/pkg/operator/resources/trafficsplitter/api.go
+++ b/pkg/operator/resources/trafficsplitter/api.go
@@ -26,6 +26,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/lib/parallel"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/operator/lib/routines"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
@@ -132,34 +133,27 @@ func getTrafficSplitterDestinations(trafficSplitter *spec.API) []k8s.Destination
 
 // GetAllAPIs returns a list of metadata, in the form of schema.APIResponse, about all the created traffic splitter APIs
 func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schema.APIResponse, error) {
-	var (
-		apiNames         []string
-		apiIDs           []string
-		trafficSplitters []schema.APIResponse
-	)
+	var trafficSplitters []schema.APIResponse
+	for i := range virtualServices {
+		apiName := virtualServices[i].Labels["apiName"]
 
-	for _, virtualService := range virtualServices {
-		if virtualService.Labels["apiKind"] == userconfig.TrafficSplitterKind.String() {
-			apiNames = append(apiNames, virtualService.Labels["apiName"])
-			apiIDs = append(apiIDs, virtualService.Labels["apiID"])
+		metadata, err := spec.MetadataFromVirtualService(&virtualServices[i])
+		if err != nil {
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
-	}
 
-	apis, err := operator.DownloadAPISpecs(apiNames, apiIDs)
-	if err != nil {
-		return nil, err
-	}
+		if metadata.Kind != userconfig.TrafficSplitterKind {
+			continue
+		}
 
-	for i := range apis {
-		trafficSplitter := apis[i]
-		endpoint, err := operator.APIEndpoint(&trafficSplitter)
+		targets, err := userconfig.TrafficSplitterTargetsFromAnnotations(&virtualServices[i])
 		if err != nil {
-			return nil, err
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
 		trafficSplitters = append(trafficSplitters, schema.APIResponse{
-			Spec:     trafficSplitter,
-			Endpoint: endpoint,
+			Metadata:                  metadata,
+			NumTrafficSplitterTargets: pointer.Int32(targets),
 		})
 	}
 
@@ -168,7 +162,12 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schem
 
 // GetAPIByName retrieves the metadata, in the form of schema.APIResponse, of a single traffic splitter API
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	api, err := operator.DownloadAPISpec(deployedResource.Name, deployedResource.VirtualService.Labels["apiID"])
+	metadata, err := spec.MetadataFromVirtualService(deployedResource.VirtualService)
+	if err != nil {
+		return nil, err
+	}
+
+	api, err := operator.DownloadAPISpec(deployedResource.Name, metadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -180,8 +179,9 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:     *api,
-			Endpoint: endpoint,
+			Spec:     api,
+			Metadata: metadata,
+			Endpoint: &endpoint,
 		},
 	}, nil
 }
diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go
index eff68701ee..703b794483 100644
--- a/pkg/operator/schema/schema.go
+++ b/pkg/operator/schema/schema.go
@@ -50,19 +50,22 @@ type NodeInfo struct {
 }
 
 type DeployResult struct {
-	API     *APIResponse `json:"api"`
-	Message string       `json:"message"`
-	Error   string       `json:"error"`
+	API     *APIResponse `json:"api" yaml:"api"`
+	Message string       `json:"message" yaml:"message"`
+	Error   string       `json:"error" yaml:"error"`
 }
 
 type APIResponse struct {
-	Spec             spec.API                `json:"spec"`
-	Status           *status.Status          `json:"status,omitempty"`
-	Endpoint         string                  `json:"endpoint"`
-	DashboardURL     *string                 `json:"dashboard_url,omitempty"`
-	BatchJobStatuses []status.BatchJobStatus `json:"batch_job_statuses,omitempty"`
-	TaskJobStatuses  []status.TaskJobStatus  `json:"task_job_statuses,omitempty"`
-	APIVersions      []APIVersion            `json:"api_versions,omitempty"`
+	Spec                      *spec.API               `json:"spec,omitempty" yaml:"spec,omitempty"`
+	Metadata                  *spec.Metadata          `json:"metadata,omitempty"  yaml:"metadata,omitempty"`
+	Status                    *status.Status          `json:"status,omitempty"  yaml:"status,omitempty"`
+	ReplicaCounts             *status.ReplicaCounts   `json:"replica_counts,omitempty"  yaml:"replica_counts,omitempty"`
+	NumTrafficSplitterTargets *int32                  `json:"num_traffic_splitter_targets,omitempty" yaml:"num_traffic_splitter_targets,omitempty"`
+	Endpoint                  *string                 `json:"endpoint,omitempty"  yaml:"endpoint,omitempty"`
+	DashboardURL              *string                 `json:"dashboard_url,omitempty"  yaml:"dashboard_url,omitempty"`
+	BatchJobStatuses          []status.BatchJobStatus `json:"batch_job_statuses,omitempty"  yaml:"batch_job_statuses,omitempty"`
+	TaskJobStatuses           []status.TaskJobStatus  `json:"task_job_statuses,omitempty"  yaml:"task_job_statuses,omitempty"`
+	APIVersions               []APIVersion            `json:"api_versions,omitempty"  yaml:"api_versions,omitempty"`
 }
 
 type LogResponse struct {
@@ -70,16 +73,16 @@ type LogResponse struct {
 }
 
 type BatchJobResponse struct {
-	APISpec   spec.API              `json:"api_spec"`
-	JobStatus status.BatchJobStatus `json:"job_status"`
-	Metrics   *metrics.BatchMetrics `json:"metrics,omitempty"`
-	Endpoint  string                `json:"endpoint"`
+	APISpec   spec.API              `json:"api_spec" yaml:"api_spec"`
+	JobStatus status.BatchJobStatus `json:"job_status" yaml:"job_status"`
+	Metrics   *metrics.BatchMetrics `json:"metrics,omitempty" yaml:"metrics,omitempty"`
+	Endpoint  string                `json:"endpoint" yaml:"endpoint"`
 }
 
 type TaskJobResponse struct {
-	APISpec   spec.API             `json:"api_spec"`
-	JobStatus status.TaskJobStatus `json:"job_status"`
-	Endpoint  string               `json:"endpoint"`
+	APISpec   spec.API             `json:"api_spec" yaml:"api_spec"`
+	JobStatus status.TaskJobStatus `json:"job_status" yaml:"job_status"`
+	Endpoint  string               `json:"endpoint" yaml:"endpoint"`
 }
 
 type DeleteResponse struct {
@@ -96,8 +99,8 @@ type ErrorResponse struct {
 }
 
 type APIVersion struct {
-	APIID       string `json:"api_id"`
-	LastUpdated int64  `json:"last_updated"`
+	APIID       string `json:"api_id" yaml:"api_id"`
+	LastUpdated int64  `json:"last_updated" yaml:"last_updated"`
 }
 
 type VerifyCortexResponse struct{}
diff --git a/pkg/types/spec/api.go b/pkg/types/spec/api.go
index e181a0ffab..3361114078 100644
--- a/pkg/types/spec/api.go
+++ b/pkg/types/spec/api.go
@@ -30,20 +30,62 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/hash"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
+	kapps "k8s.io/api/apps/v1"
 )
 
 type API struct {
 	*userconfig.API
-	ID           string `json:"id"`
-	SpecID       string `json:"spec_id"`
-	PodID        string `json:"pod_id"`
-	DeploymentID string `json:"deployment_id"`
+	ID           string `json:"id" yaml:"id"`
+	SpecID       string `json:"spec_id" yaml:"spec_id"`
+	PodID        string `json:"pod_id" yaml:"pod_id"`
+	DeploymentID string `json:"deployment_id" yaml:"deployment_id"`
 
-	Key string `json:"key"`
+	Key string `json:"key" yaml:"key"`
 
-	InitialDeploymentTime int64  `json:"initial_deployment_time"`
-	LastUpdated           int64  `json:"last_updated"`
-	MetadataRoot          string `json:"metadata_root"`
+	InitialDeploymentTime int64  `json:"initial_deployment_time" yaml:"initial_deployment_time"`
+	LastUpdated           int64  `json:"last_updated" yaml:"last_updated"`
+	MetadataRoot          string `json:"metadata_root" yaml:"metadata_root"`
+}
+
+type Metadata struct {
+	*userconfig.Resource
+	APIID        string `json:"id" yaml:"id"`
+	PodID        string `json:"pod_id,omitempty" yaml:"pod_id,omitempty"`
+	DeploymentID string `json:"deployment_id,omitempty" yaml:"deployment_id,omitempty"`
+	LastUpdated  int64  `json:"last_updated" yaml:"last_updated"`
+}
+
+func MetadataFromDeployment(deployment *kapps.Deployment) (*Metadata, error) {
+	lastUpdated, err := TimeFromAPIID(deployment.Labels["apiID"])
+	if err != nil {
+		return nil, err
+	}
+	return &Metadata{
+		Resource: &userconfig.Resource{
+			Name: deployment.Labels["apiName"],
+			Kind: userconfig.KindFromString(deployment.Labels["apiKind"]),
+		},
+		APIID:        deployment.Labels["apiID"],
+		DeploymentID: deployment.Labels["deploymentID"],
+		LastUpdated:  lastUpdated.Unix(),
+	}, nil
+}
+
+func MetadataFromVirtualService(vs *istioclientnetworking.VirtualService) (*Metadata, error) {
+	lastUpdated, err := TimeFromAPIID(vs.Labels["apiID"])
+	if err != nil {
+		return nil, err
+	}
+	return &Metadata{
+		Resource: &userconfig.Resource{
+			Name: vs.Labels["apiName"],
+			Kind: userconfig.KindFromString(vs.Labels["apiKind"]),
+		},
+		APIID:        vs.Labels["apiID"],
+		DeploymentID: vs.Labels["deploymentID"],
+		LastUpdated:  lastUpdated.Unix(),
+	}, nil
 }
 
 /*
diff --git a/pkg/types/spec/job.go b/pkg/types/spec/job.go
index 784fb4f199..d6c6cb354d 100644
--- a/pkg/types/spec/job.go
+++ b/pkg/types/spec/job.go
@@ -32,9 +32,9 @@ const (
 )
 
 type JobKey struct {
-	ID      string          `json:"job_id"`
-	APIName string          `json:"api_name"`
-	Kind    userconfig.Kind `json:"kind"`
+	ID      string          `json:"job_id" yaml:"job_id"`
+	APIName string          `json:"api_name" yaml:"api_name"`
+	Kind    userconfig.Kind `json:"kind" yaml:"kind"`
 }
 
 func (j JobKey) UserString() string {
@@ -56,39 +56,39 @@ func (j JobKey) K8sName() string {
 }
 
 type SQSDeadLetterQueue struct {
-	ARN             string `json:"arn"`
-	MaxReceiveCount int    `json:"max_receive_count"`
+	ARN             string `json:"arn" yaml:"arn"`
+	MaxReceiveCount int    `json:"max_receive_count" yaml:"max_receive_count"`
 }
 
 type RuntimeBatchJobConfig struct {
-	Workers            int                    `json:"workers"`
-	SQSDeadLetterQueue *SQSDeadLetterQueue    `json:"sqs_dead_letter_queue"`
-	Config             map[string]interface{} `json:"config"`
-	Timeout            *int                   `json:"timeout"`
+	Workers            int                    `json:"workers" yaml:"workers"`
+	SQSDeadLetterQueue *SQSDeadLetterQueue    `json:"sqs_dead_letter_queue" yaml:"sqs_dead_letter_queue"`
+	Config             map[string]interface{} `json:"config" yaml:"config"`
+	Timeout            *int                   `json:"timeout" yaml:"timeout"`
 }
 
 type RuntimeTaskJobConfig struct {
-	Workers int                    `json:"workers"`
-	Config  map[string]interface{} `json:"config"`
-	Timeout *int                   `json:"timeout"`
+	Workers int                    `json:"workers" yaml:"workers"`
+	Config  map[string]interface{} `json:"config" yaml:"config"`
+	Timeout *int                   `json:"timeout" yaml:"timeout"`
 }
 
 type BatchJob struct {
 	JobKey
 	RuntimeBatchJobConfig
-	APIID           string    `json:"api_id"`
-	SQSUrl          string    `json:"sqs_url"`
-	TotalBatchCount int       `json:"total_batch_count,omitempty"`
-	StartTime       time.Time `json:"start_time,omitempty"`
+	APIID           string    `json:"api_id" yaml:"api_id"`
+	SQSUrl          string    `json:"sqs_url" yaml:"sqs_url"`
+	TotalBatchCount int       `json:"total_batch_count,omitempty" yaml:"total_batch_count,omitempty"`
+	StartTime       time.Time `json:"start_time,omitempty" yaml:"start_time,omitempty"`
 }
 
 type TaskJob struct {
 	JobKey
 	RuntimeTaskJobConfig
-	APIID     string    `json:"api_id"`
-	SpecID    string    `json:"spec_id"`
-	PodID     string    `json:"pod_id"`
-	StartTime time.Time `json:"start_time"`
+	APIID     string    `json:"api_id" yaml:"api_id"`
+	SpecID    string    `json:"spec_id" yaml:"spec_id"`
+	PodID     string    `json:"pod_id" yaml:"pod_id"`
+	StartTime time.Time `json:"start_time" yaml:"start_time"`
 }
 
 // e.g. /<cluster UID>/jobs/<job_api_kind>/<cortex version>/<api_name>
diff --git a/pkg/types/status/code.go b/pkg/types/status/code.go
deleted file mode 100644
index 11d9c002ea..0000000000
--- a/pkg/types/status/code.go
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
-Copyright 2021 Cortex Labs, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package status
-
-// +kubebuilder:validation:Type=string
-type Code int
-
-const (
-	Unknown Code = iota
-	Pending
-	Stalled
-	Error
-	ErrorImagePull
-	OOM
-	Live
-	Updating
-)
-
-var _codes = []string{
-	"unknown",
-	"pending",
-	"stalled",
-	"error",
-	"error_image_pull",
-	"oom",
-	"live",
-	"updating",
-}
-
-var _ = [1]int{}[int(Updating)-(len(_codes)-1)] // Ensure list length matches
-
-var _codeMessages = []string{
-	"unknown",               // Unknown
-	"pending",               // Pending
-	"compute unavailable",   // Stalled
-	"error",                 // Error
-	"error (image pull)",    // Live
-	"error (out of memory)", // OOM
-	"live",                  // Live
-	"updating",              // Updating
-}
-
-var _ = [1]int{}[int(Updating)-(len(_codeMessages)-1)] // Ensure list length matches
-
-func (code Code) String() string {
-	if int(code) < 0 || int(code) >= len(_codes) {
-		return _codes[Unknown]
-	}
-	return _codes[code]
-}
-
-func (code Code) Message() string {
-	if int(code) < 0 || int(code) >= len(_codeMessages) {
-		return _codeMessages[Unknown]
-	}
-	return _codeMessages[code]
-}
-
-// MarshalText satisfies TextMarshaler
-func (code Code) MarshalText() ([]byte, error) {
-	return []byte(code.String()), nil
-}
-
-// UnmarshalText satisfies TextUnmarshaler
-func (code *Code) UnmarshalText(text []byte) error {
-	enum := string(text)
-	for i := 0; i < len(_codes); i++ {
-		if enum == _codes[i] {
-			*code = Code(i)
-			return nil
-		}
-	}
-
-	*code = Unknown
-	return nil
-}
-
-// UnmarshalBinary satisfies BinaryUnmarshaler
-// Needed for msgpack
-func (code *Code) UnmarshalBinary(data []byte) error {
-	return code.UnmarshalText(data)
-}
-
-// MarshalBinary satisfies BinaryMarshaler
-func (code Code) MarshalBinary() ([]byte, error) {
-	return []byte(code.String()), nil
-}
diff --git a/pkg/types/status/job_status.go b/pkg/types/status/job_status.go
index eb299831ba..f106d051a7 100644
--- a/pkg/types/status/job_status.go
+++ b/pkg/types/status/job_status.go
@@ -24,15 +24,15 @@ import (
 
 type BatchJobStatus struct {
 	spec.BatchJob
-	Status         JobCode       `json:"status"`
-	EndTime        *time.Time    `json:"end_time,omitempty"`
-	BatchesInQueue int           `json:"batches_in_queue"`
-	WorkerCounts   *WorkerCounts `json:"worker_counts,omitempty"`
+	Status         JobCode       `json:"status" yaml:"status"`
+	EndTime        *time.Time    `json:"end_time,omitempty" yaml:"end_time,omitempty"`
+	BatchesInQueue int           `json:"batches_in_queue" yaml:"batches_in_queue"`
+	WorkerCounts   *WorkerCounts `json:"worker_counts,omitempty" yaml:"worker_counts,omitempty"`
 }
 
 type TaskJobStatus struct {
 	spec.TaskJob
-	EndTime      *time.Time    `json:"end_time"`
-	Status       JobCode       `json:"status"`
-	WorkerCounts *WorkerCounts `json:"worker_counts"`
+	EndTime      *time.Time    `json:"end_time,omitempty" yaml:"end_time,omitempty"`
+	Status       JobCode       `json:"status" yaml:"status"`
+	WorkerCounts *WorkerCounts `json:"worker_counts,omitempty" yaml:"worker_counts,omitempty"`
 }
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index b1ef426504..6c61b3ac6b 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -16,48 +16,118 @@ limitations under the License.
 
 package status
 
+import (
+	kapps "k8s.io/api/apps/v1"
+)
+
 type Status struct {
-	APIName       string `json:"api_name"`
-	APIID         string `json:"api_id"`
-	Code          Code   `json:"status_code"`
-	ReplicaCounts `json:"replica_counts"`
+	Ready     int32 `json:"ready" yaml:"ready"`           // deployment-reported number of ready replicas (latest + out of date)
+	Requested int32 `json:"requested" yaml:"requested"`   // deployment-reported number of requested replicas
+	UpToDate  int32 `json:"up_to_date" yaml:"up_to_date"` // deployment-reported number of up-to-date replicas (in whichever phase they are found in)
 }
 
 type ReplicaCounts struct {
-	Updated   SubReplicaCounts `json:"updated,omitempty"`
-	Stale     SubReplicaCounts `json:"stale,omitempty"`
-	Requested int32            `json:"requested,omitempty"`
+	Status
+	Pending        int32 `json:"pending" yaml:"pending"`
+	Creating       int32 `json:"creating" yaml:"creating"`
+	NotReady       int32 `json:"not_ready" yaml:"not_ready"`
+	ReadyOutOfDate int32 `json:"ready_out_of_date" yaml:"ready_out_of_date"`
+	ErrImagePull   int32 `json:"err_image_pull" yaml:"err_image_pull"`
+	Terminating    int32 `json:"terminating" yaml:"terminating"` // includes up-to-date and out-of-date pods
+	Failed         int32 `json:"failed" yaml:"failed"`
+	Killed         int32 `json:"killed" yaml:"killed"`
+	KilledOOM      int32 `json:"killed_oom" yaml:"killed_oom"`
+	Stalled        int32 `json:"stalled" yaml:"stalled"` // pending for a long time
+	Unknown        int32 `json:"unknown" yaml:"unknown"`
 }
 
-type SubReplicaCounts struct {
-	Pending      int32 `json:"pending,omitempty"`
-	Initializing int32 `json:"initializing,omitempty"`
-	Ready        int32 `json:"ready"`
-	NotReady     int32 `json:"not_ready,omitempty"`
-	ErrImagePull int32 `json:"err_image_pull,omitempty"`
-	Terminating  int32 `json:"terminating,omitempty"`
-	Failed       int32 `json:"failed,omitempty"`
-	Killed       int32 `json:"killed,omitempty"`
-	KilledOOM    int32 `json:"killed_oom,omitempty"`
-	Stalled      int32 `json:"stalled,omitempty"` // pending for a long time
-	Unknown      int32 `json:"unknown,omitempty"`
+type ReplicaCountType string
+
+const (
+	ReplicaCountUpToDate       ReplicaCountType = "UpToDate"       // total up-to-date pods
+	ReplicaCountRequested      ReplicaCountType = "Requested"      // requested number of replicas (for up-to-date pods)
+	ReplicaCountPending        ReplicaCountType = "Pending"        // pods that are in the pending state (for up-to-date pods)
+	ReplicaCountCreating       ReplicaCountType = "Creating"       // pods that that have their init/non-init containers in the process of being created (for up-to-date pods)
+	ReplicaCountNotReady       ReplicaCountType = "NotReady"       // pods that are not passing the readiness checks (for up-to-date pods)
+	ReplicaCountReady          ReplicaCountType = "Ready"          // pods that are passing the readiness checks (for up-to-date pods)
+	ReplicaCountReadyOutOfDate ReplicaCountType = "ReadyOutOfDate" // pods that are passing the readiness checks (for out-of-date pods)
+	ReplicaCountErrImagePull   ReplicaCountType = "ErrImagePull"   // pods that couldn't pull the containers' images (for up-to-date pods)
+	ReplicaCountTerminating    ReplicaCountType = "Terminating"    // pods that are in a terminating state (for up-to-date pods)
+	ReplicaCountFailed         ReplicaCountType = "Failed"         // pods that have had their containers erroring (for up-to-date pods)
+	ReplicaCountKilled         ReplicaCountType = "Killed"         // pods that have had their container processes killed (for up-to-date pods)
+	ReplicaCountKilledOOM      ReplicaCountType = "KilledOOM"      // pods that have had their containers OOM (for up-to-date pods)
+	ReplicaCountStalled        ReplicaCountType = "Stalled"        // pods that have been in a pending state for more than 15 mins (for up-to-date pods)
+	ReplicaCountUnknown        ReplicaCountType = "Unknown"        // pods that are in an unknown state (for up-to-date pods)
+)
+
+var ReplicaCountTypes []ReplicaCountType = []ReplicaCountType{
+	ReplicaCountRequested, ReplicaCountPending, ReplicaCountCreating,
+	ReplicaCountNotReady, ReplicaCountReady, ReplicaCountReadyOutOfDate,
+	ReplicaCountErrImagePull, ReplicaCountTerminating, ReplicaCountFailed,
+	ReplicaCountKilled, ReplicaCountKilledOOM, ReplicaCountStalled,
+	ReplicaCountUnknown, ReplicaCountUpToDate,
 }
 
 // Worker counts don't have as many failure variations because Jobs clean up dead pods, so counting different failure scenarios isn't interesting
 type WorkerCounts struct {
-	Pending      int32 `json:"pending,omitempty"`
-	Initializing int32 `json:"initializing,omitempty"`
-	Running      int32 `json:"running,omitempty"`
-	Succeeded    int32 `json:"succeeded,omitempty"`
-	Failed       int32 `json:"failed,omitempty"`
-	Stalled      int32 `json:"stalled,omitempty"` // pending for a long time
-	Unknown      int32 `json:"unknown,omitempty"`
+	Pending      int32 `json:"pending,omitempty" yaml:"pending,omitempty"`
+	Creating     int32 `json:"creating,omitempty" yaml:"creating,omitempty"`
+	NotReady     int32 `json:"not_ready,omitempty" yaml:"not_ready,omitempty"`
+	Ready        int32 `json:"ready,omitempty" yaml:"ready,omitempty"`
+	Succeeded    int32 `json:"succeeded,omitempty" yaml:"succeeded,omitempty"`
+	ErrImagePull int32 `json:"err_image_pull,omitempty" yaml:"err_image_pull,omitempty"`
+	Terminating  int32 `json:"terminating,omitempty" yaml:"terminating,omitempty"`
+	Failed       int32 `json:"failed,omitempty" yaml:"failed,omitempty"`
+	Killed       int32 `json:"killed,omitempty" yaml:"killed,omitempty"`
+	KilledOOM    int32 `json:"killed_oom,omitempty" yaml:"killed_oom,omitempty"`
+	Stalled      int32 `json:"stalled,omitempty" yaml:"stalled,omitempty"` // pending for a long time
+	Unknown      int32 `json:"unknown,omitempty" yaml:"unknown,omitempty"`
+}
+
+func FromDeployment(deployment *kapps.Deployment) *Status {
+	var requested int32
+	if deployment.Spec.Replicas != nil {
+		requested = *deployment.Spec.Replicas
+	}
+	return &Status{
+		Ready:     deployment.Status.ReadyReplicas,
+		Requested: requested,
+		UpToDate:  deployment.Status.UpdatedReplicas,
+	}
 }
 
-func (status *Status) Message() string {
-	return status.Code.Message()
+func (counts *ReplicaCounts) GetCountBy(replicaType ReplicaCountType) int32 {
+	switch replicaType {
+	case ReplicaCountUpToDate:
+		return counts.UpToDate
+	case ReplicaCountRequested:
+		return counts.Requested
+	case ReplicaCountPending:
+		return counts.Pending
+	case ReplicaCountCreating:
+		return counts.Creating
+	case ReplicaCountNotReady:
+		return counts.NotReady
+	case ReplicaCountReady:
+		return counts.Ready
+	case ReplicaCountReadyOutOfDate:
+		return counts.ReadyOutOfDate
+	case ReplicaCountErrImagePull:
+		return counts.ErrImagePull
+	case ReplicaCountTerminating:
+		return counts.Terminating
+	case ReplicaCountFailed:
+		return counts.Failed
+	case ReplicaCountKilled:
+		return counts.Killed
+	case ReplicaCountKilledOOM:
+		return counts.KilledOOM
+	case ReplicaCountStalled:
+		return counts.Stalled
+	}
+	return counts.Unknown
 }
 
-func (src *SubReplicaCounts) TotalFailed() int32 {
-	return src.Failed + src.ErrImagePull + src.Killed + src.KilledOOM + src.Stalled
+func (counts *ReplicaCounts) TotalFailed() int32 {
+	return counts.ErrImagePull + counts.Failed + counts.Killed + counts.KilledOOM + counts.Unknown
 }
diff --git a/pkg/types/userconfig/api.go b/pkg/types/userconfig/api.go
index c2f8585941..c524c599e0 100644
--- a/pkg/types/userconfig/api.go
+++ b/pkg/types/userconfig/api.go
@@ -155,6 +155,10 @@ func IdentifyAPI(filePath string, name string, kind Kind, index int) string {
 func (api *API) ToK8sAnnotations() map[string]string {
 	annotations := map[string]string{}
 
+	if len(api.APIs) > 0 {
+		annotations[NumTrafficSplitterTargetsAnnotationKey] = s.Int32(int32(len(api.APIs)))
+	}
+
 	if api.Pod != nil && api.Kind == RealtimeAPIKind {
 		annotations[MaxConcurrencyAnnotationKey] = s.Int64(api.Pod.MaxConcurrency)
 		annotations[MaxQueueLengthAnnotationKey] = s.Int64(api.Pod.MaxQueueLength)
@@ -245,6 +249,36 @@ func AutoscalingFromAnnotations(k8sObj kmeta.Object) (*Autoscaling, error) {
 	return &a, nil
 }
 
+func TrafficSplitterTargetsFromAnnotations(k8sObj kmeta.Object) (int32, error) {
+	targets, err := k8s.ParseInt32Annotation(k8sObj, NumTrafficSplitterTargetsAnnotationKey)
+	if err != nil {
+		return 0, err
+	}
+	return targets, nil
+}
+
+func EndpointFromAnnotation(k8sObj kmeta.Object) (string, error) {
+	endpoint, err := k8s.GetAnnotation(k8sObj, EndpointAnnotationKey)
+	if err != nil {
+		return "", err
+	}
+	return endpoint, nil
+}
+
+func ConcurrencyFromAnnotations(k8sObj kmeta.Object) (int, int, error) {
+	maxQueueLength, err := k8s.ParseIntAnnotation(k8sObj, MaxQueueLengthAnnotationKey)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	maxConcurrency, err := k8s.ParseIntAnnotation(k8sObj, MaxConcurrencyAnnotationKey)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	return maxQueueLength, maxConcurrency, nil
+}
+
 func (api *API) UserStr() string {
 	var sb strings.Builder
 	sb.WriteString(fmt.Sprintf("%s: %s\n", NameKey, api.Name))
diff --git a/pkg/types/userconfig/config_key.go b/pkg/types/userconfig/config_key.go
index 826e144b05..5cbe3b2dda 100644
--- a/pkg/types/userconfig/config_key.go
+++ b/pkg/types/userconfig/config_key.go
@@ -91,6 +91,7 @@ const (
 	EndpointAnnotationKey                     = "networking.cortex.dev/endpoint"
 	MaxConcurrencyAnnotationKey               = "pod.cortex.dev/max-concurrency"
 	MaxQueueLengthAnnotationKey               = "pod.cortex.dev/max-queue-length"
+	NumTrafficSplitterTargetsAnnotationKey    = "apis.cortex.dev/traffic-splitter-targets"
 	MinReplicasAnnotationKey                  = "autoscaling.cortex.dev/min-replicas"
 	MaxReplicasAnnotationKey                  = "autoscaling.cortex.dev/max-replicas"
 	TargetInFlightAnnotationKey               = "autoscaling.cortex.dev/target-in-flight"

From 69f0fe897383354c73d217d89603146985766483 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Thu, 29 Jul 2021 12:28:15 +0200
Subject: [PATCH 27/42] Remove unnecessary annotations and add descriptions to
 the api status struct

---
 .../apis/serverless/v1alpha1/realtimeapi_types.go     | 11 +++++++----
 .../crd/bases/serverless.cortex.dev_realtimeapis.yaml |  4 ++++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 693557818e..704f3b6f7f 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -217,16 +217,19 @@ type NetworkingSpec struct {
 // RealtimeAPIStatus defines the observed state of RealtimeAPI
 type RealtimeAPIStatus struct {
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Type=integer
+	// Number of ready pods
 	Ready int32 `json:"ready"`
+
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Type=integer
+	// Number of requested pods
 	Requested int32 `json:"requested"`
+
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Type=integer
+	// Number of pods with the last requested spec
 	UpToDate int32 `json:"up_to_date"`
+
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Type=string
+	// URL of the deployed API
 	Endpoint string `json:"endpoint,omitempty"`
 }
 
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 58d12f66df..21dda24d33 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -603,14 +603,18 @@ spec:
             description: RealtimeAPIStatus defines the observed state of RealtimeAPI
             properties:
               endpoint:
+                description: URL of the deployed API
                 type: string
               ready:
+                description: Number of ready pods
                 format: int32
                 type: integer
               requested:
+                description: Number of requested pods
                 format: int32
                 type: integer
               up_to_date:
+                description: Number of pods with the last requested spec
                 format: int32
                 type: integer
             type: object

From 128cf5b24d47f98fa98e13853ed49e4ee558b79d Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Thu, 29 Jul 2021 14:31:28 +0200
Subject: [PATCH 28/42] Fix deep equal comparison

---
 pkg/operator/resources/realtimeapi/api.go     | 216 +---------------
 pkg/operator/resources/realtimeapi/helpers.go | 241 ++++++++++++++++++
 2 files changed, 244 insertions(+), 213 deletions(-)
 create mode 100644 pkg/operator/resources/realtimeapi/helpers.go

diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 096f83348c..2abe49403b 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -19,8 +19,6 @@ package realtimeapi
 import (
 	"context"
 	"fmt"
-	"path/filepath"
-	"reflect"
 	"sort"
 	"time"
 
@@ -28,7 +26,6 @@ import (
 	"github.com/cortexlabs/cortex/pkg/consts"
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
-	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/lib/maps"
 	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/lib/pointer"
@@ -38,22 +35,14 @@ import (
 	"github.com/cortexlabs/cortex/pkg/types/spec"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
-	"github.com/cortexlabs/cortex/pkg/workloads"
+	"github.com/google/go-cmp/cmp"
 	kcore "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
-	kresource "k8s.io/apimachinery/pkg/api/resource"
 	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
 	ktypes "k8s.io/apimachinery/pkg/types"
-	"k8s.io/apimachinery/pkg/util/intstr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
-const _realtimeDashboardUID = "realtimeapi"
-
-func generateDeploymentID() string {
-	return k8s.RandomName()[:10]
-}
-
 func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error) {
 	ctx := context.Background()
 	var api serverless.RealtimeAPI
@@ -103,7 +92,7 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 		MetadataRoot:          spec.MetadataRoot(apiConfig.Name, config.ClusterConfig.ClusterUID),
 	}
 
-	if !reflect.DeepEqual(api.Spec, desiredAPI.Spec) || force {
+	if !cmp.Equal(api.Spec, desiredAPI.Spec) || force {
 		api.Spec = desiredAPI.Spec
 		api.Annotations = maps.MergeStrMapsString(api.Annotations, desiredAPI.Annotations)
 
@@ -189,7 +178,7 @@ func GetAllAPIs() ([]schema.APIResponse, error) {
 	}
 
 	realtimeAPIs := make([]schema.APIResponse, len(apis.Items))
-	mappedRealtimeAPIs := make(map[string]schema.APIResponse, 0)
+	mappedRealtimeAPIs := map[string]schema.APIResponse{}
 	for i := range apis.Items {
 		api := apis.Items[i]
 
@@ -289,202 +278,3 @@ func DescribeAPIByName(apiName string) ([]schema.APIResponse, error) {
 		},
 	}, nil
 }
-
-func getDashboardURL(apiName string) string {
-	loadBalancerURL, err := operator.LoadBalancerURL()
-	if err != nil {
-		return ""
-	}
-
-	dashboardURL := fmt.Sprintf(
-		"%s/dashboard/d/%s/realtimeapi?orgId=1&refresh=30s&var-api_name=%s",
-		loadBalancerURL, _realtimeDashboardUID, apiName,
-	)
-
-	return dashboardURL
-}
-
-// K8sResourceFromAPIConfig converts a cortex API config into a realtime API CRD resource
-func K8sResourceFromAPIConfig(apiConfig userconfig.API) serverless.RealtimeAPI {
-	containers := make([]serverless.ContainerSpec, len(apiConfig.Pod.Containers))
-	for i := range apiConfig.Pod.Containers {
-		container := apiConfig.Pod.Containers[i]
-		var env []kcore.EnvVar
-		for k, v := range container.Env {
-			env = append(env, kcore.EnvVar{
-				Name:  k,
-				Value: v,
-			})
-		}
-
-		var compute *serverless.ComputeSpec
-		if container.Compute != nil {
-			var cpu *kresource.Quantity
-			if container.Compute.CPU != nil {
-				cpu = &container.Compute.CPU.Quantity
-			}
-			var mem *kresource.Quantity
-			if container.Compute.Mem != nil {
-				mem = &container.Compute.Mem.Quantity
-			}
-			var shm *kresource.Quantity
-			if container.Compute.Shm != nil {
-				shm = &container.Compute.Shm.Quantity
-			}
-
-			compute = &serverless.ComputeSpec{
-				CPU: cpu,
-				GPU: container.Compute.GPU,
-				Inf: container.Compute.Inf,
-				Mem: mem,
-				Shm: shm,
-			}
-		}
-
-		containers[i] = serverless.ContainerSpec{
-			Name:           container.Name,
-			Image:          container.Image,
-			Command:        container.Command,
-			Args:           container.Args,
-			Env:            env,
-			Compute:        compute,
-			ReadinessProbe: workloads.GetProbeSpec(container.ReadinessProbe),
-			LivenessProbe:  workloads.GetProbeSpec(container.LivenessProbe),
-		}
-	}
-
-	api := serverless.RealtimeAPI{
-		ObjectMeta: kmeta.ObjectMeta{
-			Name:      apiConfig.Name,
-			Namespace: consts.DefaultNamespace,
-		},
-		Spec: serverless.RealtimeAPISpec{
-			Pod: serverless.PodSpec{
-				Port:           *apiConfig.Pod.Port,
-				MaxConcurrency: int32(apiConfig.Pod.MaxConcurrency),
-				MaxQueueLength: int32(apiConfig.Pod.MaxQueueLength),
-				Replicas:       apiConfig.Autoscaling.InitReplicas,
-				Containers:     containers,
-			},
-			Autoscaling: serverless.AutoscalingSpec{
-				MinReplicas:                  apiConfig.Autoscaling.MinReplicas,
-				MaxReplicas:                  apiConfig.Autoscaling.MaxReplicas,
-				TargetInFlight:               fmt.Sprintf("%f", *apiConfig.Autoscaling.TargetInFlight),
-				Window:                       kmeta.Duration{Duration: apiConfig.Autoscaling.Window},
-				DownscaleStabilizationPeriod: kmeta.Duration{Duration: apiConfig.Autoscaling.DownscaleStabilizationPeriod},
-				UpscaleStabilizationPeriod:   kmeta.Duration{Duration: apiConfig.Autoscaling.UpscaleStabilizationPeriod},
-				MaxDownscaleFactor:           fmt.Sprintf("%f", apiConfig.Autoscaling.MaxDownscaleFactor),
-				MaxUpscaleFactor:             fmt.Sprintf("%f", apiConfig.Autoscaling.MaxUpscaleFactor),
-				DownscaleTolerance:           fmt.Sprintf("%f", apiConfig.Autoscaling.DownscaleTolerance),
-				UpscaleTolerance:             fmt.Sprintf("%f", apiConfig.Autoscaling.UpscaleTolerance),
-			},
-			NodeGroups: apiConfig.NodeGroups,
-			UpdateStrategy: serverless.UpdateStrategySpec{
-				MaxSurge:       intstr.FromString(apiConfig.UpdateStrategy.MaxSurge),
-				MaxUnavailable: intstr.FromString(apiConfig.UpdateStrategy.MaxUnavailable),
-			},
-			Networking: serverless.NetworkingSpec{
-				Endpoint: *apiConfig.Networking.Endpoint,
-			},
-		},
-	}
-
-	deploymentID, podID, specID, apiID := api.GetOrCreateAPIIDs()
-	api.Annotations = map[string]string{
-		"cortex.dev/deployment-id": deploymentID,
-		"cortex.dev/spec-id":       specID,
-		"cortex.dev/pod-id":        podID,
-		"cortex.dev/api-id":        apiID,
-	}
-
-	return api
-}
-
-func deleteBucketResources(apiName string) error {
-	prefix := filepath.Join(config.ClusterConfig.ClusterUID, "apis", apiName)
-	return config.AWS.DeleteS3Dir(config.ClusterConfig.Bucket, prefix, true)
-}
-
-func metadataFromRealtimeAPI(sv *serverless.RealtimeAPI) (*spec.Metadata, error) {
-	lastUpdated, err := spec.TimeFromAPIID(sv.Annotations["cortex.dev/api-id"])
-	if err != nil {
-		return nil, err
-	}
-	return &spec.Metadata{
-		Resource: &userconfig.Resource{
-			Name: sv.Name,
-			Kind: userconfig.RealtimeAPIKind,
-		},
-		APIID:        sv.Annotations["cortex.dev/api-id"],
-		DeploymentID: sv.Annotations["cortex.dev/deployment-id"],
-		LastUpdated:  lastUpdated.Unix(),
-	}, nil
-}
-
-func getReplicaCounts(pods []kcore.Pod, metadata *spec.Metadata) status.ReplicaCounts {
-	counts := status.ReplicaCounts{}
-
-	for i := range pods {
-		pod := pods[i]
-		if pod.Labels["apiName"] != metadata.Name {
-			continue
-		}
-		addPodToReplicaCounts(&pods[i], metadata, &counts)
-	}
-
-	return counts
-}
-
-func addPodToReplicaCounts(pod *kcore.Pod, metadata *spec.Metadata, counts *status.ReplicaCounts) {
-	latest := false
-	if isPodSpecLatest(pod, metadata) {
-		latest = true
-	}
-
-	isPodReady := k8s.IsPodReady(pod)
-	if latest && isPodReady {
-		counts.Ready++
-		return
-	} else if !latest && isPodReady {
-		counts.ReadyOutOfDate++
-		return
-	}
-
-	podStatus := k8s.GetPodStatus(pod)
-
-	if podStatus == k8s.PodStatusTerminating {
-		counts.Terminating++
-		return
-	}
-
-	if !latest {
-		return
-	}
-
-	switch podStatus {
-	case k8s.PodStatusPending:
-		counts.Pending++
-	case k8s.PodStatusStalled:
-		counts.Stalled++
-	case k8s.PodStatusCreating:
-		counts.Creating++
-	case k8s.PodStatusReady:
-		counts.Ready++
-	case k8s.PodStatusNotReady:
-		counts.NotReady++
-	case k8s.PodStatusErrImagePull:
-		counts.ErrImagePull++
-	case k8s.PodStatusFailed:
-		counts.Failed++
-	case k8s.PodStatusKilled:
-		counts.Killed++
-	case k8s.PodStatusKilledOOM:
-		counts.KilledOOM++
-	case k8s.PodStatusUnknown:
-		counts.Unknown++
-	}
-}
-
-func isPodSpecLatest(pod *kcore.Pod, metadata *spec.Metadata) bool {
-	return metadata.APIID == pod.Labels["apiID"]
-}
diff --git a/pkg/operator/resources/realtimeapi/helpers.go b/pkg/operator/resources/realtimeapi/helpers.go
new file mode 100644
index 0000000000..deb503edb3
--- /dev/null
+++ b/pkg/operator/resources/realtimeapi/helpers.go
@@ -0,0 +1,241 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package realtimeapi
+
+import (
+	"fmt"
+	"path/filepath"
+
+	"github.com/cortexlabs/cortex/pkg/config"
+	"github.com/cortexlabs/cortex/pkg/consts"
+	"github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
+	"github.com/cortexlabs/cortex/pkg/lib/k8s"
+	"github.com/cortexlabs/cortex/pkg/operator/operator"
+	"github.com/cortexlabs/cortex/pkg/types/spec"
+	"github.com/cortexlabs/cortex/pkg/types/status"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	"github.com/cortexlabs/cortex/pkg/workloads"
+	"k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	v12 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
+)
+
+const _realtimeDashboardUID = "realtimeapi"
+
+func generateDeploymentID() string {
+	return k8s.RandomName()[:10]
+}
+
+func getDashboardURL(apiName string) string {
+	loadBalancerURL, err := operator.LoadBalancerURL()
+	if err != nil {
+		return ""
+	}
+
+	dashboardURL := fmt.Sprintf(
+		"%s/dashboard/d/%s/realtimeapi?orgId=1&refresh=30s&var-api_name=%s",
+		loadBalancerURL, _realtimeDashboardUID, apiName,
+	)
+
+	return dashboardURL
+}
+
+// K8sResourceFromAPIConfig converts a cortex API config into a realtime API CRD resource
+func K8sResourceFromAPIConfig(apiConfig userconfig.API) v1alpha1.RealtimeAPI {
+	containers := make([]v1alpha1.ContainerSpec, len(apiConfig.Pod.Containers))
+	for i := range apiConfig.Pod.Containers {
+		container := apiConfig.Pod.Containers[i]
+		var env []v1.EnvVar
+		for k, v := range container.Env {
+			env = append(env, v1.EnvVar{
+				Name:  k,
+				Value: v,
+			})
+		}
+
+		var compute *v1alpha1.ComputeSpec
+		if container.Compute != nil {
+			var cpu *resource.Quantity
+			if container.Compute.CPU != nil {
+				cpu = &container.Compute.CPU.Quantity
+			}
+			var mem *resource.Quantity
+			if container.Compute.Mem != nil {
+				mem = &container.Compute.Mem.Quantity
+			}
+			var shm *resource.Quantity
+			if container.Compute.Shm != nil {
+				shm = &container.Compute.Shm.Quantity
+			}
+
+			compute = &v1alpha1.ComputeSpec{
+				CPU: cpu,
+				GPU: container.Compute.GPU,
+				Inf: container.Compute.Inf,
+				Mem: mem,
+				Shm: shm,
+			}
+		}
+
+		containers[i] = v1alpha1.ContainerSpec{
+			Name:           container.Name,
+			Image:          container.Image,
+			Command:        container.Command,
+			Args:           container.Args,
+			Env:            env,
+			Compute:        compute,
+			ReadinessProbe: workloads.GetProbeSpec(container.ReadinessProbe),
+			LivenessProbe:  workloads.GetProbeSpec(container.LivenessProbe),
+		}
+	}
+
+	api := v1alpha1.RealtimeAPI{
+		ObjectMeta: v12.ObjectMeta{
+			Name:      apiConfig.Name,
+			Namespace: consts.DefaultNamespace,
+		},
+		Spec: v1alpha1.RealtimeAPISpec{
+			Pod: v1alpha1.PodSpec{
+				Port:           *apiConfig.Pod.Port,
+				MaxConcurrency: int32(apiConfig.Pod.MaxConcurrency),
+				MaxQueueLength: int32(apiConfig.Pod.MaxQueueLength),
+				Replicas:       apiConfig.Autoscaling.InitReplicas,
+				Containers:     containers,
+			},
+			Autoscaling: v1alpha1.AutoscalingSpec{
+				MinReplicas:                  apiConfig.Autoscaling.MinReplicas,
+				MaxReplicas:                  apiConfig.Autoscaling.MaxReplicas,
+				TargetInFlight:               fmt.Sprintf("%f", *apiConfig.Autoscaling.TargetInFlight),
+				Window:                       v12.Duration{Duration: apiConfig.Autoscaling.Window},
+				DownscaleStabilizationPeriod: v12.Duration{Duration: apiConfig.Autoscaling.DownscaleStabilizationPeriod},
+				UpscaleStabilizationPeriod:   v12.Duration{Duration: apiConfig.Autoscaling.UpscaleStabilizationPeriod},
+				MaxDownscaleFactor:           fmt.Sprintf("%f", apiConfig.Autoscaling.MaxDownscaleFactor),
+				MaxUpscaleFactor:             fmt.Sprintf("%f", apiConfig.Autoscaling.MaxUpscaleFactor),
+				DownscaleTolerance:           fmt.Sprintf("%f", apiConfig.Autoscaling.DownscaleTolerance),
+				UpscaleTolerance:             fmt.Sprintf("%f", apiConfig.Autoscaling.UpscaleTolerance),
+			},
+			NodeGroups: apiConfig.NodeGroups,
+			UpdateStrategy: v1alpha1.UpdateStrategySpec{
+				MaxSurge:       intstr.FromString(apiConfig.UpdateStrategy.MaxSurge),
+				MaxUnavailable: intstr.FromString(apiConfig.UpdateStrategy.MaxUnavailable),
+			},
+			Networking: v1alpha1.NetworkingSpec{
+				Endpoint: *apiConfig.Networking.Endpoint,
+			},
+		},
+	}
+
+	deploymentID, podID, specID, apiID := api.GetOrCreateAPIIDs()
+	api.Annotations = map[string]string{
+		"cortex.dev/deployment-id": deploymentID,
+		"cortex.dev/spec-id":       specID,
+		"cortex.dev/pod-id":        podID,
+		"cortex.dev/api-id":        apiID,
+	}
+
+	return api
+}
+
+func deleteBucketResources(apiName string) error {
+	prefix := filepath.Join(config.ClusterConfig.ClusterUID, "apis", apiName)
+	return config.AWS.DeleteS3Dir(config.ClusterConfig.Bucket, prefix, true)
+}
+
+func metadataFromRealtimeAPI(sv *v1alpha1.RealtimeAPI) (*spec.Metadata, error) {
+	lastUpdated, err := spec.TimeFromAPIID(sv.Annotations["cortex.dev/api-id"])
+	if err != nil {
+		return nil, err
+	}
+	return &spec.Metadata{
+		Resource: &userconfig.Resource{
+			Name: sv.Name,
+			Kind: userconfig.RealtimeAPIKind,
+		},
+		APIID:        sv.Annotations["cortex.dev/api-id"],
+		DeploymentID: sv.Annotations["cortex.dev/deployment-id"],
+		LastUpdated:  lastUpdated.Unix(),
+	}, nil
+}
+
+func getReplicaCounts(pods []v1.Pod, metadata *spec.Metadata) status.ReplicaCounts {
+	counts := status.ReplicaCounts{}
+
+	for i := range pods {
+		pod := pods[i]
+		if pod.Labels["apiName"] != metadata.Name {
+			continue
+		}
+		addPodToReplicaCounts(&pods[i], metadata, &counts)
+	}
+
+	return counts
+}
+
+func addPodToReplicaCounts(pod *v1.Pod, metadata *spec.Metadata, counts *status.ReplicaCounts) {
+	latest := false
+	if isPodSpecLatest(pod, metadata) {
+		latest = true
+	}
+
+	isPodReady := k8s.IsPodReady(pod)
+	if latest && isPodReady {
+		counts.Ready++
+		return
+	} else if !latest && isPodReady {
+		counts.ReadyOutOfDate++
+		return
+	}
+
+	podStatus := k8s.GetPodStatus(pod)
+
+	if podStatus == k8s.PodStatusTerminating {
+		counts.Terminating++
+		return
+	}
+
+	if !latest {
+		return
+	}
+
+	switch podStatus {
+	case k8s.PodStatusPending:
+		counts.Pending++
+	case k8s.PodStatusStalled:
+		counts.Stalled++
+	case k8s.PodStatusCreating:
+		counts.Creating++
+	case k8s.PodStatusReady:
+		counts.Ready++
+	case k8s.PodStatusNotReady:
+		counts.NotReady++
+	case k8s.PodStatusErrImagePull:
+		counts.ErrImagePull++
+	case k8s.PodStatusFailed:
+		counts.Failed++
+	case k8s.PodStatusKilled:
+		counts.Killed++
+	case k8s.PodStatusKilledOOM:
+		counts.KilledOOM++
+	case k8s.PodStatusUnknown:
+		counts.Unknown++
+	}
+}
+
+func isPodSpecLatest(pod *v1.Pod, metadata *spec.Metadata) bool {
+	return metadata.APIID == pod.Labels["apiID"]
+}

From c1df9b7ff12fd1866ec63130c87fb4439f661419 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Thu, 29 Jul 2021 16:40:48 +0200
Subject: [PATCH 29/42] Fix rolling update on autoscaling spec update

---
 .../serverless/v1alpha1/realtimeapi_types.go    | 10 +++++-----
 .../serverless.cortex.dev_realtimeapis.yaml     | 12 ++++++------
 .../realtimeapi_controller_helpers.go           | 17 +++++++++++------
 pkg/operator/resources/realtimeapi/api.go       |  6 +++---
 pkg/operator/resources/realtimeapi/helpers.go   | 15 ++++++++++++---
 5 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 704f3b6f7f..e68b2c0629 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -33,6 +33,11 @@ import (
 
 // RealtimeAPISpec defines the desired state of RealtimeAPI
 type RealtimeAPISpec struct {
+	// +kubebuilder:validation:Required
+	// +kubebuilder:default=1
+	// Number of desired replicas
+	Replicas int32 `json:"replicas"`
+
 	// Pod configuration
 	// +kubebuilder:validation:Required
 	Pod PodSpec `json:"pod"`
@@ -73,11 +78,6 @@ type PodSpec struct {
 	// (beyond max_concurrency) before requests are rejected with error code 503
 	MaxQueueLength int32 `json:"max_queue_length"`
 
-	// +kubebuilder:validation:Required
-	// +kubebuilder:default=1
-	// Number of desired replicas
-	Replicas int32 `json:"replicas"`
-
 	// +kubebuilder:validation:Required
 	// Configurations for the containers to run
 	Containers []ContainerSpec `json:"containers"`
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 21dda24d33..7346ec219d 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -556,18 +556,17 @@ spec:
                     description: Port to which requests will be sent to
                     format: int32
                     type: integer
-                  replicas:
-                    default: 1
-                    description: Number of desired replicas
-                    format: int32
-                    type: integer
                 required:
                 - containers
                 - max_concurrency
                 - max_queue_length
                 - port
-                - replicas
                 type: object
+              replicas:
+                default: 1
+                description: Number of desired replicas
+                format: int32
+                type: integer
               update_strategy:
                 default:
                   max_surge: 25%
@@ -598,6 +597,7 @@ spec:
             required:
             - networking
             - pod
+            - replicas
             type: object
           status:
             description: RealtimeAPIStatus defines the observed state of RealtimeAPI
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 64ec0cd000..67a3381ef3 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -92,7 +92,6 @@ func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, ap
 		if err := ctrl.SetControllerReference(&api, &deployment, r.Scheme); err != nil {
 			return err
 		}
-
 		return nil
 	})
 	if err != nil {
@@ -177,7 +176,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 
 	return *k8s.Deployment(&k8s.DeploymentSpec{
 		Name:           workloads.K8sName(api.Name),
-		Replicas:       api.Spec.Pod.Replicas,
+		Replicas:       api.Spec.Replicas,
 		MaxSurge:       pointer.String(api.Spec.UpdateStrategy.MaxSurge.String()),
 		MaxUnavailable: pointer.String(api.Spec.UpdateStrategy.MaxUnavailable.String()),
 		Labels: map[string]string{
@@ -197,7 +196,6 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 				"apiName":        api.Name,
 				"apiKind":        userconfig.RealtimeAPIKind.String(),
 				"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
-				"apiID":          api.Annotations["cortex.dev/api-id"],
 				"cortex.dev/api": "true",
 			},
 			Annotations: map[string]string{
@@ -248,7 +246,7 @@ func (r *RealtimeAPIReconciler) desiredService(api serverless.RealtimeAPI) kcore
 
 func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI) istioclientnetworking.VirtualService {
 	var activatorWeight int32
-	if api.Spec.Pod.Replicas == 0 {
+	if api.Spec.Replicas == 0 {
 		activatorWeight = 100
 	}
 
@@ -405,8 +403,15 @@ func (r *RealtimeAPIReconciler) proxyContainer(api serverless.RealtimeAPI) (kcor
 			s.Int32(api.Spec.Pod.MaxQueueLength),
 		},
 		Ports: []kcore.ContainerPort{
-			{Name: consts.AdminPortName, ContainerPort: consts.AdminPortInt32},
-			{ContainerPort: consts.ProxyPortInt32},
+			{
+				Name:          consts.AdminPortName,
+				ContainerPort: consts.AdminPortInt32,
+				Protocol:      kcore.ProtocolTCP,
+			},
+			{
+				ContainerPort: consts.ProxyPortInt32,
+				Protocol:      kcore.ProtocolTCP,
+			},
 		},
 		Env:     workloads.BaseEnvVars,
 		EnvFrom: workloads.BaseClusterEnvVars(),
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 2abe49403b..6e78509274 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -52,7 +52,7 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 	if err != nil {
 		if kerrors.IsNotFound(err) {
 			if kerrors.IsNotFound(err) {
-				api = K8sResourceFromAPIConfig(*apiConfig)
+				api = k8sResourceFromAPIConfig(*apiConfig, nil)
 				if err = config.K8s.Create(ctx, &api); err != nil {
 					return nil, "", errors.Wrap(err, "failed to create realtime api resource")
 				}
@@ -79,7 +79,7 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 		return nil, "", errors.Wrap(err, "failed to get realtime api resource")
 	}
 
-	desiredAPI := K8sResourceFromAPIConfig(*apiConfig)
+	desiredAPI := k8sResourceFromAPIConfig(*apiConfig, &api)
 
 	apiSpec := &spec.API{
 		API:                   apiConfig,
@@ -256,7 +256,7 @@ func DescribeAPIByName(apiName string) ([]schema.APIResponse, error) {
 	}
 
 	var podList kcore.PodList
-	if err := config.K8s.List(ctx, &podList, client.MatchingLabels{
+	if err = config.K8s.List(ctx, &podList, client.MatchingLabels{
 		"apiName": metadata.Name,
 		"apiKind": userconfig.RealtimeAPIKind.String(),
 	}); err != nil {
diff --git a/pkg/operator/resources/realtimeapi/helpers.go b/pkg/operator/resources/realtimeapi/helpers.go
index deb503edb3..938fccc99a 100644
--- a/pkg/operator/resources/realtimeapi/helpers.go
+++ b/pkg/operator/resources/realtimeapi/helpers.go
@@ -23,6 +23,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/config"
 	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
@@ -55,8 +56,8 @@ func getDashboardURL(apiName string) string {
 	return dashboardURL
 }
 
-// K8sResourceFromAPIConfig converts a cortex API config into a realtime API CRD resource
-func K8sResourceFromAPIConfig(apiConfig userconfig.API) v1alpha1.RealtimeAPI {
+// k8sResourceFromAPIConfig converts a cortex API config into a realtime API CRD resource
+func k8sResourceFromAPIConfig(apiConfig userconfig.API, prevAPI *serverless.RealtimeAPI) v1alpha1.RealtimeAPI {
 	containers := make([]v1alpha1.ContainerSpec, len(apiConfig.Pod.Containers))
 	for i := range apiConfig.Pod.Containers {
 		container := apiConfig.Pod.Containers[i]
@@ -110,11 +111,11 @@ func K8sResourceFromAPIConfig(apiConfig userconfig.API) v1alpha1.RealtimeAPI {
 			Namespace: consts.DefaultNamespace,
 		},
 		Spec: v1alpha1.RealtimeAPISpec{
+			Replicas: apiConfig.Autoscaling.InitReplicas,
 			Pod: v1alpha1.PodSpec{
 				Port:           *apiConfig.Pod.Port,
 				MaxConcurrency: int32(apiConfig.Pod.MaxConcurrency),
 				MaxQueueLength: int32(apiConfig.Pod.MaxQueueLength),
-				Replicas:       apiConfig.Autoscaling.InitReplicas,
 				Containers:     containers,
 			},
 			Autoscaling: v1alpha1.AutoscalingSpec{
@@ -148,6 +149,14 @@ func K8sResourceFromAPIConfig(apiConfig userconfig.API) v1alpha1.RealtimeAPI {
 		"cortex.dev/api-id":        apiID,
 	}
 
+	if prevAPI != nil {
+		// we should keep the existing number of replicas instead of init_replicas
+		api.Spec.Replicas = prevAPI.Spec.Replicas
+		if prevDeployID := prevAPI.Annotations["cortex.dev/deployment-id"]; prevDeployID != "" {
+			api.Annotations["cortex.dev/deployment-id"] = prevDeployID
+		}
+	}
+
 	return api
 }
 

From b7cac93d0b043bad95e7b07c50d8e377301de7b1 Mon Sep 17 00:00:00 2001
From: Miguel Varela Ramos <miguel@cortexlabs.com>
Date: Fri, 30 Jul 2021 15:52:10 +0200
Subject: [PATCH 30/42] WIP: update realtime scaler to work with CRD

---
 pkg/autoscaler/autoscaler_test.go |  27 +--
 pkg/autoscaler/realtime_scaler.go | 293 +++++++++++++++---------------
 2 files changed, 156 insertions(+), 164 deletions(-)

diff --git a/pkg/autoscaler/autoscaler_test.go b/pkg/autoscaler/autoscaler_test.go
index bd01d7e286..4dc461f9b7 100644
--- a/pkg/autoscaler/autoscaler_test.go
+++ b/pkg/autoscaler/autoscaler_test.go
@@ -297,12 +297,9 @@ func TestAutoscaler_Awake(t *testing.T) {
 
 	ticker := time.NewTicker(250 * time.Millisecond)
 	go func() {
-		for {
-			select {
-			case <-ticker.C:
-				err := autoscaleFn()
-				require.NoError(t, err)
-			}
+		for range ticker.C {
+			err := autoscaleFn()
+			require.NoError(t, err)
 		}
 	}()
 
@@ -372,12 +369,9 @@ func TestAutoscaler_MinReplicas(t *testing.T) {
 
 	ticker := time.NewTicker(250 * time.Millisecond)
 	go func() {
-		for {
-			select {
-			case <-ticker.C:
-				err := autoscaleFn()
-				require.NoError(t, err)
-			}
+		for range ticker.C {
+			err := autoscaleFn()
+			require.NoError(t, err)
 		}
 	}()
 
@@ -444,12 +438,9 @@ func TestAutoscaler_MaxReplicas(t *testing.T) {
 
 	ticker := time.NewTicker(250 * time.Millisecond)
 	go func() {
-		for {
-			select {
-			case <-ticker.C:
-				err := autoscaleFn()
-				require.NoError(t, err)
-			}
+		for range ticker.C {
+			err := autoscaleFn()
+			require.NoError(t, err)
 		}
 	}()
 
diff --git a/pkg/autoscaler/realtime_scaler.go b/pkg/autoscaler/realtime_scaler.go
index a0dbb60c28..4ce175d29a 100644
--- a/pkg/autoscaler/realtime_scaler.go
+++ b/pkg/autoscaler/realtime_scaler.go
@@ -21,18 +21,14 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/cortexlabs/cortex/pkg/consts"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	"github.com/cortexlabs/cortex/pkg/lib/pointer"
-	"github.com/cortexlabs/cortex/pkg/lib/telemetry"
+	libstrings "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
-	"github.com/cortexlabs/cortex/pkg/workloads"
 	promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
 	"go.uber.org/zap"
-	kapps "k8s.io/api/apps/v1"
-	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
 	ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
 )
 
@@ -53,49 +49,25 @@ func NewRealtimeScaler(k8sClient *k8s.Client, promClient promv1.API, logger *zap
 func (s *RealtimeScaler) Scale(apiName string, request int32) error {
 	ctx := context.Background()
 
-	// we use the controller-runtime client to make use of the cache mechanism
-	var deployment kapps.Deployment
-	err := s.k8s.Get(ctx, ctrlclient.ObjectKey{
+	var api serverless.RealtimeAPI
+	if err := s.k8s.Get(ctx, ctrlclient.ObjectKey{
 		Namespace: s.k8s.Namespace,
-		Name:      workloads.K8sName(apiName),
-	}, &deployment)
-	if err != nil {
-		return errors.Wrap(err, "failed to get deployment")
-	}
-
-	if deployment.Spec.Replicas == nil {
-		return errors.Wrap(err, "k8s deployment doesn't have the replicas field set")
+		Name:      apiName},
+		&api,
+	); err != nil {
+		return err
 	}
 
-	current := *deployment.Spec.Replicas
+	current := api.Spec.Replicas
 	if current == request {
 		return nil
 	}
 
-	if request == 0 {
-		if err = s.routeToActivator(&deployment); err != nil {
-			return errors.Wrap(err, "failed to re-route traffic to activator")
-		}
-	}
-
-	deployment.Spec.Replicas = pointer.Int32(request)
-
-	if err = s.k8s.Update(ctx, &deployment); err != nil {
+	api.Spec.Replicas = request
+	if err := s.k8s.Update(ctx, &api); err != nil {
 		return errors.Wrap(err, "failed to update deployment")
 	}
 
-	if current == 0 && request > 0 {
-		go func() {
-			if err := s.routeToService(&deployment); err != nil {
-				s.logger.Errorw("failed to re-route traffic to API",
-					zap.Error(err), zap.String("apiName", apiName),
-				)
-				telemetry.Error(err)
-			}
-		}()
-
-	}
-
 	return nil
 }
 
@@ -136,133 +108,162 @@ func (s *RealtimeScaler) GetInFlightRequests(apiName string, window time.Duratio
 }
 
 func (s *RealtimeScaler) GetAutoscalingSpec(apiName string) (*userconfig.Autoscaling, error) {
-	deployment, err := s.k8s.GetDeployment(workloads.K8sName(apiName))
-	if err != nil {
-		return nil, errors.Wrap(err, "failed to get deployment")
-	}
-
-	if deployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find k8s deployment", apiName)
-	}
-
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
-	if err != nil {
-		return nil, err
-	}
-
-	return autoscalingSpec, nil
-}
-
-func (s *RealtimeScaler) CurrentRequestedReplicas(apiName string) (int32, error) {
 	ctx := context.Background()
 
-	// we use the controller-runtime client to make use of the cache mechanism
-	var deployment kapps.Deployment
-	err := s.k8s.Get(ctx, ctrlclient.ObjectKey{
+	var api serverless.RealtimeAPI
+	if err := s.k8s.Get(ctx, ctrlclient.ObjectKey{
 		Namespace: s.k8s.Namespace,
-		Name:      workloads.K8sName(apiName),
-	}, &deployment)
-	if err != nil {
-		return 0, errors.Wrap(err, "failed to get deployment")
-	}
-
-	if deployment.Spec.Replicas == nil {
-		return 0, errors.Wrap(err, "k8s deployment doesn't have the replicas field set")
+		Name:      apiName},
+		&api,
+	); err != nil {
+		return nil, err
 	}
 
-	return *deployment.Spec.Replicas, nil
-}
-
-func (s *RealtimeScaler) routeToService(deployment *kapps.Deployment) error {
-	ctx := context.Background()
-	vs, err := s.k8s.GetVirtualService(deployment.Name)
-	if err != nil {
-		return errors.Wrap(err, "failed to get virtual service")
+	targetInFlight, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.TargetInFlight)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse target-in-flight requests from autoscaling spec")
 	}
 
-	if len(vs.Spec.Http) < 1 {
-		return errors.ErrorUnexpected("virtual service does not have any http entries")
+	maxDownscaleFactor, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.MaxDownscaleFactor)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse max downscale factor from autoscaling spec")
 	}
 
-	if err = s.waitForReadyReplicas(ctx, deployment); err != nil {
-		return errors.Wrap(err, "no ready replicas available")
+	maxUpscaleFactor, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.MaxUpscaleFactor)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse max upscale factor from autoscaling spec")
 	}
 
-	for i := range vs.Spec.Http {
-		if len(vs.Spec.Http[i].Route) != 2 {
-			return errors.ErrorUnexpected("virtual service does not have the required number of 2 http routes")
-		}
-
-		vs.Spec.Http[i].Route[0].Weight = 100 // service traffic
-		vs.Spec.Http[i].Route[1].Weight = 0   // activator traffic
+	downscaleTolerance, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.DownscaleTolerance)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse downscale tolerance from autoscaling spec")
 	}
 
-	vsClient := s.k8s.IstioClientSet().NetworkingV1beta1().VirtualServices(s.k8s.Namespace)
-	if _, err = vsClient.Update(ctx, vs, kmeta.UpdateOptions{}); err != nil {
-		return errors.Wrap(err, "failed to update virtual service")
+	upscaleTolerance, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.UpscaleTolerance)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse upscale tolerance from autoscaling spec")
 	}
 
-	return nil
+	return &userconfig.Autoscaling{
+		MinReplicas:                  api.Spec.Autoscaling.MinReplicas,
+		MaxReplicas:                  api.Spec.Autoscaling.MaxReplicas,
+		InitReplicas:                 api.Spec.Autoscaling.MinReplicas, // FIXME: either add init replicas to the CRD autoscaling spec or remove init_replicas (?)
+		TargetInFlight:               &targetInFlight,
+		Window:                       api.Spec.Autoscaling.Window.Duration,
+		DownscaleStabilizationPeriod: api.Spec.Autoscaling.DownscaleStabilizationPeriod.Duration,
+		UpscaleStabilizationPeriod:   api.Spec.Autoscaling.UpscaleStabilizationPeriod.Duration,
+		MaxDownscaleFactor:           maxDownscaleFactor,
+		MaxUpscaleFactor:             maxUpscaleFactor,
+		DownscaleTolerance:           downscaleTolerance,
+		UpscaleTolerance:             upscaleTolerance,
+	}, nil
 }
 
-func (s *RealtimeScaler) routeToActivator(deployment *kapps.Deployment) error {
+func (s *RealtimeScaler) CurrentRequestedReplicas(apiName string) (int32, error) {
 	ctx := context.Background()
-	vs, err := s.k8s.GetVirtualService(deployment.Name)
-	if err != nil {
-		return errors.Wrap(err, "failed to get virtual service")
-	}
-
-	if len(vs.Spec.Http) < 1 {
-		return errors.ErrorUnexpected("virtual service does not have any http entries")
-	}
-
-	for i := range vs.Spec.Http {
-		if len(vs.Spec.Http[i].Route) != 2 {
-			return errors.ErrorUnexpected("virtual service does not have the required number of 2 http routes")
-		}
 
-		vs.Spec.Http[i].Route[0].Weight = 0   // service traffic
-		vs.Spec.Http[i].Route[1].Weight = 100 // activator traffic
-	}
-
-	vsClient := s.k8s.IstioClientSet().NetworkingV1beta1().VirtualServices(s.k8s.Namespace)
-	if _, err = vsClient.Update(ctx, vs, kmeta.UpdateOptions{}); err != nil {
-		return errors.Wrap(err, "failed to update virtual service")
+	var api serverless.RealtimeAPI
+	if err := s.k8s.Get(ctx, ctrlclient.ObjectKey{
+		Namespace: s.k8s.Namespace,
+		Name:      apiName},
+		&api,
+	); err != nil {
+		return 0, err
 	}
 
-	return nil
+	return api.Spec.Replicas, nil
 }
 
-func (s *RealtimeScaler) waitForReadyReplicas(ctx context.Context, deployment *kapps.Deployment) error {
-	watcher, err := s.k8s.ClientSet().AppsV1().Deployments(s.k8s.Namespace).Watch(
-		ctx,
-		kmeta.ListOptions{
-			FieldSelector: fmt.Sprintf("metadata.name=%s", deployment.Name),
-			Watch:         true,
-		},
-	)
-	if err != nil {
-		return errors.Wrap(err, "could not create deployment watcher")
-	}
-
-	defer watcher.Stop()
-
-	ctx, cancel := context.WithTimeout(ctx, consts.WaitForReadyReplicasTimeout)
-	defer cancel()
-
-	for {
-		select {
-		case event := <-watcher.ResultChan():
-			deploy, ok := event.Object.(*kapps.Deployment)
-			if !ok {
-				continue
-			}
-
-			if deploy.Status.ReadyReplicas > 0 {
-				return nil
-			}
-		case <-ctx.Done():
-			return ctx.Err()
-		}
-	}
-}
+//func (s *RealtimeScaler) routeToService(deployment *kapps.Deployment) error {
+//	ctx := context.Background()
+//	vs, err := s.k8s.GetVirtualService(deployment.Name)
+//	if err != nil {
+//		return errors.Wrap(err, "failed to get virtual service")
+//	}
+//
+//	if len(vs.Spec.Http) < 1 {
+//		return errors.ErrorUnexpected("virtual service does not have any http entries")
+//	}
+//
+//	if err = s.waitForReadyReplicas(ctx, deployment); err != nil {
+//		return errors.Wrap(err, "no ready replicas available")
+//	}
+//
+//	for i := range vs.Spec.Http {
+//		if len(vs.Spec.Http[i].Route) != 2 {
+//			return errors.ErrorUnexpected("virtual service does not have the required number of 2 http routes")
+//		}
+//
+//		vs.Spec.Http[i].Route[0].Weight = 100 // service traffic
+//		vs.Spec.Http[i].Route[1].Weight = 0   // activator traffic
+//	}
+//
+//	vsClient := s.k8s.IstioClientSet().NetworkingV1beta1().VirtualServices(s.k8s.Namespace)
+//	if _, err = vsClient.Update(ctx, vs, kmeta.UpdateOptions{}); err != nil {
+//		return errors.Wrap(err, "failed to update virtual service")
+//	}
+//
+//	return nil
+//}
+//
+//func (s *RealtimeScaler) routeToActivator(deployment *kapps.Deployment) error {
+//	ctx := context.Background()
+//	vs, err := s.k8s.GetVirtualService(deployment.Name)
+//	if err != nil {
+//		return errors.Wrap(err, "failed to get virtual service")
+//	}
+//
+//	if len(vs.Spec.Http) < 1 {
+//		return errors.ErrorUnexpected("virtual service does not have any http entries")
+//	}
+//
+//	for i := range vs.Spec.Http {
+//		if len(vs.Spec.Http[i].Route) != 2 {
+//			return errors.ErrorUnexpected("virtual service does not have the required number of 2 http routes")
+//		}
+//
+//		vs.Spec.Http[i].Route[0].Weight = 0   // service traffic
+//		vs.Spec.Http[i].Route[1].Weight = 100 // activator traffic
+//	}
+//
+//	vsClient := s.k8s.IstioClientSet().NetworkingV1beta1().VirtualServices(s.k8s.Namespace)
+//	if _, err = vsClient.Update(ctx, vs, kmeta.UpdateOptions{}); err != nil {
+//		return errors.Wrap(err, "failed to update virtual service")
+//	}
+//
+//	return nil
+//}
+//
+//func (s *RealtimeScaler) waitForReadyReplicas(ctx context.Context, deployment *kapps.Deployment) error {
+//	watcher, err := s.k8s.ClientSet().AppsV1().Deployments(s.k8s.Namespace).Watch(
+//		ctx,
+//		kmeta.ListOptions{
+//			FieldSelector: fmt.Sprintf("metadata.name=%s", deployment.Name),
+//			Watch:         true,
+//		},
+//	)
+//	if err != nil {
+//		return errors.Wrap(err, "could not create deployment watcher")
+//	}
+//
+//	defer watcher.Stop()
+//
+//	ctx, cancel := context.WithTimeout(ctx, consts.WaitForReadyReplicasTimeout)
+//	defer cancel()
+//
+//	for {
+//		select {
+//		case event := <-watcher.ResultChan():
+//			deploy, ok := event.Object.(*kapps.Deployment)
+//			if !ok {
+//				continue
+//			}
+//
+//			if deploy.Status.ReadyReplicas > 0 {
+//				return nil
+//			}
+//		case <-ctx.Done():
+//			return ctx.Err()
+//		}
+//	}
+//}

From e27f41458397803bbd781372403e65a236369b89 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Fri, 30 Jul 2021 17:50:27 +0300
Subject: [PATCH 31/42] Fix autoscaler for RealtimeAPI CRD

---
 go.mod                                        |   2 +-
 pkg/autoscaler/realtime_scaler.go             | 117 ++----------------
 .../serverless/v1alpha1/realtimeapi_types.go  |  20 +--
 .../serverless/realtimeapi_controller.go      |  12 +-
 .../realtimeapi_controller_helpers.go         |   2 +-
 pkg/operator/resources/realtimeapi/helpers.go |   3 +-
 6 files changed, 25 insertions(+), 131 deletions(-)

diff --git a/go.mod b/go.mod
index 4381c8a46b..490aac0a00 100644
--- a/go.mod
+++ b/go.mod
@@ -26,7 +26,7 @@ require (
 	github.com/go-ole/go-ole v1.2.5 // indirect
 	github.com/gobwas/glob v0.2.3
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
-	github.com/google/go-cmp v0.5.6 // indirect
+	github.com/google/go-cmp v0.5.6
 	github.com/google/gofuzz v1.2.0 // indirect
 	github.com/google/uuid v1.2.0
 	github.com/googleapis/gnostic v0.5.5 // indirect
diff --git a/pkg/autoscaler/realtime_scaler.go b/pkg/autoscaler/realtime_scaler.go
index 4ce175d29a..5d1df8ad27 100644
--- a/pkg/autoscaler/realtime_scaler.go
+++ b/pkg/autoscaler/realtime_scaler.go
@@ -49,23 +49,24 @@ func NewRealtimeScaler(k8sClient *k8s.Client, promClient promv1.API, logger *zap
 func (s *RealtimeScaler) Scale(apiName string, request int32) error {
 	ctx := context.Background()
 
-	var api serverless.RealtimeAPI
-	if err := s.k8s.Get(ctx, ctrlclient.ObjectKey{
+	// we use the controller-runtime client to make use of the cache mechanism
+	var realtimeAPI serverless.RealtimeAPI
+	err := s.k8s.Get(ctx, ctrlclient.ObjectKey{
 		Namespace: s.k8s.Namespace,
-		Name:      apiName},
-		&api,
-	); err != nil {
-		return err
+		Name:      apiName,
+	}, &realtimeAPI)
+	if err != nil {
+		return errors.Wrap(err, "failed to get realtimeapi")
 	}
 
-	current := api.Spec.Replicas
+	current := realtimeAPI.Spec.Replicas
 	if current == request {
 		return nil
 	}
+	realtimeAPI.Spec.Replicas = request
 
-	api.Spec.Replicas = request
-	if err := s.k8s.Update(ctx, &api); err != nil {
-		return errors.Wrap(err, "failed to update deployment")
+	if err = s.k8s.Update(ctx, &realtimeAPI); err != nil {
+		return errors.Wrap(err, "failed to update realtimeapi")
 	}
 
 	return nil
@@ -147,7 +148,7 @@ func (s *RealtimeScaler) GetAutoscalingSpec(apiName string) (*userconfig.Autosca
 	return &userconfig.Autoscaling{
 		MinReplicas:                  api.Spec.Autoscaling.MinReplicas,
 		MaxReplicas:                  api.Spec.Autoscaling.MaxReplicas,
-		InitReplicas:                 api.Spec.Autoscaling.MinReplicas, // FIXME: either add init replicas to the CRD autoscaling spec or remove init_replicas (?)
+		InitReplicas:                 api.Spec.Autoscaling.InitReplicas,
 		TargetInFlight:               &targetInFlight,
 		Window:                       api.Spec.Autoscaling.Window.Duration,
 		DownscaleStabilizationPeriod: api.Spec.Autoscaling.DownscaleStabilizationPeriod.Duration,
@@ -173,97 +174,3 @@ func (s *RealtimeScaler) CurrentRequestedReplicas(apiName string) (int32, error)
 
 	return api.Spec.Replicas, nil
 }
-
-//func (s *RealtimeScaler) routeToService(deployment *kapps.Deployment) error {
-//	ctx := context.Background()
-//	vs, err := s.k8s.GetVirtualService(deployment.Name)
-//	if err != nil {
-//		return errors.Wrap(err, "failed to get virtual service")
-//	}
-//
-//	if len(vs.Spec.Http) < 1 {
-//		return errors.ErrorUnexpected("virtual service does not have any http entries")
-//	}
-//
-//	if err = s.waitForReadyReplicas(ctx, deployment); err != nil {
-//		return errors.Wrap(err, "no ready replicas available")
-//	}
-//
-//	for i := range vs.Spec.Http {
-//		if len(vs.Spec.Http[i].Route) != 2 {
-//			return errors.ErrorUnexpected("virtual service does not have the required number of 2 http routes")
-//		}
-//
-//		vs.Spec.Http[i].Route[0].Weight = 100 // service traffic
-//		vs.Spec.Http[i].Route[1].Weight = 0   // activator traffic
-//	}
-//
-//	vsClient := s.k8s.IstioClientSet().NetworkingV1beta1().VirtualServices(s.k8s.Namespace)
-//	if _, err = vsClient.Update(ctx, vs, kmeta.UpdateOptions{}); err != nil {
-//		return errors.Wrap(err, "failed to update virtual service")
-//	}
-//
-//	return nil
-//}
-//
-//func (s *RealtimeScaler) routeToActivator(deployment *kapps.Deployment) error {
-//	ctx := context.Background()
-//	vs, err := s.k8s.GetVirtualService(deployment.Name)
-//	if err != nil {
-//		return errors.Wrap(err, "failed to get virtual service")
-//	}
-//
-//	if len(vs.Spec.Http) < 1 {
-//		return errors.ErrorUnexpected("virtual service does not have any http entries")
-//	}
-//
-//	for i := range vs.Spec.Http {
-//		if len(vs.Spec.Http[i].Route) != 2 {
-//			return errors.ErrorUnexpected("virtual service does not have the required number of 2 http routes")
-//		}
-//
-//		vs.Spec.Http[i].Route[0].Weight = 0   // service traffic
-//		vs.Spec.Http[i].Route[1].Weight = 100 // activator traffic
-//	}
-//
-//	vsClient := s.k8s.IstioClientSet().NetworkingV1beta1().VirtualServices(s.k8s.Namespace)
-//	if _, err = vsClient.Update(ctx, vs, kmeta.UpdateOptions{}); err != nil {
-//		return errors.Wrap(err, "failed to update virtual service")
-//	}
-//
-//	return nil
-//}
-//
-//func (s *RealtimeScaler) waitForReadyReplicas(ctx context.Context, deployment *kapps.Deployment) error {
-//	watcher, err := s.k8s.ClientSet().AppsV1().Deployments(s.k8s.Namespace).Watch(
-//		ctx,
-//		kmeta.ListOptions{
-//			FieldSelector: fmt.Sprintf("metadata.name=%s", deployment.Name),
-//			Watch:         true,
-//		},
-//	)
-//	if err != nil {
-//		return errors.Wrap(err, "could not create deployment watcher")
-//	}
-//
-//	defer watcher.Stop()
-//
-//	ctx, cancel := context.WithTimeout(ctx, consts.WaitForReadyReplicasTimeout)
-//	defer cancel()
-//
-//	for {
-//		select {
-//		case event := <-watcher.ResultChan():
-//			deploy, ok := event.Object.(*kapps.Deployment)
-//			if !ok {
-//				continue
-//			}
-//
-//			if deploy.Status.ReadyReplicas > 0 {
-//				return nil
-//			}
-//		case <-ctx.Done():
-//			return ctx.Err()
-//		}
-//	}
-//}
diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index e68b2c0629..9de96d79cc 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -34,7 +34,6 @@ import (
 // RealtimeAPISpec defines the desired state of RealtimeAPI
 type RealtimeAPISpec struct {
 	// +kubebuilder:validation:Required
-	// +kubebuilder:default=1
 	// Number of desired replicas
 	Replicas int32 `json:"replicas"`
 
@@ -43,7 +42,6 @@ type RealtimeAPISpec struct {
 	Pod PodSpec `json:"pod"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default={"min_replicas": 1}
 	// Autoscaling configuration
 	Autoscaling AutoscalingSpec `json:"autoscaling"`
 
@@ -52,7 +50,6 @@ type RealtimeAPISpec struct {
 	NodeGroups []string `json:"node_groups"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default={"max_surge": "25%", "max_unavailable": "25%"}
 	// Deployment strategy to use when replacing existing replicas with new ones
 	UpdateStrategy UpdateStrategySpec `json:"update_strategy"`
 
@@ -63,17 +60,14 @@ type RealtimeAPISpec struct {
 
 type PodSpec struct {
 	// +kubebuilder:validation:Required
-	// +kubebuilder:default=8080
 	// Port to which requests will be sent to
 	Port int32 `json:"port"`
 
 	// +kubebuilder:validation:Required
-	// +kubebuilder:default=1
 	// Maximum number of requests that will be concurrently sent into the container
 	MaxConcurrency int32 `json:"max_concurrency"`
 
 	// +kubebuilder:validation:Required
-	// +kubebuilder:default=100
 	// Maximum number of requests per replica which will be queued
 	// (beyond max_concurrency) before requests are rejected with error code 503
 	MaxQueueLength int32 `json:"max_queue_length"`
@@ -143,11 +137,12 @@ type ComputeSpec struct {
 }
 
 type AutoscalingSpec struct {
-	// +kubebuilder:default=1
+	// Init number of replicas
+	InitReplicas int32 `json:"init_replicas,omitempty"`
+
 	// Minimum number of replicas
 	MinReplicas int32 `json:"min_replicas,omitempty"`
 
-	// +kubebuilder:default=100
 	// Maximum number of replicas
 	MaxReplicas int32 `json:"max_replicas,omitempty"`
 
@@ -157,52 +152,43 @@ type AutoscalingSpec struct {
 	TargetInFlight string `json:"target_in_flight,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="60s"
 	// Duration over which to average the API's in-flight requests per replica
 	Window kmeta.Duration `json:"window,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="5m"
 	// The API will not scale below the highest recommendation made during this period
 	DownscaleStabilizationPeriod kmeta.Duration `json:"downscale_stabilization_period,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="1m"
 	// The API will not scale above the lowest recommendation made during this period
 	UpscaleStabilizationPeriod kmeta.Duration `json:"upscale_stabilization_period,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="0.75"
 	// Maximum factor by which to scale down the API on a single scaling event
 	MaxDownscaleFactor string `json:"max_downscale_factor,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="1.5"
 	// Maximum factor by which to scale up the API on a single scaling event
 	MaxUpscaleFactor string `json:"max_upscale_factor,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="0.5"
 	// Any recommendation falling within this factor below the current number of replicas will not trigger a
 	// scale down event
 	DownscaleTolerance string `json:"downscale_tolerance,omitempty"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="0.5"
 	// Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event
 	UpscaleTolerance string `json:"upscale_tolerance,omitempty"`
 }
 
 type UpdateStrategySpec struct {
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="25%"
 	// Maximum number of replicas that can be scheduled above the desired number of replicas during an update;
 	// can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 	// (set to 0 to disable rolling updates)
 	MaxSurge intstr.IntOrString `json:"max_surge"`
 
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:default="25%"
 	// maximum number of replicas that can be unavailable during an update; can be an absolute number,
 	// e.g. 5, or a percentage of desired replicas, e.g. 10%
 	MaxUnavailable intstr.IntOrString `json:"max_unavailable"`
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
index dab7d0e7a4..384ab8b342 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -117,12 +117,6 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	// Step 4: Create or Update Resources
-	deployOp, err := r.createOrUpdateDeployment(ctx, api)
-	if err != nil {
-		return ctrl.Result{}, err
-	}
-	log.V(1).Info(fmt.Sprintf("deployment %s", deployOp))
-
 	svcOp, err := r.createOrUpdateService(ctx, api)
 	if err != nil {
 		return ctrl.Result{}, err
@@ -135,6 +129,12 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 	log.V(1).Info(fmt.Sprintf("virtual service %s", vsOp))
 
+	deployOp, err := r.createOrUpdateDeployment(ctx, api)
+	if err != nil {
+		return ctrl.Result{}, err
+	}
+	log.V(1).Info(fmt.Sprintf("deployment %s", deployOp))
+
 	return ctrl.Result{}, nil
 }
 
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 67a3381ef3..9c92495ecd 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -246,7 +246,7 @@ func (r *RealtimeAPIReconciler) desiredService(api serverless.RealtimeAPI) kcore
 
 func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI) istioclientnetworking.VirtualService {
 	var activatorWeight int32
-	if api.Spec.Replicas == 0 {
+	if api.Spec.Replicas == 0 || api.Status.Ready == 0 {
 		activatorWeight = 100
 	}
 
diff --git a/pkg/operator/resources/realtimeapi/helpers.go b/pkg/operator/resources/realtimeapi/helpers.go
index 938fccc99a..6041907a78 100644
--- a/pkg/operator/resources/realtimeapi/helpers.go
+++ b/pkg/operator/resources/realtimeapi/helpers.go
@@ -30,7 +30,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	v12 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
@@ -119,6 +119,7 @@ func k8sResourceFromAPIConfig(apiConfig userconfig.API, prevAPI *serverless.Real
 				Containers:     containers,
 			},
 			Autoscaling: v1alpha1.AutoscalingSpec{
+				InitReplicas:                 apiConfig.Autoscaling.InitReplicas,
 				MinReplicas:                  apiConfig.Autoscaling.MinReplicas,
 				MaxReplicas:                  apiConfig.Autoscaling.MaxReplicas,
 				TargetInFlight:               fmt.Sprintf("%f", *apiConfig.Autoscaling.TargetInFlight),

From fb5c08527a4d491a67ae9342eb73b58f04338530 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 01:23:01 +0300
Subject: [PATCH 32/42] RealtimeAPI controller fixes

---
 pkg/activator/activator.go                    | 12 +++++------
 pkg/activator/helpers.go                      | 16 ++++++++++-----
 .../serverless/v1alpha1/realtimeapi_types.go  |  4 +++-
 .../serverless.cortex.dev_realtimeapis.yaml   | 20 -------------------
 .../serverless/realtimeapi_controller.go      |  1 +
 .../realtimeapi_controller_helpers.go         |  2 ++
 6 files changed, 23 insertions(+), 32 deletions(-)

diff --git a/pkg/activator/activator.go b/pkg/activator/activator.go
index 7b68736951..f5400e5fcb 100644
--- a/pkg/activator/activator.go
+++ b/pkg/activator/activator.go
@@ -158,7 +158,7 @@ func (a *activator) getOrCreateReadinessTracker(apiName string) *readinessTracke
 }
 
 func (a *activator) addAPI(obj interface{}) {
-	apiMetadata, err := getAPIMeta(obj)
+	apiMetadata, err := getAPIMeta(obj, true)
 	if err != nil {
 		a.logger.Errorw("error during virtual service informer add callback", zap.Error(err))
 		telemetry.Error(err)
@@ -182,7 +182,7 @@ func (a *activator) addAPI(obj interface{}) {
 }
 
 func (a *activator) updateAPI(oldObj interface{}, newObj interface{}) {
-	apiMetadata, err := getAPIMeta(newObj)
+	apiMetadata, err := getAPIMeta(newObj, true)
 	if err != nil {
 		a.logger.Errorw("error during virtual service informer update callback", zap.Error(err))
 		telemetry.Error(err)
@@ -195,7 +195,7 @@ func (a *activator) updateAPI(oldObj interface{}, newObj interface{}) {
 
 	apiName := apiMetadata.apiName
 
-	oldAPIMetatada, err := getAPIMeta(oldObj)
+	oldAPIMetatada, err := getAPIMeta(oldObj, true)
 	if err != nil {
 		a.logger.Errorw("error during virtual service informer update callback", zap.Error(err))
 		telemetry.Error(err)
@@ -212,7 +212,7 @@ func (a *activator) updateAPI(oldObj interface{}, newObj interface{}) {
 }
 
 func (a *activator) removeAPI(obj interface{}) {
-	apiMetadata, err := getAPIMeta(obj)
+	apiMetadata, err := getAPIMeta(obj, false)
 	if err != nil {
 		a.logger.Errorw("error during virtual service informer delete callback", zap.Error(err))
 		telemetry.Error(err)
@@ -250,7 +250,7 @@ func (a *activator) updateReadinessTracker(obj interface{}) {
 		return
 	}
 
-	api, err := getAPIMeta(obj)
+	api, err := getAPIMeta(obj, false)
 	if err != nil {
 		a.logger.Errorw("error during deployment informer callback", zap.Error(err))
 		telemetry.Error(err)
@@ -271,7 +271,7 @@ func (a *activator) updateReadinessTracker(obj interface{}) {
 }
 
 func (a *activator) removeReadinessTracker(obj interface{}) {
-	api, err := getAPIMeta(obj)
+	api, err := getAPIMeta(obj, false)
 	if err != nil {
 		a.logger.Errorw("error during deployment informer callback", zap.Error(err))
 		telemetry.Error(err)
diff --git a/pkg/activator/helpers.go b/pkg/activator/helpers.go
index 5bce2cb7bf..e3d20b6f0f 100644
--- a/pkg/activator/helpers.go
+++ b/pkg/activator/helpers.go
@@ -31,7 +31,7 @@ type apiMeta struct {
 	maxQueueLength int
 }
 
-func getAPIMeta(obj interface{}) (apiMeta, error) {
+func getAPIMeta(obj interface{}, includeAnnotations bool) (apiMeta, error) {
 	resource, err := meta.Accessor(obj)
 	if err != nil {
 		return apiMeta{}, err
@@ -48,16 +48,22 @@ func getAPIMeta(obj interface{}) (apiMeta, error) {
 		return apiMeta{}, errors.ErrorUnexpected("got a virtual service without apiName label")
 	}
 
-	maxQueueLength, maxConcurrency, err := userconfig.ConcurrencyFromAnnotations(resource)
-	if err != nil {
-		return apiMeta{}, err
+	var maxQueueLength, maxConcurrency int
+	var annotations map[string]string
+
+	if includeAnnotations {
+		maxQueueLength, maxConcurrency, err = userconfig.ConcurrencyFromAnnotations(resource)
+		if err != nil {
+			return apiMeta{}, err
+		}
+		annotations = resource.GetAnnotations()
 	}
 
 	return apiMeta{
 		apiName:        apiName,
 		apiKind:        userconfig.KindFromString(apiKind),
 		labels:         labels,
-		annotations:    resource.GetAnnotations(),
+		annotations:    annotations,
 		maxConcurrency: maxConcurrency,
 		maxQueueLength: maxQueueLength,
 	}, nil
diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 9de96d79cc..6e25cec237 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -137,12 +137,15 @@ type ComputeSpec struct {
 }
 
 type AutoscalingSpec struct {
+	// +kubebuilder:validation:Optional
 	// Init number of replicas
 	InitReplicas int32 `json:"init_replicas,omitempty"`
 
+	// +kubebuilder:validation:Optional
 	// Minimum number of replicas
 	MinReplicas int32 `json:"min_replicas,omitempty"`
 
+	// +kubebuilder:validation:Optional
 	// Maximum number of replicas
 	MaxReplicas int32 `json:"max_replicas,omitempty"`
 
@@ -244,7 +247,6 @@ func (api RealtimeAPI) GetOrCreateAPIIDs() (deploymentID, podID, specID, apiID s
 
 	var buf bytes.Buffer
 
-	buf.WriteString(api.Name)
 	buf.WriteString(api.Name)
 	buf.WriteString(userconfig.RealtimeAPIKind.String())
 	buf.WriteString(s.Obj(api.Spec.Pod))
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 7346ec219d..1fd70d2d93 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -50,38 +50,30 @@ spec:
             description: RealtimeAPISpec defines the desired state of RealtimeAPI
             properties:
               autoscaling:
-                default:
-                  min_replicas: 1
                 description: Autoscaling configuration
                 properties:
                   downscale_stabilization_period:
-                    default: 5m
                     description: The API will not scale below the highest recommendation
                       made during this period
                     type: string
                   downscale_tolerance:
-                    default: "0.5"
                     description: Any recommendation falling within this factor below
                       the current number of replicas will not trigger a scale down
                       event
                     type: string
                   max_downscale_factor:
-                    default: "0.75"
                     description: Maximum factor by which to scale down the API on
                       a single scaling event
                     type: string
                   max_replicas:
-                    default: 100
                     description: Maximum number of replicas
                     format: int32
                     type: integer
                   max_upscale_factor:
-                    default: "1.5"
                     description: Maximum factor by which to scale up the API on a
                       single scaling event
                     type: string
                   min_replicas:
-                    default: 1
                     description: Minimum number of replicas
                     format: int32
                     type: integer
@@ -91,17 +83,14 @@ spec:
                       which the autoscaler tries to maintain
                     type: string
                   upscale_stabilization_period:
-                    default: 1m
                     description: The API will not scale above the lowest recommendation
                       made during this period
                     type: string
                   upscale_tolerance:
-                    default: "0.5"
                     description: Any recommendation falling within this factor above
                       the current number of replicas will not trigger a scale up event
                     type: string
                   window:
-                    default: 60s
                     description: Duration over which to average the API's in-flight
                       requests per replica
                     type: string
@@ -539,20 +528,17 @@ spec:
                       type: object
                     type: array
                   max_concurrency:
-                    default: 1
                     description: Maximum number of requests that will be concurrently
                       sent into the container
                     format: int32
                     type: integer
                   max_queue_length:
-                    default: 100
                     description: Maximum number of requests per replica which will
                       be queued (beyond max_concurrency) before requests are rejected
                       with error code 503
                     format: int32
                     type: integer
                   port:
-                    default: 8080
                     description: Port to which requests will be sent to
                     format: int32
                     type: integer
@@ -563,14 +549,10 @@ spec:
                 - port
                 type: object
               replicas:
-                default: 1
                 description: Number of desired replicas
                 format: int32
                 type: integer
               update_strategy:
-                default:
-                  max_surge: 25%
-                  max_unavailable: 25%
                 description: Deployment strategy to use when replacing existing replicas
                   with new ones
                 properties:
@@ -578,7 +560,6 @@ spec:
                     anyOf:
                     - type: integer
                     - type: string
-                    default: 25%
                     description: 'Maximum number of replicas that can be scheduled
                       above the desired number of replicas during an update; can be
                       an absolute number, e.g. 5, or a percentage of desired replicas,
@@ -588,7 +569,6 @@ spec:
                     anyOf:
                     - type: integer
                     - type: string
-                    default: 25%
                     description: maximum number of replicas that can be unavailable
                       during an update; can be an absolute number, e.g. 5, or a percentage
                       of desired replicas, e.g. 10%
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
index 384ab8b342..a2c788ef8d 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -86,6 +86,7 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 
 	// Step 3: Get or create deployment and API ids
 	deploymentID, podID, specID, apiID := api.GetOrCreateAPIIDs()
+
 	idsOutdated := api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
 		api.Annotations["cortex.dev/spec-id"] != specID ||
 		api.Annotations["cortex.dev/api-id"] != apiID
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 9c92495ecd..9580bf8b7c 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -452,5 +452,7 @@ func (r *RealtimeAPIReconciler) generateAPIAnnotations(api serverless.RealtimeAP
 		userconfig.MaxUpscaleFactorAnnotationKey:             api.Spec.Autoscaling.MaxUpscaleFactor,
 		userconfig.DownscaleToleranceAnnotationKey:           api.Spec.Autoscaling.DownscaleTolerance,
 		userconfig.UpscaleToleranceAnnotationKey:             api.Spec.Autoscaling.UpscaleTolerance,
+		userconfig.MaxQueueLengthAnnotationKey:               s.Int32(api.Spec.Pod.MaxQueueLength),
+		userconfig.MaxConcurrencyAnnotationKey:               s.Int32(api.Spec.Pod.MaxConcurrency),
 	}
 }

From 835ff98b07d69004a6d3e412f11077f55b448c34 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 01:32:14 +0300
Subject: [PATCH 33/42] Add InitReplicas to RealtimeAPI spec

---
 pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go        | 2 +-
 .../config/crd/bases/serverless.cortex.dev_realtimeapis.yaml  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 6e25cec237..439711ea5c 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -138,7 +138,7 @@ type ComputeSpec struct {
 
 type AutoscalingSpec struct {
 	// +kubebuilder:validation:Optional
-	// Init number of replicas
+	// Initial number of replicas
 	InitReplicas int32 `json:"init_replicas,omitempty"`
 
 	// +kubebuilder:validation:Optional
diff --git a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
index 1fd70d2d93..7a8c6a755b 100644
--- a/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
+++ b/pkg/crds/config/crd/bases/serverless.cortex.dev_realtimeapis.yaml
@@ -61,6 +61,10 @@ spec:
                       the current number of replicas will not trigger a scale down
                       event
                     type: string
+                  init_replicas:
+                    description: Initial number of replicas
+                    format: int32
+                    type: integer
                   max_downscale_factor:
                     description: Maximum factor by which to scale down the API on
                       a single scaling event

From 7ae8949a84dfa276ffdb7f22220e18bdcad7fe6c Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 01:49:27 +0300
Subject: [PATCH 34/42] Simplify GetAutoscalingSpec function

---
 pkg/autoscaler/helpers.go         | 65 +++++++++++++++++++++++++++++++
 pkg/autoscaler/realtime_scaler.go | 40 +------------------
 2 files changed, 66 insertions(+), 39 deletions(-)
 create mode 100644 pkg/autoscaler/helpers.go

diff --git a/pkg/autoscaler/helpers.go b/pkg/autoscaler/helpers.go
new file mode 100644
index 0000000000..9f9e4af039
--- /dev/null
+++ b/pkg/autoscaler/helpers.go
@@ -0,0 +1,65 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package autoscaler
+
+import (
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	libstrings "github.com/cortexlabs/cortex/pkg/lib/strings"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+)
+
+func generateAutoscalingFromServerlessRealtimeAPI(realtimeAPI serverless.RealtimeAPI) (*userconfig.Autoscaling, error) {
+	targetInFlight, ok := libstrings.ParseFloat64(realtimeAPI.Spec.Autoscaling.TargetInFlight)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse target-in-flight requests from autoscaling spec")
+	}
+
+	maxDownscaleFactor, ok := libstrings.ParseFloat64(realtimeAPI.Spec.Autoscaling.MaxDownscaleFactor)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse max downscale factor from autoscaling spec")
+	}
+
+	maxUpscaleFactor, ok := libstrings.ParseFloat64(realtimeAPI.Spec.Autoscaling.MaxUpscaleFactor)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse max upscale factor from autoscaling spec")
+	}
+
+	downscaleTolerance, ok := libstrings.ParseFloat64(realtimeAPI.Spec.Autoscaling.DownscaleTolerance)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse downscale tolerance from autoscaling spec")
+	}
+
+	upscaleTolerance, ok := libstrings.ParseFloat64(realtimeAPI.Spec.Autoscaling.UpscaleTolerance)
+	if !ok {
+		return nil, errors.ErrorUnexpected("failed to parse upscale tolerance from autoscaling spec")
+	}
+
+	return &userconfig.Autoscaling{
+		MinReplicas:                  realtimeAPI.Spec.Autoscaling.MinReplicas,
+		MaxReplicas:                  realtimeAPI.Spec.Autoscaling.MaxReplicas,
+		InitReplicas:                 realtimeAPI.Spec.Autoscaling.InitReplicas,
+		TargetInFlight:               &targetInFlight,
+		Window:                       realtimeAPI.Spec.Autoscaling.Window.Duration,
+		DownscaleStabilizationPeriod: realtimeAPI.Spec.Autoscaling.DownscaleStabilizationPeriod.Duration,
+		UpscaleStabilizationPeriod:   realtimeAPI.Spec.Autoscaling.UpscaleStabilizationPeriod.Duration,
+		MaxDownscaleFactor:           maxDownscaleFactor,
+		MaxUpscaleFactor:             maxUpscaleFactor,
+		DownscaleTolerance:           downscaleTolerance,
+		UpscaleTolerance:             upscaleTolerance,
+	}, nil
+}
diff --git a/pkg/autoscaler/realtime_scaler.go b/pkg/autoscaler/realtime_scaler.go
index 5d1df8ad27..9aa3e5c8ef 100644
--- a/pkg/autoscaler/realtime_scaler.go
+++ b/pkg/autoscaler/realtime_scaler.go
@@ -24,7 +24,6 @@ import (
 	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	libstrings "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
@@ -120,44 +119,7 @@ func (s *RealtimeScaler) GetAutoscalingSpec(apiName string) (*userconfig.Autosca
 		return nil, err
 	}
 
-	targetInFlight, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.TargetInFlight)
-	if !ok {
-		return nil, errors.ErrorUnexpected("failed to parse target-in-flight requests from autoscaling spec")
-	}
-
-	maxDownscaleFactor, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.MaxDownscaleFactor)
-	if !ok {
-		return nil, errors.ErrorUnexpected("failed to parse max downscale factor from autoscaling spec")
-	}
-
-	maxUpscaleFactor, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.MaxUpscaleFactor)
-	if !ok {
-		return nil, errors.ErrorUnexpected("failed to parse max upscale factor from autoscaling spec")
-	}
-
-	downscaleTolerance, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.DownscaleTolerance)
-	if !ok {
-		return nil, errors.ErrorUnexpected("failed to parse downscale tolerance from autoscaling spec")
-	}
-
-	upscaleTolerance, ok := libstrings.ParseFloat64(api.Spec.Autoscaling.UpscaleTolerance)
-	if !ok {
-		return nil, errors.ErrorUnexpected("failed to parse upscale tolerance from autoscaling spec")
-	}
-
-	return &userconfig.Autoscaling{
-		MinReplicas:                  api.Spec.Autoscaling.MinReplicas,
-		MaxReplicas:                  api.Spec.Autoscaling.MaxReplicas,
-		InitReplicas:                 api.Spec.Autoscaling.InitReplicas,
-		TargetInFlight:               &targetInFlight,
-		Window:                       api.Spec.Autoscaling.Window.Duration,
-		DownscaleStabilizationPeriod: api.Spec.Autoscaling.DownscaleStabilizationPeriod.Duration,
-		UpscaleStabilizationPeriod:   api.Spec.Autoscaling.UpscaleStabilizationPeriod.Duration,
-		MaxDownscaleFactor:           maxDownscaleFactor,
-		MaxUpscaleFactor:             maxUpscaleFactor,
-		DownscaleTolerance:           downscaleTolerance,
-		UpscaleTolerance:             upscaleTolerance,
-	}, nil
+	return generateAutoscalingFromServerlessRealtimeAPI(api)
 }
 
 func (s *RealtimeScaler) CurrentRequestedReplicas(apiName string) (int32, error) {

From 5050519a32baf30cd34576a4f85a1d55aedb87d8 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 03:36:59 +0300
Subject: [PATCH 35/42] Add serverless to autoscaler's scheme

---
 cmd/autoscaler/main.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cmd/autoscaler/main.go b/cmd/autoscaler/main.go
index 71e8bd034e..24035c2c38 100644
--- a/cmd/autoscaler/main.go
+++ b/cmd/autoscaler/main.go
@@ -28,6 +28,7 @@ import (
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/autoscaler"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/aws"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
@@ -107,6 +108,9 @@ func main() {
 	defer telemetry.Close()
 
 	scheme := runtime.NewScheme()
+	if err := serverless.AddToScheme(scheme); err != nil {
+		exit(log, err, "failed to add k8s serverless to scheme")
+	}
 	if err := clientgoscheme.AddToScheme(scheme); err != nil {
 		exit(log, err, "failed to add k8s client-go-scheme to scheme")
 	}

From 934f1ff4f2a25b02ed95e3e235040de2d5254bdc Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 03:37:15 +0300
Subject: [PATCH 36/42] apiID is required for determining if a pod is
 up-to-date or not

---
 .../controllers/serverless/realtimeapi_controller_helpers.go     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 9580bf8b7c..e4ac81a1a2 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -195,6 +195,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 			Labels: map[string]string{
 				"apiName":        api.Name,
 				"apiKind":        userconfig.RealtimeAPIKind.String(),
+				"apiID":          api.Annotations["cortex.dev/api-id"],
 				"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
 				"cortex.dev/api": "true",
 			},

From d0a1b612ce3a1e4f144cd0996bc62c71d834b69e Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 03:37:37 +0300
Subject: [PATCH 37/42] Allow autoscaler to get/update realtimeapis resources

---
 manager/manifests/autoscaler.yaml.j2 | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/manager/manifests/autoscaler.yaml.j2 b/manager/manifests/autoscaler.yaml.j2
index ce875b24c3..2f00afb164 100644
--- a/manager/manifests/autoscaler.yaml.j2
+++ b/manager/manifests/autoscaler.yaml.j2
@@ -42,6 +42,13 @@ rules:
   - get
   - update
   - watch
+- apiGroups:
+  - "serverless.cortex.dev"
+  resources:
+  - realtimeapis
+  verbs:
+  - get
+  - update
 
 ---
 

From cac1a11954e8b125f78a41eca44e754b63fdeaf7 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 08:15:47 +0300
Subject: [PATCH 38/42] Fix cortex logs/refresh cmds for realtime API

---
 .../serverless/v1alpha1/realtimeapi_types.go  |  3 +-
 .../realtimeapi_controller_helpers.go         |  4 +-
 pkg/operator/operator/logging.go              | 34 ----------------
 pkg/operator/resources/realtimeapi/api.go     | 39 +++++++++++++------
 pkg/operator/resources/realtimeapi/helpers.go | 22 ++++++-----
 5 files changed, 44 insertions(+), 58 deletions(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 439711ea5c..31c722a4d8 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -266,8 +266,7 @@ func (api RealtimeAPI) GetOrCreateAPIIDs() (deploymentID, podID, specID, apiID s
 		api.Annotations["cortex.dev/spec-id"] != specID {
 		apiID = fmt.Sprintf("%s-%s-%s", spec.MonotonicallyDecreasingID(), deploymentID, specID)
 	}
-
-	return deploymentID, podID, specID, apiID
+	return
 }
 
 //+kubebuilder:object:root=true
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index e4ac81a1a2..9c672458a0 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -183,6 +183,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
 			"apiID":          api.Annotations["cortex.dev/api-id"],
+			"podID":          api.Annotations["cortex.dev/pod-id"],
 			"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
 			"cortex.dev/api": "true",
 		},
@@ -195,7 +196,7 @@ func (r *RealtimeAPIReconciler) desiredDeployment(api serverless.RealtimeAPI) ka
 			Labels: map[string]string{
 				"apiName":        api.Name,
 				"apiKind":        userconfig.RealtimeAPIKind.String(),
-				"apiID":          api.Annotations["cortex.dev/api-id"],
+				"podID":          api.Annotations["cortex.dev/pod-id"],
 				"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
 				"cortex.dev/api": "true",
 			},
@@ -298,6 +299,7 @@ func (r *RealtimeAPIReconciler) desiredVirtualService(api serverless.RealtimeAPI
 			"apiName":        api.Name,
 			"apiKind":        userconfig.RealtimeAPIKind.String(),
 			"apiID":          api.Annotations["cortex.dev/api-id"],
+			"podID":          api.Annotations["cortex.dev/pod-id"],
 			"deploymentID":   api.Annotations["cortex.dev/deployment-id"],
 			"cortex.dev/api": "true",
 		},
diff --git a/pkg/operator/operator/logging.go b/pkg/operator/operator/logging.go
index f49746f64b..51e6ece794 100644
--- a/pkg/operator/operator/logging.go
+++ b/pkg/operator/operator/logging.go
@@ -101,40 +101,6 @@ func initializeLogger(key string, level userconfig.LogLevel, fields map[string]i
 	return sugarLogger, nil
 }
 
-func GetRealtimeAPILogger(apiName string, apiID string) (*zap.SugaredLogger, error) {
-	loggerCacheKey := fmt.Sprintf("apiName=%s,apiID=%s", apiName, apiID)
-	logger := getFromCacheOrNil(loggerCacheKey)
-
-	if logger != nil {
-		return logger, nil
-	}
-
-	apiSpec, err := DownloadAPISpec(apiName, apiID)
-	if err != nil {
-		return nil, err
-	}
-
-	return initializeLogger(loggerCacheKey, userconfig.InfoLogLevel, map[string]interface{}{
-		"apiName": apiSpec.Name,
-		"apiKind": apiSpec.Kind.String(),
-		"apiID":   apiSpec.ID,
-	})
-}
-
-func GetRealtimeAPILoggerFromSpec(apiSpec *spec.API) (*zap.SugaredLogger, error) {
-	loggerCacheKey := fmt.Sprintf("apiName=%s,apiID=%s", apiSpec.Name, apiSpec.ID)
-	logger := getFromCacheOrNil(loggerCacheKey)
-	if logger != nil {
-		return logger, nil
-	}
-
-	return initializeLogger(loggerCacheKey, userconfig.InfoLogLevel, map[string]interface{}{
-		"apiName": apiSpec.Name,
-		"apiKind": apiSpec.Kind.String(),
-		"apiID":   apiSpec.ID,
-	})
-}
-
 func GetJobLogger(jobKey spec.JobKey) (*zap.SugaredLogger, error) {
 	loggerCacheKey := fmt.Sprintf("apiName=%s,jobID=%s", jobKey.APIName, jobKey.ID)
 	logger := getFromCacheOrNil(loggerCacheKey)
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index fe9203a4de..8c58564b14 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -39,7 +39,6 @@ import (
 	kcore "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
 	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
-	ktypes "k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
@@ -116,21 +115,37 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*spec.API, string, error)
 
 func RefreshAPI(apiName string) (string, error) {
 	ctx := context.Background()
-	api := serverless.RealtimeAPI{
-		ObjectMeta: kmeta.ObjectMeta{
-			Namespace: consts.DefaultNamespace,
-			Name:      apiName,
-		},
-	}
+	var api serverless.RealtimeAPI
+	key := client.ObjectKey{Namespace: consts.DefaultNamespace, Name: apiName}
 
-	// slashes are encoded as ~1 in the json patch
-	patch := []byte(fmt.Sprintf(
-		"[{\"op\": \"replace\", \"path\": \"/metadata/annotations/cortex.dev~1deployment-id\", \"value\": \"%s\" }]",
-		generateDeploymentID()))
-	if err := config.K8s.Patch(ctx, &api, client.RawPatch(ktypes.JSONPatchType, patch)); err != nil {
+	err := config.K8s.Get(ctx, key, &api)
+	if err != nil {
 		return "", errors.Wrap(err, "failed to get realtime api resource")
 	}
 
+	apiSpec, err := operator.DownloadAPISpec(api.Name, api.Annotations["cortex.dev/api-id"])
+	if err != nil {
+		return "", err
+	}
+
+	// generate a new api-id
+	// the deployment-id and spec-id components of the api-id remain unchanged
+	api.Annotations["cortex.dev/api-id"] = ""
+	_, _, _, apiID := api.GetOrCreateAPIIDs()
+	api.Annotations["cortex.dev/api-id"] = apiID
+
+	err = config.K8s.Update(ctx, &api)
+	if err != nil {
+		return "", errors.Wrap(err, "failed to update realtime api resource")
+	}
+
+	apiSpec.ID = apiID
+	apiSpec.Key = spec.Key(apiName, apiID, config.ClusterConfig.ClusterUID)
+
+	if err := config.AWS.UploadJSONToS3(apiSpec, config.ClusterConfig.Bucket, apiSpec.Key); err != nil {
+		return "", errors.Wrap(err, "failed to upload api spec")
+	}
+
 	apiResource := userconfig.Resource{
 		Name: apiName,
 		Kind: userconfig.RealtimeAPIKind,
diff --git a/pkg/operator/resources/realtimeapi/helpers.go b/pkg/operator/resources/realtimeapi/helpers.go
index 6041907a78..979ad90ef3 100644
--- a/pkg/operator/resources/realtimeapi/helpers.go
+++ b/pkg/operator/resources/realtimeapi/helpers.go
@@ -142,6 +142,16 @@ func k8sResourceFromAPIConfig(apiConfig userconfig.API, prevAPI *serverless.Real
 		},
 	}
 
+	if prevAPI != nil {
+		// we should keep the existing number of replicas instead of init_replicas
+		api.Spec.Replicas = prevAPI.Spec.Replicas
+		if prevDeployID := prevAPI.Annotations["cortex.dev/deployment-id"]; prevDeployID != "" {
+			api.Annotations = map[string]string{
+				"cortex.dev/deployment-id": prevDeployID,
+			}
+		}
+	}
+
 	deploymentID, podID, specID, apiID := api.GetOrCreateAPIIDs()
 	api.Annotations = map[string]string{
 		"cortex.dev/deployment-id": deploymentID,
@@ -150,14 +160,6 @@ func k8sResourceFromAPIConfig(apiConfig userconfig.API, prevAPI *serverless.Real
 		"cortex.dev/api-id":        apiID,
 	}
 
-	if prevAPI != nil {
-		// we should keep the existing number of replicas instead of init_replicas
-		api.Spec.Replicas = prevAPI.Spec.Replicas
-		if prevDeployID := prevAPI.Annotations["cortex.dev/deployment-id"]; prevDeployID != "" {
-			api.Annotations["cortex.dev/deployment-id"] = prevDeployID
-		}
-	}
-
 	return api
 }
 
@@ -178,6 +180,7 @@ func metadataFromRealtimeAPI(sv *v1alpha1.RealtimeAPI) (*spec.Metadata, error) {
 		},
 		APIID:        sv.Annotations["cortex.dev/api-id"],
 		DeploymentID: sv.Annotations["cortex.dev/deployment-id"],
+		PodID:        sv.Annotations["cortex.dev/pod-id"],
 		LastUpdated:  lastUpdated.Unix(),
 	}, nil
 }
@@ -247,5 +250,6 @@ func addPodToReplicaCounts(pod *v1.Pod, metadata *spec.Metadata, counts *status.
 }
 
 func isPodSpecLatest(pod *v1.Pod, metadata *spec.Metadata) bool {
-	return metadata.APIID == pod.Labels["apiID"]
+	return metadata.DeploymentID == pod.Labels["deploymentID"] &&
+		metadata.PodID == pod.Labels["podID"]
 }

From c31bf2cbdfaffc80f3d7373400864924c16b065b Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 08:22:46 +0300
Subject: [PATCH 39/42] Fix CORTEX_PORT not present on realtime api pods

---
 .../controllers/serverless/realtimeapi_controller_helpers.go  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 9c672458a0..4cf5637f93 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -363,6 +363,10 @@ func (r *RealtimeAPIReconciler) userContainers(api serverless.RealtimeAPI) ([]kc
 
 		containerEnvVars := workloads.BaseEnvVars
 		containerEnvVars = append(containerEnvVars, workloads.ClientConfigEnvVar())
+		containerEnvVars = append(containerEnvVars, kcore.EnvVar{
+			Name:  "CORTEX_PORT",
+			Value: s.Int32(api.Spec.Pod.Port),
+		})
 		containerEnvVars = append(containerEnvVars, container.Env...)
 
 		containers[i] = kcore.Container{

From 2c8c233282394b829fd884a653a063d0ad9d6d04 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 17:37:17 +0300
Subject: [PATCH 40/42] Use deployment-id instead of api-id for

---
 pkg/operator/resources/realtimeapi/api.go | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 8c58564b14..52b995bcb7 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -128,11 +128,10 @@ func RefreshAPI(apiName string) (string, error) {
 		return "", err
 	}
 
-	// generate a new api-id
-	// the deployment-id and spec-id components of the api-id remain unchanged
-	api.Annotations["cortex.dev/api-id"] = ""
-	_, _, _, apiID := api.GetOrCreateAPIIDs()
-	api.Annotations["cortex.dev/api-id"] = apiID
+	// create new deployment
+	api.Annotations["cortex.dev/deployment-id"] = ""
+	deploymentID, _, _, apiID := api.GetOrCreateAPIIDs()
+	api.Annotations["cortex.dev/deployment-id"] = deploymentID
 
 	err = config.K8s.Update(ctx, &api)
 	if err != nil {

From 763acf0e29dd513c43dad3230e11a441a983d258 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 17:38:14 +0300
Subject: [PATCH 41/42] Ensure that the `min_replicas`/`max_replicas` range is
 ensured by the controller

---
 .../serverless/realtimeapi_controller.go         |  3 ++-
 .../serverless/realtimeapi_controller_helpers.go | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller.go b/pkg/crds/controllers/serverless/realtimeapi_controller.go
index a2c788ef8d..f422813acf 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller.go
@@ -85,6 +85,7 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	// Step 3: Get or create deployment and API ids
+	desiredReplicasChanged := r.ensureDesiredReplicasRange(ctx, &api)
 	deploymentID, podID, specID, apiID := api.GetOrCreateAPIIDs()
 
 	idsOutdated := api.Annotations["cortex.dev/deployment-id"] != deploymentID ||
@@ -111,7 +112,7 @@ func (r *RealtimeAPIReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 		api.Annotations["cortex.dev/api-id"] = apiID
 	}
 
-	if idsOutdated {
+	if idsOutdated || desiredReplicasChanged {
 		if err = r.Update(ctx, &api); err != nil {
 			return ctrl.Result{}, err
 		}
diff --git a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
index 4cf5637f93..909c446475 100644
--- a/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
+++ b/pkg/crds/controllers/serverless/realtimeapi_controller_helpers.go
@@ -77,6 +77,22 @@ func (r *RealtimeAPIReconciler) updateStatus(ctx context.Context, api *serverles
 	return nil
 }
 
+func (r *RealtimeAPIReconciler) ensureDesiredReplicasRange(ctx context.Context, api *serverless.RealtimeAPI) bool {
+	replicasFieldChanged := false
+	desiredReplicas := api.Spec.Replicas
+
+	if desiredReplicas < api.Spec.Autoscaling.MinReplicas {
+		desiredReplicas = api.Spec.Autoscaling.MinReplicas
+		replicasFieldChanged = true
+	} else if desiredReplicas > api.Spec.Autoscaling.MaxReplicas {
+		desiredReplicas = api.Spec.Autoscaling.MaxReplicas
+		replicasFieldChanged = true
+	}
+
+	api.Spec.Replicas = desiredReplicas
+	return replicasFieldChanged
+}
+
 func (r *RealtimeAPIReconciler) createOrUpdateDeployment(ctx context.Context, api serverless.RealtimeAPI) (controllerutil.OperationResult, error) {
 	deployment := kapps.Deployment{
 		ObjectMeta: kmeta.ObjectMeta{

From b4b87458784a1545238db4593f87886f2cb10886 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 31 Jul 2021 17:39:06 +0300
Subject: [PATCH 42/42] Revert explicit return expression

---
 pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
index 31c722a4d8..cf0708a735 100644
--- a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
+++ b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -266,7 +266,7 @@ func (api RealtimeAPI) GetOrCreateAPIIDs() (deploymentID, podID, specID, apiID s
 		api.Annotations["cortex.dev/spec-id"] != specID {
 		apiID = fmt.Sprintf("%s-%s-%s", spec.MonotonicallyDecreasingID(), deploymentID, specID)
 	}
-	return
+	return deploymentID, podID, specID, apiID
 }
 
 //+kubebuilder:object:root=true