from io import BytesIO
  from fastapi import FastAPI
  from fastapi.responses import Response
  import torch
  from ray import serve
  from ray.serve.handle import DeploymentHandle
  app = FastAPI()
  @serve.deployment(num_replicas=1)
  @serve.ingress(app)
  class APIIngress:
      def __init__(self, diffusion_model_handle: DeploymentHandle) -> None:
          self.handle = diffusion_model_handle
      @app.get(
          "/imagine",
          responses={200: {"content": {"image/png": {}}}},
          response_class=Response,
      )
      async def generate(self, prompt: str, img_size: int = 512):
          assert len(prompt), "prompt parameter cannot be empty"
          image = await self.handle.generate.remote(prompt, img_size=img_size)
          file_stream = BytesIO()
          image.save(file_stream, "PNG")
          return Response(content=file_stream.getvalue(), media_type="image/png")
  @serve.deployment(
      ray_actor_options={"num_gpus": 1},
      autoscaling_config={"min_replicas": 0, "max_replicas": 2},
  )
  class StableDiffusionV2:
      def __init__(self):
          from diffusers import EulerDiscreteScheduler, StableDiffusionPipeline
          model_id = "stabilityai/stable-diffusion-2"
          scheduler = EulerDiscreteScheduler.from_pretrained(
              model_id, subfolder="scheduler"
          )
          self.pipe = StableDiffusionPipeline.from_pretrained(
              model_id, scheduler=scheduler, revision="fp16", torch_dtype=torch.float16
          )
          self.pipe = self.pipe.to("cuda")
      def generate(self, prompt: str, img_size: int = 512):
          assert len(prompt), "prompt parameter cannot be empty"
          with torch.autocast("cuda"):
              image = self.pipe(prompt, height=img_size, width=img_size).images[0]
              return image
  entrypoint = APIIngress.bind(StableDiffusionV2.bind())