赞
踩
- import requests
- from starlette.requests import Request
- from typing import Dict
-
- from ray import serve
-
-
- # 1: Define a Ray Serve application.
- @serve.deployment
- class MyModelDeployment:
- def __init__(self, msg: str):
- # Initialize model state: could be very large neural net weights.
- self._msg = msg
-
- def __call__(self, request: Request) -> Dict:
- return {"result": self._msg}
-
-
- app = MyModelDeployment.bind(msg="Hello world!")
-
- # 2: Deploy the application locally.
- serve.run(app, route_prefix="/")
-
- # 3: Query the application and print the result.
- print(requests.get("http://localhost:8000/").json())
- # {'result': 'Hello world!'}

server脚本:
- # File name: serve_quickstart.py
- from starlette.requests import Request
-
- import ray
- from ray import serve
-
- from transformers import pipeline
-
-
- @serve.deployment(num_replicas=2, ray_actor_options={"num_cpus": 0.2, "num_gpus": 0})
- class Translator:
- def __init__(self):
- # Load model
- self.model = pipeline("translation_en_to_fr", model="t5-small")
-
- def translate(self, text: str) -> str:
- # Run inference
- model_output = self.model(text)
-
- # Post-process output to return only the translation text
- translation = model_output[0]["translation_text"]
-
- return translation
-
- async def __call__(self, http_request: Request) -> str:
- english_text: str = await http_request.json()
- return self.translate(english_text)
-
-
- translator_app = Translator.bind()

确保server脚本启动
serve run serve_quickstart:translator_app
默认在服务在http://127.0.0.1:8000/ 运行
client脚本
- # File name: model_client.py
- import requests
-
- english_text = "Hello world!"
-
- response = requests.post("http://127.0.0.1:8000/", json=english_text)
- french_text = response.text
-
- print(french_text)
测试:
python model_client.py
组合
- # File name: serve_quickstart_composed.py
- from starlette.requests import Request
-
- import ray
- from ray import serve
- from ray.serve.handle import DeploymentHandle
-
- from transformers import pipeline
-
-
- @serve.deployment
- class Translator:
- def __init__(self):
- # Load model
- self.model = pipeline("translation_en_to_fr", model="t5-small")
-
- def translate(self, text: str) -> str:
- # Run inference
- model_output = self.model(text)
-
- # Post-process output to return only the translation text
- translation = model_output[0]["translation_text"]
-
- return translation
-
-
- @serve.deployment
- class Summarizer:
- def __init__(self, translator: DeploymentHandle):
- self.translator = translator
-
- # Load model.
- self.model = pipeline("summarization", model="t5-small")
-
- def summarize(self, text: str) -> str:
- # Run inference
- model_output = self.model(text, min_length=5, max_length=15)
-
- # Post-process output to return only the summary text
- summary = model_output[0]["summary_text"]
-
- return summary
-
- async def __call__(self, http_request: Request) -> str:
- english_text: str = await http_request.json()
- summary = self.summarize(english_text)
-
- translation = await self.translator.translate.remote(summary)
- return translation
-
-
- app = Summarizer.bind(Translator.bind())

serve run serve_quickstart_composed:app
- # File name: composed_client.py
- import requests
-
- english_text = (
- "It was the best of times, it was the worst of times, it was the age "
- "of wisdom, it was the age of foolishness, it was the epoch of belief"
- )
- response = requests.post("http://127.0.0.1:8000/", json=english_text)
- french_text = response.text
-
- print(french_text)
测试:
python composed_client.py
结果:
c'était le meilleur des temps, c'était le pire des temps .
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。