赞
踩
# 如果通过多卡去部署13b会发现两个卡会占用两个进程 rank=0 rank=1,这种情况 会导致程序死掉,所以无法直接部署
- class Config(BaseModel):
- prompts: List[str]=["""\
- import socket
- def ping_exponential_backoff(host: str):"""]
- max_gen_len: Optional[int] = None
- temperature: float = 0.2
- top_p: float = 0.90
-
- if dist.get_rank() == 0:
- @app.post("/llama/")
- def generate(config: Config):
- prompts = [config.prompts[0]]
- print(prompts)
- max_gen_len = config.max_gen_len
- temperature = config.temperature
- top_p = config.top_p
- dist.broadcast_object_list([config.prompts, config.max_gen_len,
- config.temperature, config.top_p])
- #print(instructions,max_gen_len,temperature,top_p)
- results = generator.text_completion(
- prompts, # type: ignore
- max_gen_len=max_gen_len,
- temperature=temperature,
- top_p=top_p,
- )
- print(results)
- return {"responses": results}
-
- uvicorn.run(app, host="127.0.0.1", port=5000)
- else:
- while True:
- config = [None] * 4
- try:
- dist.broadcast_object_list(config)
- generator.text_completion(
- config[0], max_gen_len=config[1], temperature=config[2],
- top_p=config[3]
- )
- except:
- pass
通过判断rank进行部署,这样就不会报错了^_^
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。