赞
踩
参考之前的文章:
https://blog.csdn.net/freewebsys/article/details/136921703
# coding=utf-8
"""
代码测试工具:
python3 test_throughput.py --api-address http://localhost:8000 --model-name chatglm3-6b --n-thread 10
"""
import argparse
import json
import requests
import threading
import time
def main():
headers = {"User-Agent": "openai client", "Content-Type": "application/json"}
ploads = {
"model": args.model_name,
"messages": [{"role": "user", "content": "生成一个50字的故事"}],
"temperature": 0.7,
}
thread_api_addr = args.api_address
def send_request(results, i):
print(f"thread {i} goes to {thread_api_addr}")
response = requests.post(
thread_api_addr + "/v1/chat/completions",
headers=headers,
json=ploads,
stream=False,
)
print(response.text)
response_new_words = json.loads(response.text)["usage"]["completion_tokens"]
#error_code = json.loads(response.text)["error_code"]
print(f"=== Thread {i} ===, words: {response_new_words} ")
results[i] = response_new_words
# use N threads to prompt the backend
tik = time.time()
threads = []
results = [None] * args.n_thread
for i in range(args.n_thread):
t = threading.Thread(target=send_request, args=(results, i))
t.start()
# time.sleep(0.5)
threads.append(t)
for t in threads:
t.join()
print(f"Time (POST): {time.time() - tik} s")
n_words = sum(results)
time_seconds = time.time() - tik
print(
f"Time (Completion): {time_seconds}, n threads: {args.n_thread}, "
f"throughput: {n_words / time_seconds} words/s."
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--api-address", type=str, default="http://127.0.0.1:8000")
parser.add_argument("--model-name", type=str, default="chatglm3-6b")
parser.add_argument("--n-thread", type=int, default=10)
args = parser.parse_args()
main()
export OLLAMA_HOST="0.0.0.0:8000"
/usr/local/bin/ollama serve
启动服务:
export OLLAMA_HOST="0.0.0.0:8000"
ollama run qwen:0.5b-chat
ollama run qwen:1.8b-chat
然后分别测试结果:
python3 test_throughput.py --api-address http://192.168.1.115:8000 --model-name qwen:0.5b-chat --n-thread 1
thread 0 goes to http://192.168.1.115:8000
{"id":"chatcmpl-536","object":"chat.completion","created":1711031396,"model":"qwen:0.5b-chat","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"在一个古老的小镇上,住着一位勇敢的冒险家。一天,他得知小镇上的一座古老建筑被邪恶势力侵袭。为了救出这座古建筑,冒险家决定勇往直前。\n\n最终,冒险家成功解除了邪恶势力。当他站在胜利之线上时,他明白只有勇敢面对和挑战,才能真正实现自己的人生目标。\n"},"finish_reason":"stop"}],"usage":{"prompt_tokens":0,"completion_tokens":79,"total_tokens":79}}
=== Thread 0 ===, words: 79
Time (POST): 8.190938234329224 s
Time (Completion): 8.1909818649292, n threads: 1, throughput: 9.644753376667726 words/s.
python3 test_throughput.py --api-address http://192.168.1.115:8000 --model-name qwen:1.8b-chat --n-thread 1
thread 0 goes to http://192.168.1.115:8000
{"id":"chatcmpl-376","object":"chat.completion","created":1711032770,"model":"qwen:1.8b-chat","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"在一个古老的村庄里,住着一个名叫艾伦的男孩。艾伦的家世代以种田为生,经济并不宽裕。\n\n一天,艾伦在山脚下发现了一处神秘的矿洞。他心中充满好奇和激动,决定探秘这个神秘的地方。\n\n经过艰苦的挖掘,艾伦成功地找到了一颗晶莹剔透的宝石。他欣喜若狂,将宝石捧到父母面前,告诉他们自己发现了神奇的矿洞,而且还得到了一颗晶莹剔透的宝石。\n\n从此以后,艾伦的故事在村庄里流传开来,成为了一个传奇般的农村故事。\n"},"finish_reason":"stop"}],"usage":{"prompt_tokens":14,"completion_tokens":130,"total_tokens":144}}
=== Thread 0 ===, words: 130
Time (POST): 45.56362271308899 s
Time (Completion): 45.56366229057312, n threads: 1, throughput: 2.853150810638334 words/s.
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。