赞
踩
本小项目,是根据多张拼接的图片,使用gpt4进行批量识别,并根据提示词分析
1、新建一个recognition_project目录,把图片放到default下,然后把脚本放到目录下
2、配置gpt的key
api_key是openai的key
base_url 是中转站或者openai的官网地址,如果你买的中转站的,就用中转站的地址即可
3、执行脚本即可
recognition_image.py
import base64 import requests import datetime import os import time from tqdm import tqdm # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def recognition_image(image_file, base_url, api_key): # Path to your image images = image_file[0] file = image_file[1] # Getting the base64 string base64_image = encode_image(images) headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } payload = { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": f"这是一张广告图片,名称是{file},该图片包含多张关键帧截图,根据你的理解,告诉我它的行业,宣传的产品," f"如果你不能识别,直接告诉我不能识别就行。" }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 300 } response = requests.post(f"{base_url}/v1/chat/completions", headers=headers, json=payload) # 检查响应状态码 if response.status_code == 200: # 解析响应数据 response_data = response.json() text = response_data['choices'][0]['message']['content'] return text else: print(response.status_code) print(response.text) print(f"{file}: 请求失败") return "识别失败" def check_file_existence(file_path): return os.path.exists(file_path) def list_jpg_files(directory): jpg_files = [] for root, dirs, files in os.walk(directory): for file in files: if file.lower().endswith('.jpg'): file_path = os.path.join(root, file) windows_absolute_path = os.path.abspath(file_path) jpg_files.append([windows_absolute_path, file]) return jpg_files def process_recognition(file_array, base_url, api_key): max_retries = 2 retries = 0 while retries < max_retries: text = recognition_image(file_array, base_url, api_key) result = text.split("\n") if len(result) == 4: result = [row.replace('/', '&').split("-")[1] for row in result] result = "-".join(result) result = result.replace("无法识别","").replace("无法辨认","").replace("无法确认","") return result else: retries += 1 time.sleep(1) # print("Max retries reached. Unable to get valid result.") return "fail" def recognition_main(file_array, base_url, api_key): text = process_recognition(file_array, base_url, api_key) # print('text', text) windows_absolute_image = file_array[0] image_file = text + '_'+ file_array[1] last_directory = os.path.dirname(windows_absolute_image) # print("last_directory", last_directory) new_iamge = os.path.join(last_directory, image_file) if check_file_existence(windows_absolute_image): os.rename(windows_absolute_image, new_iamge) def main(directory, base_url, api_key): start_time = datetime.datetime.now() jpg_files = list_jpg_files(directory) for file_array in tqdm(jpg_files): try: recognition_main(file_array, base_url, api_key) time.sleep(1) except Exception as e: print('错误:',e) print(datetime.datetime.now() - start_time) if __name__ == '__main__': api_key = "sk-xxxxx" base_url = 'https://xxx.cn' directory = './default' main(directory, base_url, api_key)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。