赞
踩
达摩院发布的QWEN系列大模型确实是不错,我们在基础模型对于工业场景/工业图文场景下的判断测评中超过了miniGPT,visualGLM,Owl-PLUG等一众多模态大模型。选定其为开发基座较好。
github链接:GitHub - QwenLM/Qwen-VL: The official repo of Qwen-VL (通义千问-VL) chat & pretrained large vision language model proposed by Alibaba Cloud.
huggingface:https://huggingface.co/Qwen/Qwen-VL
模型部署部分csdn有,出现各种问题可能是transformers版本问题,这里后面再说。
在github中给出了微调数据集的模板:
- [
- {
- "id": "identity_0",
- "conversations": [
- {
- "from": "user",
- "value": "你好"
- },
- {
- "from": "assistant",
- "value": "我是Qwen-VL,一个支持视觉输入的大模型。"
- }
- ]
- },
- {
- "id": "identity_1",
- "conversations": [
- {
- "from": "user",
- "value": "Picture 1: <img>https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg</img>\n图中的狗是什么品种?"
- },
- {
- "from": "assistant",
- "value": "图中是一只拉布拉多犬。"
- },
- {
- "from": "user",
- "value": "框出图中的格子衬衫"
- },
- {
- "from": "assistant",
- "value": "<ref>格子衬衫</ref><box>(588,499),(725,789)</box>"
- }
- ]
- },
- {
- "id": "identity_2",
- "conversations": [
- {
- "from": "user",
- "value": "Picture 1: <img>assets/mm_tutorial/Chongqing.jpeg</img>\nPicture 2: <img>assets/mm_tutorial/Beijing.jpeg</img>\n图中都是哪"
- },
- {
- "from": "assistant",
- "value": "第一张图片是重庆的城市天际线,第二张图片是北京的天际线。"
- }
- ]
- }
- ]
可以选择手撸,为了方便,写了一个简便的标注工具
标注后可以形成按规定的json文件,并把图片也收集到该文件夹中。label.py
- import tkinter as tk
- from tkinter import filedialog, Label, simpledialog
- from PIL import Image, ImageTk
- import json
- import random
- import string
- import os
- import shutil
- import subprocess
- import sys
-
- # 创建主窗口
- root = tk.Tk()
- root.title("Image Viewer with Dialogue")
- root.geometry("1000x600") # 设置窗口默认大小
-
- dialogue_entries = [] # 用来存储动态创建的对话输入框
-
- # 加载并显示图片的函数
- def load_and_display_image():
- global image_label
- global image_path
- file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png;*.jpg;*.jpeg")])
- image_path = file_path
- if file_path:
- image = Image.open(file_path)
- # 根据原图片宽高比确定图像显示区域的宽度
- width = int((400 / image.height) * image.width)
- image = image.resize((width, 400), Image.ANTIALIAS)
- photo = ImageTk.PhotoImage(image)
-
- if 'image_label' in globals():
- image_label.config(image=photo)
- else:
- image_label = tk.Label(image_frame, image=photo)
- image_label.pack(padx=7, pady=7)
- image_label.image = photo # keep a reference to the image
- # 创建添加对话的按钮
- add_dialogue_button = tk.Button(root, text="Add Dialogue", command=add_dialogue_boxes)
- add_dialogue_button.pack(side="top", padx=(5, 0), pady=(7, 0))
-
- # 创建保存按钮
- save_button = tk.Button(root, text="Save", command=save_to_json)
- save_button.pack(side="top", pady=(7, 0))
-
- # 创建文本提示框和输入框
- global input_box_1,input_box_2
- input_box_1 = create_labeled_input(root, "Input with the picture:")
- input_box_2 = create_labeled_input(root, "Assistant:")
-
-
-
- # 创建左侧的图片显示框架
- image_frame = tk.Frame(root)
- image_frame.pack(side="left", anchor="nw", padx=7, pady=7)
-
- # 创建打开图片的按钮
- open_button = tk.Button(root, text="Open Image", command=load_and_display_image)
- open_button.pack(side="top", pady=(7, 0))
-
- # 创建文本提示框和输入框的函数
- def create_labeled_input(parent, label_text):
- label = tk.Label(parent, text=label_text)
- label.pack(side="top")
- entry = tk.Entry(parent, width=50)
- entry.pack(side="top")
- return entry
-
-
-
- # 动态添加对话输入框的函数
- def add_dialogue_boxes():
- user_entry = create_labeled_input(root, "User:")
- assistant_entry = create_labeled_input(root, "Assistant:")
- dialogue_entries.append((user_entry, assistant_entry))
-
-
- # 将输入的文本保存为 JSON 文件的函数
- def save_to_json():
- save_dir = "saves"
-
- # 生成一个10位的随机字符串作为文件名
- random_filename = ''.join(random.choices(string.ascii_lowercase + string.digits, k=10))
-
- # 确保save_dir存在
- if not os.path.exists(save_dir):
- os.makedirs(save_dir)
-
- # 定义JSON文件的完整路径
- json_file_path = os.path.join(save_dir, random_filename + '.json')
- # 定义图片文件的完整路径
- image_file_path = os.path.join(save_dir, random_filename + '.jpg')
-
-
- cat1 = "Picture 1: <img>"
- cat2 = random_filename
- cat3 = ".jpg</img>\n"
- cat4 = input_box_1.get()
- cat = cat1+cat2+cat3+cat4
- dialogue_data = {
-
- "conversations": []
- }
-
- dialogue_data["conversations"].append({
- "from":"user",
- "value": cat
- })
-
- dialogue_data["conversations"].append({
- "from": "assistant",
- "value": input_box_2.get()
- })
- for user_entry, assistant_entry in dialogue_entries:
- dialogue_data["conversations"].append({
- "from": "user",
- "value": user_entry.get()
- })
- dialogue_data["conversations"].append({
- "from": "assistant",
- "assistant": assistant_entry.get()
- })
-
- # 把对话数据保存到JSON文件
- with open(json_file_path, 'w', encoding='utf-8') as json_file:
- json.dump(dialogue_data, json_file, ensure_ascii=False, indent=4)
-
- # 把图片文件保存到指定的文件夹
- if image_path and os.path.isfile(image_path):
- shutil.copy2(image_path, image_file_path)
-
-
- subprocess.Popen(["python", "test.py"])
- sys.exit()
-
-
-
-
- root.mainloop()
标注后,所有的标注文件和图片在saves文件下,可以在代码中自行更改保存文件夹。
另外需要一个脚本来聚合json文件:merge.py
- import os
- import json
- from collections import OrderedDict
- import re
-
- # 刚才标注完的数据目录,建议把merge.py放在其上一级
- directory_path = 'saves/'
-
- # 创建一个空列表来存储合并后的内容
- merged_data = []
-
- #建议设置成到时候训练用的数据的绝对路径,方便qwen finetune时遍历
- image_path_prefix = 'pathtoyourimages/'
-
- # 正则表达式匹配 <img>*.jpg</img>
- img_pattern = re.compile(r'<img>(.*?\.jpg)</img>')
-
- # 递归函数来更新图片路径
- def update_img_paths(obj):
- if isinstance(obj, OrderedDict):
- for key, value in obj.items():
- if isinstance(value, (OrderedDict, list)):
- update_img_paths(value)
- elif isinstance(value, str):
- obj[key] = img_pattern.sub(
- f'<img>{image_path_prefix}\\1<img>', value)
- elif isinstance(obj, list):
- for i, item in enumerate(obj):
- if isinstance(item, (OrderedDict, list)):
- update_img_paths(item)
- elif isinstance(item, str):
- obj[i] = img_pattern.sub(
- f'<img>{image_path_prefix}\\1<img>', item)
-
- # 遍历目录中的所有文件
- for filename in os.listdir(directory_path):
- # 检查文件是否是JSON文件
- if filename.endswith('.json'):
- # 构建完整的文件路径
- file_path = os.path.join(directory_path, filename)
- # 打开并读取JSON文件
- with open(file_path, 'r', encoding='utf-8') as file:
- try:
- # 加载JSON内容
- content = json.load(file, object_pairs_hook=OrderedDict)
- # 创建一个新的OrderedDict以将'id'字段放在最前面
- new_content = OrderedDict()
- new_content['id'] = os.path.splitext(filename)[0]
- new_content.update(content)
- update_img_paths(new_content)
- # 将此内容添加到合并后的数据列表
- merged_data.append(new_content)
- except json.JSONDecodeError as e:
- print(f"Error reading {filename}: {e}")
-
- # 指定新JSON文件的名称
- output_filename = 'merged_data.json'
- # 构建完整的输出文件路径
- output_filepath = output_filename
-
- # 写入合并后的数据到新的JSON文件
- with open(output_filepath, 'w', encoding='utf-8') as output_file:
- json.dump(merged_data, output_file, ensure_ascii=False, indent=4)
-
- print(f"Merge complete. Combined file created at {output_filepath}")
在以上文件中,建议将label.py和merge.py,saves文件夹放在同一级,输出后的文件merged_data.json也在这一级文件,之后移动图片到训练数据集文件夹,并更改merge.py中的image_path_prefix路径,即可完成所有标注文件储存。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。