赞
踩
处理color FERET Database数据,每个ppm文件被压缩,因此先解压再转换格式
python处理代码:
import bz2 # 指定要解压缩的文件夹路径 rootdir = r'C:\Users\xxx\Downloads\colorFERETDatabase\colorferet' for folder_path, dir, fns in os.walk(rootdir): # 遍历文件夹中的所有文件 # for filename in os.listdir(folder_path): # 如果文件是以.bz2结尾的压缩文件 if len(dir) == 0: print(folder_path, dir, filename) for filename in fns: if filename.endswith('.bz2'): # 拼接文件的完整路径 file_path = os.path.join(folder_path, filename) # 打开压缩文件 with bz2.open(file_path, 'rb') as f: # 读取压缩文件中的数据 data = f.read() # 拼接解压后的文件路径 output_path = os.path.join(folder_path, filename[:-4]) # 将解压后的数据写入文件 with open(output_path, 'wb') as f: f.write(data) ppm = Image.open(output_path) # 将图像转换为png格式并保存 ppm.save(output_path[:-3] + 'png') # 删除原始的压缩文件 os.remove(output_path) os.remove(file_path)
IMDb-Face is a new large-scale noise-controlled dataset for face recognition research. The dataset contains about 1.7 million faces, 59k identities, which is manually cleaned from 2.0 million raw images. All images are obtained from the IMDb website. A detailed introduction of IMDb-Face can be found in the paper(https://arxiv.org/abs/1807.11649).
python处理代码:
import os import csv import requests from multiprocessing.dummy import Pool as ThreadPool from PIL import Image def remove_empty_directories(directory): for root, dirs, files in os.walk(directory, topdown=False): for dir in dirs: folder_path = os.path.join(root, dir) if not os.listdir(folder_path): # 检查文件夹是否为空 os.rmdir(folder_path) # 删除空文件夹 print(f"Deleted empty folder: {folder_path}") def download_image(image_url, save_path, bbox): try: response = requests.get(image_url, stream=True) response.raise_for_status() with open(save_path, 'wb') as f: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) print(f"Downloaded {save_path}") # try: # with Image.open(save_path) as img: # cropped_img = img.crop((bbox[0], bbox[1], bbox[2], bbox[3])) # cropped_img.save(save_path[:-4] + "_crop.jpg") # print(f"Saved {save_path}") # except Exception as e: # print(f"Failed to save {save_path}: {str(e)}") except Exception as e: print(f"Failed to download {save_path}: {str(e)}") # try: # with Image.open(save_path) as img: # cropped_img = img.crop((bbox[0], bbox[1], bbox[2], bbox[3])) # cropped_img.save(save_path) # print(f"Saved {save_path}") # except Exception as e: # print(f"Failed to save {save_path}: {str(e)}") def crop_and_save_image(image_url, save_path, bbox): try: with Image.open(save_path) as img: cropped_img = img.crop((bbox[0], bbox[1], bbox[2], bbox[3])) cropped_img.save(save_path) print(f"Saved {save_path}") except Exception as e: print(f"Failed to save {save_path}: {str(e)}") def download_dataset(csv_file, data_dir, num_processes=8): if not os.path.exists(data_dir): os.makedirs(data_dir) with open(csv_file, "r") as file: reader = csv.DictReader(file) url_list = [] i = 0 for row in reader: i += 1 url = row['url'] image_id = row['name'] image_num = row['image'] face_rect = row['rect'] # print(url, image_id, image_num, face_rect) # bbox = [int(face_rect[0]), int(face_rect[1]), int(face_rect[2]), int(face_rect[3])] bbox = [int(i) for i in row['rect'].split(' ')] save_dir = os.path.join(data_dir, image_id[:]) if not os.path.exists(save_dir): os.makedirs(save_dir, True) save_path = os.path.join(save_dir, image_num) # print(save_dir, save_path, url_list[:2]) url_list.append((url, save_path, bbox)) # i += 1 # if i > 2000: # break print('csv loaded') print('len : ', len(url_list)) pool = ThreadPool(num_processes) pool.starmap(download_image, url_list) pool.close() pool.join() # pool = ThreadPool(num_processes) # pool.starmap(crop_and_save_image, url_list) # pool.close() # pool.join() remove_empty_directories(data_dir) csv_file = "IMDb-Face.csv" save_dir = 'IMDB_temp' download_dataset(csv_file, save_dir, num_processes=8) # remove_empty_directories(save_dir)
http://vcipl-okstate.org/pbvs/bench/Data/07/download.html
下载即可用
import cv2 import os # 定义视频文件路径 video_path = "video" # 定义保存PNG文件的文件夹路径 save_folder = "png" # 创建保存PNG文件的文件夹 if not os.path.exists(save_folder): os.makedirs(save_folder, exist_ok=True) for mp4 in os.listdir(video_path): # 打开视频文件 cap = cv2.VideoCapture(os.path.join(video_path, mp4)) val = cap.isOpened() # 获取视频帧率 fps = cap.get(cv2.CAP_PROP_FPS) # 获取视频总帧数 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) print(mp4, frame_count) # 循环读取视频帧 # for i in range(0, frame_count, frame_count//1200 + 1): i = 0 while val: i += 1 # 读取视频帧 ret, frame = cap.read() # 如果读取失败,则退出循环 if not ret: break # 生成保存PNG文件的文件名 save_path = os.path.join(save_folder, mp4[:mp4.find('.')], f"{i+1:06d}.png") # 创建保存PNG文件的文件夹 if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path), exist_ok=True) # 保存PNG文件 if i % (frame_count//1200 + 1) == 0: cv2.imwrite(save_path, frame) # 输出进度信息 # print(f"{mp4} Processed {i+1}/{frame_count} frames") # 关闭视频文件 cap.release()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。