赞
踩
论文:
Image Warping with Scattered Image Warping with Scattered Data Interpolation
局部变形算法:液化,膨胀
全局变形算法:IDW,MLS,特征线变形
算法思路:
算法优缺点:
优点:实现简单,cpu实现,gpu实现都友好
缺点:速度与点的个数,图片长,宽,这3个指标成正比,点个数越多,速度越慢,图片越大速度越慢。如果点太少,形变会不平滑。
应用场景:
大脸,瘦脸,大眼,等任何形变场景
基本实现:
好处,更容易结合公式看清原理,缺点,速度很慢。
- class IDW(object):
- def __init__(self,):
- pass
-
- def IDW(self, srcImg, input_points, output_points):
- start = time.time()
- height, width, _ = srcImg.shape
- keypoints_num = len(input_points)
-
- u = 2
- UX = np.expand_dims(np.vstack([np.arange(width).astype(np.float32).reshape(1, -1)] * height), axis = 2)
- UY = np.expand_dims(np.hstack([np.arange(height).astype(np.float32).reshape(-1, 1)] * width), axis = 2)
-
- input_points_X = np.ones((height,width,keypoints_num), np.float32) * input_points[:,0]
- input_points_Y = np.ones((height,width,keypoints_num), np.float32) * input_points[:,1]
- output_points_X = np.ones((height,width,keypoints_num), np.float32) * output_points[:,0]
- output_points_Y = np.ones((height,width,keypoints_num), np.float32) * output_points[:,1]
-
-
- v_tmp = np.power( (UX - input_points_X)*(UX - input_points_X) + (UY - input_points_Y)*(UY - input_points_Y), u)
- for k in range(keypoints_num):
- v_tmp[input_points[k,1],input_points[k,0],:] = 1
-
- v = 1/v_tmp
- ww = np.expand_dims(1/np.sum(v,axis = 2), axis =2)
- UX = np.squeeze( (np.sum(v*(output_points_X + UX - input_points_X), axis = 2, keepdims=True)) * ww).astype(np.float32)
- UY = np.squeeze( (np.sum(v*(output_points_Y + UY - input_points_Y), axis = 2, keepdims=True)) * ww).astype(np.float32)
-
- copyImg = cv2.remap(srcImg, UX, UY, interpolation=cv2.INTER_LINEAR)
- end = time.time()
- print("IDW time cost:{} s".format(end - start))
- return copyImg
numba向量化加速实现:
好处,基于向量化实现,速度更快,缺点,没有足够快
- from numba import jit, vectorize, int64, float64, njit, prange
- def IDW(self, srcImg, input_points, output_points):
- @vectorize([float64(float64,float64, float64, float64, int64)], target='parallel')
- def l2_power_numba(UX, input_points_X, UY, input_points_Y, u):
- tmp_x = UX - input_points_X
- tmp_y = UY - input_points_Y
- ux_sum = tmp_x * tmp_x
- uy_sum = tmp_y * tmp_y
- if u==1:
- return ux_sum + uy_sum
- else:
- return np.power(ux_sum + uy_sum, u)
-
-
- @vectorize([float64(float64)], target='parallel')
- def div_numba(v_tmp):
- return 1/v_tmp
-
-
- start = time.time()
- height, width, _ = srcImg.shape
- keypoints_num = len(input_points)
-
- u = 2
-
- UX = np.expand_dims(np.vstack([np.arange(width).astype(np.float32).reshape(1, -1)] * height), axis = 2)
- UY = np.expand_dims(np.hstack([np.arange(height).astype(np.float32).reshape(-1, 1)] * width), axis = 2)
-
- input_points_X = np.ones((1,1,keypoints_num), np.float32)
- input_points_X = input_points[:,0]
- input_points_Y = np.ones((1,1,keypoints_num), np.float32)
- input_points_Y = input_points[:,1]
- output_points_X = np.ones((1,1,keypoints_num), np.float32)
- output_points_X = output_points[:,0]
- output_points_Y = np.ones((1,1,keypoints_num), np.float32)
- output_points_Y = output_points[:,1]
-
-
- v_tmp = l2_power_numba(UX, input_points_X, UY, input_points_Y, u)
-
-
- for k in range(keypoints_num):
- v_tmp[input_points[k,1],input_points[k,0],:] = v_tmp[input_points[k,1]-1,input_points[k,0]-1,:]
-
-
- v = div_numba(v_tmp)
- ww = np.expand_dims(1/np.sum(v,axis = 2), axis =2)
-
-
- UX = np.squeeze( UX + np.sum(v * (output_points_X- input_points_X), axis=2, keepdims=True) * ww).astype(np.float32)
- UY = np.squeeze( UY + np.sum(v * (output_points_Y- input_points_Y), axis=2, keepdims=True) * ww).astype(np.float32)
-
-
- copyImg = cv2.remap(srcImg, UX, UY, interpolation=cv2.INTER_LINEAR)
- end = time.time()
- print("IDW FAST time cost:{} s".format(end - start))
-
- return copyImg
cpu最佳加速实现:
好处,基于scipy加速计算距离,公式合并,减少一次乘法,2d矩阵变1d矩阵。
- from scipy.spatial.distance import cdist
- class IDW(object):
- def __init__(self,
- original_control_points=None,
- deformed_control_points=None,
- power=1):
-
- if original_control_points is None:
- self.original_control_points = np.array([[0., 0., 0.], [0., 0., 1.],
- [0., 1., 0.], [1., 0., 0.],
- [0., 1., 1.], [1., 0., 1.],
- [1., 1., 0.], [1., 1.,
- 1.]])
- else:
- self.original_control_points = original_control_points
-
- if deformed_control_points is None:
- self.deformed_control_points = np.array([[0., 0., 0.], [0., 0., 1.],
- [0., 1., 0.], [1., 0., 0.],
- [0., 1., 1.], [1., 0., 1.],
- [1., 1., 0.], [1., 1.,
- 1.]])
- else:
- self.deformed_control_points = deformed_control_points
-
- self.power = power
-
- def __call__(self, src_pts):
-
- displ = self.deformed_control_points - self.original_control_points
-
- dist = cdist(src_pts, self.original_control_points, metric='sqeuclidean')
- dist = dist** self.power
-
- # Weights are set as the reciprocal of the distance if the distance is
- # not zero, otherwise 1.0 where distance is zero.
- dist[dist == 0.0] = 1
- weights = 1. / dist
- #weights[dist == 0.0] = 1.0
-
- offset = np.dot(weights, displ) / np.sum(weights, axis=1, keepdims=True)
- return src_pts + offset
-
-
- def IDW_cpu(self, srcImg, input_points, output_points):
- start = time.time()
-
- idw = IDW(original_control_points=input_points.astype(np.float64), deformed_control_points=output_points.astype(np.float64), power=3)
-
- h, w = srcImg.shape[:-1]
- x = np.empty((h, w), np.float64)
- x[:, :] = np.arange(w)
- y = np.empty((h, w), np.float64)
- y[:, :] = np.arange(h)[:, np.newaxis]
- mesh = np.array([x.ravel(), y.ravel()])
- mesh = mesh.T
-
- new_mesh = idw(mesh.astype(np.float64))
- UX = new_mesh[:,0].reshape(h,w).astype(np.float32)
- UY = new_mesh[:,1].reshape(h,w).astype(np.float32)
- copyImg = cv2.remap(srcImg, UX, UY, interpolation=cv2.INTER_LINEAR)
-
- end = time.time()
- print("IDW time cost:{} s".format(end - start))
- return copyImg
pytorch基于cuda加速实现:
好处,基于最佳cpu版本实现,基于pytorch实现加速,速度快的一逼。缺点,图片太大,注意显存开销。解决思路,使用pytorch1.8以上版本,进行显存设置。当然也可以考虑tensorflow基于静态图的优化。
- torch.cuda.set_per_process_memory_fraction(0.5, 0)
- 参数1:fraction 限制的上限比例,如0.5 就是总GPU显存的一半,可以是0~1的任意float大小;
- 参数2:device 设备号; 如0 表示GPU卡 0号;
- class IDW_TORCH(object):
- def __init__(self,
- original_control_points=None,
- deformed_control_points=None,
- power=1):
-
- if torch.cuda.is_available():
- device = "cuda"
- else:
- device = "cpu"
- self.original_control_points = original_control_points.to(device)
-
- self.deformed_control_points = deformed_control_points.to(device)
-
- self.power = power
- self.device = device
-
- def __call__(self, src_pts):
-
- src_pts = src_pts.to(self.device)
- displ = self.deformed_control_points - self.original_control_points
-
- dist = torch.cdist(src_pts, self.original_control_points)
-
- dist = dist ** (self.power*2)
-
- # Weights are set as the reciprocal of the distance if the distance is
- # not zero, otherwise 1.0 where distance is zero.
- dist[dist == 0.0] = 1
- weights = 1. / dist
-
- offset = torch.matmul(weights, displ) / torch.sum(weights, axis=1, keepdims=True)
- return src_pts + offset
-
- def IDW_torch(self, srcImg, input_points, output_points):
- start = time.time()
- idw = IDW_TORCH(original_control_points=torch.from_numpy(input_points.astype(np.float32)),
- deformed_control_points=torch.from_numpy(output_points.astype(np.float32)),
- power=3)
-
- h, w = srcImg.shape[:-1]
- x = torch.empty((h, w))
- x[:, :] = torch.arange(w)
- y = torch.empty((h, w))
- y[:, :] = torch.arange(h)[:, np.newaxis]
- #mesh = torch.vstack([x.flatten(), y.flatten()]).T
- mesh = torch.stack([x.flatten(), y.flatten()], dim=1)
-
- new_mesh = idw(mesh.float()).cpu().numpy()
-
-
- UX = new_mesh[:,0].reshape(h,w).astype(np.float32)
- UY = new_mesh[:,1].reshape(h,w).astype(np.float32)
-
-
- self.idwUX = UX
- self.idwUY = UY
-
- copyImg = cv2.remap(srcImg, UX, UY, interpolation=cv2.INTER_LINEAR)
-
- end = time.time()
- print("IDW TORCH time cost:{} s".format(end - start))
- return copyImg
上面所有代码 input_points, output_points,2个矩阵的维度都是(n,2),n表示点的个数。坐标就是n个点的x,y坐标。只需要拿到人脸图片上,变换前的坐标位置,变换后的坐标位置,调用上面的函数就可以。生成图片基于opencv自带函数cv2.remap,更加高效。
大眼瘦脸运行效果:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。