赞
踩
模型地址:https://huggingface.co/CompVis/stable-diffusion-v1-4/tree/main/vae
主要参考:Using-Stable-Diffusion-VAE-to-encode-satellite-images
下载到本地
from diffusers import AutoencoderKL from PIL import Image import torch import torchvision.transforms as T # ./huggingface/stable-diffusion-v1-4/vae 切换为任意本地路径 vae = AutoencoderKL.from_pretrained("./huggingface/stable-diffusion-v1-4/vae",variant='fp16') # c:\Users\zeng\Downloads\vae_config.json def encode_img(input_img): # Single image -> single latent in a batch (so size 1, 4, 64, 64) # Transform the image to a tensor and normalize it transform = T.Compose([ # T.Resize((256, 256)), T.ToTensor() ]) input_img = transform(input_img) if len(input_img.shape)<4: input_img = input_img.unsqueeze(0) with torch.no_grad(): latent = vae.encode(input_img*2 - 1) # Note scaling return 0.18215 * latent.latent_dist.sample() def decode_img(latents): # bath of latents -> list of images latents = (1 / 0.18215) * latents with torch.no_grad(): image = vae.decode(latents).sample image = (image / 2 + 0.5).clamp(0, 1) image = image.detach().cpu() # image = T.Resize(original_size)(image.squeeze()) return T.ToPILImage()(image.squeeze()) if __name__ == '__main__': # Load an example image input_img = Image.open("huge.jpg") original_size = input_img.size print('original_size',original_size) # Encode and decode the image latents = encode_img(input_img) reconstructed_img = decode_img(latents) # Save the reconstructed image reconstructed_img.save("reconstructed_example2.jpg") # Concatenate the original and reconstructed images concatenated_img = Image.new('RGB', (original_size[0] * 2, original_size[1])) concatenated_img.paste(input_img, (0, 0)) concatenated_img.paste(reconstructed_img, (original_size[0], 0)) # Save the concatenated image concatenated_img.save("concatenated_example2.jpg")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。