当前位置:   article > 正文

(病理图像读写)病理图像(whole slide images,WSI)的读写(.svs, .tiff),使用openslide,和pyvips以及matlab_svs格式图处理python

svs格式图处理python

更新!感谢ONE_SIX_MIX 的分享。我找到了python写SVS文件的方法。

代码跑不通的可以直接作用:

mrxs转svs

import time
import numpy as np
from math import ceil
import tifffile
from tqdm import tqdm
import openslide
import cv2

# 设置tile的大小。不想设置的太小,所以是512
TILE_SIZE = 512
# 为了保证图像的整体大小是tile大小的整倍数,不然会出错。
def up_to512_manifi(hw):
    return int(ceil(hw[0]/TILE_SIZE)*TILE_SIZE), int(ceil(hw[1]/TILE_SIZE)*TILE_SIZE)
def gen_patches_index(ori_size, *, img_size=224, stride = 224,keep_last_size = False):

    """
        这个函数用来按照输入的size和patch大小,生成每个patch所在原始的size上的位置

        keep_last_size:表示当size不能整除patch的size的时候,最后一个patch要不要保持输入的img_size
        
        返回:
            一个np数组,每个成员表示当前patch所在的x和y的起点和终点如:
                [[x_begin,x_end,y_begin,y_end],...]
    """
    height, width = ori_size[:2]
    index = []
    if height<img_size or width<img_size: 
        warn("input size is ({} {}), small than img_size:{}".format(height, width, img_size))
        return index
        
    for h in range(0, height+1, stride):
        xe = h+img_size
        if h+img_size>height:
            xe = height
            h = xe-img_size if keep_last_size else h

        for w in range(0, width+1, stride):
            ye = w+img_size
            if w+img_size>width:
                ye = width
                w = ye-img_size if keep_last_size else w
            index.append(np.array([h, xe, w, ye]))

            if ye==width:
                break
        if xe==height:
            break
    return index
# 构建图像的迭代器。这是因为这个库要求输入是一个迭代器。每次迭代器输出一个tile的图像。
def gen_im(wsi, index):
    ind = 0
    while True:
    	_ind = index[ind]
        temp_img = wsi[ind[0]:ind[1], ind[2]:ind[3]]
        ind+=1
        yield temp_img
# 获取每个tile的索引输出是索引的数组。可以通过img = image[ind[0]:ind[1], ind[2]:ind[3]]的形式获取单个tile图像
def gen_patches_index(ori_size, *, img_size=224, stride = 224,keep_last_size = False):
    height, width = ori_size[:2]
    index = []
    if height<img_size or width<img_size: 
        warn("input size is ({} {}), small than img_size:{}".format(height, width, img_size))
        return index
        
    for h in range(0, height+1, stride):
        xe = h+img_size
        if h+img_size>height:
            xe = height
            h = xe-img_size if keep_last_size else h
        for w in range(0, width+1, stride):
            ye = w+img_size
            if w+img_size>width:
                ye = width
                w = ye-img_size if keep_last_size else w
            index.append(np.array([h, xe, w, ye]))

            if ye==width:
                break
        if xe==height:
            break
    return index

def gen_pyramid_tiff(image , out_file):
    svs_desc = 'Aperio Image Library Fake\nABC |AppMag = {mag}|Filename = {filename}|MPP = {mpp}'
    label_desc = 'Aperio Image Library Fake\nlabel {W}x{H}'
    macro_desc = 'Aperio Image Library Fake\nmacro {W}x{H}'
    odata = openslide.open_slide(in_file)
    # 指定mpp值
    mpp = 0.25
    # 指定缩放倍率
    mag = 40
    # 换算mpp值到分辨率
    resolution = [10000 / mpp, 10000 / mpp, 'CENTIMETER']
    # 指定图像名字
    filename = odata.properties['aperio.Filename']

    # tile 大小.tiff图像是分成多个tile存储的,所以需要设置我们想要的tile大小
    tile_hw = np.int64([TILE_SIZE, TILE_SIZE])
    width, height = image.shape[0:2]
    # 要需要的金字塔分辨率
    multi_hw = np.int64([(width, height), (width//2, height//2), 
                         (width//4, height//4), (width//8, height//8),
                         (width//16, height//16),
                         (width//32, height//32),
                         (width//64, height//64)])

    # 尝试写入 svs 格式
    with tifffile.TiffWriter(out_file, bigtiff=True) as tif:
        thw = tile_hw.tolist()
        # outcolorspace 要保持为默认的 YCbCr,不能使用rgb,否则颜色会异常
        # 95 是默认JPEG质量,值域是 0-100,值越大越接近无损
        compression = ['JPEG', 90, dict(outcolorspace='YCbCr')]
        kwargs = dict(subifds=0, photometric='rgb', planarconfig='CONTIG', compression=compression, dtype=np.uint8, metadata=None)

        for i, hw in enumerate(multi_hw):
            hw =  up_to16_manifi(hw)
            temp_wsi = cv2.resize(image, (hw[1], hw[0]))
            # 将需要存入的图像扩展到TILE_SIZE的倍数
            new_x, new_y = up_to512_manifi(hw)
            new_wsi = np.ones((new_x, new_y, 3),dtype=np.uint8)*255
            new_wsi[0:hw[0], 0:hw[1],:] = temp_wsi
            # 设置每个tile的索引
            index = gen_patches_index((new_x, new_y),img_size=TILE_SIZE,stride=TILE_SIZE)
            # 构建图像的迭代器。
            gen = gen_im(new_wsi, index)

            if i == 0:
                desc = svs_desc.format(mag=mag, filename=filename, mpp=mpp)
                # tile是每个小块的大小,它的长宽必须能被16整除
                tif.write(data=gen, shape=(*hw, 3), tile=thw[::-1], resolution=resolution, description=desc, **kwargs)
            else:
                tif.write(data=gen, shape=(*hw, 3), tile=thw[::-1], resolution=resolution, description='', **kwargs)

# img是一个npmpy图像(h,w,3),save_path是存储的路径 .../a.svs
gen_pyramid_tiff(img, save_path)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135

以下是原文:
今天对病理图像的读写进行一个小介绍。先说结论,如果不需要读很小倍率的图像,pyvips比openslide快很多,写SVS的话用matlab。

病理图像的存储有很多种格式,可以看如下图:
在这里插入图片描述
在这里插入图片描述

常见的是.svs, .tif,.mrxs。今天就只讲.svs和.tif。.mrxs手上没有数据。但依然可以用openslide读取,以后有机会再展开。
在python上读取.svs和.tif的方式是一样的,因为.svs本质上就是一个tif,采用的数据格式和压缩方式都是一样的。示意图图下:
在这里插入图片描述

简单的读取可以如下:

from openslide import OpenSlide
filePath = 'file.tif'
slide   = OpenSlide(filePath)
#查看文件的金字塔结构,从中选取需要读取的层。
print(slide.level_dimensions)
# 输出结果如下:
# ((90624, 214528), (45312, 107264), (22656, 53632), (11328, 26816), (5664, 13408), (2832, 6704), (1416, 3352), (708, 1676), (354, 838), (177, 419))
#假如说需要读取40X的层,那就是第0层,代码如下:

img_40X = np.array(location=slide.read_region((0,0),level=0,size=slide.level_dimensions[0]),dtype = np.uint8)[:,:,0:3]
# level表示要读的层数,我们可以从slide.level_dimensions查看需要的层
# location表示读取时图像的起始坐标,需要注意的是,不管我们读取哪个level的图像,这个点都填的是40X的坐标。
#比如说我们读取40X下起始点是(100,100)终点是(324,324)大小的patch,那location=(100,100),size=(224,224),level=0
img_40X_small = np.array(location=slide.read_region((100,100),level=0,size=(324,324)),dtype = np.uint8)[:,:,0:3]
#如果需要读取10X的一样视野的图像那个,size就应该是(224//4,224//4), level=2, 但location依然是(100,100)
img_10X_small = np.array(location=slide.read_region((100,100),level=2,size=(224//4,224//4)),dtype = np.uint8)[:,:,0:3]
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

location的好处是允许我们自定义读取的起始点,有时候我们需要排除wsi中的空白,就可以根据所画的ROI来决定起始点和size。
另一种方式是使用pyvips来读取,这个就简单很多:

import pyvips
svs_file = 'file.svs'
# level 表示要读取的层数
img = pyvips.Image.new_from_file(svs_file, level=0)
# crop(起始点x,起始点y,长,宽)
img2 = img.crop(x1, y1, h, w)
img2 = np.asarray(img2, dtype=np.uint8)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7

pyvips的读取速度要比openslide快很多,如果不是需要读取level很高的图像,我都推荐用pyvips。我用工作站读取了一个(49821, 93298)大小的WSI,openslide花费162.94秒,pyvips花费42.79秒。越大的图像pyvips就会越快。

如果要写tif的话,我个人推荐用matlab。在python上写多层的tif文件我没有成功过,可能是因为python对tif的支持不好。写的代码如下:

function writesvs(imgdata, file_name)
    size2 = fix(size(imgdata));
    img_fist = imresize(imgdata, [size2(1) size2(2)]);
    
    size2 = fix(size(imgdata)/2);
    img_half = imresize(imgdata, [size2(1) size2(2)]);
    %
    size3 = fix(size(imgdata)/4);
    img_third = imresize(imgdata, [size3(1) size3(2)]);

    size4 = fix(size(imgdata)/8);
    img_four = imresize(imgdata, [size4(1) size4(2)]);
    %img_four = imgdata(1:64:end,1:64:end,:);

    t = Tiff(file_name,'w');
	%写40X的图像
	tagstruct.ImageDescription = "Aperio Image Library |AppMag = 40|MPP = 0.265018";
	tagstruct.ImageLength = size(img_fist ,1);
	tagstruct.ImageWidth = size(img_fist ,2);
	tagstruct.Photometric = Tiff.Photometric.RGB;
	tagstruct.BitsPerSample = 8;
	tagstruct.SamplesPerPixel = 3;
	tagstruct.RowsPerStrip = 16;
	tagstruct.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
	tagstruct.Software = 'MATLAB';
	tagstruct.TileWidth = 240;
	tagstruct.TileLength = 240;
	tagstruct.Compression = 7;
	tagstruct.JPEGQuality = 80;
	setTag(t,tagstruct)

	write(t,img_fist );
	writeDirectory(t);

	%写20X的图像
    tagstruct2.ImageDescription = "Aperio Image Library |AppMag = 20|MPP = 0.51";
    tagstruct2.ImageLength = size(img_half,1);
    tagstruct2.ImageWidth = size(img_half,2);
    tagstruct2.Photometric = Tiff.Photometric.RGB;
    tagstruct2.BitsPerSample = 8;
    tagstruct2.SamplesPerPixel = 3;
    tagstruct2.RowsPerStrip = 16;
    tagstruct2.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
    tagstruct2.Software = 'MATLAB';
    tagstruct2.TileWidth = 240;
    tagstruct2.TileLength = 240;
    tagstruct2.Compression = 7;
    tagstruct2.JPEGQuality = 80;
    setTag(t,tagstruct2)

    write(t,img_half);
    writeDirectory(t);
    
	%写10X的图像
    tagstruct3.ImageDescription = "Aperio Image Library";
    tagstruct3.ImageLength = size(img_third,1);
    tagstruct3.ImageWidth = size(img_third,2);
    tagstruct3.Photometric = Tiff.Photometric.RGB;
    tagstruct3.BitsPerSample = 8;
    tagstruct3.SamplesPerPixel = 3;
    tagstruct3.RowsPerStrip = 16;
    tagstruct3.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
    tagstruct3.Software = 'MATLAB';
    tagstruct3.TileWidth = 240;
    tagstruct3.TileLength = 240;
    tagstruct3.Compression = 7;
    tagstruct3.JPEGQuality = 80;
    setTag(t,tagstruct3)

    write(t,img_third);
    writeDirectory(t);
    
	%写5X的图像
    tagstruct4.ImageDescription = "Aperio Image Library";
    tagstruct4.ImageLength = size(img_four,1);
    tagstruct4.ImageWidth = size(img_four,2);
    tagstruct4.Photometric = Tiff.Photometric.RGB;
    tagstruct4.BitsPerSample = 8;
    tagstruct4.SamplesPerPixel = 3;
    tagstruct4.RowsPerStrip = 16;
    tagstruct4.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky;
    tagstruct4.Software = 'MATLAB';
    tagstruct4.TileWidth = 240;
    tagstruct4.TileLength = 240;
    tagstruct4.Compression = 7;
    tagstruct4.JPEGQuality = 80;
    setTag(t,tagstruct4)

    write(t,img_four);
    close(t);
end
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号