当前位置:   article > 正文

kgsl_ioctl_gpumem_alloc_kgsl gpumem_mapped

kgsl gpumem_mapped
static const struct kgsl_ioctl kgsl_ioctl_funcs[] = {
    ...
    // ioctl命令:IOCTL_KGSL_GPUMEM_ALLOC
    // ioctl函数:kgsl_ioctl_gpumem_alloc
	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC,
			kgsl_ioctl_gpumem_alloc),
    ...
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

1. kgsl_gpumem_alloc

struct kgsl_gpumem_alloc {
    // 返回值:GPU内存地址
	unsigned long gpuaddr; /* output param */
    // 申请的内存大小
	__kernel_size_t size;
    // 标志位
	unsigned int flags;
};

// ioctl参数:kgsl_gpumem_alloc
#define IOCTL_KGSL_GPUMEM_ALLOC \
	_IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12

2. kgsl_ioctl_gpumem_alloc

long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv,
		unsigned int cmd, void *data)
{
	struct kgsl_device *device = dev_priv->device;
    // ioctl命令参数
	struct kgsl_gpumem_alloc *param = data;
    
    // kgsl_mem_entry用于描述用户空间的内存分配[见2.1节]
	struct kgsl_mem_entry *entry;
	uint64_t flags = param->flags;

	/*
	 * On 64 bit kernel, secure memory region is expanded and
	 * moved to 64 bit address, 32 bit apps can not access it from
	 * this IOCTL.
	 */
	if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task()
			&& test_bit(KGSL_MMU_64BIT, &device->mmu.features))
		return -EOPNOTSUPP;

	/* Legacy functions doesn't support these advanced features */
	flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);

	if (is_compat_task())
		flags |= KGSL_MEMFLAGS_FORCE_32BIT;

    // 创建kgsl_mem_entry[见2.2节]
	entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags);

	if (IS_ERR(entry))
		return PTR_ERR(entry);

    // 更新参数
	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
	param->size = (size_t) entry->memdesc.size;
	param->flags = (unsigned int) entry->memdesc.flags;

	/* Put the extra ref from kgsl_mem_entry_create() */
    // 减少引用计数, 如果引用计数减为0则通过kgsl_mem_entry_destroy释放kgsl_mem_entry
	kgsl_mem_entry_put(entry);

	return 0;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43

2.1 kgsl_mem_entry

/*
 * struct kgsl_mem_entry - a userspace memory allocation
 */
struct kgsl_mem_entry {
    // Currently userspace can only hold a single reference count but the kernel may hold more
	struct kref refcount;
    // description of the memory[见2.1.1节]
	struct kgsl_memdesc memdesc;
    // type-specific data, such as the dma-buf attachment pointer
	void *priv_data;
    // rb_node for the gpu address lookup rb tree
	struct rb_node node;
    // idr index for this entry, can be used to find memory that does not have a valid GPU address
	unsigned int id;
    // 持有该内存的进程
	struct kgsl_process_private *priv;
    // if !0, userspace requested that his memory be freed, but there are still references to it
	int pending_free;
    // String containing user specified metadata for the entry
	char metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX + 1];
    // used to schedule a kgsl_mem_entry_put in atomic contexts
	struct work_struct work;
	/**
	 * @map_count: Count how many vmas this object is mapped in - used for
	 * debugfs accounting
	 */
    // 映射的VMA数量
	atomic_t map_count;
};
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29

2.1.1 kgsl_memdesc

/**
 * struct kgsl_memdesc - GPU memory object descriptor
 */
struct kgsl_memdesc {
    // Pointer to the pagetable that the object is mapped in
	struct kgsl_pagetable *pagetable;
    // Kernel virtual address
	void *hostptr;
    // Number of threads using hostptr
	unsigned int hostptr_count;
    // GPU virtual address
	uint64_t gpuaddr;
    // Physical address of the memory object
	phys_addr_t physaddr;
    // Size of the memory object
	uint64_t size;
    // Internal flags and settings
	unsigned int priv;
	struct sg_table *sgt;
    // Function hooks for the memdesc memory type[见2.1.2节]
	const struct kgsl_memdesc_ops *ops;
    // Flags set from userspace
	uint64_t flags;
	struct device *dev;
    // dma attributes for this memory
	unsigned long attrs;
    // An array of pointers to allocated pages
	struct page **pages;
    // Total number of pages allocated
	unsigned int page_count;
	/*
	 * @lock: Spinlock to protect the gpuaddr from being accessed by
	 * multiple entities trying to map the same SVM region at once
	 */
	spinlock_t lock;
	/** @shmem_filp: Pointer to the shmem file backing this memdesc */
    // 共享内存的文件
	struct file *shmem_filp;
	/** @ranges: rbtree base for the interval list of vbo ranges */
	struct rb_root_cached ranges;
	/** @ranges_lock: Mutex to protect the range database */
	struct mutex ranges_lock;
	/** @gmuaddr: GMU VA if this is mapped in GMU */
	u32 gmuaddr;
};
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45

2.1.2 kgsl_memdesc_ops

struct kgsl_memdesc_ops {
	unsigned int vmflags;
	vm_fault_t (*vmfault)(struct kgsl_memdesc *memdesc,
		struct vm_area_struct *vma, struct vm_fault *vmf);
	void (*free)(struct kgsl_memdesc *memdesc);
	int (*map_kernel)(struct kgsl_memdesc *memdesc);
	void (*unmap_kernel)(struct kgsl_memdesc *memdesc);
	/**
	 * @put_gpuaddr: Put away the GPU address and unmap the memory
	 * descriptor
	 */
	void (*put_gpuaddr)(struct kgsl_memdesc *memdesc);
};
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13

2.2 gpumem_alloc_entry

struct kgsl_mem_entry *gpumem_alloc_entry(
		struct kgsl_device_private *dev_priv,
		uint64_t size, uint64_t flags)
{
	int ret;
	struct kgsl_process_private *private = dev_priv->process_priv;
	struct kgsl_mem_entry *entry;
	struct kgsl_device *device = dev_priv->device;
	u32 cachemode;

	/* For 32-bit kernel world nothing to do with this flag */
	if (BITS_PER_LONG == 32)
		flags &= ~((uint64_t) KGSL_MEMFLAGS_FORCE_32BIT);

	if (flags & KGSL_MEMFLAGS_VBO)
		return gpumem_alloc_vbo_entry(dev_priv, size, flags);

	flags &= KGSL_MEMFLAGS_GPUREADONLY
		| KGSL_CACHEMODE_MASK
		| KGSL_MEMTYPE_MASK
		| KGSL_MEMALIGN_MASK
		| KGSL_MEMFLAGS_USE_CPU_MAP
		| KGSL_MEMFLAGS_SECURE
		| KGSL_MEMFLAGS_FORCE_32BIT
		| KGSL_MEMFLAGS_IOCOHERENT
		| KGSL_MEMFLAGS_GUARD_PAGE;

	/* Return not supported error if secure memory isn't enabled */
	if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device))
		return ERR_PTR(-EOPNOTSUPP);

	flags = cap_alignment(device, flags);

	/* For now only allow allocations up to 4G */
	if (size == 0 || size > UINT_MAX)
		return ERR_PTR(-EINVAL);

    // 更新缓存策略
	flags = kgsl_filter_cachemode(flags);

    // 前面主要完成标志位的校验和更新
    // 这里开始创建kgsl_mem_entry[见2.2.1节]
	entry = kgsl_mem_entry_create();
	if (entry == NULL)
		return ERR_PTR(-ENOMEM);

    // 根据标志位判断是否是cached buffer
	if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) &&
		kgsl_cachemode_is_cached(flags))
		flags |= KGSL_MEMFLAGS_IOCOHERENT;

    // 用户空间分配[2.2.2节]
	ret = kgsl_allocate_user(device, &entry->memdesc,
		size, flags, 0);
	if (ret != 0)
		goto err;

    // 绑定映射[2.2.7节]
	ret = kgsl_mem_entry_attach_and_map(device, private, entry);
	if (ret != 0) {
		kgsl_sharedmem_free(&entry->memdesc);
		goto err;
	}

    // 获取缓存模式
	cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc);
	/*
	 * Secure buffers cannot be reclaimed. Avoid reclaim of cached buffers
	 * as we could get request for cache operations on these buffers when
	 * they are reclaimed.
	 */
    // 确认memdesc的pages是否能够直接回收
	if (!(flags & KGSL_MEMFLAGS_SECURE) &&
			!(cachemode == KGSL_CACHEMODE_WRITEBACK) &&
			!(cachemode == KGSL_CACHEMODE_WRITETHROUGH))
		entry->memdesc.priv |= KGSL_MEMDESC_CAN_RECLAIM;

    // 首先确定kgsl_memdesc的buffer类型
    // 然后将其大小统计进kgsl_process_private的stats数组
	kgsl_process_add_stats(private,
			kgsl_memdesc_usermem_type(&entry->memdesc),
			entry->memdesc.size);
	trace_kgsl_mem_alloc(entry);

    // 将kgsl_mem_entry提交到kgsl_process_private, 以便其他操作也能够访问
	kgsl_mem_entry_commit_process(entry);
	return entry;
err:
	kfree(entry);
	return ERR_PTR(ret);
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91

2.2.1 kgsl_mem_entry_create

static struct kgsl_mem_entry *kgsl_mem_entry_create(void)
{
    // 创建kgsl_mem_entry
	struct kgsl_mem_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);

	if (entry != NULL) {
        // 初始化kgsl_mem_entry引用计数为1
		kref_init(&entry->refcount);
		/* put this ref in userspace memory alloc and map ioctls */
        // 引用计数加1
		kref_get(&entry->refcount);
        // 初始化映射的VMA数量为0
		atomic_set(&entry->map_count, 0);
	}

	return entry;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

2.2.2 kgsl_allocate_user

enum kgsl_mmutype {
    // 支持IOMMU
	KGSL_MMU_TYPE_IOMMU = 0,
	KGSL_MMU_TYPE_NONE
};

int kgsl_allocate_user(struct kgsl_device *device, struct kgsl_memdesc *memdesc,
		u64 size, u64 flags, u32 priv)
{
    // 如果不支持IOMMU, 则需要分配连续内存
	if (device->mmu.type == KGSL_MMU_TYPE_NONE)
		return kgsl_alloc_contiguous(device, memdesc, size, flags,
			priv);
	else if (flags & KGSL_MEMFLAGS_SECURE)
		return kgsl_allocate_secure(device, memdesc, size, flags, priv);

    // 页面分配[见2.2.3节]
	return kgsl_alloc_pages(device, memdesc, size, flags, priv);
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19

2.2.3 kgsl_alloc_pages

static int kgsl_alloc_pages(struct kgsl_device *device,
		struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv)
{
	struct page **pages;
	int count;

    // size大小对齐
	size = PAGE_ALIGN(size);

    // 判断size大小有效性
	if (!size || size > UINT_MAX)
		return -EINVAL;

    // 根据标志位初始化kgsl_memdesc[见2.2.4节]
	kgsl_memdesc_init(device, memdesc, flags);
    // 传入的priv为0
	memdesc->priv |= priv;

    // #define KGSL_MEMDESC_SYSMEM BIT(9)
	if (priv & KGSL_MEMDESC_SYSMEM) {
		memdesc->ops = &kgsl_system_ops;
		count = kgsl_system_alloc_pages(size, &pages, device->dev);
	} else {
        // 设置kgsl_memdesc的kgsl_memdesc_ops实现[2.2.5节]
		memdesc->ops = &kgsl_page_ops;
        // 分配页面并返回分配的page[2.2.6节]
		count = _kgsl_alloc_pages(memdesc, size, &pages, device->dev);
	}

	if (count < 0)
		return count;

    // 页面指针
	memdesc->pages = pages;
    // 内存大小
	memdesc->size = size;
    // 页面数量
	memdesc->page_count = count;

    // 更新全局的kgsl的内存统计: 将申请的内存大小统计进kgsl_driver的stats结构体page_alloc成员
	KGSL_STATS_ADD(size, &kgsl_driver.stats.page_alloc,
		&kgsl_driver.stats.page_alloc_max);

	return 0;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45

2.2.4 kgsl_memdesc_init

void kgsl_memdesc_init(struct kgsl_device *device,
			struct kgsl_memdesc *memdesc, uint64_t flags)
{
	struct kgsl_mmu *mmu = &device->mmu;
	unsigned int align;

    // 初始化kgsl_memdesc
	memset(memdesc, 0, sizeof(*memdesc));
	/* Turn off SVM if the system doesn't support it */
    // 判断是否支持KGSL_MMU_IOPGTABLE
	if (!kgsl_mmu_is_perprocess(mmu))
		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);

	/* Secure memory disables advanced addressing modes */
	if (flags & KGSL_MEMFLAGS_SECURE)
		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);

	/* Disable IO coherence if it is not supported on the chip */
    // 判断是否支持I/O coherency
	if (!kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT)) {
		flags &= ~((uint64_t) KGSL_MEMFLAGS_IOCOHERENT);

		WARN_ONCE(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT),
			"I/O coherency is not supported on this target\n");
	} else if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT))
		flags |= KGSL_MEMFLAGS_IOCOHERENT;

	/*
	 * We can't enable I/O coherency on uncached surfaces because of
	 * situations where hardware might snoop the cpu caches which can
	 * have stale data. This happens primarily due to the limitations
	 * of dma caching APIs available on arm64
	 */
	if (!kgsl_cachemode_is_cached(flags))
		flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT);

	if (kgsl_mmu_has_feature(device, KGSL_MMU_NEED_GUARD_PAGE) ||
		(flags & KGSL_MEMFLAGS_GUARD_PAGE))
		memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE;

	if (flags & KGSL_MEMFLAGS_SECURE)
		memdesc->priv |= KGSL_MEMDESC_SECURE;

    // 设置标志位
	memdesc->flags = flags;
    // 设置持有该内存的device
	memdesc->dev = &device->pdev->dev;

    // 对齐
	align = max_t(unsigned int,
		kgsl_memdesc_get_align(memdesc), ilog2(PAGE_SIZE));
    // 设置kgsl_memdesc的对齐标志位
	kgsl_memdesc_set_align(memdesc, align);

	spin_lock_init(&memdesc->lock);
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56

2.2.5 kgsl_page_ops

static const struct kgsl_memdesc_ops kgsl_page_ops = {
	.free = kgsl_free_pages,
	.vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP,
	.vmfault = kgsl_paged_vmfault,
	.map_kernel = kgsl_paged_map_kernel,
	.unmap_kernel = kgsl_paged_unmap_kernel,
	.put_gpuaddr = kgsl_unmap_and_put_gpuaddr,
};
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

2.2.6 _kgsl_alloc_pages

static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc,
		u64 size, struct page ***pages, struct device *dev)
{
	int count = 0;
    // 将内存大小转换为页面数量
	int npages = size >> PAGE_SHIFT;
    // attempt to allocate physically contiguous memory by kmalloc
    // but upon failure, fall back to non-contiguous (vmalloc) allocation
	struct page **local = kvcalloc(npages, sizeof(*local), GFP_KERNEL);
	u32 page_size, align;
	u64 len = size;

	if (!local)
		return -ENOMEM;

    // 共享内存设置成功或者未配置CONFIG_QCOM_KGSL_USE_SHMEM则返回0[见2.2.6.1节]
	count = kgsl_memdesc_file_setup(memdesc, size);
	if (count) {
		kvfree(local);
		return count;
	}

	/* Start with 1MB alignment to get the biggest page we can */
	align = ilog2(SZ_1M);

    // 根据内存大小计算页面大小
	page_size = kgsl_get_page_size(len, align);

	while (len) {
        // 调用kgsl_pool_alloc_page分配, 并将获取的page通过local数组返回
		int ret = kgsl_alloc_page(&page_size, &local[count],
			npages, &align, count, memdesc->shmem_filp, dev);

		if (ret == -EAGAIN)
			continue;
		else if (ret <= 0) {
			int i;

			for (i = 0; i < count; ) {
				int n = 1 << compound_order(local[i]);

				kgsl_free_page(local[i]);
				i += n;
			}
			kvfree(local);

			if (!kgsl_sharedmem_noretry_flag)
				pr_err_ratelimited("kgsl: out of memory: only allocated %lldKb of %lldKb requested\n",
					(size - len) >> 10, size >> 10);

			if (memdesc->shmem_filp)
				fput(memdesc->shmem_filp);

			return -ENOMEM;
		}

		count += ret;
		npages -= ret;
		len -= page_size;

		page_size = kgsl_get_page_size(len, align);
	}

    // pages作为返回值
	*pages = local;

	return count;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
2.2.6.1 kgsl_memdesc_file_setup
// 配置kgsl使用共享内存
#ifdef CONFIG_QCOM_KGSL_USE_SHMEM
static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc, uint64_t size)
{
	int ret;

    // 在共享内存目录下挂载一个kgsl-3d0的目录, 共享size大小的内存
	memdesc->shmem_filp = shmem_file_setup("kgsl-3d0", size,
			VM_NORESERVE);
	if (IS_ERR(memdesc->shmem_filp)) {
		ret = PTR_ERR(memdesc->shmem_filp);
		pr_err("kgsl: unable to setup shmem file err %d\n",
				ret);
		memdesc->shmem_filp = NULL;
		return ret;
	}

	return 0;
}
#else
static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc, uint64_t size)
{
	return 0;
}
#endif
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25

2.2.7 kgsl_mem_entry_attach_and_map

/*
 * Attach the memory object to a process by (possibly) getting a GPU address and
 * (possibly) mapping it
 */
static int kgsl_mem_entry_attach_and_map(struct kgsl_device *device,
		struct kgsl_process_private *process,
		struct kgsl_mem_entry *entry)
{
	struct kgsl_memdesc *memdesc = &entry->memdesc;
	int ret;

    // 2.2.7.1
	ret = kgsl_mem_entry_attach_to_process(device, process, entry);
	if (ret)
		return ret;

	if (memdesc->gpuaddr) {
		/*
		 * Map the memory if a GPU address is already assigned, either
		 * through kgsl_mem_entry_attach_to_process() or via some other
		 * SVM process
		 */
		ret = kgsl_mmu_map(memdesc->pagetable, memdesc);

		if (ret) {
			kgsl_mem_entry_detach_process(entry);
			return ret;
		}
	}

	kgsl_memfree_purge(memdesc->pagetable, memdesc->gpuaddr,
		memdesc->size);

	return ret;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
2.2.7.1 kgsl_mem_entry_attach_to_process
static int kgsl_mem_entry_attach_to_process(struct kgsl_device *device,
		struct kgsl_process_private *process,
		struct kgsl_mem_entry *entry)
{
	struct kgsl_memdesc *memdesc = &entry->memdesc;
	int ret, id;

    // kgsl_process_private引用计数加1
	ret = kgsl_process_private_get(process);
	if (!ret)
		return -EBADF;

	/* Assign a gpu address */
    // 判断是否使用与CPU同样的虚拟映射[见2.2.7.2节]以及是否支持IOMMU
	if (!kgsl_memdesc_use_cpu_map(memdesc) &&
		kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_NONE) {
        // GPU页表[见2.2.7.3节]
		struct kgsl_pagetable *pagetable;

        // 获取进程页表
		pagetable = kgsl_memdesc_is_secured(memdesc) ?
			device->mmu.securepagetable : process->pagetable;

        // 分配GPU虚拟地址[见2.2.7.4节]
		ret = kgsl_mmu_get_gpuaddr(pagetable, memdesc);
		if (ret) {
			kgsl_process_private_put(process);
			return ret;
		}
	}

	idr_preload(GFP_KERNEL);
	spin_lock(&process->mem_lock);
	/* Allocate the ID but don't attach the pointer just yet */
	id = idr_alloc(&process->mem_idr, NULL, 1, 0, GFP_NOWAIT);
	spin_unlock(&process->mem_lock);
	idr_preload_end();

	if (id < 0) {
		if (!kgsl_memdesc_use_cpu_map(memdesc))
			kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc);
		kgsl_process_private_put(process);
		return id;
	}

    // 更新kgsl_mem_entry的id和kgsl_mem_entry
	entry->id = id;
	entry->priv = process;

	return 0;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
2.2.7.2 kgsl_memdesc_use_cpu_map
#define KGSL_MEMFLAGS_USE_CPU_MAP (1ULL << 28)
#define KGSL_MEMFLAGS_SPARSE_PHYS (1ULL << 29)
#define KGSL_MEMFLAGS_SPARSE_VIRT (1ULL << 30)
#define KGSL_MEMFLAGS_IOCOHERENT  (1ULL << 31)
#define KGSL_MEMFLAGS_GUARD_PAGE  (1ULL << 33)
#define KGSL_MEMFLAGS_VBO         (1ULL << 34)

/*
 * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU?
 * @memdesc: the memdesc
 *
 * Return: true if the memdesc is using SVM mapping
 */
// 根据用户空间传入的标志位判断是否在CPU和GPU之间使用同样的虚拟映射
static inline bool
kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc)
{
	return memdesc && (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP);
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
2.2.7.3 kgsl_pagetable
struct kgsl_pagetable {
	spinlock_t lock;
	struct kref refcount;
	struct list_head list;
	unsigned int name;
	struct kobject *kobj;
	struct work_struct destroy_ws;

	struct {
		atomic_t entries;
		atomic_long_t mapped;
		atomic_long_t max_mapped;
	} stats;
    // 
	const struct kgsl_mmu_pt_ops *pt_ops;
	uint64_t fault_addr;
	struct kgsl_mmu *mmu;
	/** @rbtree: all buffers mapped into the pagetable, indexed by gpuaddr */
	struct rb_root rbtree;
	/** @va_start: Start of virtual range used in this pagetable */
	u64 va_start;
	/** @va_end: End of virtual range */
	u64 va_end;
	/**
	 * @svm_start: Start of shared virtual memory range. Addresses in this
	 * range are also valid in the process's CPU address space.
	 */
	u64 svm_start;
	/** @svm_end: end of 32 bit compatible range */
	u64 svm_end;
	/**
	 * @compat_va_start - Start of the "compat" virtual address range for
	 * forced 32 bit allocations
	 */
	u64 compat_va_start;
	/**
	 * @compat_va_end - End of the "compat" virtual address range for
	 * forced 32 bit allocations
	 */
	u64 compat_va_end;
	u64 global_base;
};
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
2.2.7.4 kgsl_mmu_get_gpuaddr
#define PT_OP_VALID(_pt, _field) \
	(((_pt) != NULL) && \
	 ((_pt)->pt_ops != NULL) && \
	 ((_pt)->pt_ops->_field != NULL))

/**
 * kgsl_mmu_get_gpuaddr - Assign a GPU address to the memdesc
 * @pagetable: GPU pagetable to assign the address in
 * @memdesc: mem descriptor to assign the memory to
 *
 * Return: 0 on success or negative on failure
 */
static inline int kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable,
		 struct kgsl_memdesc *memdesc)
{
    // 调用kgsl_pagetable->kgsl_mmu_pt_ops-->get_gpuaddr方法分配GPU地址[2.2.7.5节]
	if (PT_OP_VALID(pagetable, get_gpuaddr))
		return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc);

	return -ENOMEM;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
2.2.7.5 kgsl_iommu_get_gpuaddr
static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable,
		struct kgsl_memdesc *memdesc)
{
	int ret = 0;
	uint64_t addr, start, end, size;
	unsigned int align;

	if (WARN_ON(kgsl_memdesc_use_cpu_map(memdesc)))
		return -EINVAL;

	if (memdesc->flags & KGSL_MEMFLAGS_SECURE &&
			pagetable->name != KGSL_MMU_SECURE_PT)
		return -EINVAL;

    // 获取映射区域(kgsl_memdesc)的大小
	size = kgsl_memdesc_footprint(memdesc);

	align = max_t(uint64_t, 1 << kgsl_memdesc_get_align(memdesc),
			PAGE_SIZE);

	if (memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT) {
		start = pagetable->compat_va_start;
		end = pagetable->compat_va_end;
	} else {
        // Start of virtual range used in this pagetable
		start = pagetable->va_start;
        // End of virtual range
		end = pagetable->va_end;
	}

	spin_lock(&pagetable->lock);
    // 获取一块未映射的虚拟地址[2.2.7.6节]
	addr = _get_unmapped_area(pagetable, start, end, size, align);

	if (addr == (uint64_t) -ENOMEM) {
		ret = -ENOMEM;
		goto out;
	}

	/*
	 * This path is only called in a non-SVM path with locks so we can be
	 * sure we aren't racing with anybody so we don't need to worry about
	 * taking the lock
	 */
    // 将虚拟地址插入页表[2.2.7.7节]
	ret = _insert_gpuaddr(pagetable, addr, size);
	if (ret == 0) {
        // 设置GPU虚拟地址
		memdesc->gpuaddr = addr;
        // 设置页表
		memdesc->pagetable = pagetable;
	}

out:
	spin_unlock(&pagetable->lock);
	return ret;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
2.2.7.6 _get_unmapped_area
/*
 * struct kgsl_iommu_addr_entry - entry in the kgsl_pagetable rbtree.
 * @base: starting virtual address of the entry
 * @size: size of the entry
 * @node: the rbtree node
 */
struct kgsl_iommu_addr_entry {
    // 起始虚拟地址
	uint64_t base;
	uint64_t size;
	struct rb_node node;
};

static uint64_t _get_unmapped_area(struct kgsl_pagetable *pagetable,
		uint64_t bottom, uint64_t top, uint64_t size,
		uint64_t align)
{
    // 页表radix tree头节点
	struct rb_node *node = rb_first(&pagetable->rbtree);
	uint64_t start;

	bottom = ALIGN(bottom, align);
	start = bottom;

	while (node != NULL) {
		uint64_t gap;
        // 查找rb_node的容器即kgsl_iommu_addr_entry
		struct kgsl_iommu_addr_entry *entry = rb_entry(node,
			struct kgsl_iommu_addr_entry, node);

		/*
		 * Skip any entries that are outside of the range, but make sure
		 * to account for some that might straddle the lower bound
		 */
		if (entry->base < bottom) {
			if (entry->base + entry->size > bottom)
				start = ALIGN(entry->base + entry->size, align);
			node = rb_next(node);
			continue;
		}

		/* Stop if we went over the top */
		if (entry->base >= top)
			break;

		/* Make sure there is a gap to consider */
		if (start < entry->base) {
			gap = entry->base - start;

			if (gap >= size)
				return start;
		}

		/* Stop if there is no more room in the region */
		if (entry->base + entry->size >= top)
			return (uint64_t) -ENOMEM;

		/* Start the next cycle at the end of the current entry */
		start = ALIGN(entry->base + entry->size, align);
		node = rb_next(node);
	}

    // 返回起始虚拟地址
	if (start + size <= top)
		return start;

	return (uint64_t) -ENOMEM;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
2.2.7.7 _insert_gpuaddr
static int _insert_gpuaddr(struct kgsl_pagetable *pagetable,
		uint64_t gpuaddr, uint64_t size)
{
	struct rb_node **node, *parent = NULL;
    // 创建kgsl_iommu_addr_entry
	struct kgsl_iommu_addr_entry *new =
		kmem_cache_alloc(addr_entry_cache, GFP_ATOMIC);

	if (new == NULL)
		return -ENOMEM;

    // 设置kgsl_iommu_addr_entry起始虚拟地址
	new->base = gpuaddr;
    // 设置kgsl_iommu_addr_entry大小
	new->size = size;

    // 页表基数树头节点
	node = &pagetable->rbtree.rb_node;

	while (*node != NULL) {
		struct kgsl_iommu_addr_entry *this;

		parent = *node;
		this = rb_entry(parent, struct kgsl_iommu_addr_entry, node);

		if (new->base < this->base)
			node = &parent->rb_left;
		else if (new->base > this->base)
			node = &parent->rb_right;
		else {
			/* Duplicate entry */
			WARN(1, "duplicate gpuaddr: 0x%llx\n", gpuaddr);
			kmem_cache_free(addr_entry_cache, new);
			return -EEXIST;
		}
	}

    // 将rb_node插入页表的基数树
	rb_link_node(&new->node, parent, node);
	rb_insert_color(&new->node, &pagetable->rbtree);

	return 0;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家小花儿/article/detail/303589
推荐阅读
相关标签
  

闽ICP备14008679号