当前位置:   article > 正文

linux进程disk sleep状态的定位方法

disk sleep


在调试设备驱动时,可能会遇到进程阻塞,kill不掉,查看进程状态为disk sleep,本文演示了如何定位原因。

一.复现过程

1.驱动源码(drv.c)

#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/delay.h>

#define MISC_DRV_DEMO_NAME "miscdemo"
 
struct misc_demo_dev
{
	int value;
};
 
struct misc_demo_dev misc_demo;
 
static int misc_demo_open(struct inode *inode, struct file *filp)
{
    filp->private_data = &misc_demo;
    return 0;
}
 
static int misc_demo_release(struct inode *inode, struct file *filp)
{
    return 0;
}
 
static ssize_t misc_demo_write(struct file *filp, 
								const char __user *buf,
                              	size_t count, loff_t *ppos)
{
	printk("misc_demo_write in\n");
	int counter = 0;
	do {
		//msleep_interruptible(10);
		msleep(10);
		counter++;
		if (counter == 100000) 
		{
			break;
		}
	} while (1);
	printk("misc_demo_write out\n");
    return 0;
}
 
struct file_operations misc_demo_fops = {
    .owner = THIS_MODULE,
    .open = misc_demo_open,
    .release = misc_demo_release,
    .write = misc_demo_write,
};

static struct miscdevice demo_miscdev = {
    .minor = MISC_DYNAMIC_MINOR,
    .name = MISC_DRV_DEMO_NAME,
    .fops = &misc_demo_fops,
};

static int __init misc_demo_init(void)
{
    int ret=misc_register(&demo_miscdev);
	printk("misc_demo_init:%d\n",ret);
	return ret;
}
 
static void __exit misc_demo_exit(void)
{
    misc_deregister(&demo_miscdev);
	printk("misc_demo_exit\n");
} 
module_init(misc_demo_init);
module_exit(misc_demo_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("miscdemo");
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73

2.驱动Makefile

obj-m := drv.o

KERNEL_DIR ?= /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)

all:
	$(MAKE) -C $(KERNEL_DIR) M=$(PWD) modules    
clean:    
	$(MAKE) -C $(KERNEL_DIR) M=$(PWD) clean
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9

3.应用层代码(main.c)

#include <stdio.h>
#include <fcntl.h> 

int main()
{
	int fd=open("/dev/miscdemo",O_RDWR);
	char buf[32]={0};
	write(fd,buf,sizeof(buf));
	close(fd);
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

4.复现过程

#编译并加载驱动
make
insmod drv.ko

#编译并运行应用程序
gcc -o demo main.c
./demo

#程序阻塞
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9

二.分析过程

1.获取进程状态

cat /proc/`pidof demo`/status | grep "State"
  • 1

输出

State:  D (disk sleep)
  • 1

2.尝试kill进程【失败】

killall -9 demo
pidof  demo
  • 1
  • 2

3.查看调用栈

dmesg  -C
echo 1 > /proc/sys/kernel/sysrq
dmesg  -C
echo w > /proc/sysrq-trigger
dmesg
  • 1
  • 2
  • 3
  • 4
  • 5

输出

[18914.852891] sysrq: SysRq : Show Blocked State
[18914.852896]   task                        PC stack   pid father
[18914.877159] ad              D    0 20469   7950 0x00000084
[18914.877161] Call Trace:
[18914.877167]  ? __schedule+0x25d/0x840
[18914.877169]  schedule+0x28/0x70
[18914.877170]  schedule_timeout+0x179/0x380
[18914.877174]  ? __next_timer_interrupt+0xc0/0xc0
[18914.877177]  ? printk+0x52/0x6e
[18914.877179]  msleep+0x29/0x30
[18914.877183]  misc_demo_write+0x23/0x3a [drv]
[18914.877188]  vfs_write+0xad/0x1a0
[18914.877190]  ksys_write+0x52/0xc0
[18914.877193]  do_syscall_64+0x5b/0x1b0
[18914.877195]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[18914.877196] RIP: 0033:0x7f46df99aba0
[18914.877199] Code: Bad RIP value.
[18914.877199] RSP: 002b:00007ffeda277cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[18914.877201] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f46df99aba0
[18914.877201] RDX: 0000000000000020 RSI: 00007ffeda277d00 RDI: 0000000000000003
[18914.877202] RBP: 00007ffeda277d30 R08: 00007f46dfc73e80 R09: 0000000000000000
[18914.877202] R10: 00007ffeda2770e0 R11: 0000000000000246 R12: 00000000004004d0
[18914.877203] R13: 00007ffeda277e10 R14: 0000000000000000 R15: 0000000000000000
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23

发现异常位置: misc_demo_write+0x23/0x3a [drv] misc_demo_write 函数偏移 0x23 的位置

4.导出驱动的符号,查询misc_demo_write函数的基址

objdump -S -d drv.ko > obj.txt
cat obj.txt  | grep "<misc_demo_write>:"
  • 1
  • 2

输出

0000000000000030 <misc_demo_write>:
  • 1

函数基值为: 0x30

5.计算异常点的偏移量并转换成代码行号 (misc_demo_write基值+偏移量=0x30+0x23)

ADDR=$(echo "ibase=16;30+23" | bc)
ADDR=$(echo "obase=16;$ADDR" | bc)
echo $ADDR
addr2line -e drv.ko "0x$ADDR"
  • 1
  • 2
  • 3
  • 4

输出

/mnt/drv.c:36
  • 1

drv.c文件的36行

6.查看有问题的代码

26 static ssize_t misc_demo_write(struct file *filp, const char __user *buf,
27                               size_t count, loff_t *ppos)
28 {
29     //struct misc_demo_dev *dev = filp->private_data;
30         printk("misc_demo_write in\n");
31         int counter = 0;
32         do {
33                 //msleep_interruptible(10);
34                 msleep(10);
35                 counter++;
36                 if (counter == 100000)
37                 {
38                         break;
39                 }
40         } while (1);
41         printk("misc_demo_write out\n");
42     return 0;
43 }
44
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19

7.修改方法

将msleep(10);修改为msleep_interruptible(10);
  • 1
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/笔触狂放9/article/detail/535423
推荐阅读
相关标签
  

闽ICP备14008679号