您的位置 首页 > 腾讯云社区

内核rwsem原理---CD

#ifdef CONFIG_X86_64

# define RWSEM_ACTIVE_MASK 0xffffffffL

#else

# define RWSEM_ACTIVE_MASK 0x0000ffffL

#endif

#define RWSEM_UNLOCKED_VALUE 0x00000000L

#define RWSEM_ACTIVE_BIAS 0x00000001L

#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)

#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS

#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)

down_read和down_write的反汇编如下:

crash> disas down_read

Dump of assembler code for function down_read:

0xffffffff81463e00 <+0>: mov %rdi,%rax //将参数传入的信号量结构体地址保存到rax;

0xffffffff81463e03 <+3>: lock incq (%rax)//信号量成员count值加1,如果操作数结果为负数,则SF符号标志位被置位为1

0xffffffff81463e07 <+7>: jns 0xffffffff81463e0e <down_read+14>//如果SF(符号标志位)没有被置位为1就跳转,在这里就是读写信号量成员count加1之前的值大于等于0则跳转;

0xffffffff81463e09 <+9>: callq 0xffffffff81262f80 <call_rwsem_down_read_failed>//读写信号量成员count小于0代表有进程已申请rw信号量写锁(已经获取写锁成功或申请写锁失败被放入等待队列都会将count设置为小于0)

0xffffffff81463e0e <+14>: retq

crash> disas down_write

Dump of assembler code for function down_write:

0xffffffff81463e10 <+0>: mov %rdi,%rax //将参数传入的信号量结构体地址保存到rax;

0xffffffff81463e13 <+3>: movabs $0xffffffff00000001,%rdx//将常数0xffffffff000000001保存到rdx

0xffffffff81463e1d <+13>: lock xadd %rdx,(%rax) //将rax寄存器代表的地址指向的值(也就是信号量成员值count)先与rdx寄存器交换,然后两个值相加后放置到rax寄存器值为地址的内存中(前面三条指令合起来就是将函数传入的参数sem结构体成员值count保存到rdx中,然后再将常数$0xffffffff00000001与信号量成员值count相加后赋值给参数sem结构体成员count;)

0xffffffff81463e22 <+18>: test %rdx,%rdx //test指令为按位与,结果为0则设置ZF标志位为1,在这里是判断传入的信号量值sem成员值count是否为0;set ZF to 1 if rdx == 0;

0xffffffff81463e25 <+21>: je 0xffffffff81463e2c <down_write+28>//ZF(zero flag)标志位为1则跳转,则代表先前未有进程获取该锁

0xffffffff81463e27 <+23>: callq 0xffffffff81262fb0 <call_rwsem_down_write_failed>//获取锁失败;

0xffffffff81463e2c <+28>: retq

另外rwsem_down_failed_common函数还会对rw信号量的值count做修改:

struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)

{

return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,

-RWSEM_ACTIVE_READ_BIAS); // -RWSEM_ACTIVE_READ_BIAS=-0x01=0xffffffffffffffff;

}

struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)

{

return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,

-RWSEM_ACTIVE_WRITE_BIAS); //-RWSEM_ACTIVE_WRITE_BIAS=-0xffffffff00000001=0xffffffff

}

static struct rw_semaphore __sched *

rwsem_down_failed_common(struct rw_semaphore *sem,

unsigned int flags, signed long adjustment)

{

........

if (list_empty(&sem->wait_list))

adjustment += RWSEM_WAITING_BIAS;//RWSEM_WAITING_BIAS=0xffffffff00000000

..........

/* we're now waiting on the lock, but no longer actively locking */

count = rwsem_atomic_update(adjustment, sem);

//先一个进程拿读锁然后另一进程申请写锁值计算逻辑为:

adjustment+sem->count=(-RWSEM_ACTIVE_WRITE_BIAS+RWSEM_WAITING_BIAS+0xffffffff00000002)=0xffffffff00000001

计算后0xffffffff00000001赋值到sem->count;

0xffffffff00000002是由前面down_write计算后填充到sem->count中的值;

//接着有写进程申请写锁则值计算逻辑为:

adjustment+sem->count=(-RWSEM_ACTIVE_WRITE_BIAS+(0xffffffff00000001+0xffffffff00000001))=0xffffffff00000001

sem->count在调用down_write时已被修改为 (0xffffffff00000001+0xffffffff00000001);

或接着有读进程申请读锁值计算逻辑为:

adjustment+sem->count=(-RWSEM_ACTIVE_READ_BIAS+(0xffffffff00000001+0x1))=0xffffffff00000001

sem->count在调用down_read时已被修改为 (0xffffffff00000001+0x1);

.........

}

/*

* implement exchange and add functionality

*/

static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)

{

long tmp = delta;

asm volatile(LOCK_PREFIX "xadd %0,%1"

: "+r" (tmp), "+m" (sem->count)

: : "memory");

return tmp + delta;

}

通过在crash中反汇编 rwsem_down_failed_common找到rwsem_atomic_update对应的汇编指令如下:

crash> dis rwsem_down_failed_common

0xffffffff814677ab <rwsem_down_failed_common+123>: xadd %rax,%ds:(%r12) //执行后r12存放的是sem->count地址,

(r12)为sem- >count的值;

0xffffffff814677b1 <rwsem_down_failed_common+129>: lea (%r14,%rax,1),%rax //rax=r14+rax*1, 计算后rax的值作为rwsem_atomic_update函数返回值

编写测试代码验证rw信号量count值在进程申请锁失败时变化情况如下:

1)先有进程拿读锁,则count值加1变为为0x0000000000000001,这时如果再有进程申请读锁则值变为0x0000000000000002

2)先有进程拿读锁,则count值加1变为为0x0000000000000001,这时如果有进程申请写锁则值变为0xffffffff00000001,然后后面如果再有读锁申请或写锁申请则count值都保持不变为0xffffffff00000001;

3)先有进程拿写锁,则count值被设置为0xffffffff00000001,这时如果有进程申请读锁则值高位减1变为0xfffffffe00000001,然后如果接着有进程申请

写锁则值保持不变为0xfffffffe00000001,接着再有进程申请读锁也还是保持不变为0xfffffffe00000001;

4)先有进程拿写锁,则count值被设置为0xffffffff00000001,这时如果有进程申请写锁则值高位减1变为0xfffffffe00000001

总结:从测试数据看,无论是先拿读锁再申请写锁,还是写拿写锁再申请读锁,一旦出现读写锁交叉申请后,再有其他进程无论是申请写锁还是读锁count的值都不变。

若有vmcore,可以通过遍历rw_semaphore等待队列找到所有阻塞在读写信号量锁上的进程,比如以struct task_struct成员struct mm_struct中的struct rw_semaphore mmap_sem为例:

1)找到wait_list成员值:

crash> struct task_struct ffff88154fe0a240 | grep mm

mm = 0xffff88201e690100,

crash> struct mm_struct 0xffff88201e690100 | grep mmap_sem -A14 | grep wait_list -A3

wait_list = {

next = 0xffff88152e499e90,

prev = 0xffff88100f1b3e90

}

2)列出等待队列中的struct rwsem_waiter成员:

crash> list 0xffff88152e499e90

ffff88152e499e90

ffff88029d173d38

ffff881569b7be90

ffff8815ba4dbe90

ffff880e10767e90

ffff880211645c28

ffff881dd9615e90

ffff881008615e90

ffff8810d20c5d38

ffff88101ee77d40

ffff8810103ffd38

ffff88111c435d38

ffff88117291fd38

ffff88100f1b3e90

ffff88201e690170

crash>

crash> list list_head.next 0xffff88152e499e90

ffff88152e499e90

ffff88029d173d38

ffff881569b7be90

ffff8815ba4dbe90

ffff880e10767e90

ffff880211645c28

ffff881dd9615e90

ffff881008615e90

ffff8810d20c5d38

ffff88101ee77d40

ffff8810103ffd38

ffff88111c435d38

ffff88117291fd38

ffff88100f1b3e90

ffff88201e690170

注:list_head.next中的list_head为 struct rw_semaphore结构体成员wait_list的类型。

3)列出链表struct list_head wait_list中所有的struct rw_semaphore 实例地址:

crash> list rwsem_waiter.list -s rwsem_waiter.task,flags 0xffff88152e499e90

ffff88152e499e90

task = 0xffff88152e496600

flags = 0x2 //0x2代表申请写锁;

ffff88029d173d38

task = 0xffff8800455505c0

flags = 0x1//0x1代表申请读锁

ffff881569b7be90

task = 0xffff881569b780c0

flags = 0x2

ffff8815ba4dbe90

task = 0xffff8815c4e8e5c0

flags = 0x2

ffff880e10767e90

task = 0xffff880e0a182140

flags = 0x2

ffff880211645c28

task = 0xffff880006814300

flags = 0x1

ffff881dd9615e90

task = 0xffff88201ecfe640

flags = 0x2

ffff881008615e90

task = 0xffff88100e99a080

flags = 0x2

ffff8810d20c5d38

task = 0xffff881174ba4340

flags = 0x1

ffff88101ee77d40

task = 0xffff88101ee74480

flags = 0x1

ffff8810103ffd38

task = 0xffff88013a31c240

flags = 0x1

ffff88111c435d38

task = 0xffff881096094140

flags = 0x1

ffff88117291fd38

task = 0xffff881182c30540

flags = 0x1

ffff88100f1b3e90

task = 0xffff88100e904480

flags = 0x2

ffff88201e690170

task = 0xffff88201e690180

flags = 0x1e690180

crash>

crash> list list_head.next -s rwsem_waiter.task,flags 0xffff88152e499e90

ffff88152e499e90

task = 0xffff88152e496600

flags = 0x2

ffff88029d173d38

task = 0xffff8800455505c0

flags = 0x1

ffff881569b7be90

task = 0xffff881569b780c0

flags = 0x2

ffff8815ba4dbe90

task = 0xffff8815c4e8e5c0

flags = 0x2

ffff880e10767e90

task = 0xffff880e0a182140

flags = 0x2

ffff880211645c28

task = 0xffff880006814300

flags = 0x1

ffff881dd9615e90

task = 0xffff88201ecfe640

flags = 0x2

ffff881008615e90

task = 0xffff88100e99a080

flags = 0x2

ffff8810d20c5d38

task = 0xffff881174ba4340

flags = 0x1

ffff88101ee77d40

task = 0xffff88101ee74480

flags = 0x1

ffff8810103ffd38

task = 0xffff88013a31c240

flags = 0x1

ffff88111c435d38

task = 0xffff881096094140

flags = 0x1

ffff88117291fd38

task = 0xffff881182c30540

flags = 0x1

ffff88100f1b3e90

task = 0xffff88100e904480

flags = 0x2

ffff88201e690170

task = 0xffff88201e690180

flags = 0x1e690180

crash>

说明:

struct list_head wait_list是struct rw_semaphore 结构体里定义的一个链表,

在内核代码使用过程中定义的struct rwsem_waiter waiter实例 会添加到struct list_head wait_list列表中。

rwsem_waiter.list这里rwsem_waiter是结构体类型名,也就是struct rwsem_waiter,0xffff88152e499e90是wait_list中其中一个struct rwsem_waiter实例地址。rwsem_waiter.list中的list是结构体struct rwsem_waiter中的一个struct list_head类型成员名,

list list_head.next -s rwsem_waiter.task,flags 0xffff88152e499e90 和 list rwsem_waiter.list -s rwsem_waiter.task,flags 0xffff88152e499e90命令可以总结为:

0xffff88152e499e90是struct rwsem_waiter的结构体地址值,该类型结构体地址添加到struct rw_semaphore中定义的双向循环链表wait_list中:

struct rw_semaphore {

long count;

spinlock_t wait_lock;

struct list_head wait_list;

#ifdef CONFIG_DEBUG_LOCK_ALLOC

struct lockdep_map dep_map;

#endif

};

struct rwsem_waiter {

struct list_head list;

struct task_struct *task;

unsigned int flags;

#define RWSEM_WAITING_FOR_READ 0x00000001

#define RWSEM_WAITING_FOR_WRITE 0x00000002

};

struct list_head {

struct list_head *next, *prev;

};

测试源码附录,根据验证场景对源码进行调整:

#include <linux/init.h>

#include <linux/module.h>

#include <linux/sched.h>

#include <linux/delay.h>

#include <linux/kthread.h>

#include <linux/rwsem.h>

MODULE_LICENSE("Dual BSD/GPL");

static struct rw_semaphore sem;

static void print_sem(struct rw_semaphore *sem)

{

printk(KERN_DEBUG "sem.count=0x%016lxn", sem->count);

}

static void get_read_lock(struct rw_semaphore *sem, char *name)

{

printk(KERN_DEBUG "%s is waiting for readn", name);

down_read(sem);

printk(KERN_DEBUG "%s got read lockn", name);

}

static void release_read_lock(struct rw_semaphore *sem, char *name)

{

printk(KERN_DEBUG "%s releses read lockn", name);

up_read(sem);

}

static void get_write_lock(struct rw_semaphore *sem, char *name)

{

printk(KERN_DEBUG "%s is waiting for writen", name);

down_write(sem);

printk(KERN_DEBUG "%s got write lockn", name);

}

static void release_write_lock(struct rw_semaphore *sem, char *name)

{

printk(KERN_DEBUG "%s releses write lockn", name);

up_write(sem);

}

int reader(void *data)

{

char *name = (char *)data;

printk(KERN_DEBUG "%s is runningn", name);

msleep(100);

get_read_lock(&sem, name);

print_sem(&sem);

printk(KERN_DEBUG "%s is sleeping 200n", name);

msleep(200);

release_read_lock(&sem, name);

do_exit(0);

}

int writer(void *data)

{

char *name = (char *)data;

printk(KERN_DEBUG "%s is runningn", name);

msleep(10);

get_write_lock(&sem, name);

print_sem(&sem);

printk(KERN_DEBUG "%s is sleeping 200n", name);

msleep(200);

print_sem(&sem);

release_write_lock(&sem, name);

do_exit(0);

}

static int hello_init(void)

{

struct task_struct *task = NULL;

printk(KERN_ALERT "Hello, worldn");

init_rwsem(&sem);

task = kthread_create(reader, "reader_1", "reader1");

if (IS_ERR(task)) {

printk(KERN_DEBUG "failed to create taskn");

return 1;

}

wake_up_process(task);

task = kthread_create(reader, "reader_2", "reader2");

if (IS_ERR(task)) {

printk(KERN_DEBUG "failed to create taskn");

return 1;

}

wake_up_process(task);

task = kthread_create(writer, "writer_1", "writer1");

if (IS_ERR(task)) {

printk(KERN_DEBUG "failed to create taskn");

return 1;

}

wake_up_process(task);

msleep(100);

task = kthread_create(writer, "writer_2", "writer2");

if (IS_ERR(task)) {

printk(KERN_DEBUG "failed to create taskn");

return 1;

}

wake_up_process(task);

return 0;

}

static void hello_exit(void)

{

printk(KERN_ALERT"Goodbye, cruel worldn");

}

module_init(hello_init);

module_exit(hello_exit);

---来自腾讯云社区的---CD

关于作者: 瞎采新闻

这里可以显示个人介绍!这里可以显示个人介绍!

热门文章

留言与评论(共有 0 条评论)
   
验证码: