#ifdef CONFIG_X86_64
# define RWSEM_ACTIVE_MASK 0xffffffffL
#else
# define RWSEM_ACTIVE_MASK 0x0000ffffL
#endif
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
down_read和down_write的反汇编如下:
crash> disas down_read
Dump of assembler code for function down_read:
0xffffffff81463e00 <+0>: mov %rdi,%rax //将参数传入的信号量结构体地址保存到rax;
0xffffffff81463e03 <+3>: lock incq (%rax)//信号量成员count值加1,如果操作数结果为负数,则SF符号标志位被置位为1
0xffffffff81463e07 <+7>: jns 0xffffffff81463e0e <down_read+14>//如果SF(符号标志位)没有被置位为1就跳转,在这里就是读写信号量成员count加1之前的值大于等于0则跳转;
0xffffffff81463e09 <+9>: callq 0xffffffff81262f80 <call_rwsem_down_read_failed>//读写信号量成员count小于0代表有进程已申请rw信号量写锁(已经获取写锁成功或申请写锁失败被放入等待队列都会将count设置为小于0)
0xffffffff81463e0e <+14>: retq
crash> disas down_write
Dump of assembler code for function down_write:
0xffffffff81463e10 <+0>: mov %rdi,%rax //将参数传入的信号量结构体地址保存到rax;
0xffffffff81463e13 <+3>: movabs $0xffffffff00000001,%rdx//将常数0xffffffff000000001保存到rdx
0xffffffff81463e1d <+13>: lock xadd %rdx,(%rax) //将rax寄存器代表的地址指向的值(也就是信号量成员值count)先与rdx寄存器交换,然后两个值相加后放置到rax寄存器值为地址的内存中(前面三条指令合起来就是将函数传入的参数sem结构体成员值count保存到rdx中,然后再将常数$0xffffffff00000001与信号量成员值count相加后赋值给参数sem结构体成员count;)
0xffffffff81463e22 <+18>: test %rdx,%rdx //test指令为按位与,结果为0则设置ZF标志位为1,在这里是判断传入的信号量值sem成员值count是否为0;set ZF to 1 if rdx == 0;
0xffffffff81463e25 <+21>: je 0xffffffff81463e2c <down_write+28>//ZF(zero flag)标志位为1则跳转,则代表先前未有进程获取该锁
0xffffffff81463e27 <+23>: callq 0xffffffff81262fb0 <call_rwsem_down_write_failed>//获取锁失败;
0xffffffff81463e2c <+28>: retq
另外rwsem_down_failed_common函数还会对rw信号量的值count做修改:
struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
-RWSEM_ACTIVE_READ_BIAS); // -RWSEM_ACTIVE_READ_BIAS=-0x01=0xffffffffffffffff;
}
struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
-RWSEM_ACTIVE_WRITE_BIAS); //-RWSEM_ACTIVE_WRITE_BIAS=-0xffffffff00000001=0xffffffff
}
static struct rw_semaphore __sched *
rwsem_down_failed_common(struct rw_semaphore *sem,
unsigned int flags, signed long adjustment)
{
........
if (list_empty(&sem->wait_list))
adjustment += RWSEM_WAITING_BIAS;//RWSEM_WAITING_BIAS=0xffffffff00000000
..........
/* we're now waiting on the lock, but no longer actively locking */
count = rwsem_atomic_update(adjustment, sem);
//先一个进程拿读锁然后另一进程申请写锁值计算逻辑为:
adjustment+sem->count=(-RWSEM_ACTIVE_WRITE_BIAS+RWSEM_WAITING_BIAS+0xffffffff00000002)=0xffffffff00000001
计算后0xffffffff00000001赋值到sem->count;
0xffffffff00000002是由前面down_write计算后填充到sem->count中的值;
//接着有写进程申请写锁则值计算逻辑为:
adjustment+sem->count=(-RWSEM_ACTIVE_WRITE_BIAS+(0xffffffff00000001+0xffffffff00000001))=0xffffffff00000001
sem->count在调用down_write时已被修改为 (0xffffffff00000001+0xffffffff00000001);
或接着有读进程申请读锁值计算逻辑为:
adjustment+sem->count=(-RWSEM_ACTIVE_READ_BIAS+(0xffffffff00000001+0x1))=0xffffffff00000001
sem->count在调用down_read时已被修改为 (0xffffffff00000001+0x1);
.........
}
/*
* implement exchange and add functionality
*/
static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
{
long tmp = delta;
asm volatile(LOCK_PREFIX "xadd %0,%1"
: "+r" (tmp), "+m" (sem->count)
: : "memory");
return tmp + delta;
}
通过在crash中反汇编 rwsem_down_failed_common找到rwsem_atomic_update对应的汇编指令如下:
crash> dis rwsem_down_failed_common
0xffffffff814677ab <rwsem_down_failed_common+123>: xadd %rax,%ds:(%r12) //执行后r12存放的是sem->count地址,
(r12)为sem- >count的值;
0xffffffff814677b1 <rwsem_down_failed_common+129>: lea (%r14,%rax,1),%rax //rax=r14+rax*1, 计算后rax的值作为rwsem_atomic_update函数返回值
编写测试代码验证rw信号量count值在进程申请锁失败时变化情况如下:
1)先有进程拿读锁,则count值加1变为为0x0000000000000001,这时如果再有进程申请读锁则值变为0x0000000000000002
2)先有进程拿读锁,则count值加1变为为0x0000000000000001,这时如果有进程申请写锁则值变为0xffffffff00000001,然后后面如果再有读锁申请或写锁申请则count值都保持不变为0xffffffff00000001;
3)先有进程拿写锁,则count值被设置为0xffffffff00000001,这时如果有进程申请读锁则值高位减1变为0xfffffffe00000001,然后如果接着有进程申请
写锁则值保持不变为0xfffffffe00000001,接着再有进程申请读锁也还是保持不变为0xfffffffe00000001;
4)先有进程拿写锁,则count值被设置为0xffffffff00000001,这时如果有进程申请写锁则值高位减1变为0xfffffffe00000001
总结:从测试数据看,无论是先拿读锁再申请写锁,还是写拿写锁再申请读锁,一旦出现读写锁交叉申请后,再有其他进程无论是申请写锁还是读锁count的值都不变。
若有vmcore,可以通过遍历rw_semaphore等待队列找到所有阻塞在读写信号量锁上的进程,比如以struct task_struct成员struct mm_struct中的struct rw_semaphore mmap_sem为例:
1)找到wait_list成员值:
crash> struct task_struct ffff88154fe0a240 | grep mm
mm = 0xffff88201e690100,
crash> struct mm_struct 0xffff88201e690100 | grep mmap_sem -A14 | grep wait_list -A3
wait_list = {
next = 0xffff88152e499e90,
prev = 0xffff88100f1b3e90
}
2)列出等待队列中的struct rwsem_waiter成员:
crash> list 0xffff88152e499e90
ffff88152e499e90
ffff88029d173d38
ffff881569b7be90
ffff8815ba4dbe90
ffff880e10767e90
ffff880211645c28
ffff881dd9615e90
ffff881008615e90
ffff8810d20c5d38
ffff88101ee77d40
ffff8810103ffd38
ffff88111c435d38
ffff88117291fd38
ffff88100f1b3e90
ffff88201e690170
crash>
或
crash> list list_head.next 0xffff88152e499e90
ffff88152e499e90
ffff88029d173d38
ffff881569b7be90
ffff8815ba4dbe90
ffff880e10767e90
ffff880211645c28
ffff881dd9615e90
ffff881008615e90
ffff8810d20c5d38
ffff88101ee77d40
ffff8810103ffd38
ffff88111c435d38
ffff88117291fd38
ffff88100f1b3e90
ffff88201e690170
注:list_head.next中的list_head为 struct rw_semaphore结构体成员wait_list的类型。
3)列出链表struct list_head wait_list中所有的struct rw_semaphore 实例地址:
crash> list rwsem_waiter.list -s rwsem_waiter.task,flags 0xffff88152e499e90
ffff88152e499e90
task = 0xffff88152e496600
flags = 0x2 //0x2代表申请写锁;
ffff88029d173d38
task = 0xffff8800455505c0
flags = 0x1//0x1代表申请读锁
ffff881569b7be90
task = 0xffff881569b780c0
flags = 0x2
ffff8815ba4dbe90
task = 0xffff8815c4e8e5c0
flags = 0x2
ffff880e10767e90
task = 0xffff880e0a182140
flags = 0x2
ffff880211645c28
task = 0xffff880006814300
flags = 0x1
ffff881dd9615e90
task = 0xffff88201ecfe640
flags = 0x2
ffff881008615e90
task = 0xffff88100e99a080
flags = 0x2
ffff8810d20c5d38
task = 0xffff881174ba4340
flags = 0x1
ffff88101ee77d40
task = 0xffff88101ee74480
flags = 0x1
ffff8810103ffd38
task = 0xffff88013a31c240
flags = 0x1
ffff88111c435d38
task = 0xffff881096094140
flags = 0x1
ffff88117291fd38
task = 0xffff881182c30540
flags = 0x1
ffff88100f1b3e90
task = 0xffff88100e904480
flags = 0x2
ffff88201e690170
task = 0xffff88201e690180
flags = 0x1e690180
crash>
或
crash> list list_head.next -s rwsem_waiter.task,flags 0xffff88152e499e90
ffff88152e499e90
task = 0xffff88152e496600
flags = 0x2
ffff88029d173d38
task = 0xffff8800455505c0
flags = 0x1
ffff881569b7be90
task = 0xffff881569b780c0
flags = 0x2
ffff8815ba4dbe90
task = 0xffff8815c4e8e5c0
flags = 0x2
ffff880e10767e90
task = 0xffff880e0a182140
flags = 0x2
ffff880211645c28
task = 0xffff880006814300
flags = 0x1
ffff881dd9615e90
task = 0xffff88201ecfe640
flags = 0x2
ffff881008615e90
task = 0xffff88100e99a080
flags = 0x2
ffff8810d20c5d38
task = 0xffff881174ba4340
flags = 0x1
ffff88101ee77d40
task = 0xffff88101ee74480
flags = 0x1
ffff8810103ffd38
task = 0xffff88013a31c240
flags = 0x1
ffff88111c435d38
task = 0xffff881096094140
flags = 0x1
ffff88117291fd38
task = 0xffff881182c30540
flags = 0x1
ffff88100f1b3e90
task = 0xffff88100e904480
flags = 0x2
ffff88201e690170
task = 0xffff88201e690180
flags = 0x1e690180
crash>
说明:
struct list_head wait_list是struct rw_semaphore 结构体里定义的一个链表,
在内核代码使用过程中定义的struct rwsem_waiter waiter实例 会添加到struct list_head wait_list列表中。
rwsem_waiter.list这里rwsem_waiter是结构体类型名,也就是struct rwsem_waiter,0xffff88152e499e90是wait_list中其中一个struct rwsem_waiter实例地址。rwsem_waiter.list中的list是结构体struct rwsem_waiter中的一个struct list_head类型成员名,
list list_head.next -s rwsem_waiter.task,flags 0xffff88152e499e90 和 list rwsem_waiter.list -s rwsem_waiter.task,flags 0xffff88152e499e90命令可以总结为:
0xffff88152e499e90是struct rwsem_waiter的结构体地址值,该类型结构体地址添加到struct rw_semaphore中定义的双向循环链表wait_list中:
struct rw_semaphore {
long count;
spinlock_t wait_lock;
struct list_head wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
unsigned int flags;
#define RWSEM_WAITING_FOR_READ 0x00000001
#define RWSEM_WAITING_FOR_WRITE 0x00000002
};
struct list_head {
struct list_head *next, *prev;
};
测试源码附录,根据验证场景对源码进行调整:
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/rwsem.h>
MODULE_LICENSE("Dual BSD/GPL");
static struct rw_semaphore sem;
static void print_sem(struct rw_semaphore *sem)
{
printk(KERN_DEBUG "sem.count=0x%016lxn", sem->count);
}
static void get_read_lock(struct rw_semaphore *sem, char *name)
{
printk(KERN_DEBUG "%s is waiting for readn", name);
down_read(sem);
printk(KERN_DEBUG "%s got read lockn", name);
}
static void release_read_lock(struct rw_semaphore *sem, char *name)
{
printk(KERN_DEBUG "%s releses read lockn", name);
up_read(sem);
}
static void get_write_lock(struct rw_semaphore *sem, char *name)
{
printk(KERN_DEBUG "%s is waiting for writen", name);
down_write(sem);
printk(KERN_DEBUG "%s got write lockn", name);
}
static void release_write_lock(struct rw_semaphore *sem, char *name)
{
printk(KERN_DEBUG "%s releses write lockn", name);
up_write(sem);
}
int reader(void *data)
{
char *name = (char *)data;
printk(KERN_DEBUG "%s is runningn", name);
msleep(100);
get_read_lock(&sem, name);
print_sem(&sem);
printk(KERN_DEBUG "%s is sleeping 200n", name);
msleep(200);
release_read_lock(&sem, name);
do_exit(0);
}
int writer(void *data)
{
char *name = (char *)data;
printk(KERN_DEBUG "%s is runningn", name);
msleep(10);
get_write_lock(&sem, name);
print_sem(&sem);
printk(KERN_DEBUG "%s is sleeping 200n", name);
msleep(200);
print_sem(&sem);
release_write_lock(&sem, name);
do_exit(0);
}
static int hello_init(void)
{
struct task_struct *task = NULL;
printk(KERN_ALERT "Hello, worldn");
init_rwsem(&sem);
task = kthread_create(reader, "reader_1", "reader1");
if (IS_ERR(task)) {
printk(KERN_DEBUG "failed to create taskn");
return 1;
}
wake_up_process(task);
task = kthread_create(reader, "reader_2", "reader2");
if (IS_ERR(task)) {
printk(KERN_DEBUG "failed to create taskn");
return 1;
}
wake_up_process(task);
task = kthread_create(writer, "writer_1", "writer1");
if (IS_ERR(task)) {
printk(KERN_DEBUG "failed to create taskn");
return 1;
}
wake_up_process(task);
msleep(100);
task = kthread_create(writer, "writer_2", "writer2");
if (IS_ERR(task)) {
printk(KERN_DEBUG "failed to create taskn");
return 1;
}
wake_up_process(task);
return 0;
}
static void hello_exit(void)
{
printk(KERN_ALERT"Goodbye, cruel worldn");
}
module_init(hello_init);
module_exit(hello_exit);
---来自腾讯云社区的---CD
微信扫一扫打赏
支付宝扫一扫打赏