futex_queues做为一个桥梁作用,glibc系统调用进入内核时,第一件事情就是通过它来找到内核的的rt_mutex对象;利用当前task_struct->mm->mmap_sem和用户态lock字段的地址生成key,用此key哈希到此变量数组的某个成员,从而建立起二者的联系。
数据结构查找关系:
futex_lock_pi():
1.uaddr(即用户态lock字段的地址), mmap_sem->key;
2.栈上分配futex_q, 将futex_q挂入futex_queues[hash(key)]中;
3.futex_q->futex_pi_state->rt_mutex,查找或分配futex_pi_state进而获得rt_mutex;
4.栈上分配rt_mutex_waiter,将rt_mutex_waiter挂入当前task和步骤3的rt_mutex中。
futex_unlock_pi():
1.key->futex_queues[hash(key)]->futex_q->futex_pi_state->rt_mutex->rt_mutex_waiter->task,从而找到等待唤醒的任务。
数据结构关系图:
数据结构描述(同种颜色相互对应):
struct task_struct {
spinlock_t pi_lock;
struct plist_head pi_waiters; /* 只链入某rtmutex中优先级最高的rt_mutex_waiter */
/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
}
static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
struct futex_hash_bucket {
spinlock_t lock;
struct plist_head chain;
};
struct futex_q {
struct plist_node list; /* 根据当前线程的nomal_prio值链入 futex_hash_bucket
的chain中:plist_add(&q->list, &hb->chain); */
wait_queue_head_t waiters;
spinlock_t *lock_ptr; /* 指向futex_hash_bucket中的lock */
union futex_key key; /* 选择futex_hash_bucket */
/* For fd, sigio sent using these: */
int fd;
struct file *filp;
/* Optional priority inheritance state: */
struct futex_pi_state *pi_state; /*指向task_struct中第一次预先分配好的pi_state_cache */
struct task_struct *task;
struct rt_mutex_waiter waiter;
};
struct futex_pi_state {
struct list_head list; /* 链入task_struct中的pi_state_list */
struct rt_mutex pi_mutex;
struct task_struct *owner; /* 锁的当前拥有者 */
atomic_t refcount;
union futex_key key; /* 拷贝futex_q中的 key */
};
struct rt_mutex {
spinlock_t wait_lock;
struct plist_head wait_list; /* 所有的该锁的rt_mutex_waiter */
struct task_struct *owner;
}
struct rt_mutex_waiter {
struct plist_node list_entry; /* 链入rt_mutex中的wait_list */
struct plist_node pi_list_entry; /* 链入task_struct中的pi_waiters */
struct task_struct *task;
struct rt_mutex *lock;
}
struct rt_mutex锁的owner的低2位作为如下标志使用:
#define RT_MUTEX_OWNER_PENDING 1UL /* 唤醒时会将实时锁置上此标志,
用于try_to_steal_lock() */
#define RT_MUTEX_HAS_WAITERS 2UL /* 有线程等待实时锁置上此标志 */
#define RT_MUTEX_OWNER_MASKALL 3UL
futex_lock_pi()流程:
栈上分配struct futex_q q;
分配当前线程的pi_state_cache内存;
根据glibc中的用户态futex锁uaddr的地址和task_struct->mm-> mmap_sem生成q. key;
根据key找到hash_bucket, 初始化q.lock_ptr,并上锁;
如果锁的拥有都为当前线程,释放q.lock_ptr锁后退出;
将uaddr置位FUTEX_WAITERS;
正在持有futex锁uaddr的线程p,第一次会以uaddr&0x0fff_ffff为pid找到;
lookup_pi_state():遍历hash_bucket-> chain,根据q.key值找到相应的pi_state;如果找不到则返回之前分配好的当前线程的pi_state_cache,并初始化pi_state:拷贝q.key,owner指向线程p,初始化pi_mutex(初始化wait_lock,将owner指向线程p),将list链入线程p的pi_state_list(注pi_state始终链入锁的拥有者线程中);
__queue_me():初始化q,将q.list链入hash_bucket->chain中,q.task指向当前线程,并释放q.lock_ptr锁;
如果是高优先级的线程则可以偷锁,偷锁成功后会调用fixup_pi_state_owner()对pi_state以及uaddr的值进行修正;
rt_mutex_timed_lock()->rt_mutex_slowlock()->task_blocks_on_rt_mutex(lock, &waiter, detect):
其中waiter为栈上分配的rt_mutex_waiter结构;设定当前线程为TASK_INTERRUPTIBLE状态;初始化waiter,task指向当前线程,lock指向当前rt_mutex锁,并使用当前线程的prio初始化两个plist_node结构;waiter->list_entry链入lock->wait_list, waiter->pi_list_entry链入owner ->pi_waiters(owner为拥有锁的线程即p);当前线程的pi_blocked_on指向waiter(用于检测死锁和PI处理,会在被唤醒时检查并置NULL),最后__rt_mutex_adjust_prio(owner)进行优先级继承操作,如果存在锁链(一个低优先级rtmutex1的拥有者会继承高优先级的waiter的优先级, 谓之PI;如果此线程阻塞在另外的rtmutex2,这个优先级会传播到rtmutex2的拥有者,依次类推传播,默认允许传播1024个;此种情形暂称之为存在锁链),则调用rt_mutex_adjust_prio_chain()进行优先级的传播;涉及到的锁为:lock->wait_lock和task->pi_lock;
schedule();/* unlock pi时会调用wakeup_next_waiter()唤醒此线程 */
正常流程调用try_to_steal_lock()成功返回:当成功后清除RT_MUTEX_OWNER_PENDING标志。