- 论坛徽章:
- 11
|
测试了高并发, 低并发下几种锁的性能差异:- #define _GNU_SOURCE
- #include <stdint.h>
- #include <stdio.h>
- #include "lkf.h"
- #include <pthread.h>
- #include <unistd.h>
- #include <assert.h>
- #include <time.h>
- #include <sys/time.h>
- struct timespec base;
- int timer_init(){
- int n = clock_gettime(CLOCK_MONOTONIC, &base);
- if(0 != n){
- return -1;
- }
- return 0;
- }
- unsigned int now(){
- struct timespec tp;
- int n = clock_gettime(CLOCK_MONOTONIC, &tp);
- if(0 != n){
- assert(0);
- }
- uint64_t ms = (tp.tv_sec - base.tv_sec) * 1000 + 1e-6 * (tp.tv_nsec - base.tv_nsec);
- return (unsigned int) ms;
- }
- static LKF_LIST(head);
- struct ctx {
- struct lkf_node entry;
- int n;
- };
- pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
- #define lock1(lkp) do{ \
- while(!__sync_bool_compare_and_swap(lkp, 0, 1)){ \
- sched_yield(); \
- } \
- }while(0)
- #define lock2(lkp) do{ \
- while(!__sync_bool_compare_and_swap(lkp, 0, 1)){ \
- continue; \
- } \
- }while(0)
- #define unlock(lkp) do{ \
- *(lkp) = 0; \
- }while(0)
- int lck = 0;
- void* thread_func(void* any)
- {
- int n2 = 0;
- int n3 = 0;
- unsigned t1, t2 = now();
- while (1) {
- struct ctx* ctx = NULL;
- #if 1
- struct lkf_node* node = lkf_node_get(&head);
- if (node == NULL) {
- __sync_add_and_fetch(&n2, 1);
- continue;
- }
- struct lkf_node* nd = NULL;
- do {
- nd = lkf_node_next(node);
- if (nd == NULL) {
- continue;
- }
- __sync_add_and_fetch(&n3, 1);
- lkf_node_put(&head, nd);
- if (n3 % 10000 == 0) {
- t1 = now();
- printf("%d, %d \n", n2, t1 - t2);
- t2 = t1;
- }
- } while (nd != node);
- #else
- //lock1(&lck);
- // lock2(&lck);
- pthread_mutex_lock(&mutex);
- struct lkf_node* node = lkf_node_get_one(&head);
- pthread_mutex_unlock(&mutex);
- //unlock(&lck);
- if (node == NULL) {
- __sync_add_and_fetch(&n2, 1);
- continue;
- }
- struct lkf_node* nd = node;
- __sync_add_and_fetch(&n3, 1);
- lkf_node_put(&head, nd);
- if (n3 % 10000 == 0) {
- t1 = now();
- printf("%d, %d \n", n2, t1 - t2);
- t2 = t1;
- }
- #endif
- n2 = 0;
- }
- return NULL;
- }
- int main(void)
- {
- timer_init();
- for (int i = 0; i < 2000; ++i) {
- struct ctx* ctx = malloc(sizeof(struct ctx));
- ctx->entry.next = NULL;
- lkf_node_put(&head, &ctx->entry);
- }
- pthread_t tid;
- for (int i = 0; i < 8; ++i) {
- pthread_create(&tid, NULL, thread_func, NULL);
- }
- pause();
- return 0;
- }
复制代码 得到数据如下:
800 threads:
lock less batch 2,30 seconds
lock less single 80.x --- 100.x seconds
spin lock infinite
yield lock 1.x seconds
mutex 5.x seconds
4 threads:
lockless batch 3ms
lock less single 12ms
yield lock 11ms
spin lock 16ms
mutex 3,40 ms
8 threads:
lockless batch 20% 15ms, 80% 5ms
lock less batch 20% 140s, 80% 60ms
spin lock 3% 120ms, 97% 50ms
yield lock 20ms
mutex 60ms
机器是 azure 上的一台 a3 4 核 的虚拟机
无锁使用的是之前我发的那份
基本结论如下:
线程数小于或接近 cpu 个数的情况下, 我的自己那个无锁链表 批取模式性能最高, 单取性能还算可以, 但与 shedu_yield 锁稍差那么一点, 比 spin lock 锁高那么一点, 但到了 8 线程, 就已经被 spin lock 锁超越了
高并发下, 还是 同步锁性能最高, spin lock 是找死, 用无锁也是脑残的行为
在任何负载下, 通过CAS加轻量的 shed_yield 实现的 yield 锁性能都高于 mutex, futex 没测试, 估计性能位于 yield 锁和 mutex 之间
mutex 基本上似乎都表现中庸, 似乎在任何情况下, 都是第二选择
|
|