- 论坛徽章:
- 0
|
下面两个结构体性能不一样
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <sched.h>
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <sys/time.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#ifndef __aligned
#define __aligned(x) __attribute__((aligned(x)))
#endif
#define cache_line_size 64
这两个结构体性能不一样
struct buf{
char buf1[20];
char buf2[20];
char buf[cache_line_size*3];
char buf3[20];
};
struct buf_align{
char buf1[20];
char buf2[20];
char buf[cache_line_size*4];
char buf3[20];
};
struct buf data[3];
struct buf_align data_align[3];
#define time 100000000
void thread(void *cpu)
{
int i = 0;
struct timeval tv_begin, tv_end;
cpu_set_t mask;
CPU_ZERO(&mask);
int tmp_cpu = *(int *)cpu;
CPU_SET(tmp_cpu, &mask);
if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) {
printf("set thread affinity failed\n");
}
cpu_set_t get;
CPU_ZERO(&get);
if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) {
printf("get thread affinity failed\n");
}else{
int j;
for(j = 0; j < 4; j++){
if(CPU_ISSET(j, &get)) {
printf("thread%d is core%d\n",*(int *)cpu, j);
}
}
}
gettimeofday(&tv_begin, NULL);
for(i = 0; i < time; i++){
strncpy((data[tmp_cpu].buf1),"01234567890123456789",20);
memset(&(data[tmp_cpu].buf1), 0, 20);
strncpy((data[tmp_cpu].buf2),"01234567890123456789",20);
memset(&(data[tmp_cpu].buf2), 0, 20);
}
gettimeofday(&tv_end, NULL);
printf("time:%d\n", (tv_end.tv_sec - tv_begin.tv_sec)*1000000 + (tv_end.tv_usec - tv_begin.tv_usec));
/*align*/
gettimeofday(&tv_begin, NULL);
for(i = 0; i < time; i++){
strncpy((data_align[tmp_cpu].buf1),"01234567890123456789",20);
memset(&(data_align[tmp_cpu].buf1), 0, 20);
strncpy((data_align[tmp_cpu].buf2),"01234567890123456789",20);
memset(&(data_align[tmp_cpu].buf2), 0, 20);
}
gettimeofday(&tv_end, NULL);
printf("align time:%d\n", (tv_end.tv_sec - tv_begin.tv_sec)*1000000 + (tv_end.tv_usec - tv_begin.tv_usec));
}
int cpu_num = 1;
int main(void)
{
pthread_t tid[3];
int tmp[3];
int i,ret;
memset(&data, 0, sizeof(struct buf)*3);
printf("size %d, align_size:%d\n",sizeof(struct buf),sizeof(struct buf_align));
for(i = 0; i < cpu_num; i++){
tmp[i] = i;
ret=pthread_create(&tid[i],NULL,(void *) thread, &tmp[i]);
if(ret!=0){
printf ("Create pthread error!\n");
return 1;
}
}
for(i = 0; i < cpu_num; ++i)
{
pthread_join(tid[i],NULL);
}
return 0;
}
编译 gcc wlong.c -lpthread -o no
[dev:~/cache/wlong]$ ./no
size 252, align_size:316
thread0 is core0
time:672585
align time:620361
实际上两个结构体只用到了上面的buf1 和buf2 为什么后面的长度会对性能有影响?请大神指导!!!
|
|