当前位置:   article > 正文

使用memcpy函数的耗时测试(拷贝不同大小数据量耗时不同)_memcpy耗时

memcpy耗时

今天公司里的一个大神给我普及了一下知识,使用memcpy函数的耗时在拷贝不同大小数据的时候,速度是不一样的,于是我写了个程序测试了一下,具体如下:
目标:比较 使用memcpy()拷贝1k,4k,16k,512k,2M,4M,8M,16M,128M,500M数据的耗时

主要代码如下(编译时会自动区分当前是什么系统):

#include "mainwindow.h"
#include "ui_mainwindow.h"
#include <QDebug>
#if defined(Q_OS_LINUX)
#include "time.h"
#else
#include <windows.h>
class chronograph
{
public:
    chronograph()
    {
        QueryPerformanceFrequency(&m_freq);
        QueryPerformanceCounter(&m_bgn);
    }
    void start()
    {
        QueryPerformanceCounter(&m_bgn);
    }

double duration()
{
    QueryPerformanceCounter(&m_end);
    return (m_end.QuadPart - m_bgn.QuadPart) * 1000.0 / m_freq.QuadPart;
}
LARGE_INTEGER now()
{
    LARGE_INTEGER now;
    QueryPerformanceCounter(&now);
    return now;
}
double DoubleNow()
{
    LARGE_INTEGER now;
    QueryPerformanceCounter(&now);
    return now.QuadPart*1000.0 / m_freq.QuadPart;
}
private:
    LARGE_INTEGER m_freq;
    LARGE_INTEGER m_bgn;
    LARGE_INTEGER m_end;
};
#endif
MainWindow::MainWindow(QWidget *parent) :
    QMainWindow(parent),
    ui(new Ui::MainWindow)
{
    ui->setupUi(this);
    #if defined(Q_OS_LINUX)
    //在linux下测试memcpy的耗时
    double usetime1k,usetime4k,usetime16k,usetime512k,usetime2M,usetime4M,usetime8M,usetime16M,usetime128M,usetime500M;
    usetime1k = 0;
    usetime4k = 0;
    usetime16k = 0;
    usetime512k = 0;
    usetime2M = 0;
    usetime4M = 0;
    usetime8M = 0;
    usetime16M = 0;
    usetime128M = 0;
    usetime500M = 0;

char *cData1k = new char[1024];//1k
char *cData4k = new char[1024*4];//4k
char *cData16k = new char[1024*16];//16k
char *cData512k = new char[1024*512];//512k
char *cData2M = new char[1024*1024*2];//2M  //(char*)malloc(1024*1024*1024*2); //
char *cData4M = new char[1024*1024*4];//4M
char *cData8M = new char[1024*1024*8];//8M
char *cData16M = new char[1024*1024*16];//64M
char *cData128M = new char[1024*1024*128];//128M
char *cData500M = new char[1024*1024*500];//128M

char *cData1kCP = new char[1024];//1k
char *cData4kCP = new char[1024*4];//4k
char *cData16kCP = new char[1024*16];//16k
char *cData512kCP = new char[1024*512];//512k
char *cData2MCP = new char[1024*1024*2];//2M  //(char*)malloc(1024*1024*1024*2);//
char *cData4MCP = new char[1024*1024*4];//4M
char *cData8MCP = new char[1024*1024*8];//8M
char *cData16MCP = new char[1024*1024*16];//64M
char *cData128MCP = new char[1024*1024*128];//128M
char *cData500MCP = new char[1024*1024*500];//128M

memset(cData1kCP,1,1024);
memset(cData4kCP,1,1024*4);
memset(cData16kCP,1,1024*16);
memset(cData512kCP,1,1024*512);
memset(cData2MCP,1,1024*1024*2);
memset(cData4MCP,1,1024*1024*4);
memset(cData8MCP,1,1024*1024*8);
memset(cData16MCP,1,1024*1024*16);
memset(cData128MCP,1,1024*1024*128);
memset(cData500MCP,1,1024*1024*500);


struct timespec time1,time2;
for(int i = 0;i<100;i++)
{
    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData1k,cData1kCP,1024*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime1k += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData4k,cData4kCP,1024*4*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime4k += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData16k,cData16kCP,1024*16*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime16k += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData512k,cData512kCP,1024*512*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime512k += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms
    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData2M,cData2MCP,1024*1024*2*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime2M += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData4M,cData4MCP,1024*1024*4*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime4M += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData8M,cData8MCP,1024*1024*8*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime8M += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData16M,cData16MCP,1024*1024*16*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime16M += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData128M,cData128MCP,1024*1024*128*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime128M += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms

    clock_gettime(CLOCK_MONOTONIC,&time1);//start time
    memcpy(cData500M,cData500MCP,1024*1024*500*sizeof(char));
    clock_gettime(CLOCK_MONOTONIC,&time2);//end time
    usetime500M += (time2.tv_sec-time1.tv_sec)*1000.0+(time2.tv_nsec-time1.tv_nsec)/1000000.0;//ms
}
qDebug()<<"memcpy 1k data usetime:"<<usetime1k/100<<"ms,speed = "<<usetime1k*10<<"um/1kbit";
qDebug()<<"memcpy 4k data usetime:"<<usetime4k/100<<"ms,speed = "<<usetime4k/4*10<<"um/1kbit";
qDebug()<<"memcpy 16k data usetime:"<<usetime16k/100<<"ms,speed = "<<usetime16k/16*10<<"um/1kbit";
qDebug()<<"memcpy 512k data usetime:"<<usetime512k/100<<"ms,speed = "<<usetime512k/512*10<<"um/1kbit";
qDebug()<<"memcpy 2M data usetime:"<<usetime2M/100<<"ms,speed = "<<usetime2M/1024/4*10<<"um/1kbit";
qDebug()<<"memcpy 4M data usetime:"<<usetime4M/100<<"ms,speed = "<<usetime4M/1024/4*10<<"um/1kbit";
qDebug()<<"memcpy 8M data usetime:"<<usetime8M/100<<"ms,speed = "<<usetime8M/1024/8*10<<"um/1kbit";
qDebug()<<"memcpy 16M data usetime:"<<usetime16M/100<<"ms,speed = "<<usetime16M/1024/16*10<<"um/1kbit";
qDebug()<<"memcpy 128M data usetime:"<<usetime128M/100<<"ms,speed = "<<usetime128M/1024/128*10<<"um/1kbit";
qDebug()<<"memcpy 500M data usetime:"<<usetime500M/100<<"ms,speed = "<<usetime500M/1024/500*10<<"um/1kbit";
#else
    //在windows下测试memcpy()的耗时
    chronograph calTime;
    double usetime1k,usetime4k,usetime16k,usetime512k,usetime2M,usetime4M,usetime8M,usetime16M,usetime128M,usetime500M;
    usetime1k = 0;
    usetime4k = 0;
    usetime16k = 0;
    usetime512k = 0;
    usetime2M = 0;
    usetime4M = 0;
    usetime8M = 0;
    usetime16M = 0;
    usetime128M = 0;
    usetime500M = 0;
char *cData1k = new char[1024];//1k
char *cData4k = new char[1024*4];//4k
char *cData16k = new char[1024*16];//16k
char *cData512k = new char[1024*512];//512k
char *cData2M = new char[1024*1024*2];//2M  //(char*)malloc(1024*1024*1024*2); //
char *cData4M = new char[1024*1024*4];//4M
char *cData8M = new char[1024*1024*8];//8M
char *cData16M = new char[1024*1024*16];//64M
char *cData128M = new char[1024*1024*128];//128M
char *cData500M = new char[1024*1024*500];//128M

char *cData1kCP = new char[1024];//1k
char *cData4kCP = new char[1024*4];//4k
char *cData16kCP = new char[1024*16];//16k
char *cData512kCP = new char[1024*512];//512k
char *cData2MCP = new char[1024*1024*2];//2M  //(char*)malloc(1024*1024*1024*2);//
char *cData4MCP = new char[1024*1024*4];//4M
char *cData8MCP = new char[1024*1024*8];//8M
char *cData16MCP = new char[1024*1024*16];//64M
char *cData128MCP = new char[1024*1024*128];//128M
char *cData500MCP = new char[1024*1024*500];//128M

memset(cData1kCP,1,1024);
memset(cData4kCP,1,1024*4);
memset(cData16kCP,1,1024*16);
memset(cData512kCP,1,1024*512);
memset(cData2MCP,1,1024*1024*2);
memset(cData4MCP,1,1024*1024*4);
memset(cData8MCP,1,1024*1024*8);
memset(cData16MCP,1,1024*1024*16);
memset(cData128MCP,1,1024*1024*128);
memset(cData500MCP,1,1024*1024*500);

for(int i = 0;i<100;i++)
{
    calTime.start();//start time
    memcpy(cData1k,cData1kCP,1024*sizeof(char));
    usetime1k += calTime.duration();

    calTime.start();//start time
    memcpy(cData4k,cData4kCP,1024*4*sizeof(char));
    usetime4k += calTime.duration();

    calTime.start();//start time
    memcpy(cData16k,cData16kCP,1024*16*sizeof(char));
    usetime16k += calTime.duration();

    calTime.start();//start time
    memcpy(cData512k,cData512kCP,1024*512*sizeof(char));
    usetime512k += calTime.duration();

    calTime.start();//start time
    memcpy(cData2M,cData2MCP,1024*1024*2*sizeof(char));
    usetime2M += calTime.duration();

    calTime.start();//start time
    memcpy(cData4M,cData4MCP,1024*1024*4*sizeof(char));
    usetime4M += calTime.duration();

    calTime.start();//start time
    memcpy(cData8M,cData8MCP,1024*1024*8*sizeof(char));
    usetime8M += calTime.duration();

    calTime.start();//start time
    memcpy(cData16M,cData16MCP,1024*1024*16*sizeof(char));
    usetime16M += calTime.duration();

    calTime.start();//start time
    memcpy(cData128M,cData128MCP,1024*1024*128*sizeof(char));
    usetime128M += calTime.duration();

    calTime.start();//start time
    memcpy(cData500M,cData500MCP,1024*1024*500*sizeof(char));
    usetime500M += calTime.duration();
}
qDebug()<<"memcpy 1k data usetime:"<<usetime1k/100<<"ms,speed = "<<usetime1k*10<<"um/1kbit";
qDebug()<<"memcpy 4k data usetime:"<<usetime4k/100<<"ms,speed = "<<usetime4k/4*10<<"um/1kbit";
qDebug()<<"memcpy 16k data usetime:"<<usetime16k/100<<"ms,speed = "<<usetime16k/16*10<<"um/1kbit";
qDebug()<<"memcpy 512k data usetime:"<<usetime512k/100<<"ms,speed = "<<usetime512k/512*10<<"um/1kbit";
qDebug()<<"memcpy 2M data usetime:"<<usetime2M/100<<"ms,speed = "<<usetime2M/1024/4*10<<"um/1kbit";
qDebug()<<"memcpy 4M data usetime:"<<usetime4M/100<<"ms,speed = "<<usetime4M/1024/4*10<<"um/1kbit";
qDebug()<<"memcpy 8M data usetime:"<<usetime8M/100<<"ms,speed = "<<usetime8M/1024/8*10<<"um/1kbit";
qDebug()<<"memcpy 16M data usetime:"<<usetime16M/100<<"ms,speed = "<<usetime16M/1024/16*10<<"um/1kbit";
qDebug()<<"memcpy 128M data usetime:"<<usetime128M/100<<"ms,speed = "<<usetime128M/1024/128*10<<"um/1kbit";
qDebug()<<"memcpy 500M data usetime:"<<usetime500M/100<<"ms,speed = "<<usetime500M/1024/500*10<<"um/1kbit";
#endif
}

MainWindow::~MainWindow()
{
    delete ui;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264

在Tx2设备上运行结果如下:在这里插入图片描述

可以看到拷贝数据在2M大小的时候,速度达到峰值。为什么拷贝2M数据速度最快呢,因为Tx2设备的处理器二级缓存就是2M,参数如下图:
在这里插入图片描述

在Thinkpad T570上运行的结果如下:
在这里插入图片描述
可以看到也是在拷贝2M数据的时候速度最快,CPU为i7-7500U,是3级缓存,大小为4M

声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号