赞
踩
我发布在matlab中央,但没有得到任何反应,所以我想我会转发在这里。
我最近在Matlab中写了一个简单的例程,在for循环中使用FFT; FFT主导计算。我在mex中写了相同的例程只是为了实验目的,它调用FFTW 3.3库。结果是,对于非常大的数组,matlab例程比mex例程运行得快(约两倍快)。 mex例程使用智慧并执行相同的FFT计算。我也知道matlab使用FFTW,但是它们的版本是可能稍微更优化吗?我甚至使用FFTW_EXHAUSTIVE标志,它仍然大约两倍的大阵列比MATLAB对手慢。此外,我确保我使用的matlab是单线程与“-singleCompThread”标志和我使用的mex文件不是在调试模式。只是好奇,如果这是case – 或者如果有一些优化matlab是使用under the hood,我不知道。谢谢。
这里是mex部分:
void class_cg_toeplitz::analysis() {
// This method computes CG iterations using FFTs
// Check for wisdom
if(fftw_import_wisdom_from_filename("cd.wis") == 0) {
mexPrintf("wisdom not loaded.\n");
} else {
mexPrintf("wisdom loaded.\n");
}
// Set FFTW Plan - use interleaved FFTW
fftw_plan plan_forward_d_buffer;
fftw_plan plan_forward_A_vec;
fftw_plan plan_backward_Ad_buffer;
fftw_complex *A_vec_fft;
fftw_complex *d_buffer_fft;
A_vec_fft = fftw_alloc_complex(n);
d_buffer_fft = fftw_alloc_complex(n);
// CREATE MASTER PLAN - Do this on an empty vector as creating a plane
// with FFTW_MEASURE will erase the contents;
// Use d_buffer
// This is somewhat dangerous because Ad_buffer is a vector; but it does not
// get resized so &Ad_buffer[0] should work
plan_forward_d_buffer = fftw_plan_dft_r2c_1d(d_buffer.size(),&d_buffer[0],d_buffer_fft,FFTW_EXHAUSTIVE);
plan_forward_A_vec = fftw_plan_dft_r2c_1d(A_vec.height,A_vec.value,A_vec_fft,FFTW_WISDOM_ONLY);
// A_vec_fft.*d_buffer_fft will overwrite d_buffer_fft
plan_backward_Ad_buffer = fftw_plan_dft_c2r_1d(Ad_buffer.size(),d_buffer_fft,&Ad_buffer[0],FFTW_EXHAUSTIVE);
// Get A_vec_fft
fftw_execute(plan_forward_A_vec);
// Find initial direction - this is the initial residual
for (int i=0;i
d_buffer[i] = b.value[i];
r_buffer[i] = b.value[i];
}
// Start CG iterations
norm_ro = norm(r_buffer);
double fft_reduction = (double)Ad_buffer.size(); // Must divide by size of vector because inverse FFT does not do this
while (norm(r_buffer)/norm_ro > relativeresidual_cutoff) {
// Find Ad - use fft
fftw_execute(plan_forward_d_buffer);
// Get A_vec_fft.*fft(d) - A_vec_fft is only real, but d_buffer_fft
// has complex elements; Overwrite d_buffer_fft
for (int i=0;i
d_buffer_fft[i][0] = d_buffer_fft[i][0]*A_vec_fft[i][0]/fft_reduction;
d_buffer_fft[i][1] = d_buffer_fft[i][1]*A_vec_fft[i][0]/fft_reduction;
}
fftw_execute(plan_backward_Ad_buffer);
// Calculate r'*r
rtr_buffer = 0;
for (int i=0;i
rtr_buffer = rtr_buffer + r_buffer[i]*r_buffer[i];
}
// Calculate alpha
alpha = 0;
for (int i=0;i
alpha = alpha + d_buffer[i]*Ad_buffer[i];
}
alpha = rtr_buffer/alpha;
// Calculate new x
for (int i=0;i
x[i] = x[i] + alpha*d_buffer[i];
}
// Calculate new residual
for (int i=0;i
r_buffer[i] = r_buffer[i] - alpha*Ad_buffer[i];
}
// Calculate beta
beta = 0;
for (int i=0;i
beta = beta + r_buffer[i]*r_buffer[i];
}
beta = beta/rtr_buffer;
// Calculate new direction vector
for (int i=0;i
d_buffer[i] = r_buffer[i] + beta*d_buffer[i];
}
*total_counter = *total_counter+1;
if(*total_counter >= iteration_cutoff) {
// Set total_counter to -1, this indicates failure
*total_counter = -1;
break;
}
}
// Store Wisdom
fftw_export_wisdom_to_filename("cd.wis");
// Free fft alloc'd memory and plans
fftw_destroy_plan(plan_forward_d_buffer);
fftw_destroy_plan(plan_forward_A_vec);
fftw_destroy_plan(plan_backward_Ad_buffer);
fftw_free(A_vec_fft);
fftw_free(d_buffer_fft);
};
这里是matlab部分:
% Take FFT of A_vec.
A_vec_fft = fft(A_vec); % Take fft once
% Find initial direction - this is the initial residual
x = zeros(n,1); % search direction
r = zeros(n,1); % residual
d = zeros(n+(n-2),1); % search direction; pad to allow FFT
for i = 1:n
d(i) = b(i);
r(i) = b(i);
end
% Enter CG iterations
total_counter = 0;
rtr_buffer = 0;
alpha = 0;
beta = 0;
Ad_buffer = zeros(n+(n-2),1); % This holds the product of A*d - calculate this once per iteration and using FFT; only 1:n is used
norm_ro = norm(r);
while(norm(r)/norm_ro > 10^-6)
% Find Ad - use fft
Ad_buffer = ifft(A_vec_fft.*fft(d));
% Calculate rtr_buffer
rtr_buffer = r'*r;
% Calculate alpha
alpha = rtr_buffer/(d(1:n)'*Ad_buffer(1:n));
% Calculate new x
x = x + alpha*d(1:n);
% Calculate new residual
r = r - alpha*Ad_buffer(1:n);
% Calculate beta
beta = r'*r/(rtr_buffer);
% Calculate new direction vector
d(1:n) = r + beta*d(1:n);
% Update counter
total_counter = total_counter+1;
end
在时间方面,对于N = 50000和b = 1:n,mex需要大约10.5秒,使用matlab为4.4秒。我使用R2011b。谢谢
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。