- 论坛徽章:
- 24
|
类似下面这种情况,如果不想在一个函数里有这么多的条件编译,应该如何优化。
- inline void SyncedMemory::to_cpu() {
- switch (head_) {
- case UNINITIALIZED: {
- CaffeMallocHost(&cpu_ptr_, size_, device_);
- caffe_memset(size_, 0, cpu_ptr_);
- head_ = HEAD_AT_CPU;
- own_cpu_data_ = true;
- break;
- }
- case HEAD_AT_GPU: {
- #ifndef CPU_ONLY
- if (cpu_ptr_ == nullptr) {
- CaffeMallocHost(&cpu_ptr_, size_, device_);
- own_cpu_data_ = true;
- #ifdef USE_GREENTEA
- CHECK_EQ(own_zero_copy_data_, false)
- << "Allocate host memory for a zero copy buffer.";
- #endif
- }
- if (device_->backend() == Backend::BACKEND_CUDA) {
- #ifdef USE_CUDA
- caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
- #endif // USE_CUDA
- } else {
- #ifdef USE_GREENTEA
- viennacl::ocl::context &ctx = viennacl::ocl::get_context(
- device_->id());
- if (!own_zero_copy_data_) {
- greentea_gpu_memcpy(size_, (cl_mem) gpu_ptr_, 0, cpu_ptr_, &ctx);
- } else {
- void *mapped_ptr = clEnqueueMapBuffer(ctx.get_queue().handle().get(),
- (cl_mem) gpu_ptr_,
- true,
- CL_MAP_READ | CL_MAP_WRITE,
- 0, size_, 0, NULL, NULL, NULL);
- CHECK_EQ(mapped_ptr, cpu_ptr_)
- << "Device claims it support zero copy"
- << " but failed to create correct user ptr buffer";
- clEnqueueUnmapMemObject(ctx.get_queue().handle().get(),
- (cl_mem) gpu_ptr_,
- mapped_ptr, 0, NULL, NULL);
- }
- ctx.get_queue().finish();
- #endif
- }
- head_ = SYNCED;
- #else
- NO_GPU;
- #endif // !CPU_ONLY
- break;
- }
- case HEAD_AT_CPU:
- case SYNCED:
- break;
- }
- }
复制代码
|
|