88 lines
2.4 KiB
C++
88 lines
2.4 KiB
C++
|
|
/*
|
||
|
|
* Copyright 2011-2021 Blender Foundation
|
||
|
|
*
|
||
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
* you may not use this file except in compliance with the License.
|
||
|
|
* You may obtain a copy of the License at
|
||
|
|
*
|
||
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
*
|
||
|
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
* See the License for the specific language governing permissions and
|
||
|
|
* limitations under the License.
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include "device/device_queue.h"
|
||
|
|
|
||
|
|
#include "util/util_algorithm.h"
|
||
|
|
#include "util/util_logging.h"
|
||
|
|
#include "util/util_time.h"
|
||
|
|
|
||
|
|
#include <iomanip>
|
||
|
|
|
||
|
|
CCL_NAMESPACE_BEGIN
|
||
|
|
|
||
|
|
DeviceQueue::DeviceQueue(Device *device)
|
||
|
|
: device(device), last_kernels_enqueued_(0), last_sync_time_(0.0)
|
||
|
|
{
|
||
|
|
DCHECK_NE(device, nullptr);
|
||
|
|
}
|
||
|
|
|
||
|
|
DeviceQueue::~DeviceQueue()
|
||
|
|
{
|
||
|
|
if (VLOG_IS_ON(3)) {
|
||
|
|
/* Print kernel execution times sorted by time. */
|
||
|
|
vector<pair<DeviceKernelMask, double>> stats_sorted;
|
||
|
|
for (const auto &stat : stats_kernel_time_) {
|
||
|
|
stats_sorted.push_back(stat);
|
||
|
|
}
|
||
|
|
|
||
|
|
sort(stats_sorted.begin(),
|
||
|
|
stats_sorted.end(),
|
||
|
|
[](const pair<DeviceKernelMask, double> &a, const pair<DeviceKernelMask, double> &b) {
|
||
|
|
return a.second > b.second;
|
||
|
|
});
|
||
|
|
|
||
|
|
VLOG(3) << "GPU queue stats:";
|
||
|
|
for (const auto &[mask, time] : stats_sorted) {
|
||
|
|
VLOG(3) << " " << std::setfill(' ') << std::setw(10) << std::fixed << std::setprecision(5)
|
||
|
|
<< std::right << time << "s: " << device_kernel_mask_as_string(mask);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void DeviceQueue::debug_init_execution()
|
||
|
|
{
|
||
|
|
if (VLOG_IS_ON(3)) {
|
||
|
|
last_sync_time_ = time_dt();
|
||
|
|
last_kernels_enqueued_ = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
|
||
|
|
{
|
||
|
|
if (VLOG_IS_ON(3)) {
|
||
|
|
VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
|
||
|
|
<< work_size;
|
||
|
|
last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void DeviceQueue::debug_synchronize()
|
||
|
|
{
|
||
|
|
if (VLOG_IS_ON(3)) {
|
||
|
|
const double new_time = time_dt();
|
||
|
|
const double elapsed_time = new_time - last_sync_time_;
|
||
|
|
VLOG(4) << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s";
|
||
|
|
|
||
|
|
stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;
|
||
|
|
|
||
|
|
last_sync_time_ = new_time;
|
||
|
|
last_kernels_enqueued_ = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
CCL_NAMESPACE_END
|