deps_builder: fix deadlock in idiff
idiff sometimes locks up while shutting down when the CPU is oversubscribed. While blender does not rely on the idiff tool the tests that run on the CI environment do, which causes tests to occasionally fail due to a timeout. The root cause is a bit complex but can be found on the oiio tracker at https://github.com/OpenImageIO/oiio/issues/3851 This change fixes idiff by : 1- Shutting down the thread pool before the main function exits 2- Have the shutdown wait for the pool threads to actually join, to prevent the OS from forcefully terminating them while they could potentially still be holding a lock.
This commit is contained in:
@@ -106,7 +106,8 @@ ExternalProject_Add(external_openimageio
|
||||
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
|
||||
PREFIX ${BUILD_DIR}/openimageio
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/openimageio.diff &&
|
||||
${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/oiio_3832.diff
|
||||
${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/oiio_3832.diff &&
|
||||
${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/oiio_deadlock.diff
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openimageio ${DEFAULT_CMAKE_FLAGS} ${OPENIMAGEIO_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/openimageio
|
||||
)
|
||||
|
||||
62
build_files/build_environment/patches/oiio_deadlock.diff
Normal file
62
build_files/build_environment/patches/oiio_deadlock.diff
Normal file
@@ -0,0 +1,62 @@
|
||||
diff -Naur orig/src/idiff/idiff.cpp external_openimageio/src/idiff/idiff.cpp
|
||||
--- orig/src/idiff/idiff.cpp 2023-06-07 07:47:42 -0600
|
||||
+++ external_openimageio/src/idiff/idiff.cpp 2023-06-07 09:46:47 -0600
|
||||
@@ -399,5 +399,6 @@
|
||||
|
||||
imagecache->invalidate_all(true);
|
||||
ImageCache::destroy(imagecache);
|
||||
+ default_thread_pool()->resize(0);
|
||||
return ret;
|
||||
}
|
||||
diff -Naur orig/src/libutil/thread.cpp external_openimageio/src/libutil/thread.cpp
|
||||
--- orig/src/libutil/thread.cpp 2023-06-07 07:47:42 -0600
|
||||
+++ external_openimageio/src/libutil/thread.cpp 2023-06-07 09:45:39 -0600
|
||||
@@ -151,9 +151,10 @@
|
||||
this->set_thread(i);
|
||||
}
|
||||
} else { // the number of threads is decreased
|
||||
+ std::vector<std::unique_ptr<std::thread>> terminating_threads;
|
||||
for (int i = oldNThreads - 1; i >= nThreads; --i) {
|
||||
*this->flags[i] = true; // this thread will finish
|
||||
- this->terminating_threads.push_back(
|
||||
+ terminating_threads.push_back(
|
||||
std::move(this->threads[i]));
|
||||
this->threads.erase(this->threads.begin() + i);
|
||||
}
|
||||
@@ -162,6 +163,11 @@
|
||||
std::unique_lock<std::mutex> lock(this->mutex);
|
||||
this->cv.notify_all();
|
||||
}
|
||||
+ // wait for the terminated threads to finish
|
||||
+ for (auto& thread : terminating_threads) {
|
||||
+ if (thread->joinable())
|
||||
+ thread->join();
|
||||
+ }
|
||||
this->threads.resize(
|
||||
nThreads); // safe to delete because the threads are detached
|
||||
this->flags.resize(
|
||||
@@ -238,16 +244,10 @@
|
||||
if (thread->joinable())
|
||||
thread->join();
|
||||
}
|
||||
- // wait for the terminated threads to finish
|
||||
- for (auto& thread : this->terminating_threads) {
|
||||
- if (thread->joinable())
|
||||
- thread->join();
|
||||
- }
|
||||
// if there were no threads in the pool but some functors in the queue, the functors are not deleted by the threads
|
||||
// therefore delete them here
|
||||
this->clear_queue();
|
||||
this->threads.clear();
|
||||
- this->terminating_threads.clear();
|
||||
this->flags.clear();
|
||||
}
|
||||
|
||||
@@ -349,7 +349,6 @@
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<std::thread>> threads;
|
||||
- std::vector<std::unique_ptr<std::thread>> terminating_threads;
|
||||
std::vector<std::shared_ptr<std::atomic<bool>>> flags;
|
||||
mutable pvt::ThreadsafeQueue<std::function<void(int id)>*> q;
|
||||
std::atomic<bool> isDone;
|
||||
Reference in New Issue
Block a user