deps_builder: fix deadlock in idiff

idiff sometimes locks up while shutting down when the CPU is
oversubscribed. While blender does not rely on the idiff tool
the tests that run on the CI environment do, which causes tests
to occasionally fail due to a timeout.

The root cause is a bit complex but can be found on the oiio tracker
at https://github.com/OpenImageIO/oiio/issues/3851

This change fixes idiff by :

1- Shutting down the thread pool before the main function exits
2- Have the shutdown wait for the pool threads to actually join, to
prevent the OS from forcefully terminating them while they could
potentially still be holding a lock.
This commit is contained in:
Ray Molenkamp
2023-06-12 08:51:51 -06:00
parent f09d465a6d
commit 7f9021a92f
2 changed files with 64 additions and 1 deletions

View File

@@ -106,7 +106,8 @@ ExternalProject_Add(external_openimageio
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
PREFIX ${BUILD_DIR}/openimageio
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/openimageio.diff &&
${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/oiio_3832.diff
${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/oiio_3832.diff &&
${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/openimageio/src/external_openimageio/ < ${PATCH_DIR}/oiio_deadlock.diff
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openimageio ${DEFAULT_CMAKE_FLAGS} ${OPENIMAGEIO_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/openimageio
)

View File

@@ -0,0 +1,62 @@
diff -Naur orig/src/idiff/idiff.cpp external_openimageio/src/idiff/idiff.cpp
--- orig/src/idiff/idiff.cpp 2023-06-07 07:47:42 -0600
+++ external_openimageio/src/idiff/idiff.cpp 2023-06-07 09:46:47 -0600
@@ -399,5 +399,6 @@
imagecache->invalidate_all(true);
ImageCache::destroy(imagecache);
+ default_thread_pool()->resize(0);
return ret;
}
diff -Naur orig/src/libutil/thread.cpp external_openimageio/src/libutil/thread.cpp
--- orig/src/libutil/thread.cpp 2023-06-07 07:47:42 -0600
+++ external_openimageio/src/libutil/thread.cpp 2023-06-07 09:45:39 -0600
@@ -151,9 +151,10 @@
this->set_thread(i);
}
} else { // the number of threads is decreased
+ std::vector<std::unique_ptr<std::thread>> terminating_threads;
for (int i = oldNThreads - 1; i >= nThreads; --i) {
*this->flags[i] = true; // this thread will finish
- this->terminating_threads.push_back(
+ terminating_threads.push_back(
std::move(this->threads[i]));
this->threads.erase(this->threads.begin() + i);
}
@@ -162,6 +163,11 @@
std::unique_lock<std::mutex> lock(this->mutex);
this->cv.notify_all();
}
+ // wait for the terminated threads to finish
+ for (auto& thread : terminating_threads) {
+ if (thread->joinable())
+ thread->join();
+ }
this->threads.resize(
nThreads); // safe to delete because the threads are detached
this->flags.resize(
@@ -238,16 +244,10 @@
if (thread->joinable())
thread->join();
}
- // wait for the terminated threads to finish
- for (auto& thread : this->terminating_threads) {
- if (thread->joinable())
- thread->join();
- }
// if there were no threads in the pool but some functors in the queue, the functors are not deleted by the threads
// therefore delete them here
this->clear_queue();
this->threads.clear();
- this->terminating_threads.clear();
this->flags.clear();
}
@@ -349,7 +349,6 @@
}
std::vector<std::unique_ptr<std::thread>> threads;
- std::vector<std::unique_ptr<std::thread>> terminating_threads;
std::vector<std::shared_ptr<std::atomic<bool>>> flags;
mutable pvt::ThreadsafeQueue<std::function<void(int id)>*> q;
std::atomic<bool> isDone;