2023-06-14 16:52:36 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "util/system.h"
|
|
|
|
|
#include "util/string.h"
|
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Patch by lukasstockner97, jbakker, brecht
job | scene_name | compilation_time
----------+-----------------+------------------
Baseline | empty | 22.73
D2264 | empty | 13.94
Baseline | bmw | 56.44
D2264 | bmw | 41.32
Baseline | fishycat | 59.50
D2264 | fishycat | 45.19
Baseline | barbershop | 212.28
D2264 | barbershop | 169.81
Baseline | victor | 67.51
D2264 | victor | 53.60
Baseline | classroom | 51.46
D2264 | classroom | 39.02
Baseline | koro | 62.48
D2264 | koro | 49.03
Baseline | pavillion | 54.37
D2264 | pavillion | 38.82
Baseline | splash279 | 47.43
D2264 | splash279 | 37.94
Baseline | volume_emission | 145.22
D2264 | volume_emission | 121.10
This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
2019-02-15 08:18:38 +01:00
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
#ifdef _WIN32
|
2016-02-05 09:09:39 +01:00
|
|
|
# if (!defined(FREE_WINDOWS))
|
|
|
|
|
# include <intrin.h>
|
|
|
|
|
# endif
|
2021-10-24 14:19:19 +02:00
|
|
|
# include "util/windows.h"
|
2011-04-27 11:58:34 +00:00
|
|
|
#elif defined(__APPLE__)
|
2019-02-11 15:03:28 +01:00
|
|
|
# include <sys/ioctl.h>
|
2016-02-05 09:09:39 +01:00
|
|
|
# include <sys/sysctl.h>
|
|
|
|
|
# include <sys/types.h>
|
2022-04-06 22:25:40 +02:00
|
|
|
# include <unistd.h>
|
2011-04-27 11:58:34 +00:00
|
|
|
#else
|
2019-02-06 12:57:10 +01:00
|
|
|
# include <sys/ioctl.h>
|
2020-03-19 09:33:03 +01:00
|
|
|
# include <unistd.h>
|
2011-04-27 11:58:34 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
2019-02-06 12:57:10 +01:00
|
|
|
int system_console_width()
|
|
|
|
|
{
|
|
|
|
|
int columns = 0;
|
|
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
CONSOLE_SCREEN_BUFFER_INFO csbi;
|
|
|
|
|
if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
|
|
|
|
|
columns = csbi.dwSize.X;
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
struct winsize w;
|
|
|
|
|
if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
|
|
|
|
|
columns = w.ws_col;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return (columns > 0) ? columns : 80;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-29 16:21:30 +02:00
|
|
|
/* Equivalent of Windows __cpuid for x86 processors on other platforms. */
|
|
|
|
|
#if (!defined(_WIN32) || defined(FREE_WINDOWS)) && (defined(__x86_64__) || defined(__i386__))
|
2011-04-27 11:58:34 +00:00
|
|
|
static void __cpuid(int data[4], int selector)
|
|
|
|
|
{
|
2018-08-30 12:50:36 +02:00
|
|
|
# if defined(__x86_64__)
|
2011-04-27 11:58:34 +00:00
|
|
|
asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector));
|
2018-08-30 12:50:36 +02:00
|
|
|
# elif defined(__i386__)
|
2011-05-05 13:51:33 +00:00
|
|
|
asm("pushl %%ebx \n\t"
|
2018-08-30 12:50:36 +02:00
|
|
|
"cpuid \n\t"
|
|
|
|
|
"movl %%ebx, %1 \n\t"
|
|
|
|
|
"popl %%ebx \n\t"
|
|
|
|
|
: "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3])
|
|
|
|
|
: "a"(selector)
|
|
|
|
|
: "ebx");
|
2011-05-05 13:51:33 +00:00
|
|
|
# else
|
|
|
|
|
data[0] = data[1] = data[2] = data[3] = 0;
|
|
|
|
|
# endif
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
string system_cpu_brand_string()
|
|
|
|
|
{
|
2021-04-29 15:52:27 +02:00
|
|
|
#if defined(__APPLE__)
|
2021-04-29 16:21:30 +02:00
|
|
|
/* Get from system on macOS. */
|
2021-04-29 15:52:27 +02:00
|
|
|
char modelname[512] = "";
|
|
|
|
|
size_t bufferlen = 512;
|
2024-12-26 17:53:55 +01:00
|
|
|
if (sysctlbyname("machdep.cpu.brand_string", &modelname, &bufferlen, nullptr, 0) == 0) {
|
2021-04-29 15:52:27 +02:00
|
|
|
return modelname;
|
|
|
|
|
}
|
2024-03-06 15:44:46 +01:00
|
|
|
#elif (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) && !defined(_M_ARM64)
|
2021-04-29 16:21:30 +02:00
|
|
|
/* Get from intrinsics on Windows and x86. */
|
|
|
|
|
char buf[49] = {0};
|
|
|
|
|
int result[4] = {0};
|
|
|
|
|
|
|
|
|
|
__cpuid(result, 0x80000000);
|
|
|
|
|
|
|
|
|
|
if (result[0] != 0 && result[0] >= (int)0x80000004) {
|
|
|
|
|
__cpuid((int *)(buf + 0), 0x80000002);
|
|
|
|
|
__cpuid((int *)(buf + 16), 0x80000003);
|
|
|
|
|
__cpuid((int *)(buf + 32), 0x80000004);
|
|
|
|
|
|
|
|
|
|
string brand = buf;
|
|
|
|
|
|
|
|
|
|
/* Make it a bit more presentable. */
|
|
|
|
|
brand = string_remove_trademark(brand);
|
|
|
|
|
|
|
|
|
|
return brand;
|
|
|
|
|
}
|
2024-03-06 15:44:46 +01:00
|
|
|
#elif defined(_M_ARM64)
|
2024-10-10 10:37:17 +02:00
|
|
|
DWORD processorNameStringLength = 255;
|
|
|
|
|
char processorNameString[255];
|
2024-03-06 15:44:46 +01:00
|
|
|
if (RegGetValueA(HKEY_LOCAL_MACHINE,
|
|
|
|
|
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
|
2024-10-10 10:37:17 +02:00
|
|
|
"ProcessorNameString",
|
2024-03-06 15:44:46 +01:00
|
|
|
RRF_RT_REG_SZ,
|
|
|
|
|
nullptr,
|
2024-10-10 10:37:17 +02:00
|
|
|
&processorNameString,
|
|
|
|
|
&processorNameStringLength) == ERROR_SUCCESS)
|
2024-03-06 15:44:46 +01:00
|
|
|
{
|
2024-10-10 10:37:17 +02:00
|
|
|
return processorNameString;
|
2024-03-06 15:44:46 +01:00
|
|
|
}
|
2021-04-29 16:21:30 +02:00
|
|
|
#else
|
|
|
|
|
/* Get from /proc/cpuinfo on Unix systems. */
|
2021-04-29 15:51:29 +02:00
|
|
|
FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
|
|
|
|
|
if (cpuinfo != nullptr) {
|
|
|
|
|
char cpuinfo_buf[513] = "";
|
|
|
|
|
fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo);
|
|
|
|
|
fclose(cpuinfo);
|
|
|
|
|
|
|
|
|
|
char *modelname = strstr(cpuinfo_buf, "model name");
|
|
|
|
|
if (modelname != nullptr) {
|
|
|
|
|
modelname = strchr(modelname, ':');
|
|
|
|
|
if (modelname != nullptr) {
|
|
|
|
|
modelname += 2;
|
|
|
|
|
char *modelname_end = strchr(modelname, '\n');
|
|
|
|
|
if (modelname_end != nullptr) {
|
|
|
|
|
*modelname_end = '\0';
|
|
|
|
|
return modelname;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2011-04-27 11:58:34 +00:00
|
|
|
return "Unknown CPU";
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-08 18:58:07 +00:00
|
|
|
int system_cpu_bits()
|
|
|
|
|
{
|
|
|
|
|
return (sizeof(void *) * 8);
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-29 16:21:30 +02:00
|
|
|
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
|
2011-11-15 15:13:38 +00:00
|
|
|
|
|
|
|
|
struct CPUCapabilities {
|
2024-02-09 17:25:58 +01:00
|
|
|
bool sse42;
|
2014-06-13 22:23:58 +02:00
|
|
|
bool avx2;
|
2011-11-15 15:13:38 +00:00
|
|
|
};
|
|
|
|
|
|
2013-02-04 16:12:37 +00:00
|
|
|
static CPUCapabilities &system_cpu_capabilities()
|
2011-11-15 15:13:38 +00:00
|
|
|
{
|
2022-08-08 17:45:37 +02:00
|
|
|
static CPUCapabilities caps = {};
|
2011-11-15 15:13:38 +00:00
|
|
|
static bool caps_init = false;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
if (!caps_init) {
|
2015-02-11 11:28:42 +01:00
|
|
|
int result[4], num;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
__cpuid(result, 0);
|
|
|
|
|
num = result[0];
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-06-06 22:38:39 +00:00
|
|
|
if (num >= 1) {
|
2011-11-15 15:13:38 +00:00
|
|
|
__cpuid(result, 0x00000001);
|
2022-08-08 17:45:37 +02:00
|
|
|
const bool sse = (result[3] & ((int)1 << 25)) != 0;
|
|
|
|
|
const bool sse2 = (result[3] & ((int)1 << 26)) != 0;
|
|
|
|
|
const bool sse3 = (result[2] & ((int)1 << 0)) != 0;
|
|
|
|
|
|
|
|
|
|
const bool ssse3 = (result[2] & ((int)1 << 9)) != 0;
|
|
|
|
|
const bool sse41 = (result[2] & ((int)1 << 19)) != 0;
|
2024-02-09 17:25:58 +01:00
|
|
|
const bool sse42 = (result[2] & ((int)1 << 20)) != 0;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-08-08 17:45:37 +02:00
|
|
|
const bool fma3 = (result[2] & ((int)1 << 12)) != 0;
|
|
|
|
|
const bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
|
|
|
|
|
const bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-08-08 17:45:37 +02:00
|
|
|
/* Simplify to combined capabilities for which we specialize kernels. */
|
2024-02-09 17:25:58 +01:00
|
|
|
caps.sse42 = sse && sse2 && sse3 && ssse3 && sse41 && sse42;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2014-02-25 17:57:05 +01:00
|
|
|
if (os_uses_xsave_xrestore && cpu_avx_support) {
|
|
|
|
|
// Check if the OS will save the YMM registers
|
|
|
|
|
uint32_t xcr_feature_mask;
|
2014-05-05 02:19:08 +10:00
|
|
|
# if defined(__GNUC__)
|
|
|
|
|
int edx; /* not used */
|
|
|
|
|
/* actual opcode for xgetbv */
|
|
|
|
|
__asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0));
|
|
|
|
|
# elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
|
2021-06-23 12:05:40 +10:00
|
|
|
/* Minimum VS2010 SP1 compiler is required. */
|
|
|
|
|
xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
2014-05-05 02:19:08 +10:00
|
|
|
# else
|
|
|
|
|
xcr_feature_mask = 0;
|
|
|
|
|
# endif
|
2022-08-08 17:45:37 +02:00
|
|
|
const bool avx = (xcr_feature_mask & 0x6) == 0x6;
|
|
|
|
|
const bool f16c = (result[2] & ((int)1 << 29)) != 0;
|
2014-06-13 22:23:58 +02:00
|
|
|
|
2022-08-08 17:45:37 +02:00
|
|
|
__cpuid(result, 0x00000007);
|
|
|
|
|
bool bmi1 = (result[1] & ((int)1 << 3)) != 0;
|
|
|
|
|
bool bmi2 = (result[1] & ((int)1 << 8)) != 0;
|
|
|
|
|
bool avx2 = (result[1] & ((int)1 << 5)) != 0;
|
2015-05-11 15:49:36 +00:00
|
|
|
|
2024-02-09 17:25:58 +01:00
|
|
|
caps.avx2 = sse && sse2 && sse3 && ssse3 && sse41 && sse42 && avx && f16c && avx2 &&
|
|
|
|
|
fma3 && bmi1 && bmi2;
|
2022-08-08 17:45:37 +02:00
|
|
|
}
|
2011-11-15 15:13:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
caps_init = true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-04 16:12:37 +00:00
|
|
|
return caps;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-09 17:25:58 +01:00
|
|
|
bool system_cpu_support_sse42()
|
2013-11-22 14:16:47 +01:00
|
|
|
{
|
|
|
|
|
CPUCapabilities &caps = system_cpu_capabilities();
|
2024-02-09 17:25:58 +01:00
|
|
|
return caps.sse42;
|
2013-11-22 14:16:47 +01:00
|
|
|
}
|
2014-01-16 17:04:11 +01:00
|
|
|
|
2014-06-13 22:23:58 +02:00
|
|
|
bool system_cpu_support_avx2()
|
|
|
|
|
{
|
|
|
|
|
CPUCapabilities &caps = system_cpu_capabilities();
|
2022-08-08 17:45:37 +02:00
|
|
|
return caps.avx2;
|
2014-06-13 22:23:58 +02:00
|
|
|
}
|
2011-11-15 15:13:38 +00:00
|
|
|
#else
|
|
|
|
|
|
2024-02-09 17:25:58 +01:00
|
|
|
bool system_cpu_support_sse42()
|
2014-01-16 17:04:11 +01:00
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-13 22:23:58 +02:00
|
|
|
bool system_cpu_support_avx2()
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-01-16 17:04:11 +01:00
|
|
|
|
2011-11-15 15:13:38 +00:00
|
|
|
#endif
|
|
|
|
|
|
2017-11-04 00:33:38 +01:00
|
|
|
size_t system_physical_ram()
|
|
|
|
|
{
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
MEMORYSTATUSEX ram;
|
|
|
|
|
ram.dwLength = sizeof(ram);
|
|
|
|
|
GlobalMemoryStatusEx(&ram);
|
2019-08-26 18:34:54 -06:00
|
|
|
return ram.ullTotalPhys;
|
2017-11-04 00:33:38 +01:00
|
|
|
#elif defined(__APPLE__)
|
|
|
|
|
uint64_t ram = 0;
|
|
|
|
|
size_t len = sizeof(ram);
|
2024-12-26 17:53:55 +01:00
|
|
|
if (sysctlbyname("hw.memsize", &ram, &len, nullptr, 0) == 0) {
|
2017-11-04 00:33:38 +01:00
|
|
|
return ram;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
#else
|
|
|
|
|
size_t ps = sysconf(_SC_PAGESIZE);
|
|
|
|
|
size_t pn = sysconf(_SC_PHYS_PAGES);
|
|
|
|
|
return ps * pn;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
uint64_t system_self_process_id()
|
|
|
|
|
{
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
return GetCurrentProcessId();
|
|
|
|
|
#else
|
|
|
|
|
return getpid();
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
CCL_NAMESPACE_END
|