Merge branch 'master' into blender2.8

2018-01-11 13:24:41 +11:00
parent 6112cde3f9 9d62e6e782
commit 9c91c75ea6
14 changed files with 99 additions and 78 deletions
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -21,8 +21,11 @@ if(WITH_CYCLES_NATIVE_ONLY)
 	add_definitions(
 		-DWITH_KERNEL_NATIVE
 	)
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
-	set(CYCLES_KERNEL_FLAGS "-march=native")
+
+	if(NOT MSVC)
+		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+		set(CYCLES_KERNEL_FLAGS "-march=native")
+	endif()
 elseif(NOT WITH_CPU_SSE)
 	set(CXX_HAS_SSE FALSE)
 	set(CXX_HAS_AVX FALSE)
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -829,18 +829,26 @@ public:
 				status = " in host memory";

 				/* Replace host pointer with our host allocation. Only works if
-				 * CUDA memory layout is the same and has no pitch padding. */
-				if(pitch_padding == 0 && mem.host_pointer && mem.host_pointer != mem.shared_pointer) {
+				 * CUDA memory layout is the same and has no pitch padding. Also
+				 * does not work if we move textures to host during a render,
+				 * since other devices might be using the memory. */
+				if(!move_texture_to_host && pitch_padding == 0 &&
+				   mem.host_pointer && mem.host_pointer != mem.shared_pointer) {
 					memcpy(mem.shared_pointer, mem.host_pointer, size);
 					mem.host_free();
 					mem.host_pointer = mem.shared_pointer;
 				}
 			}
+			else {
+				status = " failed, out of host memory";
+			}
+		}
+		else if(mem_alloc_result != CUDA_SUCCESS) {
+			status = " failed, out of device and host memory";
 		}

 		if(mem_alloc_result != CUDA_SUCCESS) {
 			cuda_assert(mem_alloc_result);
-			status = " failed, out of memory";
 		}

 		if(mem.name) {
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -22,6 +22,7 @@

 #include "util/util_foreach.h"
 #include "util/util_logging.h"
+#include "util/util_set.h"

 CCL_NAMESPACE_BEGIN

@@ -105,7 +106,9 @@ void device_opencl_info(vector<DeviceInfo>& devices)
 	OpenCLInfo::get_usable_devices(&usable_devices);
 	/* Devices are numbered consecutively across platforms. */
 	int num_devices = 0;
+	set<string> unique_ids;
 	foreach(OpenCLPlatformDevice& platform_device, usable_devices) {
+		/* Compute unique ID for persistent user preferences. */
 		const string& platform_name = platform_device.platform_name;
 		const cl_device_type device_type = platform_device.device_type;
 		const string& device_name = platform_device.device_name;
@@ -113,7 +116,15 @@ void device_opencl_info(vector<DeviceInfo>& devices)
 		if(hardware_id == "") {
 			hardware_id = string_printf("ID_%d", num_devices);
 		}
+		string id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;

+		/* Hardware ID might not be unique, add device number in that case. */
+		if(unique_ids.find(id) != unique_ids.end()) {
+			id += string_printf("_ID_%d", num_devices);
+		}
+		unique_ids.insert(id);
+
+		/* Create DeviceInfo. */
 		DeviceInfo info;
 		info.type = DEVICE_OPENCL;
 		info.description = string_remove_trademark(string(device_name));
@@ -125,7 +136,7 @@ void device_opencl_info(vector<DeviceInfo>& devices)
 		                                                     device_type);
 		info.has_volume_decoupled = false;
 		info.has_qbvh = false;
-		info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
+		info.id = id;
 		devices.push_back(info);
 		num_devices++;
 	}
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -53,7 +53,7 @@ ccl_device_inline AttributeDescriptor attribute_not_found()

 ccl_device_inline uint object_attribute_map_offset(KernelGlobals *kg, int object)
 {
-	int offset = object*OBJECT_SIZE + 11;
+	int offset = object*OBJECT_SIZE + 15;
 	float4 f = kernel_tex_fetch(__objects, offset);
 	return __float_as_uint(f.y);
 }
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -28,11 +28,12 @@ CCL_NAMESPACE_BEGIN

 enum ObjectTransform {
 	OBJECT_TRANSFORM = 0,
-	OBJECT_TRANSFORM_MOTION_PRE = 0,
 	OBJECT_INVERSE_TRANSFORM = 4,
-	OBJECT_TRANSFORM_MOTION_POST = 4,
-	OBJECT_PROPERTIES = 8,
-	OBJECT_DUPLI = 9
+	OBJECT_TRANSFORM_MOTION_PRE = 0,
+	OBJECT_TRANSFORM_MOTION_MID = 4,
+	OBJECT_TRANSFORM_MOTION_POST = 8,
+	OBJECT_PROPERTIES = 12,
+	OBJECT_DUPLI = 13
 };

 enum ObjectVectorTransform {
@@ -90,19 +91,24 @@ ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int
 #ifdef __OBJECT_MOTION__
 ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
 {
-	DecompMotionTransform motion;
+	MotionTransform motion;

 	int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE;

-	motion.mid.x = kernel_tex_fetch(__objects, offset + 0);
-	motion.mid.y = kernel_tex_fetch(__objects, offset + 1);
-	motion.mid.z = kernel_tex_fetch(__objects, offset + 2);
-	motion.mid.w = kernel_tex_fetch(__objects, offset + 3);
+	motion.pre.x = kernel_tex_fetch(__objects, offset + 0);
+	motion.pre.y = kernel_tex_fetch(__objects, offset + 1);
+	motion.pre.z = kernel_tex_fetch(__objects, offset + 2);
+	motion.pre.w = kernel_tex_fetch(__objects, offset + 3);

-	motion.pre_x = kernel_tex_fetch(__objects, offset + 4);
-	motion.pre_y = kernel_tex_fetch(__objects, offset + 5);
-	motion.post_x = kernel_tex_fetch(__objects, offset + 6);
-	motion.post_y = kernel_tex_fetch(__objects, offset + 7);
+	motion.mid.x = kernel_tex_fetch(__objects, offset + 4);
+	motion.mid.y = kernel_tex_fetch(__objects, offset + 5);
+	motion.mid.z = kernel_tex_fetch(__objects, offset + 6);
+	motion.mid.w = kernel_tex_fetch(__objects, offset + 7);
+
+	motion.post.x = kernel_tex_fetch(__objects, offset + 8);
+	motion.post.y = kernel_tex_fetch(__objects, offset + 9);
+	motion.post.z = kernel_tex_fetch(__objects, offset + 10);
+	motion.post.w = kernel_tex_fetch(__objects, offset + 11);

 	Transform tfm;
 	transform_motion_interpolate(&tfm, &motion, time);
@@ -328,7 +334,7 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
 	if(object == OBJECT_NONE)
 		return 0;

-	int offset = object*OBJECT_SIZE + 11;
+	int offset = object*OBJECT_SIZE + 15;
 	float4 f = kernel_tex_fetch(__objects, offset);
 	return __float_as_uint(f.x);
 }
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -95,11 +95,11 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo
 #  ifdef __KERNEL_OPENCL__
 		const MotionTransform tfm = kernel_data.cam.motion;
 		transform_motion_interpolate(&cameratoworld,
-		                             ((const DecompMotionTransform*)&tfm),
+									 &tfm,
 		                             ray->time);
 #  else
 		transform_motion_interpolate(&cameratoworld,
-		                             ((const DecompMotionTransform*)&kernel_data.cam.motion),
+		                             &kernel_data.cam.motion,
 		                             ray->time);
 #  endif
 	}
@@ -207,11 +207,11 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
 #  ifdef __KERNEL_OPENCL__
 		const MotionTransform tfm = kernel_data.cam.motion;
 		transform_motion_interpolate(&cameratoworld,
-		                             (const DecompMotionTransform*)&tfm,
+		                             &tfm,
 		                             ray->time);
 #  else
 		transform_motion_interpolate(&cameratoworld,
-		                             (const DecompMotionTransform*)&kernel_data.cam.motion,
+		                             &kernel_data.cam.motion,
 		                             ray->time);
 #  endif
 	}
@@ -285,11 +285,11 @@ ccl_device_inline void camera_sample_panorama(KernelGlobals *kg,
 #  ifdef __KERNEL_OPENCL__
 		const MotionTransform tfm = kernel_data.cam.motion;
 		transform_motion_interpolate(&cameratoworld,
-		                             (const DecompMotionTransform*)&tfm,
+		                             &tfm,
 		                             ray->time);
 #  else
 		transform_motion_interpolate(&cameratoworld,
-		                             (const DecompMotionTransform*)&kernel_data.cam.motion,
+		                             &kernel_data.cam.motion,
 		                             ray->time);
 #  endif
 	}
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -35,7 +35,7 @@
 CCL_NAMESPACE_BEGIN

 /* Constants */
-#define OBJECT_SIZE 		12
+#define OBJECT_SIZE 		16
 #define OBJECT_VECTOR_SIZE	6
 #define LIGHT_SIZE		11
 #define FILTER_TABLE_SIZE	1024
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -353,7 +353,7 @@ void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene)
 #ifdef __CAMERA_MOTION__
 	else if(need_motion == Scene::MOTION_BLUR) {
 		if(use_motion) {
-			transform_motion_decompose((DecompMotionTransform*)&kcam->motion, &motion, &matrix);
+			transform_motion_decompose(&kcam->motion, &motion, &matrix);
 			kcam->have_motion = 1;
 		}
 		if(use_perspective_motion) {
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -97,7 +97,7 @@ void Object::compute_bounds(bool motion_blur)
 			mtfm.post = tfm;
 		}

-		DecompMotionTransform decomp;
+		MotionTransform decomp;
 		transform_motion_decompose(&decomp, &mtfm, &tfm);

 		bounds = BoundBox::empty;
@@ -365,7 +365,7 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s
 	/* OBJECT_INVERSE_TRANSFORM */
 	memcpy(&objects[offset+4], &itfm, sizeof(float4)*3);
 	/* OBJECT_PROPERTIES */
-	objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index));
+	objects[offset+12] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index));

 	if(mesh->use_motion_blur) {
 		state->have_motion = true;
@@ -402,10 +402,10 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s
 	else if(state->need_motion == Scene::MOTION_BLUR) {
 		if(ob->use_motion) {
 			/* decompose transformations for interpolation. */
-			DecompMotionTransform decomp;
+			MotionTransform decomp;

 			transform_motion_decompose(&decomp, &ob->motion, &ob->tfm);
-			memcpy(&objects[offset], &decomp, sizeof(float4)*8);
+			memcpy(&objects[offset], &decomp, sizeof(float4)*12);
 			flag |= SD_OBJECT_MOTION;
 			state->have_motion = true;
 		}
@@ -418,9 +418,9 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s
 	int numverts = mesh->verts.size();
 	int numkeys = mesh->curve_keys.size();

-	objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys));
-	objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts));
-	objects[offset+11] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	objects[offset+13] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys));
+	objects[offset+14] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts));
+	objects[offset+15] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);

 	/* Object flag. */
 	if(ob->use_holdout) {
@@ -655,7 +655,7 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc

 	foreach(Object *object, scene->objects) {
 		Mesh* mesh = object->mesh;
-		int offset = object_index*OBJECT_SIZE + 11;
+		int offset = object_index*OBJECT_SIZE + 15;

 		if(mesh->patch_table) {
 			uint patch_map_offset = 2*(mesh->patch_table_offset + mesh->patch_table->total_size() -
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -247,30 +247,18 @@ static void transform_decompose(Transform *decomp, const Transform *tfm)
 	decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
 }

-void transform_motion_decompose(DecompMotionTransform *decomp, const MotionTransform *motion, const Transform *mid)
+void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion, const Transform *mid)
 {
-	Transform pre, post;
-
-	transform_decompose(&pre, &motion->pre);
+	transform_decompose(&decomp->pre, &motion->pre);
 	transform_decompose(&decomp->mid, mid);
-	transform_decompose(&post, &motion->post);
+	transform_decompose(&decomp->post, &motion->post);

 	/* ensure rotation around shortest angle, negated quaternions are the same
 	 * but this means we don't have to do the check in quat_interpolate */
-	if(dot(decomp->mid.x, post.x) < 0.0f)
+	if(dot(decomp->pre.x, decomp->mid.x) < 0.0f)
+		decomp->pre.x = -decomp->pre.x;
+	if(dot(decomp->mid.x, decomp->post.x) < 0.0f)
 		decomp->mid.x = -decomp->mid.x;
-	if(dot(pre.x, decomp->mid.x) < 0.0f)
-		pre.x = -pre.x;
-	
-	/* drop scale of pre/post */
-	pre.y.w = decomp->mid.y.w;
-	post.y.w = decomp->mid.y.w;
-
-	/* store translation/rotation part of pre/post */
-	decomp->pre_x = pre.x;
-	decomp->pre_y = pre.y;
-	decomp->post_x = post.x;
-	decomp->post_y = post.y;
 }

 Transform transform_from_viewplane(BoundBox2D& viewplane)
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -39,9 +39,7 @@ typedef struct Transform {

 /* transform decomposed in rotation/translation/scale. we use the same data
 * structure as Transform, and tightly pack decomposition into it. first the
- * rotation (4), then translation (3), then 3x3 scale matrix (9).
- *
- * For the DecompMotionTransform we drop scale from pre/post. */
+ * rotation (4), then translation (3), then 3x3 scale matrix (9). */

 typedef struct ccl_may_alias MotionTransform {
 	Transform pre;
@@ -49,12 +47,6 @@ typedef struct ccl_may_alias MotionTransform {
 	Transform post;
 } MotionTransform;

-typedef struct DecompMotionTransform {
-	Transform mid;
-	float4 pre_x, pre_y;
-	float4 post_x, post_y;
-} DecompMotionTransform;
-
 typedef struct PerspectiveMotionTransform {
 	Transform pre;
 	Transform post;
@@ -466,7 +458,7 @@ ccl_device_inline void transform_compose(Transform *tfm, const Transform *decomp
 /* Disabled for now, need arc-length parametrization for constant speed motion.
 * #define CURVED_MOTION_INTERPOLATE */

-ccl_device void transform_motion_interpolate(Transform *tfm, const DecompMotionTransform *motion, float t)
+ccl_device void transform_motion_interpolate(Transform *tfm, const MotionTransform *motion, float t)
 {
 	/* possible optimization: is it worth it adding a check to skip scaling?
 	 * it's probably quite uncommon to have scaling objects. or can we skip
@@ -475,9 +467,9 @@ ccl_device void transform_motion_interpolate(Transform *tfm, const DecompMotionT

 #ifdef CURVED_MOTION_INTERPOLATE
 	/* 3 point bezier curve interpolation for position */
-	float3 Ppre = float4_to_float3(motion->pre_y);
+	float3 Ppre = float4_to_float3(motion->pre.y);
 	float3 Pmid = float4_to_float3(motion->mid.y);
-	float3 Ppost = float4_to_float3(motion->post_y);
+	float3 Ppost = float4_to_float3(motion->post.y);

 	float3 Pcontrol = 2.0f*Pmid - 0.5f*(Ppre + Ppost);
 	float3 P = Ppre*t*t + Pcontrol*2.0f*t*(1.0f - t) + Ppost*(1.0f - t)*(1.0f - t);
@@ -491,27 +483,28 @@ ccl_device void transform_motion_interpolate(Transform *tfm, const DecompMotionT
 	if(t < 0.5f) {
 		t *= 2.0f;

-		decomp.x = quat_interpolate(motion->pre_x, motion->mid.x, t);
+		decomp.x = quat_interpolate(motion->pre.x, motion->mid.x, t);
 #ifdef CURVED_MOTION_INTERPOLATE
-		decomp.y.w = (1.0f - t)*motion->pre_y.w + t*motion->mid.y.w;
+		decomp.y.w = (1.0f - t)*motion->pre.y.w + t*motion->mid.y.w;
 #else
-		decomp.y = (1.0f - t)*motion->pre_y + t*motion->mid.y;
+		decomp.y = (1.0f - t)*motion->pre.y + t*motion->mid.y;
 #endif
+		decomp.z = (1.0f - t)*motion->pre.z + t*motion->mid.z;
+		decomp.w = (1.0f - t)*motion->pre.w + t*motion->mid.w;
 	}
 	else {
 		t = (t - 0.5f)*2.0f;

-		decomp.x = quat_interpolate(motion->mid.x, motion->post_x, t);
+		decomp.x = quat_interpolate(motion->mid.x, motion->post.x, t);
 #ifdef CURVED_MOTION_INTERPOLATE
-		decomp.y.w = (1.0f - t)*motion->mid.y.w + t*motion->post_y.w;
+		decomp.y.w = (1.0f - t)*motion->mid.y.w + t*motion->post.y.w;
 #else
-		decomp.y = (1.0f - t)*motion->mid.y + t*motion->post_y;
+		decomp.y = (1.0f - t)*motion->mid.y + t*motion->post.y;
 #endif
+		decomp.z = (1.0f - t)*motion->mid.z + t*motion->post.z;
+		decomp.w = (1.0f - t)*motion->mid.w + t*motion->post.w;
 	}

-	decomp.z = motion->mid.z;
-	decomp.w = motion->mid.w;
-
 	/* compose rotation, translation, scale into matrix */
 	transform_compose(tfm, &decomp);
 }
@@ -526,7 +519,7 @@ ccl_device_inline bool operator==(const MotionTransform& A, const MotionTransfor
 }

 float4 transform_to_quat(const Transform& tfm);
-void transform_motion_decompose(DecompMotionTransform *decomp, const MotionTransform *motion, const Transform *mid);
+void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion, const Transform *mid);
 Transform transform_from_viewplane(BoundBox2D& viewplane);

 #endif
--- a/source/blender/blenkernel/intern/constraint.c
+++ b/source/blender/blenkernel/intern/constraint.c
@@ -1013,8 +1013,11 @@ static void vectomat(const float vec[3], const float target_up[3], short axis, s
 		u[2] = 1;
 	}

+	/* note: even though 'n' is normalized, don't use 'project_v3_v3v3_normalized' below
+	 * because precision issues cause a problem in near degenerate states, see: T53455. */
+
 	/* project the up vector onto the plane specified by n */
-	project_v3_v3v3_normalized(proj, u, n); /* first u onto n... */
+	project_v3_v3v3(proj, u, n); /* first u onto n... */
 	sub_v3_v3v3(proj, u, proj); /* then onto the plane */
 	/* proj specifies the transformation of the up axis */

--- a/source/blender/editors/curve/curve_ops.c
+++ b/source/blender/editors/curve/curve_ops.c
@@ -241,6 +241,9 @@ void ED_keymap_curve(wmKeyConfig *keyconf)
 	kmi = WM_keymap_add_item(keymap, "CURVE_OT_draw", ACTIONMOUSE, KM_PRESS, KM_SHIFT, 0);
 	RNA_boolean_set(kmi->ptr, "wait_for_input", false);

+	kmi = WM_keymap_add_item(keymap, "CURVE_OT_draw", TABLET_STYLUS, KM_PRESS, KM_SHIFT, 0);
+	RNA_boolean_set(kmi->ptr, "wait_for_input", false);
+
 	kmi = WM_keymap_add_item(keymap, "CURVE_OT_select_all", AKEY, KM_PRESS, 0, 0);
 	RNA_enum_set(kmi->ptr, "action", SEL_TOGGLE);
 	kmi = WM_keymap_add_item(keymap, "CURVE_OT_select_all", IKEY, KM_PRESS, KM_CTRL, 0);
--- a/source/blender/editors/interface/interface_widgets.c
+++ b/source/blender/editors/interface/interface_widgets.c
@@ -707,6 +707,7 @@ static void widgetbase_draw(uiWidgetBase *wtb, uiWidgetColors *wcol)

 	/* backdrop non AA */
 	if (wtb->draw_inner) {
+		BLI_assert(wtb->totvert != 0);
 		if (wcol->shaded == 0) {
 			if (wcol->alpha_check) {
 				float inner_v_half[WIDGET_SIZE_MAX][2];
@@ -784,6 +785,7 @@ static void widgetbase_draw(uiWidgetBase *wtb, uiWidgetColors *wcol)

 	/* for each AA step */
 	if (wtb->draw_outline) {
+		BLI_assert(wtb->totvert != 0);
 		float triangle_strip[WIDGET_SIZE_MAX * 2 + 2][2]; /* + 2 because the last pair is wrapped */
 		float triangle_strip_emboss[WIDGET_SIZE_MAX * 2][2]; /* only for emboss */

@@ -2784,6 +2786,10 @@ static void widget_numbut_draw(uiWidgetColors *wcol, rcti *rect, int state, int
 	if (!emboss) {
 		round_box_edges(&wtb, roundboxalign, rect, rad);
 	}
+	else {
+		wtb.draw_inner = false;
+		wtb.draw_outline = false;
+	}

 	/* decoration */
 	if (!(state & UI_STATE_TEXT_INPUT)) {