2023-08-16 00:20:26 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2022 Blender Authors
|
2023-05-31 16:19:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
2022-01-17 14:45:22 +01:00
|
|
|
|
|
|
|
|
/** \file
|
|
|
|
|
* \ingroup gpu
|
|
|
|
|
*/
|
|
|
|
|
|
2025-08-27 14:52:19 +02:00
|
|
|
#pragma once
|
|
|
|
|
|
2022-01-17 14:45:22 +01:00
|
|
|
#ifndef USE_GPU_SHADER_CREATE_INFO
|
2023-09-26 15:19:33 +02:00
|
|
|
|
2024-03-23 01:24:18 +01:00
|
|
|
# include "GPU_shader_shared_utils.hh"
|
2023-01-06 22:22:11 +01:00
|
|
|
|
2024-03-23 01:24:18 +01:00
|
|
|
struct TestOutputRawData;
|
2022-01-17 14:45:22 +01:00
|
|
|
#endif
|
|
|
|
|
|
2023-03-27 12:08:14 +11:00
|
|
|
/* NOTE: float3 has differing stride and alignment rules across different GPU back-ends. If 12 byte
|
2023-02-13 18:16:38 +01:00
|
|
|
* stride and alignment is essential, use `packed_float3` to avoid data read issues. This is
|
|
|
|
|
* required in the common use-case where a float3 and an int/float are paired together for optimal
|
|
|
|
|
* data transfer. */
|
|
|
|
|
|
2024-03-23 01:24:18 +01:00
|
|
|
enum eGPUKeyframeShapes : uint32_t {
|
2023-02-12 20:53:42 +01:00
|
|
|
GPU_KEYFRAME_SHAPE_DIAMOND = (1u << 0u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_CIRCLE = (1u << 1u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_CLIPPED_VERTICAL = (1u << 2u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_CLIPPED_HORIZONTAL = (1u << 3u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_INNER_DOT = (1u << 4u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_ARROW_END_MAX = (1u << 8u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_ARROW_END_MIN = (1u << 9u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_ARROW_END_MIXED = (1u << 10u),
|
|
|
|
|
GPU_KEYFRAME_SHAPE_SQUARE = (GPU_KEYFRAME_SHAPE_CLIPPED_VERTICAL |
|
|
|
|
|
GPU_KEYFRAME_SHAPE_CLIPPED_HORIZONTAL),
|
|
|
|
|
};
|
|
|
|
|
|
2024-11-23 16:42:38 +01:00
|
|
|
#define MAX_SOCKET_PARAMETERS 4
|
|
|
|
|
#define MAX_SOCKET_INSTANCE 32
|
|
|
|
|
|
|
|
|
|
/* Node Socket shader parameters. Must match the shader layout of "gpu_shader_2D_node_socket". */
|
|
|
|
|
struct NodeSocketShaderParameters {
|
|
|
|
|
float4 rect;
|
|
|
|
|
float4 color_inner;
|
|
|
|
|
float4 color_outline;
|
|
|
|
|
float outline_thickness;
|
|
|
|
|
float outline_offset;
|
|
|
|
|
float shape;
|
|
|
|
|
float aspect;
|
|
|
|
|
};
|
|
|
|
|
BLI_STATIC_ASSERT_ALIGN(NodeSocketShaderParameters, 16)
|
|
|
|
|
|
2025-08-27 15:24:22 +02:00
|
|
|
/* Per link data. */
|
2022-01-17 14:45:22 +01:00
|
|
|
struct NodeLinkData {
|
2025-08-27 15:24:22 +02:00
|
|
|
float4 start_color;
|
|
|
|
|
float4 end_color;
|
|
|
|
|
float2 bezier_P0;
|
|
|
|
|
float2 bezier_P1;
|
|
|
|
|
float2 bezier_P2;
|
|
|
|
|
float2 bezier_P3;
|
|
|
|
|
uint color_ids;
|
|
|
|
|
float dash_length;
|
|
|
|
|
float dash_factor;
|
|
|
|
|
float dash_alpha;
|
2022-01-17 14:45:22 +01:00
|
|
|
float dim_factor;
|
|
|
|
|
float thickness;
|
2023-08-22 19:24:07 +02:00
|
|
|
float aspect;
|
2025-08-27 15:24:22 +02:00
|
|
|
bool32_t do_arrow;
|
|
|
|
|
bool32_t do_muted;
|
|
|
|
|
bool32_t has_back_link;
|
|
|
|
|
float _pad0;
|
|
|
|
|
float _pad1;
|
2022-01-17 14:45:22 +01:00
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(NodeLinkData, 16)
|
2022-01-17 14:45:22 +01:00
|
|
|
|
2025-08-27 15:24:22 +02:00
|
|
|
/* Data common to all links. */
|
|
|
|
|
struct NodeLinkUniformData {
|
2022-01-17 14:45:22 +01:00
|
|
|
float4 colors[6];
|
2023-08-22 19:24:07 +02:00
|
|
|
float aspect;
|
2025-08-27 15:24:22 +02:00
|
|
|
float arrow_size;
|
2022-01-17 14:45:22 +01:00
|
|
|
float2 _pad;
|
|
|
|
|
};
|
2025-08-27 15:24:22 +02:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(NodeLinkUniformData, 16)
|
2022-01-17 14:45:22 +01:00
|
|
|
|
|
|
|
|
struct GPencilStrokeData {
|
|
|
|
|
float2 viewport;
|
|
|
|
|
float pixsize;
|
|
|
|
|
float objscale;
|
|
|
|
|
float pixfactor;
|
|
|
|
|
int xraymode;
|
|
|
|
|
int caps_start;
|
|
|
|
|
int caps_end;
|
2024-03-08 19:09:10 +01:00
|
|
|
bool32_t keep_size;
|
|
|
|
|
bool32_t fill_stroke;
|
2022-01-17 14:45:22 +01:00
|
|
|
float2 _pad;
|
|
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(GPencilStrokeData, 16)
|
2022-01-17 14:45:22 +01:00
|
|
|
|
|
|
|
|
struct GPUClipPlanes {
|
2022-12-08 21:07:28 +01:00
|
|
|
float4x4 ClipModelMatrix;
|
2022-01-17 14:45:22 +01:00
|
|
|
float4 world[6];
|
|
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(GPUClipPlanes, 16)
|
2022-01-17 14:45:22 +01:00
|
|
|
|
|
|
|
|
struct SimpleLightingData {
|
2022-12-09 00:10:14 +01:00
|
|
|
float4 l_color;
|
2023-02-13 18:16:38 +01:00
|
|
|
packed_float3 light;
|
2022-01-17 14:45:22 +01:00
|
|
|
float _pad;
|
|
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(SimpleLightingData, 16)
|
2022-01-18 13:13:23 +01:00
|
|
|
|
|
|
|
|
#define MAX_CALLS 16
|
|
|
|
|
|
2023-02-26 13:23:40 +01:00
|
|
|
struct MultiIconCallData {
|
2022-01-18 13:13:23 +01:00
|
|
|
float4 calls_data[MAX_CALLS * 3];
|
|
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(MultiIconCallData, 16)
|
2023-01-06 22:22:11 +01:00
|
|
|
|
2024-06-04 20:05:35 +02:00
|
|
|
#define GPU_SEQ_STRIP_DRAW_DATA_LEN 256
|
|
|
|
|
|
|
|
|
|
enum eGPUSeqFlags : uint32_t {
|
2024-06-11 11:55:49 +02:00
|
|
|
GPU_SEQ_FLAG_BACKGROUND = (1u << 0u),
|
2024-06-04 20:05:35 +02:00
|
|
|
GPU_SEQ_FLAG_SINGLE_IMAGE = (1u << 1u),
|
|
|
|
|
GPU_SEQ_FLAG_COLOR_BAND = (1u << 2u),
|
|
|
|
|
GPU_SEQ_FLAG_TRANSITION = (1u << 3u),
|
|
|
|
|
GPU_SEQ_FLAG_LOCKED = (1u << 4u),
|
|
|
|
|
GPU_SEQ_FLAG_MISSING_TITLE = (1u << 5u),
|
|
|
|
|
GPU_SEQ_FLAG_MISSING_CONTENT = (1u << 6u),
|
|
|
|
|
GPU_SEQ_FLAG_SELECTED = (1u << 7u),
|
|
|
|
|
GPU_SEQ_FLAG_ACTIVE = (1u << 8u),
|
|
|
|
|
GPU_SEQ_FLAG_HIGHLIGHT = (1u << 9u),
|
2024-06-19 11:49:20 +02:00
|
|
|
GPU_SEQ_FLAG_BORDER = (1u << 10u),
|
|
|
|
|
GPU_SEQ_FLAG_SELECTED_LH = (1u << 11u),
|
|
|
|
|
GPU_SEQ_FLAG_SELECTED_RH = (1u << 12u),
|
2024-07-10 02:21:14 +02:00
|
|
|
GPU_SEQ_FLAG_OVERLAP = (1u << 15u),
|
VSE: Clamp strip handles to video/audio bounds
This initial commit properly clamps handles for video/audio strips, and
provides functionality to enable/disable the behavior for all strip types
(addresses #90280).
Toggling handle clamping is done with "C",
just like with the redesigned slip operator (#137072).
If a strip is not already clamped when you start moving its handles,
then clamping behavior is disabled starting out. This means no abrupt
clamp until you explicitly ask for it.
Transform logic was altered, fixing a few bugs:
- When initializing a transform, `createTransSeqData` would already
create some clamping data for channels. This patch replaces it with
`offset_clamp` (for unconditional clamping which cannot be disabled)
and `handle_xmin/xmax` (for hold offset clamping, which is optional).
- Collecting this data ahead of time is necessary for the double
handle tweak case -- `flushTransSeq` only works one strip at a
time, so we can't clamp post-hoc.
- In `applySeqSlideValue`, we apply `transform_convert_sequencer_clamp`
before values are printed to the header, but let the unclamped values
get flushed to the strips themselves. This is so that we can have the
data later at the individual strip level to recalculate clamps.
Otherwise, if transform values are clamped preemptively, then we have
no idea whether strips are clamped vs. merely resting at their
boundaries.
Note that currently, handle clamping is drawn identically to overlaps.
More information in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/134319
2025-07-16 06:16:19 +02:00
|
|
|
GPU_SEQ_FLAG_CLAMPED = (1u << 16u),
|
2024-06-19 11:49:20 +02:00
|
|
|
|
2025-06-11 04:03:17 +02:00
|
|
|
GPU_SEQ_FLAG_ANY_HANDLE = GPU_SEQ_FLAG_SELECTED_LH | GPU_SEQ_FLAG_SELECTED_RH
|
2024-06-04 20:05:35 +02:00
|
|
|
};
|
|
|
|
|
|
2025-08-27 14:52:19 +02:00
|
|
|
/* Glyph for text rendering. */
|
|
|
|
|
struct GlyphQuad {
|
|
|
|
|
int4 position;
|
|
|
|
|
float4 glyph_color; /* Cannot be name `color` because of metal macros. */
|
|
|
|
|
int2 glyph_size;
|
|
|
|
|
int offset;
|
|
|
|
|
uint flags;
|
|
|
|
|
};
|
|
|
|
|
BLI_STATIC_ASSERT_ALIGN(GlyphQuad, 16)
|
|
|
|
|
|
2024-06-04 20:05:35 +02:00
|
|
|
/* VSE per-strip data for timeline rendering. */
|
|
|
|
|
struct SeqStripDrawData {
|
|
|
|
|
/* Horizontal strip positions (1.0 is one frame). */
|
2024-06-04 22:08:42 +03:00
|
|
|
float left_handle, right_handle; /* Left and right strip sides. */
|
|
|
|
|
float content_start, content_end; /* Start and end of actual content (only relevant for strips
|
2024-07-25 10:17:42 +10:00
|
|
|
* that have holdout regions). */
|
2024-06-04 20:05:35 +02:00
|
|
|
float handle_width;
|
|
|
|
|
/* Vertical strip positions (1.0 is one channel). */
|
|
|
|
|
float bottom;
|
|
|
|
|
float top;
|
|
|
|
|
float strip_content_top; /* Content coordinate, i.e. below title bar if there is one. */
|
|
|
|
|
uint flags; /* eGPUSeqFlags bitmask. */
|
|
|
|
|
/* Strip colors, each is uchar4 packed with equivalent of packUnorm4x8. */
|
|
|
|
|
uint col_background;
|
|
|
|
|
uint col_outline;
|
|
|
|
|
uint col_color_band;
|
|
|
|
|
uint col_transition_in, col_transition_out;
|
2024-06-19 11:49:20 +02:00
|
|
|
float _pad0, _pad1;
|
2024-06-04 20:05:35 +02:00
|
|
|
};
|
|
|
|
|
BLI_STATIC_ASSERT_ALIGN(SeqStripDrawData, 16)
|
|
|
|
|
BLI_STATIC_ASSERT(sizeof(SeqStripDrawData) * GPU_SEQ_STRIP_DRAW_DATA_LEN <= 16384,
|
2024-06-11 14:07:57 +02:00
|
|
|
"SeqStripDrawData UBO must not exceed minspec UBO size (16384)")
|
2024-06-04 20:05:35 +02:00
|
|
|
|
VSE: Faster timeline thumbnail drawing
VSE timeline, when many (hundreds/thousands) of thumbnails were visible, was
very slow to redraw. This PR makes them 3-10x faster to redraw, by stopping
doing things that are slow :) Part of #126087 thumbnail improvements task.
- No longer do mute semitransparency or corner rounding on the CPU, do it in
shader instead.
- Stop creating a separate GPU texture for each thumbnail, on every repaint,
and drawing each thumbnail as a separate draw call. Instead, put thumbnails
into a single texture atlas (using a simple shelf packing algorithm), and
draw them in batch, passing data via UBO. The atlas is still re-created every
frame, but that does not seem to be a performance issue. Thumbnails are
cropped horizontally based on how much of their parts are visible (e.g. a
narrow strip on screen), so realistically the atlas size is kinda
proportional to screen size, and ends up being just several megabytes of data
transfer between CPU -> GPU each frame.
On this Sprite Fright edit timeline view (612 visible thumbnails), time taken
to repaint the timeline window:
- Mac (M1 Max, Metal): 68.1ms -> 4.7ms
- Windows (Ryzen 5950X, RTX 3080Ti, OpenGL): 23.7ms -> 6.8ms
This also fixes a visual issue with thumbnails, where when strips are very
tall, the "rounded corners" that were poked right into the thumbnail bitmap
on the CPU were showing up due to actual bitmap being scaled up a lot.
Pull Request: https://projects.blender.org/blender/blender/pulls/126972
2024-09-03 08:25:15 +02:00
|
|
|
/* VSE per-thumbnail data for timeline rendering. */
|
|
|
|
|
struct SeqStripThumbData {
|
|
|
|
|
float left, right, bottom, top; /* Strip rectangle positions. */
|
|
|
|
|
float x1, y1, x2, y2; /* Thumbnail rectangle positions. */
|
|
|
|
|
float u1, v1, u2, v2; /* Thumbnail UVs. */
|
|
|
|
|
float4 tint_color;
|
|
|
|
|
};
|
|
|
|
|
BLI_STATIC_ASSERT_ALIGN(SeqStripThumbData, 16)
|
|
|
|
|
BLI_STATIC_ASSERT(sizeof(SeqStripThumbData) * GPU_SEQ_STRIP_DRAW_DATA_LEN <= 16384,
|
|
|
|
|
"SeqStripThumbData UBO must not exceed minspec UBO size (16384)")
|
|
|
|
|
|
2024-06-04 20:05:35 +02:00
|
|
|
/* VSE global data for timeline rendering. */
|
|
|
|
|
struct SeqContextDrawData {
|
|
|
|
|
float round_radius;
|
2024-06-19 11:19:15 +02:00
|
|
|
float pixelsize;
|
2024-06-04 20:05:35 +02:00
|
|
|
uint col_back;
|
2024-06-19 11:19:15 +02:00
|
|
|
float _pad0;
|
2024-06-04 20:05:35 +02:00
|
|
|
};
|
|
|
|
|
BLI_STATIC_ASSERT_ALIGN(SeqContextDrawData, 16)
|
|
|
|
|
|
VSE: Do Scopes on the GPU, improve their look, HDR for waveform/parade
Faster and better looking VSE scopes & "show overexposed". Waveform &
RGB Parade now can also show HDR color intensities. (Note: this is
only about VSE scopes; Image Space scopes are to be improved separately)
- Waveform, RGB Parade, Vectorscope scopes are done on the GPU now, by
drawing points for each input pixel, and placing them according to
scope logic. The point drawing is implemented in a compute shader,
with a fragment shader resolve pass; this is because drawing lots of
points in the same location is very slow on some GPUs (e.g. Apple).
The compute shader rasterizer is several times faster on regular
desktop GPU as well.
- If a non-default color management is needed (e.g. VSE colorspace is
not the same as display colorspace, or a custom look transform is used
etc. etc.), then transform the VSE preview texture into display space
RGBA 16F texture using OCIO GPU machinery, and calculate scopes
from that.
- The "show overexposed" (zebra) preview option is also done on the
GPU now.
- Waveform/Parade scopes unlock zoom X/Y aspect for viewing HDR scope,
similar to how it was done for HDR histograms recently.
- Added SEQ_preview_cache.hh that holds GPU textures of VSE preview,
this is so that when you have a preview and several scopes, each of
them does not have to create/upload their own GPU texture (that would
both waste memory, and be slow).
Screenshots and performance details in the PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/144867
2025-08-26 12:25:43 +02:00
|
|
|
/* VSE scope point rasterizer data. */
|
|
|
|
|
struct SeqScopeRasterData {
|
|
|
|
|
uint col_r;
|
|
|
|
|
uint col_g;
|
|
|
|
|
uint col_b;
|
|
|
|
|
uint col_a;
|
|
|
|
|
};
|
|
|
|
|
|
2024-10-21 16:25:24 +02:00
|
|
|
struct GreasePencilStrokeData {
|
|
|
|
|
packed_float3 position;
|
|
|
|
|
float stroke_thickness;
|
|
|
|
|
float4 stroke_color;
|
|
|
|
|
};
|
|
|
|
|
BLI_STATIC_ASSERT_ALIGN(GreasePencilStrokeData, 16)
|
|
|
|
|
|
2024-03-23 01:24:18 +01:00
|
|
|
enum TestStatus : uint32_t {
|
2023-02-26 16:13:55 +01:00
|
|
|
TEST_STATUS_NONE = 0u,
|
|
|
|
|
TEST_STATUS_PASSED = 1u,
|
|
|
|
|
TEST_STATUS_FAILED = 2u,
|
2023-01-06 22:22:11 +01:00
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
enum TestType : uint32_t {
|
2023-02-26 16:13:55 +01:00
|
|
|
TEST_TYPE_BOOL = 0u,
|
|
|
|
|
TEST_TYPE_UINT = 1u,
|
|
|
|
|
TEST_TYPE_INT = 2u,
|
|
|
|
|
TEST_TYPE_FLOAT = 3u,
|
|
|
|
|
TEST_TYPE_IVEC2 = 4u,
|
|
|
|
|
TEST_TYPE_IVEC3 = 5u,
|
|
|
|
|
TEST_TYPE_IVEC4 = 6u,
|
|
|
|
|
TEST_TYPE_UVEC2 = 7u,
|
|
|
|
|
TEST_TYPE_UVEC3 = 8u,
|
|
|
|
|
TEST_TYPE_UVEC4 = 9u,
|
|
|
|
|
TEST_TYPE_VEC2 = 10u,
|
|
|
|
|
TEST_TYPE_VEC3 = 11u,
|
|
|
|
|
TEST_TYPE_VEC4 = 12u,
|
|
|
|
|
TEST_TYPE_MAT2X2 = 13u,
|
|
|
|
|
TEST_TYPE_MAT2X3 = 14u,
|
|
|
|
|
TEST_TYPE_MAT2X4 = 15u,
|
|
|
|
|
TEST_TYPE_MAT3X2 = 16u,
|
|
|
|
|
TEST_TYPE_MAT3X3 = 17u,
|
|
|
|
|
TEST_TYPE_MAT3X4 = 18u,
|
|
|
|
|
TEST_TYPE_MAT4X2 = 19u,
|
|
|
|
|
TEST_TYPE_MAT4X3 = 20u,
|
|
|
|
|
TEST_TYPE_MAT4X4 = 21u,
|
2023-01-06 22:22:11 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** \note Contains arrays of scalar. To be use only with SSBOs to avoid padding issues. */
|
|
|
|
|
struct TestOutputRawData {
|
|
|
|
|
uint data[16];
|
|
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(TestOutputRawData, 16)
|
2023-01-06 22:22:11 +01:00
|
|
|
|
|
|
|
|
struct TestOutput {
|
|
|
|
|
TestOutputRawData expect;
|
|
|
|
|
TestOutputRawData result;
|
|
|
|
|
/** TestStatus. */
|
|
|
|
|
uint status;
|
2023-08-21 10:05:45 +10:00
|
|
|
/** Line error in the GLSL file. */
|
2023-01-06 22:22:11 +01:00
|
|
|
int line;
|
|
|
|
|
/** TestType of expect and result. */
|
|
|
|
|
uint type;
|
|
|
|
|
int _pad0;
|
|
|
|
|
};
|
2024-03-23 01:24:18 +01:00
|
|
|
BLI_STATIC_ASSERT_ALIGN(TestOutput, 16)
|
2023-01-06 22:22:11 +01:00
|
|
|
|
|
|
|
|
#ifdef GPU_SHADER
|
|
|
|
|
TestOutput test_output(
|
|
|
|
|
TestOutputRawData expect, TestOutputRawData result, bool status, int line, uint type)
|
|
|
|
|
{
|
|
|
|
|
TestOutput test;
|
|
|
|
|
test.expect = expect;
|
|
|
|
|
test.result = result;
|
|
|
|
|
test.status = status ? TEST_STATUS_PASSED : TEST_STATUS_FAILED;
|
|
|
|
|
test.line = line;
|
|
|
|
|
test.type = type;
|
|
|
|
|
return test;
|
|
|
|
|
}
|
|
|
|
|
#endif
|