Files
test/source/blender/blenlib/intern/math_bits_inline.c
Jacques Lucke 66adedbd78 BLI: optimize constructing IndexMask from bits and bools
This patch optimizes `IndexMask::from_bits` by making use of the fact that many
bits can be processed at once and one does not have to look at every bit
individual in many cases. Bits are stored as array of `BitInt` (aka `uint64_t`).
So we can process at least 64 bits at a time. On some platforms we can also make
use of SIMD and process up to 128 bits at once. This can significantly improve
performance if all bits are set/unset.

As a byproduct, this patch also optimizes `IndexMask::from_bools` which is now
implemented in terms of `IndexMask::from_bits`. The conversion from bools to
bits has been optimized significantly too by using SIMD intrinsics.

Pull Request: https://projects.blender.org/blender/blender/pulls/126888
2024-08-29 12:15:33 +02:00

186 lines
3.5 KiB
C

/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup bli
*/
#ifndef __MATH_BITS_INLINE_C__
#define __MATH_BITS_INLINE_C__
#ifdef _MSC_VER
# include <intrin.h>
#endif
#include "BLI_math_bits.h"
MINLINE unsigned int bitscan_forward_uint(unsigned int a)
{
BLI_assert(a != 0);
#ifdef _MSC_VER
unsigned long ctz;
_BitScanForward(&ctz, a);
return ctz;
#else
return (unsigned int)__builtin_ctz(a);
#endif
}
MINLINE unsigned int bitscan_forward_uint64(unsigned long long a)
{
BLI_assert(a != 0);
#ifdef _MSC_VER
unsigned long ctz;
_BitScanForward64(&ctz, a);
return ctz;
#else
return (unsigned int)__builtin_ctzll(a);
#endif
}
MINLINE int bitscan_forward_i(int a)
{
return (int)bitscan_forward_uint((unsigned int)a);
}
MINLINE unsigned int bitscan_forward_clear_uint(unsigned int *a)
{
unsigned int i = bitscan_forward_uint(*a);
*a &= (*a) - 1;
return i;
}
MINLINE int bitscan_forward_clear_i(int *a)
{
return (int)bitscan_forward_clear_uint((unsigned int *)a);
}
MINLINE unsigned int bitscan_reverse_uint(unsigned int a)
{
BLI_assert(a != 0);
#ifdef _MSC_VER
unsigned long clz;
_BitScanReverse(&clz, a);
return 31 - clz;
#else
return (unsigned int)__builtin_clz(a);
#endif
}
MINLINE unsigned int bitscan_reverse_uint64(unsigned long long a)
{
BLI_assert(a != 0);
#ifdef _MSC_VER
unsigned long clz;
_BitScanReverse64(&clz, a);
return 31 - clz;
#else
return (unsigned int)__builtin_clzll(a);
#endif
}
MINLINE int bitscan_reverse_i(int a)
{
return (int)bitscan_reverse_uint((unsigned int)a);
}
MINLINE unsigned int bitscan_reverse_clear_uint(unsigned int *a)
{
unsigned int i = bitscan_reverse_uint(*a);
*a &= ~(0x80000000 >> i);
return i;
}
MINLINE int bitscan_reverse_clear_i(int *a)
{
return (int)bitscan_reverse_clear_uint((unsigned int *)a);
}
MINLINE unsigned int highest_order_bit_uint(unsigned int n)
{
if (n == 0) {
return 0;
}
return 1 << (sizeof(unsigned int) * 8 - bitscan_reverse_uint(n));
}
MINLINE unsigned short highest_order_bit_s(unsigned short n)
{
n |= (unsigned short)(n >> 1);
n |= (unsigned short)(n >> 2);
n |= (unsigned short)(n >> 4);
n |= (unsigned short)(n >> 8);
return (unsigned short)(n - (n >> 1));
}
#if !(COMPILER_GCC || COMPILER_CLANG || COMPILER_MSVC)
MINLINE int count_bits_i(unsigned int i)
{
/* variable-precision SWAR algorithm. */
i = i - ((i >> 1) & 0x55555555);
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
}
MINLINE int count_bits_uint64(const uint64_t a)
{
return count_bits_i((uint32_t)a) + count_bits_i((uint32_t)(a >> 32));
}
#endif
MINLINE int float_as_int(float f)
{
union {
int i;
float f;
} u;
u.f = f;
return u.i;
}
MINLINE unsigned int float_as_uint(float f)
{
union {
unsigned int i;
float f;
} u;
u.f = f;
return u.i;
}
MINLINE float int_as_float(int i)
{
union {
int i;
float f;
} u;
u.i = i;
return u.f;
}
MINLINE float uint_as_float(unsigned int i)
{
union {
unsigned int i;
float f;
} u;
u.i = i;
return u.f;
}
MINLINE float xor_fl(float x, int y)
{
return int_as_float(float_as_int(x) ^ y);
}
MINLINE float half_to_float(ushort h)
{
const uint sign = (h & 0x8000);
const uint exponent = (h & 0x7c00) + 0x1C000;
const uint mantissa = (h & 0x03FF);
const uint x = (sign << 16) | (exponent << 13) | (mantissa << 13);
return uint_as_float(x);
}
#endif /* __MATH_BITS_INLINE_C__ */