emscripten-core · sbc100 · Jan 23, 2026 · Jan 21, 2026 · Jan 22, 2026 · Jan 23, 2026
diff --git a/system/lib/llvm-libc/src/__support/CPP/atomic.h b/system/lib/llvm-libc/src/__support/CPP/atomic.h
@@ -28,7 +28,8 @@ enum class MemoryOrder : int {
   SEQ_CST = __ATOMIC_SEQ_CST
 };
 
-// These are a clang extension, see the clang documenation for more information:
+// These are a clang extension, see the clang documentation for more
+// information:
 // https://clang.llvm.org/docs/LanguageExtensions.html#scoped-atomic-builtins.
 enum class MemoryScope : int {
 #if defined(__MEMORY_SCOPE_SYSTEM) && defined(__MEMORY_SCOPE_DEVICE)

diff --git a/system/lib/llvm-libc/src/__support/CPP/new.h b/system/lib/llvm-libc/src/__support/CPP/new.h
@@ -29,6 +29,14 @@ enum class align_val_t : size_t {};
 
 namespace LIBC_NAMESPACE_DECL {
 
+namespace cpp {
+template <class T> [[nodiscard]] constexpr T *launder(T *p) {
+  static_assert(__has_builtin(__builtin_launder),
+                "cpp::launder requires __builtin_launder");
+  return __builtin_launder(p);
+}
+} // namespace cpp
+
 class AllocChecker {
   bool success = false;
 

diff --git a/system/lib/llvm-libc/src/__support/CPP/type_traits/is_floating_point.h b/system/lib/llvm-libc/src/__support/CPP/type_traits/is_floating_point.h
@@ -36,7 +36,8 @@ template <typename T> struct is_floating_point {
                               ,
                               float128
 #endif
-                              >();
+                              ,
+                              bfloat16>();
 };
 template <typename T>
 LIBC_INLINE_VAR constexpr bool is_floating_point_v =

diff --git a/system/lib/llvm-libc/src/__support/FPUtil/FEnvImpl.h b/system/lib/llvm-libc/src/__support/FPUtil/FEnvImpl.h
@@ -15,6 +15,7 @@
 #include "src/__support/libc_errno.h"
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
 #include "src/__support/macros/properties/architectures.h"
 
 #if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
@@ -71,27 +72,40 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; }
 namespace LIBC_NAMESPACE_DECL {
 namespace fputil {
 
-LIBC_INLINE int clear_except_if_required(int excepts) {
+LIBC_INLINE static int clear_except_if_required([[maybe_unused]] int excepts) {
+#ifndef LIBC_MATH_HAS_NO_EXCEPT
   if (math_errhandling & MATH_ERREXCEPT)
     return clear_except(excepts);
+#endif // LIBC_MATH_HAS_NO_EXCEPT
   return 0;
 }
 
-LIBC_INLINE int set_except_if_required(int excepts) {
+LIBC_INLINE static int set_except_if_required([[maybe_unused]] int excepts) {
+#ifndef LIBC_MATH_HAS_NO_EXCEPT
   if (math_errhandling & MATH_ERREXCEPT)
     return set_except(excepts);
+#endif // LIBC_MATH_HAS_NO_EXCEPT
   return 0;
 }
 
-LIBC_INLINE int raise_except_if_required(int excepts) {
+LIBC_INLINE static int raise_except_if_required([[maybe_unused]] int excepts) {
+#ifndef LIBC_MATH_HAS_NO_EXCEPT
   if (math_errhandling & MATH_ERREXCEPT)
+#ifdef LIBC_TARGET_ARCH_IS_X86_64
+    return raise_except</*SKIP_X87_FPU*/ true>(excepts);
+#else  // !LIBC_TARGET_ARCH_IS_X86
     return raise_except(excepts);
+#endif // LIBC_TARGET_ARCH_IS_X86
+
+#endif // LIBC_MATH_HAS_NO_EXCEPT
   return 0;
 }
 
-LIBC_INLINE void set_errno_if_required(int err) {
+LIBC_INLINE static void set_errno_if_required([[maybe_unused]] int err) {
+#ifndef LIBC_MATH_HAS_NO_ERRNO
   if (math_errhandling & MATH_ERRNO)
     libc_errno = err;
+#endif // LIBC_MATH_HAS_NO_ERRNO
 }
 
 } // namespace fputil

diff --git a/system/lib/llvm-libc/src/__support/FPUtil/FPBits.h b/system/lib/llvm-libc/src/__support/FPUtil/FPBits.h
@@ -38,6 +38,7 @@ enum class FPType {
   IEEE754_Binary64,
   IEEE754_Binary128,
   X86_Binary80,
+  BFloat16
 };
 
 // The classes hierarchy is as follows:
@@ -138,6 +139,14 @@ template <> struct FPLayout<FPType::X86_Binary80> {
   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1;
 };
 
+template <> struct FPLayout<FPType::BFloat16> {
+  using StorageType = uint16_t;
+  LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
+  LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
+  LIBC_INLINE_VAR static constexpr int SIG_LEN = 7;
+  LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
+};
+
 // FPStorage derives useful constants from the FPLayout above.
 template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> {
   using UP = FPLayout<fp_type>;
@@ -801,6 +810,8 @@ template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
   else if constexpr (cpp::is_same_v<UnqualT, float128>)
     return FPType::IEEE754_Binary128;
 #endif
+  else if constexpr (cpp::is_same_v<UnqualT, bfloat16>)
+    return FPType::BFloat16;
   else
     static_assert(cpp::always_false<UnqualT>, "Unsupported type");
 }

diff --git a/system/lib/llvm-libc/src/__support/FPUtil/ManipulationFunctions.h b/system/lib/llvm-libc/src/__support/FPUtil/ManipulationFunctions.h
@@ -29,7 +29,7 @@ namespace LIBC_NAMESPACE_DECL {
 namespace fputil {
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
-LIBC_INLINE T frexp(T x, int &exp) {
+LIBC_INLINE constexpr T frexp(T x, int &exp) {
   FPBits<T> bits(x);
   if (bits.is_inf_or_nan()) {
 #ifdef LIBC_FREXP_INF_NAN_EXPONENT

diff --git a/system/lib/llvm-libc/src/__support/FPUtil/bfloat16.h b/system/lib/llvm-libc/src/__support/FPUtil/bfloat16.h
@@ -0,0 +1,65 @@
+//===-- Definition of bfloat16 data type. -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
+#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+namespace fputil {
+
+struct BFloat16 {
+  uint16_t bits;
+
+  LIBC_INLINE BFloat16() = default;
+
+  LIBC_INLINE constexpr explicit BFloat16(uint16_t bits) : bits(bits) {}
+
+  template <typename T> LIBC_INLINE constexpr explicit BFloat16(T value) {
+    if constexpr (cpp::is_floating_point_v<T>) {
+      bits = fputil::cast<bfloat16>(value).bits;
+    } else if constexpr (cpp::is_integral_v<T>) {
+      Sign sign = Sign::POS;
+
+      if constexpr (cpp::is_signed_v<T>) {
+        if (value < 0) {
+          sign = Sign::NEG;
+          value = -value;
+        }
+      }
+
+      fputil::DyadicFloat<cpp::numeric_limits<cpp::make_unsigned_t<T>>::digits>
+          xd(sign, 0, value);
+      bits = xd.template as<bfloat16, /*ShouldSignalExceptions=*/true>().bits;
+
+    } else {
+      bits = fputil::cast<bfloat16>(static_cast<float>(value)).bits;
+    }
+  }
+
+  template <cpp::enable_if_t<fputil::get_fp_type<float>() ==
+                                 fputil::FPType::IEEE754_Binary32,
+                             int> = 0>
+  LIBC_INLINE constexpr operator float() const {
+    uint32_t x_bits = static_cast<uint32_t>(bits) << 16U;
+    return cpp::bit_cast<float>(x_bits);
+  }
+}; // struct BFloat16
+
+} // namespace fputil
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
diff --git a/system/lib/llvm-libc/src/__support/FPUtil/cast.h b/system/lib/llvm-libc/src/__support/FPUtil/cast.h
@@ -26,9 +26,18 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
                                            cpp::is_floating_point_v<InType>,
                                        OutType>
 cast(InType x) {
+  // Casting to the same type is a no-op.
+  if constexpr (cpp::is_same_v<InType, OutType>)
+    return x;
+
+  // bfloat16 is always defined (for now)
+  if constexpr (cpp::is_same_v<OutType, bfloat16> ||
+                cpp::is_same_v<InType, bfloat16>
 #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
-  if constexpr (cpp::is_same_v<OutType, float16> ||
-                cpp::is_same_v<InType, float16>) {
+                || cpp::is_same_v<OutType, float16> ||
+                cpp::is_same_v<InType, float16>
+#endif
+  ) {
     using InFPBits = FPBits<InType>;
     using InStorageType = typename InFPBits::StorageType;
     using OutFPBits = FPBits<OutType>;
@@ -58,7 +67,6 @@ cast(InType x) {
     DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x);
     return xd.template as<OutType, /*ShouldSignalExceptions=*/true>();
   }
-#endif
 
   return static_cast<OutType>(x);
 }

diff --git a/system/lib/llvm-libc/src/__support/FPUtil/dyadic_float.h b/system/lib/llvm-libc/src/__support/FPUtil/dyadic_float.h
@@ -170,7 +170,6 @@ template <size_t Bits> struct DyadicFloat {
     return DyadicFloat(result_sign, result_exponent, result_mantissa);
   }
 
-#ifdef LIBC_TYPES_HAS_FLOAT16
   template <typename T, bool ShouldSignalExceptions>
   LIBC_INLINE constexpr cpp::enable_if_t<
       cpp::is_floating_point_v<T> && (FPBits<T>::FRACTION_LEN < Bits), T>
@@ -277,7 +276,6 @@ template <size_t Bits> struct DyadicFloat {
 
     return FPBits(result).get_val();
   }
-#endif // LIBC_TYPES_HAS_FLOAT16
 
   template <typename T, bool ShouldSignalExceptions,
             typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
@@ -411,11 +409,14 @@ template <size_t Bits> struct DyadicFloat {
                                             (FPBits<T>::FRACTION_LEN < Bits),
                                         void>>
   LIBC_INLINE constexpr T as() const {
+    if constexpr (cpp::is_same_v<T, bfloat16>
 #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
-    if constexpr (cpp::is_same_v<T, float16>)
-      return generic_as<T, ShouldSignalExceptions>();
+                  || cpp::is_same_v<T, float16>
 #endif
-    return fast_as<T, ShouldSignalExceptions>();
+    )
+      return generic_as<T, ShouldSignalExceptions>();
+    else
+      return fast_as<T, ShouldSignalExceptions>();
   }
 
   template <typename T,
@@ -465,7 +466,10 @@ template <size_t Bits> struct DyadicFloat {
         // exponents coming in to this function _shouldn't_ be that large). The
         // result should always end up as a positive size_t.
         size_t shift = -static_cast<size_t>(exponent);
-        new_mant >>= shift;
+        if (shift >= Bits)
+          new_mant = 0;
+        else
+          new_mant >>= shift;
         round_dir = rounding_direction(mantissa, shift, sign);
         if (round_dir > 0)
           ++new_mant;

diff --git a/system/lib/llvm-libc/src/__support/FPUtil/x86_64/FEnvImpl.h b/system/lib/llvm-libc/src/__support/FPUtil/x86_64/FEnvImpl.h
@@ -239,7 +239,7 @@ LIBC_INLINE int set_except(int excepts) {
   return 0;
 }
 
-LIBC_INLINE int raise_except(int excepts) {
+template <bool SKIP_X87_FPU = false> LIBC_INLINE int raise_except(int excepts) {
   uint16_t status_value = internal::get_status_value_for_except(excepts);
 
   // We set the status flag for exception one at a time and call the
@@ -256,13 +256,16 @@ LIBC_INLINE int raise_except(int excepts) {
   // when raising the next exception.
 
   auto raise_helper = [](uint16_t singleExceptFlag) {
-    internal::X87StateDescriptor state;
+    if constexpr (!SKIP_X87_FPU) {
+      internal::X87StateDescriptor state;
+      internal::get_x87_state_descriptor(state);
+      state.status_word |= singleExceptFlag;
+      internal::write_x87_state_descriptor(state);
+    }
+
     uint32_t mxcsr = 0;
-    internal::get_x87_state_descriptor(state);
     mxcsr = internal::get_mxcsr();
-    state.status_word |= singleExceptFlag;
     mxcsr |= singleExceptFlag;
-    internal::write_x87_state_descriptor(state);
     internal::write_mxcsr(mxcsr);
     internal::fwait();
   };

diff --git a/system/lib/llvm-libc/src/__support/GPU/allocator.cpp b/system/lib/llvm-libc/src/__support/GPU/allocator.cpp
@@ -22,6 +22,7 @@
 #include "src/__support/GPU/utils.h"
 #include "src/__support/RPC/rpc_client.h"
 #include "src/__support/threads/sleep.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -137,6 +138,11 @@ void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) {
     s[i] = c;
 }
 
+// Indicates that the provided value is a power of two.
+static inline constexpr bool is_pow2(uint64_t x) {
+  return x && (x & (x - 1)) == 0;
+}
+
 } // namespace impl
 
 /// A slab allocator used to hand out identically sized slabs of memory.
@@ -183,7 +189,9 @@ struct Slab {
 
   // Get the number of bytes needed to contain the bitfield bits.
   constexpr static uint32_t bitfield_bytes(uint32_t chunk_size) {
-    return ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8;
+    return __builtin_align_up(
+        ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8,
+        MIN_ALIGNMENT + 1);
   }
 
   // The actual amount of memory available excluding the bitfield and metadata.
@@ -540,15 +548,58 @@ void deallocate(void *ptr) {
     return;
 
   // All non-slab allocations will be aligned on a 2MiB boundary.
-  if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
+  if (__builtin_is_aligned(ptr, SLAB_ALIGNMENT + 1))
     return impl::rpc_free(ptr);
 
   // The original slab pointer is the 2MiB boundary using the given pointer.
-  Slab *slab = reinterpret_cast<Slab *>(
-      (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
+  Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
+      (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
   slab->deallocate(ptr);
   release_slab(slab);
 }
 
+void *reallocate(void *ptr, uint64_t size) {
+  if (ptr == nullptr)
+    return gpu::allocate(size);
+
+  // Non-slab allocations are considered foreign pointers so we fail.
+  if (__builtin_is_aligned(ptr, SLAB_ALIGNMENT + 1))
+    return nullptr;
+
+  // The original slab pointer is the 2MiB boundary using the given pointer.
+  Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
+      (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
+  if (slab->get_chunk_size() >= size)
+    return ptr;
+
+  // If we need a new chunk we reallocate and copy it over.
+  void *new_ptr = gpu::allocate(size);
+  inline_memcpy(new_ptr, ptr, slab->get_chunk_size());
+  gpu::deallocate(ptr);
+  return new_ptr;
+}
+
+void *aligned_allocate(uint32_t alignment, uint64_t size) {
+  // All alignment values must be a non-zero power of two.
+  if (!impl::is_pow2(alignment))
+    return nullptr;
+
+  // If the requested alignment is less than what we already provide this is
+  // just a normal allocation.
+  if (alignment <= MIN_ALIGNMENT + 1)
+    return gpu::allocate(size);
+
+  // We can't handle alignments greater than 2MiB so we simply fail.
+  if (alignment > SLAB_ALIGNMENT + 1)
+    return nullptr;
+
+  // Trying to handle allocation internally would break the assumption that each
+  // chunk is identical to eachother. Allocate enough memory with worst-case
+  // alignment and then round up. The index logic will round down properly.
+  uint64_t rounded = size + alignment - MIN_ALIGNMENT;
+  void *ptr = gpu::allocate(rounded);
+  return __builtin_align_up(ptr, alignment);
+}
+
 } // namespace gpu
 } // namespace LIBC_NAMESPACE_DECL