Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion system/lib/llvm-libc/src/__support/CPP/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ enum class MemoryOrder : int {
SEQ_CST = __ATOMIC_SEQ_CST
};

// These are a clang extension, see the clang documenation for more information:
// These are a clang extension, see the clang documentation for more
// information:
// https://clang.llvm.org/docs/LanguageExtensions.html#scoped-atomic-builtins.
enum class MemoryScope : int {
#if defined(__MEMORY_SCOPE_SYSTEM) && defined(__MEMORY_SCOPE_DEVICE)
Expand Down
8 changes: 8 additions & 0 deletions system/lib/llvm-libc/src/__support/CPP/new.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ enum class align_val_t : size_t {};

namespace LIBC_NAMESPACE_DECL {

namespace cpp {
template <class T> [[nodiscard]] constexpr T *launder(T *p) {
static_assert(__has_builtin(__builtin_launder),
"cpp::launder requires __builtin_launder");
return __builtin_launder(p);
}
} // namespace cpp

class AllocChecker {
bool success = false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ template <typename T> struct is_floating_point {
,
float128
#endif
>();
,
bfloat16>();
};
template <typename T>
LIBC_INLINE_VAR constexpr bool is_floating_point_v =
Expand Down
22 changes: 18 additions & 4 deletions system/lib/llvm-libc/src/__support/FPUtil/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "src/__support/libc_errno.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h"
#include "src/__support/macros/properties/architectures.h"

#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
Expand Down Expand Up @@ -71,27 +72,40 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; }
namespace LIBC_NAMESPACE_DECL {
namespace fputil {

LIBC_INLINE int clear_except_if_required(int excepts) {
LIBC_INLINE static int clear_except_if_required([[maybe_unused]] int excepts) {
#ifndef LIBC_MATH_HAS_NO_EXCEPT
if (math_errhandling & MATH_ERREXCEPT)
return clear_except(excepts);
#endif // LIBC_MATH_HAS_NO_EXCEPT
return 0;
}

LIBC_INLINE int set_except_if_required(int excepts) {
LIBC_INLINE static int set_except_if_required([[maybe_unused]] int excepts) {
#ifndef LIBC_MATH_HAS_NO_EXCEPT
if (math_errhandling & MATH_ERREXCEPT)
return set_except(excepts);
#endif // LIBC_MATH_HAS_NO_EXCEPT
return 0;
}

LIBC_INLINE int raise_except_if_required(int excepts) {
LIBC_INLINE static int raise_except_if_required([[maybe_unused]] int excepts) {
#ifndef LIBC_MATH_HAS_NO_EXCEPT
if (math_errhandling & MATH_ERREXCEPT)
#ifdef LIBC_TARGET_ARCH_IS_X86_64
return raise_except</*SKIP_X87_FPU*/ true>(excepts);
#else // !LIBC_TARGET_ARCH_IS_X86
return raise_except(excepts);
#endif // LIBC_TARGET_ARCH_IS_X86

#endif // LIBC_MATH_HAS_NO_EXCEPT
return 0;
}

LIBC_INLINE void set_errno_if_required(int err) {
LIBC_INLINE static void set_errno_if_required([[maybe_unused]] int err) {
#ifndef LIBC_MATH_HAS_NO_ERRNO
if (math_errhandling & MATH_ERRNO)
libc_errno = err;
#endif // LIBC_MATH_HAS_NO_ERRNO
}

} // namespace fputil
Expand Down
11 changes: 11 additions & 0 deletions system/lib/llvm-libc/src/__support/FPUtil/FPBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ enum class FPType {
IEEE754_Binary64,
IEEE754_Binary128,
X86_Binary80,
BFloat16
};

// The classes hierarchy is as follows:
Expand Down Expand Up @@ -138,6 +139,14 @@ template <> struct FPLayout<FPType::X86_Binary80> {
LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1;
};

template <> struct FPLayout<FPType::BFloat16> {
using StorageType = uint16_t;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 7;
LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
};

// FPStorage derives useful constants from the FPLayout above.
template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> {
using UP = FPLayout<fp_type>;
Expand Down Expand Up @@ -801,6 +810,8 @@ template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
else if constexpr (cpp::is_same_v<UnqualT, float128>)
return FPType::IEEE754_Binary128;
#endif
else if constexpr (cpp::is_same_v<UnqualT, bfloat16>)
return FPType::BFloat16;
else
static_assert(cpp::always_false<UnqualT>, "Unsupported type");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace fputil {

template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
LIBC_INLINE T frexp(T x, int &exp) {
LIBC_INLINE constexpr T frexp(T x, int &exp) {
FPBits<T> bits(x);
if (bits.is_inf_or_nan()) {
#ifdef LIBC_FREXP_INF_NAN_EXPONENT
Expand Down
65 changes: 65 additions & 0 deletions system/lib/llvm-libc/src/__support/FPUtil/bfloat16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//===-- Definition of bfloat16 data type. -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H

#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/cast.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/types.h"

#include <stdint.h>

namespace LIBC_NAMESPACE_DECL {
namespace fputil {

struct BFloat16 {
uint16_t bits;

LIBC_INLINE BFloat16() = default;

LIBC_INLINE constexpr explicit BFloat16(uint16_t bits) : bits(bits) {}

template <typename T> LIBC_INLINE constexpr explicit BFloat16(T value) {
if constexpr (cpp::is_floating_point_v<T>) {
bits = fputil::cast<bfloat16>(value).bits;
} else if constexpr (cpp::is_integral_v<T>) {
Sign sign = Sign::POS;

if constexpr (cpp::is_signed_v<T>) {
if (value < 0) {
sign = Sign::NEG;
value = -value;
}
}

fputil::DyadicFloat<cpp::numeric_limits<cpp::make_unsigned_t<T>>::digits>
xd(sign, 0, value);
bits = xd.template as<bfloat16, /*ShouldSignalExceptions=*/true>().bits;

} else {
bits = fputil::cast<bfloat16>(static_cast<float>(value)).bits;
}
}

template <cpp::enable_if_t<fputil::get_fp_type<float>() ==
fputil::FPType::IEEE754_Binary32,
int> = 0>
LIBC_INLINE constexpr operator float() const {
uint32_t x_bits = static_cast<uint32_t>(bits) << 16U;
return cpp::bit_cast<float>(x_bits);
}
}; // struct BFloat16

} // namespace fputil
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
14 changes: 11 additions & 3 deletions system/lib/llvm-libc/src/__support/FPUtil/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,18 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
cpp::is_floating_point_v<InType>,
OutType>
cast(InType x) {
// Casting to the same type is a no-op.
if constexpr (cpp::is_same_v<InType, OutType>)
return x;

// bfloat16 is always defined (for now)
if constexpr (cpp::is_same_v<OutType, bfloat16> ||
cpp::is_same_v<InType, bfloat16>
#if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
if constexpr (cpp::is_same_v<OutType, float16> ||
cpp::is_same_v<InType, float16>) {
|| cpp::is_same_v<OutType, float16> ||
cpp::is_same_v<InType, float16>
#endif
) {
using InFPBits = FPBits<InType>;
using InStorageType = typename InFPBits::StorageType;
using OutFPBits = FPBits<OutType>;
Expand Down Expand Up @@ -58,7 +67,6 @@ cast(InType x) {
DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x);
return xd.template as<OutType, /*ShouldSignalExceptions=*/true>();
}
#endif

return static_cast<OutType>(x);
}
Expand Down
16 changes: 10 additions & 6 deletions system/lib/llvm-libc/src/__support/FPUtil/dyadic_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ template <size_t Bits> struct DyadicFloat {
return DyadicFloat(result_sign, result_exponent, result_mantissa);
}

#ifdef LIBC_TYPES_HAS_FLOAT16
template <typename T, bool ShouldSignalExceptions>
LIBC_INLINE constexpr cpp::enable_if_t<
cpp::is_floating_point_v<T> && (FPBits<T>::FRACTION_LEN < Bits), T>
Expand Down Expand Up @@ -277,7 +276,6 @@ template <size_t Bits> struct DyadicFloat {

return FPBits(result).get_val();
}
#endif // LIBC_TYPES_HAS_FLOAT16

template <typename T, bool ShouldSignalExceptions,
typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
Expand Down Expand Up @@ -411,11 +409,14 @@ template <size_t Bits> struct DyadicFloat {
(FPBits<T>::FRACTION_LEN < Bits),
void>>
LIBC_INLINE constexpr T as() const {
if constexpr (cpp::is_same_v<T, bfloat16>
#if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
if constexpr (cpp::is_same_v<T, float16>)
return generic_as<T, ShouldSignalExceptions>();
|| cpp::is_same_v<T, float16>
#endif
return fast_as<T, ShouldSignalExceptions>();
)
return generic_as<T, ShouldSignalExceptions>();
else
return fast_as<T, ShouldSignalExceptions>();
}

template <typename T,
Expand Down Expand Up @@ -465,7 +466,10 @@ template <size_t Bits> struct DyadicFloat {
// exponents coming in to this function _shouldn't_ be that large). The
// result should always end up as a positive size_t.
size_t shift = -static_cast<size_t>(exponent);
new_mant >>= shift;
if (shift >= Bits)
new_mant = 0;
else
new_mant >>= shift;
round_dir = rounding_direction(mantissa, shift, sign);
if (round_dir > 0)
++new_mant;
Expand Down
13 changes: 8 additions & 5 deletions system/lib/llvm-libc/src/__support/FPUtil/x86_64/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ LIBC_INLINE int set_except(int excepts) {
return 0;
}

LIBC_INLINE int raise_except(int excepts) {
template <bool SKIP_X87_FPU = false> LIBC_INLINE int raise_except(int excepts) {
uint16_t status_value = internal::get_status_value_for_except(excepts);

// We set the status flag for exception one at a time and call the
Expand All @@ -256,13 +256,16 @@ LIBC_INLINE int raise_except(int excepts) {
// when raising the next exception.

auto raise_helper = [](uint16_t singleExceptFlag) {
internal::X87StateDescriptor state;
if constexpr (!SKIP_X87_FPU) {
internal::X87StateDescriptor state;
internal::get_x87_state_descriptor(state);
state.status_word |= singleExceptFlag;
internal::write_x87_state_descriptor(state);
}

uint32_t mxcsr = 0;
internal::get_x87_state_descriptor(state);
mxcsr = internal::get_mxcsr();
state.status_word |= singleExceptFlag;
mxcsr |= singleExceptFlag;
internal::write_x87_state_descriptor(state);
internal::write_mxcsr(mxcsr);
internal::fwait();
};
Expand Down
59 changes: 55 additions & 4 deletions system/lib/llvm-libc/src/__support/GPU/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/threads/sleep.h"
#include "src/string/memory_utils/inline_memcpy.h"

namespace LIBC_NAMESPACE_DECL {

Expand Down Expand Up @@ -137,6 +138,11 @@ void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) {
s[i] = c;
}

// Indicates that the provided value is a power of two.
static inline constexpr bool is_pow2(uint64_t x) {
return x && (x & (x - 1)) == 0;
}

} // namespace impl

/// A slab allocator used to hand out identically sized slabs of memory.
Expand Down Expand Up @@ -183,7 +189,9 @@ struct Slab {

// Get the number of bytes needed to contain the bitfield bits.
constexpr static uint32_t bitfield_bytes(uint32_t chunk_size) {
return ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8;
return __builtin_align_up(
((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8,
MIN_ALIGNMENT + 1);
}

// The actual amount of memory available excluding the bitfield and metadata.
Expand Down Expand Up @@ -540,15 +548,58 @@ void deallocate(void *ptr) {
return;

// All non-slab allocations will be aligned on a 2MiB boundary.
if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
if (__builtin_is_aligned(ptr, SLAB_ALIGNMENT + 1))
return impl::rpc_free(ptr);

// The original slab pointer is the 2MiB boundary using the given pointer.
Slab *slab = reinterpret_cast<Slab *>(
(reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
(reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
slab->deallocate(ptr);
release_slab(slab);
}

void *reallocate(void *ptr, uint64_t size) {
if (ptr == nullptr)
return gpu::allocate(size);

// Non-slab allocations are considered foreign pointers so we fail.
if (__builtin_is_aligned(ptr, SLAB_ALIGNMENT + 1))
return nullptr;

// The original slab pointer is the 2MiB boundary using the given pointer.
Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
(reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
if (slab->get_chunk_size() >= size)
return ptr;

// If we need a new chunk we reallocate and copy it over.
void *new_ptr = gpu::allocate(size);
inline_memcpy(new_ptr, ptr, slab->get_chunk_size());
gpu::deallocate(ptr);
return new_ptr;
}

void *aligned_allocate(uint32_t alignment, uint64_t size) {
// All alignment values must be a non-zero power of two.
if (!impl::is_pow2(alignment))
return nullptr;

// If the requested alignment is less than what we already provide this is
// just a normal allocation.
if (alignment <= MIN_ALIGNMENT + 1)
return gpu::allocate(size);

// We can't handle alignments greater than 2MiB so we simply fail.
if (alignment > SLAB_ALIGNMENT + 1)
return nullptr;

// Trying to handle allocation internally would break the assumption that each
// chunk is identical to eachother. Allocate enough memory with worst-case
// alignment and then round up. The index logic will round down properly.
uint64_t rounded = size + alignment - MIN_ALIGNMENT;
void *ptr = gpu::allocate(rounded);
return __builtin_align_up(ptr, alignment);
}

} // namespace gpu
} // namespace LIBC_NAMESPACE_DECL
Loading