Skip to content

Commit bbeb9f7

Browse files
committed
PIX: split vector alloca writes to enable debug instrumentation (microsoft#6990)
Bit of background: PIX's debug passes add new allocas, stores to which tie debug info to DXIL Values. In the case of a preexisting alloca, PIX will still add its debug writes, but the codegen may have emitted vector-valued stores. Concretely, this happens for the ray payload and RayDesc structs in DXR. Those vector-values stores were being treated incorrectly, resulting in missing values in the PIX shader debugger. This change splits vector-valued stores to such allocas into extractelement/scalar-stores, which enables the rest of the PIX instrumentation to function as expected. (Worth noting that this change only applies to the PIX shader debugging pass.) (cherry picked from commit a03a77f)
1 parent 8df0a9d commit bbeb9f7

File tree

3 files changed

+258
-11
lines changed

3 files changed

+258
-11
lines changed

lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class DxilAnnotateWithVirtualRegister : public llvm::ModulePass {
7777
private:
7878
void AnnotateValues(llvm::Instruction *pI);
7979
void AnnotateStore(llvm::Instruction *pI);
80+
void SplitVectorStores(hlsl::OP *HlslOP, llvm::Instruction *pI);
8081
bool IsAllocaRegisterWrite(llvm::Value *V, llvm::AllocaInst **pAI,
8182
llvm::Value **pIdx);
8283
void AnnotateAlloca(llvm::AllocaInst *pAlloca);
@@ -133,6 +134,15 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) {
133134
auto instrumentableFunctions =
134135
PIXPassHelpers::GetAllInstrumentableFunctions(*m_DM);
135136

137+
for (auto *F : instrumentableFunctions) {
138+
for (auto &block : F->getBasicBlockList()) {
139+
for (auto it = block.begin(); it != block.end();) {
140+
llvm::Instruction *I = &*(it++);
141+
SplitVectorStores(m_DM->GetOP(), I);
142+
}
143+
}
144+
}
145+
136146
for (auto *F : instrumentableFunctions) {
137147
for (auto &block : F->getBasicBlockList()) {
138148
for (llvm::Instruction &I : block.getInstList()) {
@@ -297,15 +307,37 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite(
297307
return false;
298308
}
299309
// And of course the member we're after might not be at the beginning of
300-
// the struct:
301-
auto *pStructType = llvm::dyn_cast<llvm::StructType>(
302-
pPointerGEP->getPointerOperandType()->getPointerElementType());
303-
auto *pStructMember =
304-
llvm::dyn_cast<llvm::ConstantInt>(pPointerGEP->getOperand(2));
305-
uint64_t memberIndex = pStructMember->getLimitedValue();
306-
for (uint64_t i = 0; i < memberIndex; ++i) {
307-
precedingMemberCount +=
308-
CountStructMembers(pStructType->getStructElementType(i));
310+
// any containing struct:
311+
if (auto *pStructType = llvm::dyn_cast<llvm::StructType>(
312+
pPointerGEP->getPointerOperandType()
313+
->getPointerElementType())) {
314+
auto *pStructMember =
315+
llvm::dyn_cast<llvm::ConstantInt>(pPointerGEP->getOperand(2));
316+
uint64_t memberIndex = pStructMember->getLimitedValue();
317+
for (uint64_t i = 0; i < memberIndex; ++i) {
318+
precedingMemberCount +=
319+
CountStructMembers(pStructType->getStructElementType(i));
320+
}
321+
}
322+
323+
// And the source pointer may be a vector (floatn) type,
324+
// and if so, that's another offset to consider.
325+
llvm::Type *DestType = pGEP->getPointerOperand()->getType();
326+
// We expect this to be a pointer type (it's a GEP after all):
327+
if (DestType->isPointerTy()) {
328+
llvm::Type *PointedType = DestType->getPointerElementType();
329+
// Being careful to check num operands too in order to avoid false
330+
// positives:
331+
if (PointedType->isVectorTy() && pGEP->getNumOperands() == 3) {
332+
// Fetch the second deref (in operand 2).
333+
// (the first derefs the pointer to the "floatn",
334+
// and the second denotes the index into the floatn.)
335+
llvm::Value *vectorIndex = pGEP->getOperand(2);
336+
if (auto *constIntIIndex =
337+
llvm::cast<llvm::ConstantInt>(vectorIndex)) {
338+
precedingMemberCount += constIntIIndex->getLimitedValue();
339+
}
340+
}
309341
}
310342
} else {
311343
return false;
@@ -365,6 +397,8 @@ void DxilAnnotateWithVirtualRegister::AnnotateAlloca(
365397
AssignNewAllocaRegister(pAlloca, 1);
366398
} else if (auto *AT = llvm::dyn_cast<llvm::ArrayType>(pAllocaTy)) {
367399
AssignNewAllocaRegister(pAlloca, AT->getNumElements());
400+
} else if (auto *VT = llvm::dyn_cast<llvm::VectorType>(pAllocaTy)) {
401+
AssignNewAllocaRegister(pAlloca, VT->getNumElements());
368402
} else if (auto *ST = llvm::dyn_cast<llvm::StructType>(pAllocaTy)) {
369403
AssignNewAllocaRegister(pAlloca, CountStructMembers(ST));
370404
} else {
@@ -433,6 +467,36 @@ void DxilAnnotateWithVirtualRegister::AssignNewAllocaRegister(
433467
m_uVReg += C;
434468
}
435469

470+
void DxilAnnotateWithVirtualRegister::SplitVectorStores(hlsl::OP *HlslOP,
471+
llvm::Instruction *pI) {
472+
auto *pSt = llvm::dyn_cast<llvm::StoreInst>(pI);
473+
if (pSt == nullptr) {
474+
return;
475+
}
476+
477+
llvm::AllocaInst *Alloca;
478+
llvm::Value *Index;
479+
if (!IsAllocaRegisterWrite(pSt->getPointerOperand(), &Alloca, &Index)) {
480+
return;
481+
}
482+
483+
llvm::Type *SourceType = pSt->getValueOperand()->getType();
484+
if (SourceType->isVectorTy()) {
485+
if (auto *constIntIIndex = llvm::cast<llvm::ConstantInt>(Index)) {
486+
// break vector alloca stores up into individual stores
487+
llvm::IRBuilder<> B(pSt);
488+
for (uint64_t el = 0; el < SourceType->getVectorNumElements(); ++el) {
489+
llvm::Value *destPointer = B.CreateGEP(pSt->getPointerOperand(),
490+
{B.getInt32(0), B.getInt32(el)});
491+
llvm::Value *source =
492+
B.CreateExtractElement(pSt->getValueOperand(), el);
493+
B.CreateStore(source, destPointer);
494+
}
495+
pI->eraseFromParent();
496+
}
497+
}
498+
}
499+
436500
} // namespace
437501

438502
using namespace llvm;

tools/clang/unittests/HLSL/PixTest.cpp

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#endif
1515

1616
#include <algorithm>
17+
#include <array>
1718
#include <cassert>
1819
#include <cfloat>
1920
#include <map>
@@ -144,6 +145,8 @@ class PixTest : public ::testing::Test {
144145
TEST_METHOD(DebugInstrumentation_TextOutput)
145146
TEST_METHOD(DebugInstrumentation_BlockReport)
146147

148+
TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs)
149+
147150
dxc::DxcDllSupport m_dllSupport;
148151
VersionSupportInfo m_ver;
149152

@@ -435,6 +438,7 @@ class PixTest : public ::testing::Test {
435438
CComPtr<IDxcBlob> RunDxilPIXDXRInvocationsLog(IDxcBlob *blob);
436439
void TestPixUAVCase(char const *hlsl, wchar_t const *model,
437440
wchar_t const *entry);
441+
std::string Disassemble(IDxcBlob *pProgram);
438442
};
439443

440444
bool PixTest::InitSupport() {
@@ -1083,6 +1087,16 @@ static bool FindStructMemberFromStore(llvm::StoreInst *S,
10831087
return false;
10841088
}
10851089

1090+
std::string PixTest::Disassemble(IDxcBlob *pProgram) {
1091+
CComPtr<IDxcCompiler> pCompiler;
1092+
CComPtr<IDxcOperationResult> pResult;
1093+
CComPtr<IDxcBlobEncoding> pSource;
1094+
VERIFY_SUCCEEDED(CreateCompiler(m_dllSupport, &pCompiler));
1095+
CComPtr<IDxcBlobEncoding> pDisassembly;
1096+
VERIFY_SUCCEEDED(pCompiler->Disassemble(pProgram, &pDisassembly));
1097+
return BlobToUtf8(pDisassembly);
1098+
}
1099+
10861100
// This function lives in lib\DxilPIXPasses\DxilAnnotateWithVirtualRegister.cpp
10871101
// Declared here so we can test it.
10881102
uint32_t CountStructMembers(llvm::Type const *pType);
@@ -2955,3 +2969,172 @@ float4 main() : SV_Target {
29552969
VERIFY_IS_TRUE(foundDoubleAssignment);
29562970
VERIFY_IS_TRUE(found32BitAllocaStore);
29572971
}
2972+
2973+
std::string ExtractBracedSubstring(std::string const &line) {
2974+
auto open = line.find('{');
2975+
auto close = line.find('}');
2976+
if (open != std::string::npos && close != std::string::npos &&
2977+
open + 1 < close) {
2978+
return line.substr(open + 1, close - open - 1);
2979+
}
2980+
return {};
2981+
}
2982+
2983+
int ExtractMetaInt32Value(std::string const &token) {
2984+
auto findi32 = token.find("i32 ");
2985+
if (findi32 != std::string_view::npos) {
2986+
return atoi(
2987+
std::string(token.data() + findi32 + 4, token.length() - (findi32 + 4))
2988+
.c_str());
2989+
}
2990+
return -1;
2991+
}
2992+
2993+
std::vector<std::string> Split(std::string str, char delimeter) {
2994+
std::vector<std::string> lines;
2995+
2996+
auto const *p = str.data();
2997+
auto const *justPastPreviousDelimiter = p;
2998+
while (p < str.data() + str.length()) {
2999+
if (*p == delimeter) {
3000+
lines.emplace_back(std::string(justPastPreviousDelimiter,
3001+
p - justPastPreviousDelimiter));
3002+
justPastPreviousDelimiter = p + 1;
3003+
p = justPastPreviousDelimiter;
3004+
} else {
3005+
p++;
3006+
}
3007+
}
3008+
3009+
lines.emplace_back(
3010+
std::string(justPastPreviousDelimiter, p - justPastPreviousDelimiter));
3011+
3012+
return lines;
3013+
}
3014+
3015+
struct MetadataAllocaDefinition {
3016+
int base;
3017+
int count;
3018+
};
3019+
using AllocaDefinitions = std::map<int, MetadataAllocaDefinition>;
3020+
struct MetadataAllocaWrite {
3021+
int allocaDefMetadataKey;
3022+
int offset;
3023+
int size;
3024+
};
3025+
using AllocaWrites = std::map<int, MetadataAllocaWrite>;
3026+
3027+
struct AllocaMetadata {
3028+
AllocaDefinitions allocaDefinitions;
3029+
AllocaWrites allocaWrites;
3030+
std::vector<int> allocaWritesMetaKeys;
3031+
};
3032+
3033+
AllocaMetadata
3034+
FindAllocaRelatedMetadata(std::vector<std::string> const &lines) {
3035+
3036+
const char *allocaMetaDataAssignment = "= !{i32 1, ";
3037+
const char *allocaRegWRiteAssignment = "= !{i32 2, !";
3038+
const char *allocaRegWriteTag = "!pix-alloca-reg-write !";
3039+
3040+
AllocaMetadata ret;
3041+
for (auto const &line : lines) {
3042+
if (line[0] == '!') {
3043+
auto key = atoi(std::string(line.data() + 1, line.length() - 1).c_str());
3044+
if (key != -1) {
3045+
if (line.find(allocaMetaDataAssignment) != std::string::npos) {
3046+
std::string bitInBraces = ExtractBracedSubstring(line);
3047+
if (bitInBraces != "") {
3048+
auto tokens = Split(bitInBraces, ',');
3049+
if (tokens.size() == 3) {
3050+
auto value0 = ExtractMetaInt32Value(tokens[1]);
3051+
auto value1 = ExtractMetaInt32Value(tokens[2]);
3052+
if (value0 != -1 && value1 != -1) {
3053+
MetadataAllocaDefinition def;
3054+
def.base = value0;
3055+
def.count = value1;
3056+
ret.allocaDefinitions[key] = def;
3057+
}
3058+
}
3059+
}
3060+
} else if (line.find(allocaRegWRiteAssignment) != std::string::npos) {
3061+
std::string bitInBraces = ExtractBracedSubstring(line);
3062+
if (bitInBraces != "") {
3063+
auto tokens = Split(bitInBraces, ',');
3064+
if (tokens.size() == 4 && tokens[1][1] == '!') {
3065+
auto allocaKey = atoi(tokens[1].c_str() + 2);
3066+
auto value0 = ExtractMetaInt32Value(tokens[2]);
3067+
auto value1 = ExtractMetaInt32Value(tokens[3]);
3068+
if (value0 != -1 && value1 != -1) {
3069+
MetadataAllocaWrite aw;
3070+
aw.allocaDefMetadataKey = allocaKey;
3071+
aw.size = value0;
3072+
aw.offset = value1;
3073+
ret.allocaWrites[key] = aw;
3074+
}
3075+
}
3076+
}
3077+
}
3078+
}
3079+
} else {
3080+
auto findAw = line.find(allocaRegWriteTag);
3081+
if (findAw != std::string::npos) {
3082+
ret.allocaWritesMetaKeys.push_back(
3083+
atoi(line.c_str() + findAw + strlen(allocaRegWriteTag)));
3084+
}
3085+
}
3086+
}
3087+
return ret;
3088+
}
3089+
3090+
TEST_F(PixTest, DebugInstrumentation_VectorAllocaWrite_Structs) {
3091+
const char *source = R"x(
3092+
RaytracingAccelerationStructure Scene : register(t0, space0);
3093+
struct RayPayload
3094+
{
3095+
float4 color;
3096+
};
3097+
RWStructuredBuffer<float> UAV: register(u0);
3098+
[shader("raygeneration")]
3099+
void RaygenInternalName()
3100+
{
3101+
RayDesc ray;
3102+
ray.Origin = float3(UAV[0], UAV[1],UAV[3]);
3103+
ray.Direction = float3(4.4,5.5,6.6);
3104+
ray.TMin = 0.001;
3105+
ray.TMax = 10000.0;
3106+
RayPayload payload = { float4(0, 1, 0, 1) };
3107+
TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload);
3108+
})x";
3109+
3110+
auto compiled = Compile(m_dllSupport, source, L"lib_6_6", {L"-Od"});
3111+
auto output = RunDebugPass(compiled);
3112+
auto disassembly = Disassemble(output.blob);
3113+
auto lines = Split(disassembly, '\n');
3114+
auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines);
3115+
// To validate that the RayDesc and RayPayload instances were fully covered,
3116+
// check that there are alloca writes that cover all of them. RayPayload
3117+
// has four elements, and RayDesc has eight.
3118+
std::array<bool, 4> RayPayloadElementCoverage;
3119+
std::array<bool, 8> RayDescElementCoverage;
3120+
3121+
for (auto const &write : metaDataKeyToValue.allocaWrites) {
3122+
// the whole point of the changes with this test is to separate vector
3123+
// writes into individual elements:
3124+
VERIFY_ARE_EQUAL(1, write.second.size);
3125+
auto findAlloca = metaDataKeyToValue.allocaDefinitions.find(
3126+
write.second.allocaDefMetadataKey);
3127+
if (findAlloca != metaDataKeyToValue.allocaDefinitions.end()) {
3128+
if (findAlloca->second.count == 4) {
3129+
RayPayloadElementCoverage[write.second.offset] = true;
3130+
} else if (findAlloca->second.count == 8) {
3131+
RayDescElementCoverage[write.second.offset] = true;
3132+
}
3133+
}
3134+
}
3135+
// Check that coverage for every element was emitted:
3136+
for (auto const &b : RayPayloadElementCoverage)
3137+
VERIFY_IS_TRUE(b);
3138+
for (auto const &b : RayDescElementCoverage)
3139+
VERIFY_IS_TRUE(b);
3140+
}

tools/clang/unittests/HLSL/PixTestUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ int ExtractMetaInt32Value(std::string const &token) {
5454
return -1;
5555
}
5656
std::map<int, std::pair<int, int>>
57-
MetaDataKeyToRegisterNumber(std::vector<std::string> const &lines) {
57+
FindAllocaRelatedMetadata(std::vector<std::string> const &lines) {
5858
// Find lines of the exemplary form
5959
// "!249 = !{i32 0, i32 20}" (in the case of a DXIL value)
6060
// "!196 = !{i32 1, i32 5, i32 1}" (in the case of a DXIL alloca reg)
@@ -148,7 +148,7 @@ DxilRegisterToNameMap BuildDxilRegisterToNameMap(char const *disassembly) {
148148

149149
auto lines = Tokenize(disassembly, "\n");
150150

151-
auto metaDataKeyToValue = MetaDataKeyToRegisterNumber(lines);
151+
auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines);
152152

153153
for (auto const &line : lines) {
154154
if (line[0] == '!') {

0 commit comments

Comments
 (0)