Skip to content

Commit c305441

Browse files
authored
Merge pull request #2 from JSGette/revision_3
Use output.proto to produce MergedSpawnExec
2 parents a6ce62e + b943195 commit c305441

File tree

5 files changed

+180
-105
lines changed

5 files changed

+180
-105
lines changed

README.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,33 @@
1-
# remote-cache-debugger
1+
# Bazel Remote Cache Debugger
2+
3+
This is a simple tool to compare 2 execution logs and scrape some cache hit metrics.
4+
5+
## Features
6+
7+
* Compare two execution logs and collect the list of environment variables and inputs
8+
that are different across executions. Right now the tool doesn't compare command_args and other
9+
attributes of [SpawnExec](https://github.com/JSGette/remote-cache-debugger/blame/main/src/main/proto/spawn.proto#L67)
10+
* Consumes execution logs in binary format that have been produced directly by bazel. So no need to use any
11+
other tools to transform them beforehand.
12+
* Produces output logs in both text and binary format so that the result can be consumed by other
13+
applications/tools based on [output.proto](src/main/proto/output.proto)
14+
15+
## How to use
16+
To see all supported commands just use --help/-h flag:
17+
</br>`java -jar remote-cache-debugger.jar -h`
18+
19+
To compare 2 execution logs:
20+
</br>`java -jar remote-cache-debugger.jar -first <path_to_exec1.log> -second <path_to_exec2.log>`
21+
22+
To compare execution logs and generate a text report:
23+
</br>`java -jar remote-cache-debugger.jar -first <path_to_exec1.log> -second <path_to_exec2.log> -o <path_to_text_output>`
24+
25+
To compare execution logs and generate a binary report:
26+
</br>`java -jar remote-cache-debugger.jar -first <path_to_exec1.log> -second <path_to_exec2.log> -ob <path_to_binary_output>`
27+
28+
## Limitations
29+
*Hopefully, these limitations will be solved soon enough*
30+
* The tool doesn't compare all attributes of SpawnExec as mentioned above
31+
* If you built another target or changed the flags/options/features of the build most probably
32+
results will be opaque
33+
* If sequential execution log contains more inputs/environment variables the tool won't track it

build.gradle.kts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ plugins {
66
}
77

88
group = "com.gettej"
9-
version = "1.0-SNAPSHOT"
9+
version = "0.0.1-SNAPSHOT"
1010

1111
repositories {
1212
mavenCentral()
@@ -16,6 +16,7 @@ dependencies {
1616
implementation("com.google.protobuf:protobuf-kotlin:3.22.2")
1717
implementation("io.grpc:grpc-stub:1.53.0")
1818
implementation("io.grpc:grpc-protobuf:1.53.0")
19+
implementation("org.jetbrains.kotlinx:kotlinx-cli:0.3.5")
1920
runtimeOnly("io.github.microutils:kotlin-logging-jvm:3.0.5")
2021

2122
testImplementation(kotlin("test"))
@@ -25,6 +26,16 @@ tasks.test {
2526
useJUnitPlatform()
2627
}
2728

29+
tasks.jar {
30+
manifest {
31+
attributes["Main-Class"] = "MainKt"
32+
}
33+
configurations["compileClasspath"].forEach { file: File ->
34+
from(zipTree(file.absoluteFile))
35+
}
36+
duplicatesStrategy = DuplicatesStrategy.INCLUDE
37+
}
38+
2839
kotlin {
2940
jvmToolchain(8)
3041
}
@@ -43,4 +54,4 @@ java {
4354
toolchain {
4455
languageVersion.set(JavaLanguageVersion.of(11))
4556
}
46-
}
57+
}

src/main/kotlin/Main.kt

Lines changed: 87 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,107 @@
1+
import com.gette.debugger.Protos
2+
import com.gette.debugger.Protos.ExecutionEnvironmentVariables
3+
import com.gette.debugger.Protos.ExecutionInputs
14
import com.google.devtools.build.lib.exec.Protos.SpawnExec
5+
import kotlinx.cli.ArgParser
6+
import kotlinx.cli.ArgType
7+
import kotlinx.cli.required
28
import java.io.File
9+
import java.io.FileOutputStream
310
import java.io.InputStream
411
import java.security.MessageDigest
512

613
val sha256 = MessageDigest.getInstance("SHA-256")
714

815
fun main(args: Array<String>) {
9-
mergeSpawnExecs("D:/exec2.log", "D:/exec3.log")
16+
val parser = ArgParser("debugger")
17+
val first_exec_log by parser.option(
18+
ArgType.String,
19+
shortName = "first",
20+
description = "Path to first execution log"
21+
).required()
22+
val second_exec_log by parser.option(
23+
ArgType.String,
24+
shortName = "second",
25+
description = "Path to second execution log"
26+
).required()
27+
val output_binary_log by parser.option(
28+
ArgType.String,
29+
shortName = "ob",
30+
description = "Path to save output log in binary format"
31+
)
32+
val output_text_log by parser.option(
33+
ArgType.String,
34+
shortName = "o",
35+
description = "Path to save output log in text format"
36+
)
37+
parser.parse(args)
38+
mergeSpawnExecs(first_exec_log, second_exec_log, output_binary_log, output_text_log)
1039
}
1140

12-
fun mergeSpawnExecs(pathA: String, pathB: String) {
13-
val mergedSpawnExecs: HashMap<String, MergedSpawnExec> = HashMap()
41+
fun mergeSpawnExecs(pathA: String, pathB: String, outputBinaryLogPath: String?, outputTextLogPath: String?) {
42+
val aExecCounter: Int
43+
var bExecCounter: Int = 0
44+
var cacheHits: Int = 0
45+
val aSpawnExecs: HashMap<String, SpawnExec> = HashMap()
1446
var ins = File(pathA).inputStream()
15-
1647
while (ins.available() > 0) {
1748
val spawnExec = getNextSpawnExec(ins)
18-
val mergedSpawnExec: MergedSpawnExec =
19-
MergedSpawnExec(
20-
spawnExec.second.listedOutputsList,
21-
spawnExec.second.environmentVariablesList.associate { it.name to it.value },
22-
spawnExec.second.inputsList.associate { it.path to it.digest }
23-
)
24-
mergedSpawnExecs[spawnExec.first] = mergedSpawnExec
49+
aSpawnExecs[spawnExec.first] = spawnExec.second
2550
}
51+
aExecCounter = aSpawnExecs.size
52+
val textLogWriter = if (!outputTextLogPath.isNullOrEmpty()) File(outputTextLogPath) else null
53+
val binaryLogFile = if (!outputBinaryLogPath.isNullOrEmpty()) File(outputBinaryLogPath) else null
2654

2755
ins = File(pathB).inputStream()
2856
while (ins.available() > 0) {
2957
val spawnExec = getNextSpawnExec(ins)
30-
if (mergedSpawnExecs.contains(spawnExec.first)) {
31-
mergedSpawnExecs[spawnExec.first]!!.presentInBothExecs = true
32-
mergedSpawnExecs[spawnExec.first]!!.bEnvVars =
58+
bExecCounter++
59+
if (!spawnExec.second.remoteCacheHit) {
60+
val aEnvVars: Map<String, String> =
61+
aSpawnExecs[spawnExec.first]!!.environmentVariablesList.associate { it.name to it.value }
62+
val bEnvVars: Map<String, String> =
3363
spawnExec.second.environmentVariablesList.associate { it.name to it.value }
34-
mergedSpawnExecs[spawnExec.first]!!.bInputs = spawnExec.second.inputsList.associate { it.path to it.digest }
64+
val mergedEnvVars =
65+
calculateDiff(aEnvVars, bEnvVars).map {
66+
ExecutionEnvironmentVariables.newBuilder().setName(it.key).setAValue(it.value.first)
67+
.setBValue(it.value.second).build()
68+
}
69+
val aInputs = aSpawnExecs[spawnExec.first]!!.inputsList.associate { it.path to it.digest }
70+
val bInputs = spawnExec.second.inputsList.associate { it.path to it.digest }
71+
val mergedInputs = calculateDiff(aInputs, bInputs).map {
72+
ExecutionInputs.newBuilder().setPath(it.key).setAHash(it.value.first.hash)
73+
.setBHash(it.value.second.hash).build()
74+
}
75+
var mergedSpawnExec =
76+
Protos.MergedSpawnExec.newBuilder().setExecutionHash(spawnExec.first)
77+
.addAllListedOutputs(spawnExec.second.listedOutputsList)
78+
.addAllEnvVars(mergedEnvVars.toMutableList())
79+
.addAllInputs(mergedInputs.toMutableList())
80+
.build()
81+
println(mergedSpawnExec.toString())
82+
textLogWriter?.appendText(mergedSpawnExec.toString())
83+
if (binaryLogFile != null) FileOutputStream(binaryLogFile, true).use {
84+
mergedSpawnExec.writeDelimitedTo(
85+
it
86+
)
87+
}
3588
} else {
36-
mergedSpawnExecs[spawnExec.first] = MergedSpawnExec(
37-
spawnExec.second.listedOutputsList,
38-
HashMap(),
39-
HashMap(),
40-
spawnExec.second.environmentVariablesList.associate { it.name to it.value },
41-
spawnExec.second.inputsList.associate { it.path to it.digest },
42-
false
43-
)
89+
cacheHits++
4490
}
4591
}
46-
File("D:/output.txt").printWriter().use { out ->
47-
mergedSpawnExecs.forEach {
48-
49-
out.println("=============================")
50-
out.println("Listed Outputs {")
51-
it.value.listedOutputs.forEach { listedOutput -> out.println(" $listedOutput") }
52-
out.println("}")
53-
it.value.printEnvVarsDiff(out)
54-
it.value.printInputsDiff(out)
55-
56-
/* println("=============================")
57-
println("Listed Outputs {")
58-
it.value.listedOutputs.forEach { listedOutput -> println(" ${listedOutput}") }
59-
println("}")
60-
it.value.printEnvVarsDiff()
61-
it.value.printInputsDiff()*/
62-
}
92+
if (aExecCounter != bExecCounter) {
93+
val warning = "WARNING! Number of executions isn't the same across builds so results may be not correct!"
94+
println(warning)
95+
textLogWriter?.appendText(warning)
6396
}
97+
val reportText = """====================REPORT====================
98+
Spawned Executions: ${aExecCounter}
99+
Cache Hits: ${cacheHits}
100+
Cache Hit Rate: ${"%.2f".format(cacheHits.toFloat() / aExecCounter.toFloat() * 100)}%
101+
==============================================
102+
""".trimIndent()
103+
println(reportText)
104+
textLogWriter?.appendText(reportText)
64105
}
65106

66107
fun getNextSpawnExec(ins: InputStream): Pair<String, SpawnExec> {
@@ -73,3 +114,9 @@ fun calculateExecHash(input: String): String {
73114
return sha256.digest(input.toByteArray())
74115
.fold("") { str, it -> str + "%02x".format(it) }
75116
}
117+
118+
fun <T> calculateDiff(aMap: Map<String, T>, bMap: Map<String, T>): Map<String, Pair<T, T>> {
119+
return aMap.filterKeys { bMap.containsKey(it) }.filter { (key, _) ->
120+
aMap[key] != bMap[key]
121+
}.mapValues { Pair(aMap[it.key]!!, bMap[it.key]!!) }
122+
}

src/main/kotlin/MergedSpawnExec.kt

Lines changed: 0 additions & 62 deletions
This file was deleted.

src/main/proto/output.proto

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
syntax = "proto3";
2+
3+
package com.gette.debugger;
4+
5+
option java_package = "com.gette.debugger";
6+
option java_outer_classname = "Protos";
7+
8+
// Message that stores digests of the inputs with the same name
9+
// across executions but different hashes.
10+
// Based on [Digest][com.google.devtools.build.lib.exec.Digest]
11+
message ExecutionInputs {
12+
// Path of an input file
13+
string path = 1;
14+
// Digest of an input during previous execution.
15+
string a_hash = 2;
16+
// Digest of an input during current execution
17+
string b_hash = 3;
18+
}
19+
20+
// Message that stores environment variable name and values
21+
// of both previous and current executions
22+
// Based on [EnvironmentVariable][com.google.devtools.build.lib.exec.EnvironmentVariable]
23+
message ExecutionEnvironmentVariables {
24+
// Name of environment variable passed along to an execution
25+
string name = 1;
26+
// Value of environment variable passed during previous execution
27+
string a_value = 2;
28+
// Value of environment variable passed during current execution
29+
string b_value = 3;
30+
}
31+
32+
// If listed outputs produced during both executions
33+
// but inputs or environment variables are different
34+
// the difference will be stored in this message
35+
// See [SpawnExec][com.google.devtools.build.lib.exec.SpawnExec]
36+
message MergedSpawnExec {
37+
// Calculated using SHA-256 and list listed_outputs
38+
// transformed into a string
39+
string execution_hash = 1;
40+
// Outputs that should be produced
41+
// by the execution
42+
repeated string listed_outputs = 2;
43+
44+
repeated ExecutionEnvironmentVariables env_vars = 3;
45+
46+
repeated ExecutionInputs inputs = 4;
47+
}

0 commit comments

Comments
 (0)