Skip to content

Commit 1e691d5

Browse files
authored
[clickhouse] Convert OTel traces model to native format (#6935)
## Which problem is this PR solving? - Part of #5058 ## Description of the changes - Based on the `ch-go` wire protocol, convert the OTel traces model to the ClickHouse native format for batch insertion. ## How was this change tested? - unit tests ## Checklist - [x] I have read https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md - [x] I have signed all commits - [x] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - for `jaeger`: `make lint test` - for `jaeger-ui`: `npm run lint` and `npm run test` --------- Signed-off-by: zhengkezhou1 <[email protected]>
1 parent c3f8f21 commit 1e691d5

File tree

12 files changed

+2689
-4
lines changed

12 files changed

+2689
-4
lines changed

go.mod

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ go 1.23.7
55
toolchain go1.24.2
66

77
require (
8+
github.com/ClickHouse/ch-go v0.65.1
89
github.com/HdrHistogram/hdrhistogram-go v1.1.2
910
github.com/Shopify/sarama v1.37.2
1011
github.com/apache/thrift v0.21.0
@@ -107,6 +108,9 @@ require (
107108
require (
108109
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect
109110
github.com/cenkalti/backoff/v5 v5.0.2 // indirect
111+
github.com/go-faster/city v1.0.1 // indirect
112+
github.com/go-faster/errors v0.7.1 // indirect
113+
github.com/segmentio/asm v1.2.0 // indirect
110114
github.com/twmb/murmur3 v1.1.8 // indirect
111115
go.opentelemetry.io/collector/extension/extensionauth v1.30.0 // indirect
112116
go.opentelemetry.io/collector/extension/extensiontest v0.124.0 // indirect

go.sum

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mx
2020
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
2121
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
2222
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
23+
github.com/ClickHouse/ch-go v0.65.1 h1:SLuxmLl5Mjj44/XbINsK2HFvzqup0s6rwKLFH347ZhU=
24+
github.com/ClickHouse/ch-go v0.65.1/go.mod h1:bsodgURwmrkvkBe5jw1qnGDgyITsYErfONKAHn05nv4=
2325
github.com/Code-Hex/go-generics-cache v1.5.1 h1:6vhZGc5M7Y/YD8cIUcY8kcuQLB4cHR7U+0KMqAA0KcU=
2426
github.com/Code-Hex/go-generics-cache v1.5.1/go.mod h1:qxcC9kRVrct9rHeiYpFWSoW1vxyillCVzX13KZG8dl4=
2527
github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM=
@@ -179,6 +181,10 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S
179181
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
180182
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
181183
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
184+
github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw=
185+
github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw=
186+
github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg=
187+
github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo=
182188
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
183189
github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
184190
github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
@@ -568,6 +574,8 @@ github.com/sagikazarmark/locafero v0.7.0 h1:5MqpDsTGNDhY8sGp0Aowyf0qKsPrhewaLSsF
568574
github.com/sagikazarmark/locafero v0.7.0/go.mod h1:2za3Cg5rMaTMoG/2Ulr9AwtFaIppKXTRYnozin4aB5k=
569575
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 h1:yoKAVkEVwAqbGbR8n87rHQ1dulL25rKloGadb3vm770=
570576
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30/go.mod h1:sH0u6fq6x4R5M7WxkoQFY/o7UaiItec0o1LinLCJNq8=
577+
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
578+
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
571579
github.com/shirou/gopsutil/v4 v4.25.3 h1:SeA68lsu8gLggyMbmCn8cmp97V1TI9ld9sVzAUcKcKE=
572580
github.com/shirou/gopsutil/v4 v4.25.3/go.mod h1:xbuxyoZj+UsgnZrENu3lQivsngRR5BdjbJwf2fv4szA=
573581
github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c h1:aqg5Vm5dwtvL+YgDpBcK1ITf3o96N/K7/wsRXQnUTEs=
@@ -889,8 +897,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
889897
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
890898
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
891899
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
892-
golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
893-
golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
900+
golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
901+
golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
894902
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
895903
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
896904
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
@@ -991,8 +999,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
991999
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
9921000
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
9931001
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
994-
golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
995-
golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
1002+
golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
1003+
golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
9961004
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
9971005
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
9981006
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
## Clickhouse
2+
3+
### Differences from the implementation in [otel collector contrib](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/clickhouseexporter)
4+
5+
#### Trace Storage Format
6+
7+
The most significant difference lies in the handling of **Attributes**. In the OTel-contrib implementation, everything within the Attributes is converted to [strings](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/80b3df26b7028a4bbe1eb606a6142cd4df9c3c74/exporter/clickhouseexporter/internal/metrics_model.go#L171-L177):
8+
9+
```golang
10+
func AttributesToMap(attributes pcommon.Map) column.IterableOrderedMap {
11+
return orderedmap.CollectN(func(yield func(string, string) bool) {
12+
for k, v := range attributes.All() {
13+
yield(k, v.AsString())
14+
}
15+
}, attributes.Len())
16+
}
17+
```
18+
19+
The primary reason for this is that it leads to the loss of the original data types and \~\~cannot be used directly as query parameters\~\~ (Clickhouse provides casting functions). For example, if an attribute has an `int64` value, we might want to perform the following operation:
20+
21+
```sql
22+
SELECT * FROM test WHERE resource.attributes['container.restart.count'] > 10
23+
```
24+
25+
To address the above issues, the following improvements have been implemented:
26+
27+
* Instead of directly using a Map for storage, the key and value are split into two separate arrays.
28+
* More Columns are used to store values of different types:
29+
* For basic types like bool, double, int, and string, corresponding type array columns are used for storage: `Array(Int64)`, `Array(Bool)`, etc.
30+
* For complex types like slice and map, they are serialized into JSON format strings before storage: `Array(String)`.
31+
32+
The `Value` type here actually refers to the `pdata` data types from the `otel-collector` pipeline. In our architecture, the `value_warpper` is responsible for wrapping the Protobuf-generated Go structures (which are the concrete implementation of `pdata`) into the `Value` type. Although `pdata` itself is based on the OTLP specification, encapsulating it into `Value` via the `value_warpper` creates a higher-level abstraction, which presents some challenges for directly storing `Value` in ClickHouse. Specifically, when deserializing `Slice` and `Map` data contained within the `Value`, the fact that JSON cannot natively distinguish whether a `Number` is an integer (`int`) or a floating-point number (`double`) leads to a loss of type information. Furthermore, directly handling the potentially dynamically nested `pdata` structures within the `Value` can also be quite complex. Therefore, to ensure the accuracy and completeness of data types in ClickHouse, and to effectively handle these nested telemetry data, we need to convert the `pdata` data inside `Value` into the standard `OTLP/JSON` format for storage.
33+
34+
#### Data Read and Write Methods
35+
36+
The OTel-contrib implementation uses `database/sql` for writing data. Using the provided generic interface is unnecessary; using the client provided by Clickhouse is a better choice.
37+
For write operations, `ch-go`'s `chpool` is used in batch mode. For read operations, `clickhouse-go` is used.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) 2025 The Jaeger Authors.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package dbmodel
5+
6+
import "time"
7+
8+
// Trace Domain model in Clickhouse.
9+
// This struct represents the schema for storing OTel pipeline Traces in Clickhouse.
10+
type Trace struct {
11+
Resource Resource
12+
Scope Scope
13+
Span Span
14+
Links []Link
15+
Events []Event
16+
}
17+
18+
type Resource struct {
19+
Attributes AttributesGroup
20+
}
21+
22+
type Scope struct {
23+
Name string
24+
Version string
25+
Attributes AttributesGroup
26+
}
27+
28+
type Span struct {
29+
Timestamp time.Time
30+
TraceId string
31+
SpanId string
32+
ParentSpanId string
33+
TraceState string
34+
Name string
35+
Kind string
36+
Duration time.Time
37+
StatusCode string
38+
StatusMessage string
39+
Attributes AttributesGroup
40+
}
41+
42+
type Link struct {
43+
TraceId string
44+
SpanId string
45+
TraceState string
46+
Attributes AttributesGroup
47+
}
48+
49+
type Event struct {
50+
Name string
51+
Timestamp time.Time
52+
Attributes AttributesGroup
53+
}
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
{
2+
"Resource": {
3+
"Attributes": {
4+
"BoolKeys": [
5+
"browser.mobile"
6+
],
7+
"BoolValues": [
8+
true
9+
],
10+
"DoubleKeys": [
11+
"host.memory.swap"
12+
],
13+
"DoubleValues": [
14+
2048
15+
],
16+
"IntKeys": [
17+
"process.parent.pid",
18+
"process.pid",
19+
"faas.max_memory"
20+
],
21+
"IntValues": [
22+
111,
23+
1234,
24+
134217728
25+
],
26+
"StrKeys": [
27+
"service.names",
28+
"service.instance.id"
29+
],
30+
"StrValues": [
31+
"clickhouse",
32+
"627cc493-f310-47de-96bd-71410b7dec09"
33+
],
34+
"BytesKeys": [
35+
"oci.manifest.digest"
36+
],
37+
"BytesValues": [
38+
"c2hhMjU2OmU0Y2E2MmMwZDYyZjNlODg2ZTY4NDgwNmRmZTlkNGUwY2RhNjBkNTQ5ODY4OTgxNzNjMTA4Mzg1NmNmZGEwZjQ="
39+
]
40+
}
41+
},
42+
"Scope": {
43+
"Name": "io.opentelemetry.contrib.clickhouse",
44+
"Version": "1.0.0",
45+
"Attributes": {
46+
"BoolKeys": [
47+
"library.feature.async_processing_enabled",
48+
"library.security.data_masking_active"
49+
],
50+
"BoolValues": [
51+
true,
52+
false
53+
],
54+
"DoubleKeys": [
55+
"component.config.sampling.ratio"
56+
],
57+
"DoubleValues": [
58+
0.75
59+
],
60+
"IntKeys": [
61+
"component.max_workers",
62+
"component.min_workers"
63+
],
64+
"IntValues": [
65+
10,
66+
2
67+
],
68+
"StrKeys": [
69+
"library.language",
70+
"library.version"
71+
],
72+
"StrValues": [
73+
"go",
74+
"v2.2.2"
75+
],
76+
"BytesKeys": [
77+
"scope.test.bytes.value"
78+
],
79+
"BytesValues": [
80+
"AQIDBA=="
81+
]
82+
}
83+
},
84+
"Span": {
85+
"Timestamp": "2023-12-25T09:53:49Z",
86+
"TraceId": "01020300000000000000000000000000",
87+
"SpanId": "0102030000000000",
88+
"ParentSpanId": "0102040000000000",
89+
"TraceState": "trace state",
90+
"Name": "call db",
91+
"Kind": "Internal",
92+
"Duration": "2023-12-25T09:54:49Z",
93+
"StatusCode": "Error",
94+
"StatusMessage": "error",
95+
"Attributes": {
96+
"BoolKeys": [
97+
"app.payment.card_valid",
98+
"app.payment.charged"
99+
],
100+
"BoolValues": [
101+
true,
102+
true
103+
],
104+
"DoubleKeys": [
105+
"app.payment.amount"
106+
],
107+
"DoubleValues": [
108+
99.99
109+
],
110+
"IntKeys": [
111+
"app.payment.count"
112+
],
113+
"IntValues": [
114+
5
115+
],
116+
"StrKeys": [
117+
"app.payment.id"
118+
],
119+
"StrValues": [
120+
"123456789"
121+
],
122+
"BytesKeys": [
123+
"span.test.bytes.value"
124+
],
125+
"BytesValues": [
126+
"AQIDBAUG"
127+
]
128+
}
129+
},
130+
"Links": [
131+
{
132+
"TraceId": "01020500000000000000000000000000",
133+
"SpanId": "0102050000000000",
134+
"TraceState": "test",
135+
"Attributes": {
136+
"BoolKeys": [
137+
"is.retry"
138+
],
139+
"BoolValues": [
140+
true
141+
],
142+
"DoubleKeys": [
143+
"similarity.score"
144+
],
145+
"DoubleValues": [
146+
0.85
147+
],
148+
"IntKeys": [
149+
"correlation.id"
150+
],
151+
"IntValues": [
152+
1324141
153+
],
154+
"StrKeys": [
155+
"related.resource.id"
156+
],
157+
"StrValues": [
158+
"resource-123"
159+
],
160+
"BytesKeys": [
161+
"link.test.bytes.value"
162+
],
163+
"BytesValues": [
164+
"AQIDBAUG"
165+
]
166+
}
167+
}
168+
],
169+
"Events": [
170+
{
171+
"Name": "event1",
172+
"Timestamp": "2023-12-25T09:53:49Z",
173+
"Attributes": {
174+
"BoolKeys": [
175+
"inventory.available",
176+
"payment.successful"
177+
],
178+
"BoolValues": [
179+
true,
180+
true
181+
],
182+
"DoubleKeys": [
183+
"product.price",
184+
"order.discount.rate"
185+
],
186+
"DoubleValues": [
187+
6.04,
188+
0.04
189+
],
190+
"IntKeys": [
191+
"order.quantity"
192+
],
193+
"IntValues": [
194+
2
195+
],
196+
"StrKeys": [
197+
"order.id",
198+
"product.id"
199+
],
200+
"StrValues": [
201+
"123456789",
202+
"987654321"
203+
],
204+
"BytesKeys": [
205+
"event.test.bytes.value"
206+
],
207+
"BytesValues": [
208+
"AQIDBAUG"
209+
]
210+
}
211+
}
212+
]
213+
}

0 commit comments

Comments
 (0)