@@ -21,7 +21,6 @@ import (
2121 "crypto/x509"
2222 "fmt"
2323 "io/ioutil"
24- "math/rand"
2524 "net"
2625 "net/http"
2726 "net/url"
@@ -32,14 +31,15 @@ import (
3231 kingpin "gopkg.in/alecthomas/kingpin.v2"
3332
3433 "github.com/ShowMax/go-fqdn"
34+ "github.com/cenkalti/backoff/v4"
3535 "github.com/go-kit/kit/log"
3636 "github.com/go-kit/kit/log/level"
3737 "github.com/pkg/errors"
38+ "github.com/prometheus-community/pushprox/util"
3839 "github.com/prometheus/client_golang/prometheus"
3940 "github.com/prometheus/client_golang/prometheus/promhttp"
4041 "github.com/prometheus/common/promlog"
4142 "github.com/prometheus/common/promlog/flag"
42- "github.com/prometheus-community/pushprox/util"
4343)
4444
4545var (
4949 tlsCert = kingpin .Flag ("tls.cert" , "<cert> Client certificate file" ).String ()
5050 tlsKey = kingpin .Flag ("tls.key" , "<key> Private key file" ).String ()
5151 metricsAddr = kingpin .Flag ("metrics-addr" , "Serve Prometheus metrics at this address" ).Default (":9369" ).String ()
52+
53+ retryInitialWait = kingpin .Flag ("proxy.retry.initial-wait" , "Amount of time to wait after proxy failure" ).Default ("1s" ).Duration ()
54+ retryMaxWait = kingpin .Flag ("proxy.retry.max-wait" , "Maximum amount of time to wait between proxy poll retries" ).Default ("5s" ).Duration ()
5255)
5356
5457var (
@@ -76,6 +79,15 @@ func init() {
7679 prometheus .MustRegister (pushErrorCounter , pollErrorCounter , scrapeErrorCounter )
7780}
7881
82+ func newBackOffFromFlags () backoff.BackOff {
83+ b := backoff .NewExponentialBackOff ()
84+ b .InitialInterval = * retryInitialWait
85+ b .Multiplier = 1.5
86+ b .MaxInterval = * retryMaxWait
87+ b .MaxElapsedTime = time .Duration (0 )
88+ return b
89+ }
90+
7991// Coordinator for scrape requests and responses
8092type Coordinator struct {
8193 logger log.Logger
@@ -168,7 +180,7 @@ func (c *Coordinator) doPush(resp *http.Response, origRequest *http.Request, cli
168180 return nil
169181}
170182
171- func loop (c Coordinator , client * http.Client ) error {
183+ func (c * Coordinator ) doPoll ( client * http.Client ) error {
172184 base , err := url .Parse (* proxyURL )
173185 if err != nil {
174186 level .Error (c .logger ).Log ("msg" , "Error parsing url:" , "err" , err )
@@ -201,35 +213,18 @@ func loop(c Coordinator, client *http.Client) error {
201213 return nil
202214}
203215
204- // decorrelated Jitter increases the maximum jitter based on the last random value.
205- type decorrelatedJitter struct {
206- duration time.Duration // sleep time
207- min time.Duration // min sleep time
208- cap time.Duration // max sleep time
209- }
210-
211- func newJitter () decorrelatedJitter {
212- rand .Seed (time .Now ().UnixNano ())
213- return decorrelatedJitter {
214- min : 50 * time .Millisecond ,
215- cap : 5 * time .Second ,
216+ func (c * Coordinator ) loop (bo backoff.BackOff , client * http.Client ) {
217+ op := func () error {
218+ return c .doPoll (client )
216219 }
217- }
218220
219- func (d * decorrelatedJitter ) calc () time.Duration {
220- change := rand .Float64 () * float64 (d .duration * time .Duration (3 )- d .min )
221- d .duration = d .min + time .Duration (change )
222- if d .duration > d .cap {
223- d .duration = d .cap
224- }
225- if d .duration < d .min {
226- d .duration = d .min
221+ for {
222+ if err := backoff .RetryNotify (op , bo , func (err error , _ time.Duration ) {
223+ pollErrorCounter .Inc ()
224+ }); err != nil {
225+ level .Error (c .logger ).Log ("err" , err )
226+ }
227227 }
228- return d .duration
229- }
230-
231- func (d * decorrelatedJitter ) sleep () {
232- time .Sleep (d .calc ())
233228}
234229
235230func main () {
@@ -299,14 +294,7 @@ func main() {
299294 TLSClientConfig : tlsConfig ,
300295 }
301296
302- jitter := newJitter ()
303297 client := & http.Client {Transport : transport }
304- for {
305- err := loop (coordinator , client )
306- if err != nil {
307- pollErrorCounter .Inc ()
308- jitter .sleep ()
309- continue
310- }
311- }
298+
299+ coordinator .loop (newBackOffFromFlags (), client )
312300}
0 commit comments