diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 14673a58712..91f814e5f85 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -7,6 +7,7 @@ import ( "flag" "fmt" "log/slog" + "net/http" "os" "slices" "sync" @@ -94,7 +95,7 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache CVEID: cve.ID, CNA: "nvd", } - repos := nvd.FindRepos(cve, vpRepoCache, repoTagsCache, metrics) + repos := nvd.FindRepos(cve, vpRepoCache, repoTagsCache, metrics, http.DefaultClient) metrics.Repos = repos var err error diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 5f7f7fa8e85..c136c8a7944 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -229,7 +229,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach } // FindRepos attempts to find the source code repositories for a given CVE. -func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics) []string { +func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) []string { // Find repos refs := cve.References CPEs := cves.CPEs(cve) @@ -244,9 +244,8 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return nil } - // Edge case: No CPEs, but perhaps usable references. if len(refs) > 0 && len(CPEs) == 0 { - repos := cves.ReposFromReferences(nil, nil, refs, cves.RefTagDenyList, repoTagsCache, metrics) + repos := cves.ReposFromReferences(nil, nil, refs, cves.RefTagDenyList, repoTagsCache, metrics, httpClient) if len(repos) == 0 { metrics.AddNote("Failed to derive any repos and there were no CPEs") return nil @@ -254,7 +253,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Derived repos for CVE with no CPEs: %v", repos) reposForCVE = repos } - appCPECount := 0 + vendorProductCombinations := make(map[cves.VendorProduct]bool) for _, CPEstr := range CPEs { CPE, err := cves.ParseCPE(CPEstr) @@ -262,29 +261,17 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Failed to parse CPE: %v", CPEstr) continue } - if CPE.Part != "a" { - continue - } - appCPECount += 1 vendorProductCombinations[cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}] = true } - if len(CPEs) > 0 && appCPECount == 0 { - // This CVE is not for software (based on there being CPEs but not any application ones), skip. - metrics.Outcome = models.NoSoftware - return nil - } - // If there wasn't a repo from the CPE Dictionary, try and derive one from the CVE references. for vendorProductKey := range vendorProductCombinations { - // Does it have any application CPEs? Look for pre-computed repos based on VendorProduct. if repos, ok := vpRepoCache.Get(vendorProductKey); ok { metrics.AddNote("Pre-references, derived repos using cache: %v", repos) if len(reposForCVE) == 0 { reposForCVE = repos continue } - // Don't append duplicates. for _, repo := range repos { if !slices.Contains(reposForCVE, repo) { reposForCVE = append(reposForCVE, repo) @@ -295,7 +282,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * if slices.Contains(cves.VendorProductDenyList, vendorProductKey) { continue } - repos := cves.ReposFromReferences(vpRepoCache, &vendorProductKey, refs, cves.RefTagDenyList, repoTagsCache, metrics) + repos := cves.ReposFromReferences(vpRepoCache, &vendorProductKey, refs, cves.RefTagDenyList, repoTagsCache, metrics, httpClient) if len(repos) == 0 { metrics.AddNote("Failed to derive any repos for %s/%s", vendorProductKey.Vendor, vendorProductKey.Product) continue @@ -305,22 +292,6 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * } } - logger.Info("Finished processing "+string(CVEID), - slog.String("cve", string(CVEID)), - slog.Int("cpes", len(CPEs)), - slog.Int("app_cpes", appCPECount), - slog.Int("derived_repos", len(reposForCVE))) - - // If we've made it to here, we may have a CVE: - // * that has Application-related CPEs (so applies to software) - // * has a reference that is a known repository URL - // OR - // * a derived repository for the software package - // - // We do not yet have: - // * any knowledge of the language used - // * definitive version information - if len(reposForCVE) == 0 { // We have nothing useful to work with, so we'll assume it's out of scope metrics.AddNote("Passing due to lack of viable repository") diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index b75b2d382f2..0ef739bd510 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -1083,7 +1083,7 @@ func GitVersionsToCommits(versions models.VersionInfo, repos []string, cache *gi // Examines the CVE references for a CVE and derives repos for it, optionally caching it. // TODO (jesslowe): refactor with below -func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Reference, tagDenyList []string, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics) (repos []string) { +func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Reference, tagDenyList []string, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) (repos []string) { for _, ref := range refs { // If any of the denylist tags are in the ref's tag set, it's out of consideration. if !RefAcceptable(ref, tagDenyList) { @@ -1097,6 +1097,13 @@ func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Re // Failed to parse as a valid repo. continue } + + // Check if the repo URL has changed (e.g. via redirect) + canonicalRepo, err := ValidateAndCanonicalizeLink(repo, httpClient) + if err == nil { + repo = canonicalRepo + } + if slices.Contains(repos, repo) { continue } diff --git a/vulnfeeds/cves/versions_test.go b/vulnfeeds/cves/versions_test.go index 5c738f94f76..9e93dc3ed41 100644 --- a/vulnfeeds/cves/versions_test.go +++ b/vulnfeeds/cves/versions_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "log" + "net/http" "os" "reflect" "slices" @@ -1355,6 +1356,21 @@ func TestReposFromReferences(t *testing.T) { }, wantRepos: []string{"https://github.com/dwyl/hapi-auth-jwt2"}, }, + { + name: "A CVE with a repo that redirects (docker/docker -> moby/moby)", + args: args{ + CVE: "CVE-2017-12345", // Dummy CVE + refs: []models.Reference{ + { + Source: "cna@docker.com", + Tags: []string{"Third Party Advisory"}, + URL: "https://github.com/docker/docker", + }, + }, + tagDenyList: RefTagDenyList, + }, + wantRepos: []string{"https://github.com/moby/moby"}, + }, } for _, tt := range tests { @@ -1362,7 +1378,7 @@ func TestReposFromReferences(t *testing.T) { testutils.SetupGitVCR(t) metrics := &models.ConversionMetrics{} repoTagsCache := &git.RepoTagsCache{} - if gotRepos := ReposFromReferences(tt.args.cache, tt.args.vp, tt.args.refs, tt.args.tagDenyList, repoTagsCache, metrics); !reflect.DeepEqual(gotRepos, tt.wantRepos) { + if gotRepos := ReposFromReferences(tt.args.cache, tt.args.vp, tt.args.refs, tt.args.tagDenyList, repoTagsCache, metrics, http.DefaultClient); !reflect.DeepEqual(gotRepos, tt.wantRepos) { t.Errorf("ReposFromReferences() = %#v, want %#v", gotRepos, tt.wantRepos) } })