Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions vulnerabilities/importer_yielder.py
Original file line numberDiff line numberDiff line change
Expand Up@@ -172,7 +172,6 @@
'db_url': 'https://usn.ubuntu.com/usn-db/database-all.json.bz2'
},
},

{
'name': 'github',
'license': '',
Expand All@@ -182,7 +181,16 @@
'endpoint': 'https://api.github.com/graphql',
'ecosystems': ['MAVEN', 'NUGET', 'COMPOSER']
}
}
},
{
'name': 'msr2019',
'license': '',
'last_run': None,
'data_source': 'ProjectKBMSRDataSource',
'data_source_cfg':{
'etag':{}
},
},

]

Expand Down
1 change: 1 addition & 0 deletions vulnerabilities/importers/__init__.py
Original file line numberDiff line numberDiff line change
Expand Up@@ -38,3 +38,4 @@
from vulnerabilities.importers.ubuntu_usn import UbuntuUSNDataSource
from vulnerabilities.importers.github import GitHubAPIDataSource
from vulnerabilities.importers.nvd import NVDDataSource
from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource
96 changes: 96 additions & 0 deletions vulnerabilities/importers/project_kb_msr2019.py
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/vulnerablecode/
# The VulnerableCode software is licensed under the Apache License version 2.0.
# Data generated with VulnerableCode require an acknowledgment.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import csv
import dataclasses
import urllib.request

# Reading CSV file from a url using `requests` is bit too complicated.
# Use `urllib.request` for that purpose. Need `requests` because making
# a HEADER request using `urllib.request` is too complicated.
import requests
from packageurl import PackageURL


from vulnerabilities.data_source import Advisory
from vulnerabilities.data_source import DataSource
from vulnerabilities.data_source import Reference
from vulnerabilities.data_source import DataSourceConfiguration


@dataclasses.dataclass
class ProjectKBDataSourceConfiguration(DataSourceConfiguration):
etag: dict


class ProjectKBMSRDataSource(DataSource):

CONFIG_CLASS = ProjectKBDataSourceConfiguration

url = "https://raw.githubusercontent.com/SAP/project-kb/master/MSR2019/dataset/vulas_db_msr2019_release.csv" # nopep8

def updated_advisories(self):
# etag are like hashes of web responses. We maintain
# (url, etag) mappings in the DB. `create_etag` creates
# (url, etag) pair. If a (url, etag) already exists then the code
# skips processing the response further to avoid duplicate work
if self.create_etag(self.url):
raw_data = self.fetch()
advisories = self.to_advisories(raw_data)
return self.batch_advisories(advisories)

return []

def create_etag(self, url):
etag = requests.head(url).headers.get("ETag")
if not etag:
return True

elif url in self.config.etag:
if self.config.etag[url] == etag:
return False

self.config.etag[url] = etag
return True

def fetch(self):
response = urllib.request.urlopen(self.url)
lines = [l.decode("utf-8") for l in response.readlines()]
return csv.reader(lines)

@staticmethod
def to_advisories(csv_reader):
# Project KB MSR csv file has no header row
advsiories = []
for row in csv_reader:
vuln_id, proj_home, fix_commit, _ = row
commit_link = proj_home + "/commit/" + fix_commit
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I would tend to prefer a .format or an f-string.

advsiories.append(
Advisory(
summary="",
impacted_package_urls=[],
vuln_references=[Reference(url=commit_link)],
cve_id=vuln_id,
)
)

return advisories