Skip to content

Commit 7de9dea

Browse files
authored
Merge pull request package-url#19 from package-url/url2purl
Add support for nuget, pypi, and sourceforge in url2purl
2 parents d0e7c9f + 56e817b commit 7de9dea

File tree

2 files changed

+147
-9
lines changed

2 files changed

+147
-9
lines changed

src/packageurl/contrib/url2purl.py

Lines changed: 103 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from __future__ import unicode_literals
3030

3131
import os
32+
import re
3233

3334
try:
3435
from urlparse import urlparse # Python 2
@@ -65,9 +66,22 @@ def get_purl(uri):
6566
return
6667

6768

69+
def purl_from_pattern(type_, pattern, uri):
70+
uri = unquote_plus(uri)
71+
compiled_pattern = re.compile(pattern, re.VERBOSE)
72+
match = compiled_pattern.match(uri)
73+
74+
if match:
75+
purl_data = {
76+
field: value for field, value in match.groupdict().items()
77+
if field in PackageURL._fields
78+
}
79+
return PackageURL(type_, **purl_data)
80+
81+
6882
@purl_router.route('https?://registry.npmjs.*/.*',
6983
'https?://registry.yarnpkg.com/.*')
70-
def build_npm_url(uri):
84+
def build_npm_purl(uri):
7185
# npm URLs are difficult to disambiguate with regex
7286
if '/-/' in uri:
7387
return build_npm_download_purl(uri)
@@ -157,14 +171,95 @@ def build_maven_purl(uri):
157171
return PackageURL('maven', namespace, name, version, qualifiers)
158172

159173

160-
@purl_router.route('https?://rubygems.org/downloads/.*')
161-
def build_rubygems_url(uri):
162-
if uri.endswith('/') or not uri.endswith('.gem'):
163-
return
174+
# https://rubygems.org/downloads/jwt-0.1.8.gem
175+
rubygems_pattern = (
176+
r"^https?://rubygems.org/downloads/"
177+
r"(?P<name>.+)-(?P<version>.+)"
178+
r"(\.gem)$"
179+
)
180+
181+
182+
@purl_router.route(rubygems_pattern)
183+
def build_rubygems_purl(uri):
184+
return purl_from_pattern('rubygems', rubygems_pattern, uri)
164185

186+
187+
# https://pypi.python.org/packages/source/a/anyjson/anyjson-0.3.3.tar.gz
188+
pypi_pattern = (
189+
r"(?P<name>.+)-(?P<version>.+)"
190+
r"\.(zip|tar.gz|tar.bz2|.tgz)$"
191+
)
192+
193+
# This pattern can be found in the following locations:
194+
# - wheel.wheelfile.WHEEL_INFO_RE
195+
# - distlib.wheel.FILENAME_RE
196+
# - setuptools.wheel.WHEEL_NAME
197+
# - pip._internal.wheel.Wheel.wheel_file_re
198+
wheel_file_re = re.compile(
199+
r"^(?P<namever>(?P<name>.+?)-(?P<version>.*?))"
200+
r"((-(?P<build>\d[^-]*?))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)"
201+
r"\.whl)$",
202+
re.VERBOSE
203+
)
204+
205+
206+
@purl_router.route('https?://.+python.+org/packages/.*')
207+
def build_pypi_purl(uri):
165208
path = unquote_plus(urlparse(uri).path)
166209
last_segment = path.split('/')[-1]
167-
archive_basename = last_segment.rstrip('.gem')
168-
name, _, version = archive_basename.rpartition('-')
169210

170-
return PackageURL('rubygems', name=name, version=version)
211+
# /wheel-0.29.0-py2.py3-none-any.whl
212+
if last_segment.endswith('.whl'):
213+
match = wheel_file_re.match(last_segment)
214+
if match:
215+
return PackageURL(
216+
'pypi',
217+
name=match.group('name'),
218+
version=match.group('version'),
219+
)
220+
221+
return purl_from_pattern('pypi', pypi_pattern, last_segment)
222+
223+
224+
# http://nuget.org/packages/EntityFramework/4.2.0.0
225+
# https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1
226+
nuget_pattern1 = (
227+
r"^https?://.*nuget.org/(api/v2/)?packages?/"
228+
r"(?P<name>.+)/"
229+
r"(?P<version>.+)$"
230+
)
231+
232+
233+
@purl_router.route(nuget_pattern1)
234+
def build_nuget_purl(uri):
235+
return purl_from_pattern('nuget', nuget_pattern1, uri)
236+
237+
238+
# https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg
239+
nuget_pattern2 = (
240+
r"^https?://api.nuget.org/v3-flatcontainer/"
241+
r"(?P<name>.+)/"
242+
r"(?P<version>.+)/"
243+
r".*(nupkg)$" # ends with "nupkg"
244+
)
245+
246+
247+
@purl_router.route(nuget_pattern2)
248+
def build_nuget_purl(uri):
249+
return purl_from_pattern('nuget', nuget_pattern2, uri)
250+
251+
252+
# http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2
253+
sourceforge_pattern = (
254+
r"^https?://.*sourceforge.net/project/"
255+
r"(?P<namespace>([^/]+))/" # do not allow more "/" segments
256+
r"(?P<name>.+)/"
257+
r"(?P<version>[0-9\.]+)/" # version restricted to digits and dots
258+
r"(?P=name)-(?P=version).*" # {name}-{version} repeated in the filename
259+
r"[^/]$" # not ending with "/"
260+
)
261+
262+
263+
@purl_router.route(sourceforge_pattern)
264+
def build_sourceforge_purl(uri):
265+
return purl_from_pattern('sourceforge', sourceforge_pattern, uri)

tests/contrib/data/url2purl.json

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@
8181
"https://registry.npmjs.org/xdg-basedir/-/xdg-basedir-3.0.0.tgz": "pkg:npm/xdg-basedir@3.0.0",
8282
"https://registry.npmjs.org/yallist/-/yallist-2.1.2.tgz": "pkg:npm/yallist@2.1.2",
8383

84+
"http://rubygems.org/downloads/": null,
85+
"http://rubygems.org/downloads/macaddr-1.6.1": null,
8486
"http://rubygems.org/downloads/macaddr-1.6.1.gem": "pkg:rubygems/macaddr@1.6.1",
8587
"http://rubygems.org/downloads/open4-1.3.0.gem": "pkg:rubygems/open4@1.3.0",
8688
"https://rubygems.org/downloads/actionmailer-4.0.3.gem": "pkg:rubygems/actionmailer@4.0.3",
@@ -92,5 +94,46 @@
9294
"https://rubygems.org/downloads/ref-1.0.5.gem": "pkg:rubygems/ref@1.0.5",
9395
"https://rubygems.org/downloads/talentbox-delayed_job_sequel-4.0.0.gem": "pkg:rubygems/talentbox-delayed_job_sequel@4.0.0",
9496
"https://rubygems.org/downloads/unf-0.1.3.gem": "pkg:rubygems/unf@0.1.3",
95-
"https://rubygems.org/downloads/yajl-ruby-1.2.0.gem": "pkg:rubygems/yajl-ruby@1.2.0"
97+
"https://rubygems.org/downloads/yajl-ruby-1.2.0.gem": "pkg:rubygems/yajl-ruby@1.2.0",
98+
99+
"https://pypi.python.org/packages/source/z/zc.recipe.egg/zc.recipe.egg-2.0.0.tar.gz": "pkg:pypi/zc.recipe.egg@2.0.0",
100+
"https://pypi.python.org/packages/source/p/python-openid/python-openid-2.2.5.zip": "pkg:pypi/python-openid@2.2.5",
101+
"https://pypi.python.org/packages/38/e2/b23434f4030bbb1af3bcdbb2ecff6b11cf2e467622446ce66a08e99f2ea9/pluggy-0.4.0.zip#md5=447a92368175965d2fbacaef9f3df842": "pkg:pypi/pluggy@0.4.0",
102+
"https://pypi.python.org/packages/py2.py3/w/wheel/bad-wheel-name-any.whl": null,
103+
"https://pypi.python.org/packages/py2.py3/w/wheel/wheel-0.29.0-py2.py3-none-any.whl": "pkg:pypi/wheel@0.29.0",
104+
"https://pypi.python.org/packages/py2.py3/w/wheel/wheel-0.29.0-py2.py3-none-any.whl#md5=d7db45db5c131af262b8ffccde46a88a": "pkg:pypi/wheel@0.29.0",
105+
"https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl": "pkg:pypi/aboutcode-toolkit@3.4.0rc1",
106+
"https://files.pythonhosted.org/packages/7f/cf/12d4611fc67babd4ae250c9e8249c5650ae1933395488e9e7e3562b4ff24/amqp-2.3.2-py2.py3-none-any.whl#sha256=eed41946890cd43e8dee44a316b85cf6fee5a1a34bb4a562b660a358eb529e1b": "pkg:pypi/amqp@2.3.2",
107+
"https://pypi.python.org/packages/34/c1/8806f99713ddb993c5366c362b2f908f18269f8d792aff1abfd700775a77/click-6.7-py2.py3-none-any.whl#md5=5e7a4e296b3212da2ff11017675d7a4d": "pkg:pypi/click@6.7",
108+
"https://pypi.python.org/packages/f6/ae/bbc6a204f33d9d57c798fb3857a072cd14b836792244eea4b446fdb674c6/pycryptodome-3.4.7-cp27-cp27m-win32.whl#md5=78b341de1cd686077745cd9e3a93d8d3": "pkg:pypi/pycryptodome@3.4.7",
109+
"https://pypi.python.org/packages/bd/e8/ea44ba5357a0b4fd16e5fb60c355fc8722eae31b93d7597eec50f7c35a52/pycryptodome-3.4.7-cp27-cp27m-win_amd64.whl#md5=f20bb847322baf7ae24700e5cbb15e07": "pkg:pypi/pycryptodome@3.4.7",
110+
"https://pypi.python.org/packages/1e/75/8005d086cac4cc41d3b320d338972c5e5c6a21f88472f21ac9d0e031d300/pyahocorasick-1.1.4.tar.bz2#md5=ad445b6648dc06e9040705ce1ccb4384": "pkg:pypi/pyahocorasick@1.1.4",
111+
112+
"http://nuget.org/packages/EntityFramework/4.2.0.0": "pkg:nuget/EntityFramework@4.2.0.0",
113+
"http://www.nuget.org/packages/SharpGIS.GZipWebClient/1.2.0": "pkg:nuget/SharpGIS.GZipWebClient@1.2.0",
114+
"https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1": "pkg:nuget/Newtonsoft.Json@11.0.1",
115+
"http://www.nuget.org/api/v2/package/EntityFramework/6.1.0": "pkg:nuget/EntityFramework@6.1.0",
116+
"https://www.nuget.org/api/v2/package/MvvmLightLibs/4.1.23": "pkg:nuget/MvvmLightLibs@4.1.23",
117+
"https://www.nuget.org/api/v2/package/Twilio/3.4.1": "pkg:nuget/Twilio@3.4.1",
118+
"https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg": "pkg:nuget/newtonsoft.json@10.0.1",
119+
120+
"http://master.dl.sourceforge.net/project/zznotes/zznotes/1.1.2/zznotes-1.1.2.tar.gz": "pkg:sourceforge/zznotes/zznotes@1.1.2",
121+
"http://master.dl.sourceforge.net/project/zapping/zvbi/0.2.35/zvbi-0.2.35.tar.bz2": "pkg:sourceforge/zapping/zvbi@0.2.35",
122+
"http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2": "pkg:sourceforge/libpng/zlib@1.2.3",
123+
"http://master.dl.sourceforge.net/project/xmlstar/xmlstarlet/1.0.0/xmlstarlet-1.0.0-1.src.rpm": "pkg:sourceforge/xmlstar/xmlstarlet@1.0.0",
124+
"http://master.dl.sourceforge.net/project/wxmozilla/wxMozilla/0.5.5/wxMozilla-0.5.5.exe": "pkg:sourceforge/wxmozilla/wxMozilla@0.5.5",
125+
"http://iweb.dl.sourceforge.net/project/sblim/sblim-cim-client2/2.2.5/sblim-cim-client2-2.2.5-src.zip": "pkg:sourceforge/sblim/sblim-cim-client2@2.2.5",
126+
"http://master.dl.sourceforge.net/project/zinnia/zinnia-win32/0.06/zinnia-win32-0.06.zip": "pkg:sourceforge/zinnia/zinnia-win32@0.06",
127+
"http://iweb.dl.sourceforge.net/project/findbugs/findbugs/1.3.4/findbugs-1.3.4.tar.gz/": null,
128+
"http://master.dl.sourceforge.net/project/arestc/net/sf/arestc/arestc/0.1.4/arestc-0.1.4-javadoc.jar": null,
129+
"http://master.dl.sourceforge.net/project/intraperson/OldFiles/intraperson/0.28/intraperson-0.28.tar.gz": null,
130+
"http://master.dl.sourceforge.net/project/pwiki/pwiki/0.1.2/0.1.2.zip": null,
131+
"http://master.dl.sourceforge.net/project/iswraid/iswraid/0.1.4.3/2.4.28-pre3-iswraid.patch.gz": null,
132+
"http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%20stable/0.1a1_stable_AloysCore.zip": null,
133+
"http://master.dl.sourceforge.net/project/myenterprise/OldFiles/1.0.0.2.MyEnterprise.Source.zip": null,
134+
"http://master.dl.sourceforge.net/project/wxhaskell/wxhaskell/wxhaskell-0.9/wxhaskell-src-0.9.zip": null,
135+
"http://master.dl.sourceforge.net/project/a2freedom/A2/1.2/a2freedom-1.2.zip": null,
136+
"http://master.dl.sourceforge.net/project/tinyos/OldFiles/tinyos/1.1.0/tinyos-1.1.0.tar.gz": null,
137+
"http://master.dl.sourceforge.net/project/urlchecker/lu/ng/urlchecker/urlchecker/1.7/urlchecker-1.7-javadoc.jar": null,
138+
"http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar": null
96139
}

0 commit comments

Comments
 (0)