Skip to content

Commit cf114f7

Browse files
committed
Refine and simplify all regex patterns
Signed-off-by: Thomas Druez <tdruez@nexb.com>
1 parent d7d3a7e commit cf114f7

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

src/packageurl/contrib/url2purl.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def build_maven_purl(uri):
173173
# https://rubygems.org/downloads/jwt-0.1.8.gem
174174
rubygems_pattern = (
175175
r"^https?://rubygems.org/downloads/"
176-
r"(?P<name>.+-?)-(?P<version>.*?)"
176+
r"(?P<name>.+)-(?P<version>.+)"
177177
r"(\.gem)$"
178178
)
179179

@@ -185,7 +185,7 @@ def build_rubygems_purl(uri):
185185

186186
# https://pypi.python.org/packages/source/p/python-openid/python-openid-2.2.5.zip
187187
pypi_pattern = (
188-
r"(?P<name>.+-?)-(?P<version>.*?)"
188+
r"(?P<name>.+)-(?P<version>.+)"
189189
r"\.(zip|tar.gz|tar.bz2)$"
190190
)
191191

@@ -210,7 +210,8 @@ def build_pypi_purl(uri):
210210
# https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1
211211
nuget_pattern1 = (
212212
r"^https?://.*nuget.org/(api/v2/)?packages?/"
213-
r"(?P<name>.+-?)/(?P<version>.*?)$"
213+
r"(?P<name>.+)/"
214+
r"(?P<version>.+)$"
214215
)
215216

216217

@@ -222,8 +223,9 @@ def build_nuget_purl(uri):
222223
# https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg
223224
nuget_pattern2 = (
224225
r"^https?://api.nuget.org/v3-flatcontainer/"
225-
r"(?P<name>.+-?)/(?P<version>.*?)/"
226-
r".*nupkg$"
226+
r"(?P<name>.+)/"
227+
r"(?P<version>.+)/"
228+
r".*(nupkg)$" # ends with "nupkg"
227229
)
228230

229231

@@ -235,8 +237,11 @@ def build_nuget_purl(uri):
235237
# http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2
236238
sourceforge_pattern = (
237239
r"^https?://.*sourceforge.net/project/"
238-
r"(?P<namespace>.+-?)/(?P<name>.+-?)/(?P<version>[\.0-9]*?)/"
239-
r"(?P=name)-(?P=version).*$"
240+
r"(?P<namespace>([^/]+))/" # do not allow more "/" segments
241+
r"(?P<name>.+)/"
242+
r"(?P<version>[0-9\.]+)/" # version restricted to digits and dots
243+
r"(?P=name)-(?P=version).*" # {name}-{version} repeated in the filename
244+
r"[^/]$" # not ending with "/"
240245
)
241246

242247

tests/contrib/data/url2purl.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,12 @@
121121
"http://master.dl.sourceforge.net/project/wxmozilla/wxMozilla/0.5.5/wxMozilla-0.5.5.exe": "pkg:sourceforge/wxmozilla/wxMozilla@0.5.5",
122122
"http://iweb.dl.sourceforge.net/project/sblim/sblim-cim-client2/2.2.5/sblim-cim-client2-2.2.5-src.zip": "pkg:sourceforge/sblim/sblim-cim-client2@2.2.5",
123123
"http://master.dl.sourceforge.net/project/zinnia/zinnia-win32/0.06/zinnia-win32-0.06.zip": "pkg:sourceforge/zinnia/zinnia-win32@0.06",
124+
"http://iweb.dl.sourceforge.net/project/findbugs/findbugs/1.3.4/findbugs-1.3.4.tar.gz/": null,
124125
"http://master.dl.sourceforge.net/project/pwiki/pwiki/0.1.2/0.1.2.zip": null,
125126
"http://master.dl.sourceforge.net/project/iswraid/iswraid/0.1.4.3/2.4.28-pre3-iswraid.patch.gz": null,
126127
"http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%20stable/0.1a1_stable_AloysCore.zip": null,
127128
"http://master.dl.sourceforge.net/project/myenterprise/OldFiles/1.0.0.2.MyEnterprise.Source.zip": null,
128129
"http://master.dl.sourceforge.net/project/wxhaskell/wxhaskell/wxhaskell-0.9/wxhaskell-src-0.9.zip": null,
129-
"http://master.dl.sourceforge.net/project/a2freedom/A2/1.2/a2freedom-1.2.zip": null
130+
"http://master.dl.sourceforge.net/project/a2freedom/A2/1.2/a2freedom-1.2.zip": null,
131+
"http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar": null
130132
}

0 commit comments

Comments
 (0)