Skip to content

Commit 09fd622

Browse files
Reject deprecated SPDX licenses, but be lenient with committed LICENSE files (#754)
1 parent 24dc9b3 commit 09fd622

15 files changed

Lines changed: 661 additions & 179 deletions

File tree

SPEC.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ Alternately, this can be written without an id:
361361
**[Source](./lib/src/Registry/License.purs)**
362362
**[Spec](./types/v1/License.dhall)**
363363

364-
All packages in the registry must have a license that grants permission for redistribution of the source code. Concretely, the registry requires that all packages use an SPDX license and specify an [SPDX license identifier](https://spdx.dev/ids/). `AND` and `OR` conjunctions are allowed, and licenses can contain exceptions using the `WITH` preposition. The SPDX specification describes [how licenses can be combined and exceptions applied](https://spdx.dev/ids#how).
364+
All packages in the registry must have a license that grants permission for redistribution of the source code. Concretely, the registry requires that all packages use an SPDX license and specify an [SPDX license identifier](https://spdx.dev/ids/). `AND` and `OR` conjunctions are allowed, and licenses can contain exceptions using the `WITH` preposition. The SPDX specification describes [how licenses can be combined and exceptions applied](https://spdx.dev/ids#how). Newly submitted manifests must use current canonical SPDX identifiers, but registry readers should remain backward-compatible with historical stored manifests that still use deprecated SPDX spellings.
365365

366366
A `License` is represented as a string, which must be a valid SPDX identifier. For example:
367367

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"name": "ambiguous-gfdl-fixture",
3+
"version": "1.0.0",
4+
"license": "GFDL-1.3"
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"name": "deprecated-agpl-fixture",
3+
"version": "1.0.0",
4+
"license": "AGPL-3.0"
5+
}

app/src/App/API.purs

Lines changed: 57 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,20 @@ derive instance Eq ManifestOrigin
129129
-- | A parsed manifest along with which format it originated from.
130130
type ParsedManifest = { manifest :: Manifest, origin :: ManifestOrigin }
131131

132+
type ManifestLicenseField = { license :: String }
133+
134+
manifestLicenseFieldCodec :: CJ.Codec ManifestLicenseField
135+
manifestLicenseFieldCodec = CJ.named "ManifestLicenseField" $ CJ.Record.object
136+
{ license: CJ.string
137+
}
138+
139+
validateCanonicalManifestLicense :: String -> Either String Unit
140+
validateCanonicalManifestLicense input = do
141+
{ license } <- case parseJson manifestLicenseFieldCodec input of
142+
Left err -> Left $ CJ.DecodeError.print err
143+
Right decoded -> Right decoded
144+
void $ License.parseCanonical license
145+
132146
-- | Effect row for package set updates. Authentication is done at the API
133147
-- | boundary, so we don't need GITHUB_EVENT_ENV effects here.
134148
type PackageSetUpdateEffects r = (REGISTRY + PACKAGE_SETS + LOG + EXCEPT String + r)
@@ -436,13 +450,17 @@ parseSourceManifest { packageDir, name, version, ref, location } = do
436450
Left error -> do
437451
Log.error $ "Manifest does not typecheck: " <> error
438452
Except.throw $ "Found a valid purs.json file in the package source, but it does not typecheck."
439-
Right _ -> case parseJson Manifest.codec string of
453+
Right _ -> case validateCanonicalManifestLicense string of
440454
Left err -> do
441-
Log.error $ "Failed to parse manifest: " <> CJ.DecodeError.print err
442-
Except.throw $ "Found a purs.json file in the package source, but it could not be decoded."
443-
Right m -> do
444-
Log.debug $ "Read a valid purs.json manifest from the package source:\n" <> stringifyJson Manifest.codec m
445-
pure m
455+
Log.error $ "Manifest license is not canonical: " <> err
456+
Except.throw $ "Found a purs.json file in the package source, but its license field is not canonical."
457+
Right _ -> case parseJson Manifest.codec string of
458+
Left err -> do
459+
Log.error $ "Failed to parse manifest: " <> CJ.DecodeError.print err
460+
Except.throw $ "Found a purs.json file in the package source, but it could not be decoded."
461+
Right m -> do
462+
Log.debug $ "Read a valid purs.json manifest from the package source:\n" <> stringifyJson Manifest.codec m
463+
pure m
446464

447465
FromSpagoYaml -> do
448466
let spagoYamlPath = Path.concat [ packageDir, "spago.yaml" ]
@@ -1308,20 +1326,17 @@ instance FsEncodable PursGraphCache where
13081326
Exists.mkExists $ Cache.AsJson cacheKey codec next
13091327

13101328
-- | Errors that can occur when validating license consistency
1311-
data LicenseValidationError = LicenseMismatch
1312-
{ manifestLicense :: License
1313-
, detectedLicenses :: Array License
1314-
}
1329+
data LicenseValidationError = LicenseMismatch { manifest :: License, detected :: Array License }
13151330

13161331
derive instance Eq LicenseValidationError
13171332

13181333
printLicenseValidationError :: LicenseValidationError -> String
13191334
printLicenseValidationError = case _ of
1320-
LicenseMismatch { manifestLicense, detectedLicenses } -> Array.fold
1335+
LicenseMismatch licenses -> Array.fold
13211336
[ "License mismatch: The manifest specifies license '"
1322-
, License.print manifestLicense
1337+
, License.print licenses.manifest
13231338
, "' but the following license(s) were detected in your repository: "
1324-
, String.joinWith ", " (map License.print detectedLicenses)
1339+
, String.joinWith ", " (map License.print licenses.detected)
13251340
, ". Please ensure your manifest license accurately represents all licenses "
13261341
, "in your repository. If multiple licenses apply, join them using SPDX "
13271342
, "conjunctions (e.g., 'MIT AND Apache-2.0' or 'MIT OR Apache-2.0')."
@@ -1344,44 +1359,40 @@ validateLicense packageDir manifestLicense = do
13441359
pure Nothing
13451360
Right detectedStrings -> do
13461361
let
1362+
-- Best effort: keep detected licenses that parse, which canonicalizes
1363+
-- deprecated IDs when possible and preserves recognized ambiguous
1364+
-- deprecated IDs for validation.
13471365
parsedLicenses :: Array License
1348-
parsedLicenses = Array.mapMaybe (hush <<< License.parse) detectedStrings
1366+
parsedLicenses =
1367+
detectedStrings # Array.mapMaybe (hush <<< License.parse)
13491368

13501369
Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings
13511370

13521371
if Array.null parsedLicenses then do
13531372
Log.debug "No licenses detected from repository files, nothing to validate."
13541373
pure Nothing
1355-
else case License.extractIds manifestLicense of
1356-
Left err -> do
1357-
-- This shouldn't be possible (we have already validated the license)
1358-
-- as part of constructing the manifest
1359-
Log.warn $ "Could not extract license IDs from manifest: " <> err
1374+
else do
1375+
let
1376+
manifestIds = License.extractIds manifestLicense
1377+
manifestIdSet = Set.fromFoldable manifestIds
1378+
1379+
-- A detected license is covered if all its IDs are in the manifest IDs
1380+
isCovered :: License -> Boolean
1381+
isCovered license =
1382+
License.extractIds license # Array.all \id ->
1383+
Set.member id manifestIdSet
1384+
1385+
uncoveredLicenses :: Array License
1386+
uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses
1387+
1388+
if Array.null uncoveredLicenses then do
1389+
Log.debug "All detected licenses are covered by the manifest license."
13601390
pure Nothing
1361-
Right manifestIds -> do
1362-
let
1363-
manifestIdSet = Set.fromFoldable manifestIds
1364-
1365-
-- A detected license is covered if all its IDs are in the manifest IDs
1366-
isCovered :: License -> Boolean
1367-
isCovered license = case License.extractIds license of
1368-
Left _ -> false
1369-
Right ids -> Array.all (\id -> Set.member id manifestIdSet) ids
1370-
1371-
uncoveredLicenses :: Array License
1372-
uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses
1373-
1374-
if Array.null uncoveredLicenses then do
1375-
Log.debug "All detected licenses are covered by the manifest license."
1376-
pure Nothing
1377-
else do
1378-
Log.warn $ Array.fold
1379-
[ "License mismatch detected: manifest has '"
1380-
, License.print manifestLicense
1381-
, "' but detected "
1382-
, String.joinWith ", " (map License.print parsedLicenses)
1383-
]
1384-
pure $ Just $ LicenseMismatch
1385-
{ manifestLicense
1386-
, detectedLicenses: uncoveredLicenses
1387-
}
1391+
else do
1392+
Log.warn $ Array.fold
1393+
[ "License mismatch detected: manifest has '"
1394+
, License.print manifestLicense
1395+
, "' but detected "
1396+
, String.joinWith ", " (map License.print parsedLicenses)
1397+
]
1398+
pure $ Just $ LicenseMismatch { manifest: manifestLicense, detected: uncoveredLicenses }

app/src/App/Legacy/Manifest.purs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import Registry.App.Prelude
1515

1616
import Codec.JSON.DecodeError as CJ.DecodeError
1717
import Data.Array as Array
18+
import Data.Array.NonEmpty as NonEmptyArray
1819
import Data.Codec as Codec
1920
import Data.Codec.JSON as CJ
2021
import Data.Codec.JSON.Common as CJ.Common
@@ -74,9 +75,14 @@ bowerfileToPursJson
7475
:: Bowerfile
7576
-> Either String { license :: License, description :: Maybe String, dependencies :: Map PackageName Range }
7677
bowerfileToPursJson (Bowerfile { description, dependencies, license }) = do
77-
parsedLicense <- case Array.mapMaybe (hush <<< License.parse) license of
78-
[] -> Left "No valid SPDX license found in bower.json"
79-
multiple -> Right $ License.joinWith License.And multiple
78+
let
79+
-- Best effort: keep any licenses that parse cleanly and drop the rest.
80+
validLicenses = Array.mapMaybe (hush <<< License.parseCanonical) license
81+
82+
parsedLicense <-
83+
case NonEmptyArray.fromArray validLicenses of
84+
Nothing -> Left "No valid SPDX license found in bower.json"
85+
Just multiple -> Right $ License.joinWith License.And multiple
8086

8187
parsedDeps <- parseDependencies dependencies
8288

@@ -134,7 +140,7 @@ spagoDhallToPursJson
134140
spagoDhallToPursJson (SpagoDhallJson { license, dependencies, packages }) = do
135141
parsedLicense <- case license of
136142
Nothing -> Left "No license found in spago.dhall"
137-
Just lic -> case License.parse (NonEmptyString.toString lic) of
143+
Just lic -> case License.parseCanonical (NonEmptyString.toString lic) of
138144
Left _ -> Left $ "Invalid SPDX license in spago.dhall: " <> NonEmptyString.toString lic
139145
Right l -> Right l
140146

app/src/App/Manifest/SpagoYaml.purs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,20 @@ type PublishConfig =
9090
publishConfigCodec :: CJ.Codec PublishConfig
9191
publishConfigCodec = CJ.named "PublishConfig" $ CJ.Record.object
9292
{ version: Version.codec
93-
, license: License.codec
93+
-- Publish metadata is authored input so it must use canonical SPDX identifiers
94+
, license: canonicalLicenseCodec
9495
, location: CJ.Record.optional Location.codec
9596
, include: CJ.Record.optional (CJ.array CJ.string)
9697
, exclude: CJ.Record.optional (CJ.array CJ.string)
9798
, owners: CJ.Record.optional (CJ.Common.nonEmptyArray Owner.codec)
9899
}
99100

101+
canonicalLicenseCodec :: CJ.Codec License
102+
canonicalLicenseCodec = CJ.named "CanonicalLicense" $ Codec.codec' decode encode
103+
where
104+
encode = CJ.encode CJ.string <<< License.print
105+
decode = Codec.decode CJ.string >=> (License.parseCanonical >>> lmap CJ.DecodeError.basic >>> except)
106+
100107
dependenciesCodec :: CJ.Codec (Map PackageName (Maybe SpagoRange))
101108
dependenciesCodec = Profunctor.dimap toJsonRep fromJsonRep $ CJ.array dependencyCodec
102109
where

app/test/App/API.purs

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import Data.Map as Map
88
import Data.Set as Set
99
import Data.String as String
1010
import Data.String.NonEmpty as NonEmptyString
11+
import Data.String.Pattern (Pattern(..))
1112
import Effect.Aff as Aff
1213
import Effect.Class.Console as Console
1314
import Effect.Ref as Ref
@@ -27,6 +28,7 @@ import Registry.Foreign.FSExtra as FS.Extra
2728
import Registry.Foreign.FastGlob as FastGlob
2829
import Registry.Foreign.Tmp as Tmp
2930
import Registry.License as License
31+
import Registry.Location (Location(..))
3032
import Registry.PackageName as PackageName
3133
import Registry.Test.Assert as Assert
3234
import Registry.Test.Assert.Run as Assert.Run
@@ -52,6 +54,9 @@ spec = do
5254
Spec.describe "Verifies build plans" do
5355
checkBuildPlanToResolutions
5456

57+
Spec.describe "Parses source manifests" do
58+
parseSourceManifestSpec
59+
5560
Spec.describe "Validates licenses match" do
5661
licenseValidation
5762

@@ -222,6 +227,38 @@ checkBuildPlanToResolutions = do
222227
path = Path.concat [ installedResolutions, PackageName.print packageName <> "-" <> Version.print version ]
223228
pure $ Tuple bowerName { path, version }
224229

230+
parseSourceManifestSpec :: Spec.Spec Unit
231+
parseSourceManifestSpec = do
232+
Spec.it "Rejects deprecated SPDX identifiers in purs.json" do
233+
resourceEnv <- liftEffect Env.lookupResourceEnv
234+
Aff.bracket Tmp.mkTmpDir FS.Extra.remove \packageDir -> do
235+
let
236+
manifestPath = Path.concat [ packageDir, "purs.json" ]
237+
args =
238+
{ packageDir
239+
, name: Utils.unsafePackageName "registry-lib"
240+
, version: Utils.unsafeVersion "0.0.1"
241+
, ref: "v0.0.1"
242+
, location: GitHub { owner: "purescript", repo: "registry-dev", subdir: Nothing }
243+
}
244+
245+
FS.Aff.writeTextFile UTF8 manifestPath
246+
"""{"name":"registry-lib","version":"0.0.1","license":"AGPL-3.0","location":{"githubOwner":"purescript","githubRepo":"registry-dev"},"ref":"v0.0.1","dependencies":{"prelude":">=6.0.0 <7.0.0"}}"""
247+
248+
result <-
249+
API.parseSourceManifest args
250+
# Env.runResourceEnv resourceEnv
251+
# Log.interpret (\(Log.Log _ _ next) -> pure next)
252+
# Except.runExcept
253+
# Run.runBaseAff'
254+
255+
case result of
256+
Left err ->
257+
unless (String.contains (Pattern "license field is not canonical") err) do
258+
Assert.fail $ "Expected a canonical license error, but got: " <> err
259+
Right _ ->
260+
Assert.fail "Expected parseSourceManifest to reject deprecated SPDX identifiers"
261+
225262
removeIgnoredTarballFiles :: Spec.Spec Unit
226263
removeIgnoredTarballFiles = Spec.before runBefore do
227264
Spec.it "Picks correct files when packaging a tarball" \{ tmp, writeDirectories, writeFiles } -> do
@@ -361,7 +398,10 @@ copySourceFiles = Spec.hoistSpec identity (\_ -> Assert.Run.runBaseEffects) $ Sp
361398

362399
licenseValidation :: Spec.Spec Unit
363400
licenseValidation = do
364-
let fixtures = Path.concat [ "app", "fixtures", "licenses", "halogen-hooks" ]
401+
let
402+
fixtures = Path.concat [ "app", "fixtures", "licenses", "halogen-hooks" ]
403+
deprecatedFixture = Path.concat [ "app", "fixtures", "licenses", "deprecated-agpl" ]
404+
ambiguousFixture = Path.concat [ "app", "fixtures", "licenses", "ambiguous-gfdl" ]
365405

366406
Spec.describe "validateLicense" do
367407
Spec.it "Passes when manifest license covers all detected licenses" do
@@ -375,9 +415,9 @@ licenseValidation = do
375415
let manifestLicense = unsafeLicense "MIT"
376416
result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense
377417
case result of
378-
Just (LicenseMismatch { detectedLicenses }) ->
418+
Just (LicenseMismatch { detected }) ->
379419
-- Should detect that Apache-2.0 is not covered
380-
Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0"
420+
Assert.shouldContain (map License.print detected) "Apache-2.0"
381421
_ ->
382422
Assert.fail "Expected LicenseMismatch error"
383423

@@ -386,11 +426,11 @@ licenseValidation = do
386426
let manifestLicense = unsafeLicense "BSD-3-Clause"
387427
result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense
388428
case result of
389-
Just (LicenseMismatch { manifestLicense: ml, detectedLicenses }) -> do
429+
Just (LicenseMismatch { manifest: ml, detected }) -> do
390430
Assert.shouldEqual "BSD-3-Clause" (License.print ml)
391431
-- Both MIT and Apache-2.0 should be in the detected licenses
392-
Assert.shouldContain (map License.print detectedLicenses) "MIT"
393-
Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0"
432+
Assert.shouldContain (map License.print detected) "MIT"
433+
Assert.shouldContain (map License.print detected) "Apache-2.0"
394434
_ ->
395435
Assert.fail "Expected LicenseMismatch error"
396436

@@ -400,5 +440,23 @@ licenseValidation = do
400440
result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense
401441
Assert.shouldEqual Nothing result
402442

443+
Spec.it "Canonicalizes deterministic deprecated detected licenses during validation" do
444+
let manifestLicense = unsafeLicense "MIT"
445+
result <- Assert.Run.runBaseEffects $ validateLicense deprecatedFixture manifestLicense
446+
case result of
447+
Just (LicenseMismatch { detected }) ->
448+
Assert.shouldContain (map License.print detected) "AGPL-3.0-only"
449+
_ ->
450+
Assert.fail "Expected LicenseMismatch error"
451+
452+
Spec.it "Preserves ambiguous deprecated detected licenses during validation" do
453+
let manifestLicense = unsafeLicense "MIT"
454+
result <- Assert.Run.runBaseEffects $ validateLicense ambiguousFixture manifestLicense
455+
case result of
456+
Just (LicenseMismatch { detected }) ->
457+
Assert.shouldContain (map License.print detected) "GFDL-1.3"
458+
_ ->
459+
Assert.fail "Expected LicenseMismatch error"
460+
403461
unsafeLicense :: String -> License
404462
unsafeLicense str = unsafeFromRight $ License.parse str

app/test/App/Legacy/Manifest.purs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import Codec.JSON.DecodeError as CJ.DecodeError
66
import Data.Array as Array
77
import Data.Codec.JSON as CJ
88
import Registry.App.Legacy.Manifest as Legacy.Manifest
9+
import Registry.License as License
910
import Registry.Manifest (Manifest(..))
1011
import Registry.Test.Assert as Assert
1112
import Test.Spec (Spec)
@@ -134,6 +135,21 @@ bowerfileToPursJsonSpec = do
134135
Left err -> Assert.fail $ "Failed to convert bowerfile:\n" <> err
135136
Right _ -> pure unit
136137

138+
Spec.it "Drops invalid SPDX identifiers when valid licenses remain" do
139+
let
140+
input =
141+
"""
142+
{ "license": [ "MIT", "AGPL-3.0" ]
143+
, "dependencies": { "purescript-prelude": "^6.0.0" }
144+
}
145+
"""
146+
case parseJson Legacy.Manifest.bowerfileCodec input of
147+
Left err -> Assert.fail $ "Failed to parse bowerfile:\n" <> CJ.DecodeError.print err
148+
Right bowerfile -> case Legacy.Manifest.bowerfileToPursJson bowerfile of
149+
Left err -> Assert.fail $ "Failed to convert bowerfile:\n" <> err
150+
Right result ->
151+
Assert.shouldEqual "MIT" (License.print result.license)
152+
137153
Spec.describe "Rejects invalid Bowerfiles" do
138154
Spec.it "Fails on missing license" do
139155
let

0 commit comments

Comments
 (0)