Skip to content

Commit d5bd78d

Browse files
authored
Merge pull request #66 from JuliaAI/dev
For a 0.3.4 release
2 parents 411860f + b59b9d3 commit d5bd78d

File tree

12 files changed

+208
-21
lines changed

12 files changed

+208
-21
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
arch:
2525
- x64
2626
steps:
27-
- uses: actions/checkout@v4
27+
- uses: actions/checkout@v6
2828
- uses: julia-actions/setup-julia@v2
2929
with:
3030
version: ${{ matrix.version }}
@@ -51,7 +51,7 @@ jobs:
5151
name: Documentation
5252
runs-on: ubuntu-latest
5353
steps:
54-
- uses: actions/checkout@v4
54+
- uses: actions/checkout@v6
5555
- uses: julia-actions/setup-julia@v2
5656
with:
5757
version: '1'

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "StatisticalMeasures"
22
uuid = "a19d573c-0a75-4610-95b3-7071388c7541"
33
authors = ["Anthony D. Blaom <[email protected]>"]
4-
version = "0.3.3"
4+
version = "0.3.4"
55

66
[deps]
77
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

src/confusion_matrices.jl

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,12 @@ module ConfusionMatrices
99
using CategoricalArrays
1010
using OrderedCollections
1111
import ..Functions
12+
import ..warn_unordered
1213

1314
const CM = "ConfusionMatrices"
1415
const CatArrOrSub{T, N} =
1516
Union{CategoricalArray{T, N}, SubArray{T, N, <:CategoricalArray}}
1617

17-
function WARN_UNORDERED(levels)
18-
raw_levels = CategoricalArrays.unwrap.(levels)
19-
ret = "Levels not explicitly ordered. "*
20-
"Using the order $raw_levels. "
21-
if length(levels) == 2
22-
ret *= "The \"positive\" level is $(raw_levels[2]). "
23-
end
24-
ret
25-
end
26-
2718
const ERR_INDEX_ACCESS_DENIED = ErrorException(
2819
"Direct access by index of unordered confusion matrices dissallowed. "*
2920
"Access by level, as in `some_confusion_matrix(\"male\", \"female\")` or first "*
@@ -343,7 +334,7 @@ Return the regular `Matrix` associated with confusion matrix `m`.
343334
"""
344335
matrix(cm::ConfusionMatrix{N,true}; kwargs...) where N = cm.mat
345336
@inline function matrix(cm::ConfusionMatrix{N,false}; warn=true) where N
346-
warn && @warn WARN_UNORDERED(levels(cm))
337+
warn && warn_unordered(levels(cm))
347338
cm.mat
348339
end
349340

src/functions.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,59 @@ function matthews_correlation(m)
249249
return mcc
250250
end
251251

252+
"""
253+
Functions.cbi(
254+
probability_of_positive, ground_truth_observations, positive_class,
255+
nbins, binwidth, ma=maximum(scores), mi=minimum(scores), cor=corspearman
256+
)
257+
Return the Continuous Boyce Index (CBI) for a vector of probabilities and ground truth observations.
258+
259+
"""
260+
function cbi(
261+
scores, y, positive_class;
262+
verbosity, nbins, binwidth,
263+
max=maximum(scores), min=minimum(scores), cor=StatsBase.corspearman
264+
)
265+
binstarts = range(min, stop=max-binwidth, length=nbins)
266+
binends = binstarts .+ binwidth
267+
268+
sorted_indices = sortperm(scores)
269+
sorted_scores = view(scores, sorted_indices)
270+
sorted_y = view(y, sorted_indices)
271+
272+
n_positive = zeros(Int, nbins)
273+
n_total = zeros(Int, nbins)
274+
empty_bins = falses(nbins)
275+
any_empty = false
276+
277+
@inbounds for i in 1:nbins
278+
bin_index_first = searchsortedfirst(sorted_scores, binstarts[i])
279+
bin_index_last = searchsortedlast(sorted_scores, binends[i])
280+
if bin_index_first > bin_index_last
281+
empty_bins[i] = true
282+
any_empty = true
283+
end
284+
@inbounds for j in bin_index_first:bin_index_last
285+
if sorted_y[j] == positive_class
286+
n_positive[i] += 1
287+
end
288+
end
289+
n_total[i] = bin_index_last - bin_index_first + 1
290+
end
291+
if any_empty
292+
verbosity > 1 && @info "removing $(sum(empty_bins)) bins without any observations"
293+
deleteat!(n_positive, empty_bins)
294+
deleteat!(n_total, empty_bins)
295+
binstarts = binstarts[.!empty_bins]
296+
end
297+
298+
# calculate "PE-ratios" - a bunch of things cancel out but that does not matter for
299+
# any correlation calculation
300+
PE_ratios = n_positive ./ n_total
301+
return cor(PE_ratios, binstarts)
302+
end
303+
304+
252305

253306
# ## binary, but NOT invariant under class relabellings
254307

src/probabilistic.jl

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,3 +544,89 @@ $DOC_DISTRIBUTIONS
544544
SphericalScore
545545
"$SphericalScoreDoc"
546546
const spherical_score = SphericalScore()
547+
548+
549+
# ---------------------------------------------------------------------
550+
# Continuous Boyce Index
551+
struct _ContinuousBoyceIndex
552+
verbosity::Int
553+
nbins::Integer
554+
binwidth::Float64
555+
min::Float64
556+
max::Float64
557+
cor::Function
558+
function _ContinuousBoyceIndex(;
559+
verbosity = 1, nbins = 101, binwidth = 0.1,
560+
min = 0, max = 1, cor = StatsBase.corspearman
561+
)
562+
new(verbosity, nbins, binwidth, min, max, cor)
563+
end
564+
end
565+
566+
ContinuousBoyceIndex(; kw...) = _ContinuousBoyceIndex(; kw...) |> robust_measure |> fussy_measure
567+
568+
function (m::_ContinuousBoyceIndex)(ŷ::AbstractArray{<:UnivariateFinite}, y::NonMissingCatArrOrSub)
569+
m.verbosity > 0 && warn_unordered(levels(y))
570+
positive_class = levels(first(ŷ))|> last
571+
scores = pdf.(ŷ, positive_class)
572+
573+
return Functions.cbi(scores, y, positive_class;
574+
verbosity = m.verbosity, nbins = m.nbins, binwidth = m.binwidth, max = m.max, min = m.min, cor = m.cor)
575+
end
576+
577+
const ContinuousBoyceIndexType = API.FussyMeasure{<:API.RobustMeasure{<:_ContinuousBoyceIndex}}
578+
579+
@fix_show ContinuousBoyceIndex::ContinuousBoyceIndexType
580+
581+
StatisticalMeasures.@trait(
582+
_ContinuousBoyceIndex,
583+
consumes_multiple_observations=true,
584+
observation_scitype = Finite{2},
585+
kind_of_proxy=StatisticalMeasures.LearnAPI.Distribution(),
586+
orientation=Score(),
587+
external_aggregation_mode=Mean(),
588+
human_name = "continuous Boyce index",
589+
)
590+
591+
register(ContinuousBoyceIndex, "continuous_boyce_index", "cbi")
592+
593+
const ContinuousBoyceIndexDoc = docstring(
594+
"ContinuousBoyceIndex(; verbosity=1, nbins=101, bin_overlap=0.1, min=nothing, max=nothing, cor=StatsBase.corspearman)",
595+
body=
596+
"""
597+
The Continuous Boyce Index is a measure for evaluating the performance of probabilistic predictions for binary classification,
598+
especially for presence-background data in ecological modeling.
599+
It compares the predicted probability scores for the positive class across bins, giving higher scores if the ratio of positive
600+
and negative samples in each bin is strongly correlated to the value at that bin.
601+
602+
## Keywords
603+
- `verbosity`: Verbosity level.
604+
- `nbins`: Number of bins to use for score partitioning.
605+
- `binwidth`: The width of each bin, which defaults to 0.1.
606+
- `min`, `max`: Optional minimum and maximum score values for binning. Default to the 0 and 1, respectively.
607+
- `cor`: Correlation function (defaults to StatsBase.corspearman, i.e. Spearman correlation).
608+
609+
## Arguments
610+
611+
The predictions `ŷ` should be a vector of `UnivariateFinite` distributions from CategoricalDistributions.jl,
612+
and `y` a CategoricalVector of ground truth labels.
613+
614+
Returns the correlation between the ratio of positive to negative samples in each bin and the bin centers.
615+
616+
Core implementation: [`Functions.cbi`](@ref).
617+
618+
Reference:
619+
Alexandre H. Hirzel, Gwenaëlle Le Lay, Véronique Helfer, Christophe Randin, Antoine Guisan,
620+
Evaluating the ability of habitat suitability models to predict species presences,
621+
Ecological Modelling,
622+
Volume 199, Issue 2, 2006
623+
""",
624+
scitype="",
625+
)
626+
627+
"$ContinuousBoyceIndexDoc"
628+
ContinuousBoyceIndex
629+
"$ContinuousBoyceIndexDoc"
630+
const cbi = ContinuousBoyceIndex()
631+
"$ContinuousBoyceIndexDoc"
632+
const continuous_boyce_index = ContinuousBoyceIndex()

src/roc.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,7 @@ function binary_levels(
2323
length(classes) == 2 || throw(ERR_ROC2)
2424
API.check_numobs(yhat, y)
2525
API.check_pools(yhat, y)
26-
if !(yhat isa AbstractArray{<:UnivariateFinite{<:OrderedFactor}}) ||
27-
!CategoricalArrays.isordered(y)
28-
@warn ConfusionMatrices.WARN_UNORDERED(classes)
29-
end
26+
warn_unordered(classes)
3027
classes
3128
end
3229
binary_levels(

src/tools.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,15 @@ function API.check_pools(
5959
return nothing
6060
end
6161

62+
# Throw a warning if levels are not explicitly ordered
63+
function warn_unordered(levels)
64+
levels isa CategoricalArray && CategoricalArrays.isordered(levels) && return
65+
raw_levels = CategoricalArrays.unwrap.(levels)
66+
ret = "Levels not explicitly ordered. "*
67+
"Using the order $raw_levels. "
68+
if length(levels) == 2
69+
ret *= "The \"positive\" level is $(raw_levels[2]). "
70+
end
71+
@warn ret
72+
return ret
73+
end

test/confusion_matrices.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ const CM = StatisticalMeasures.ConfusionMatrices
3030
rev_index_given_level = Dict("B" => 1, "A" => 2)
3131
@test cm == CM.ConfusionMatrix(n, rev_index_given_level)
3232
mat = @test_logs(
33-
(:warn, CM.WARN_UNORDERED(levels)),
33+
(:warn, StatisticalMeasures.warn_unordered(levels)),
3434
CM.matrix(cm),
3535
)
3636
@test mat == m

test/finite.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ end
114114
1, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2,
115115
1, 2, 2, missing]
116116

117-
@test_logs (:warn, CM.WARN_UNORDERED([1, 2])) f1score(ŷ, y)
117+
@test_logs (:warn, StatisticalMeasures.warn_unordered([1, 2])) f1score(ŷ, y)
118118
f05 = @test_logs FScore(0.5, levels=[1, 2])(ŷ, y)
119119
sk_f05 = 0.625
120120
@test f05 sk_f05 # m.fbeta_score(y, yhat, 0.5, pos_label=2)

test/probabilistic.jl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,53 @@ end
180180
@test_throws StatisticalMeasures.ERR_UNSUPPORTED_ALPHA s(yhat, [1.0, 1.0])
181181
end
182182

183+
@testset "ContinuousBoyceIndex" begin
184+
rng = srng(1234)
185+
# Simple synthetic test: perfectly separates positives and negatives
186+
c = ["neg", "pos"]
187+
probs = repeat(0.0:0.1:0.9, inner = 10) .+ rand(rng, 100) .* 0.1
188+
y = categorical(probs .> rand(rng, 100))
189+
= UnivariateFinite(levels(y), probs, augment=true)
190+
# Should be pretty high
191+
@test cbi(ŷ, y) 0.87 atol=0.01
192+
193+
# Passing different correlation methods works
194+
@test ContinuousBoyceIndex(cor=cor)(ŷ, y) 0.90 atol = 0.01
195+
@test ContinuousBoyceIndex(nbins = 11, binwidth = 0.03)(ŷ, y) 0.77 atol = 0.01
196+
197+
# Randomized test: shuffled labels, should be near 0
198+
y_shuf = copy(y)
199+
MLUtils.shuffle!(rng, y_shuf)
200+
@test (cbi(ŷ, y_shuf)) 0.0 atol=0.1
201+
202+
# Test invariance to order
203+
idx = randperm(length(y))
204+
@test isapprox(cbi(ŷ[idx], y[idx]), cbi(ŷ, y), atol=1e-8)
205+
206+
# Test with all positives or all negatives return NaN
207+
y_allpos = categorical(trues(100), levels = levels(y))
208+
y_allneg = categorical(falses(100), levels = levels(y))
209+
@test isnan(cbi(ŷ, y_allpos))
210+
@test isnan(cbi(ŷ, y_allneg))
211+
212+
unordered_warning = StatisticalMeasures.warn_unordered([false, true])
213+
@test_logs(
214+
(:warn, unordered_warning),
215+
cbi(ŷ, y),
216+
)
217+
218+
cbi_dropped_bins = @test_logs(
219+
(:warn, unordered_warning), (:info, "removing 91 bins without any observations",),
220+
ContinuousBoyceIndex(; verbosity = 2, min =0.0, max = 2.0, nbins = 191)(ŷ, y),
221+
)
222+
# These two are identical because bins are dropped
223+
@test cbi_dropped_bins ==
224+
ContinuousBoyceIndex(; min = 0.0, max = 1.2, nbins = 111)(ŷ, y)
225+
226+
# cbi is silent for verbosity 0
227+
@test_logs ContinuousBoyceIndex(; verbosity = 0)(ŷ, y)
228+
end
229+
183230
@testset "l2_check" begin
184231
d = Distributions.Normal()
185232
yhat = Union{Distributions.Sampleable,Missing}[d, d, missing]

0 commit comments

Comments
 (0)