Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev-v0.9' into dev-v0.9.1
Browse files Browse the repository at this point in the history
  • Loading branch information
giopaglia committed Nov 13, 2023
2 parents 54db24e + 67dadbe commit 1d15302
Show file tree
Hide file tree
Showing 14 changed files with 395 additions and 11 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/Documentation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Documentation
on:
push:
branches:
- main
- dev
tags: '*'
pull_request:

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@latest
with:
version: '1.9'
- name: Install dependencies
run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and deploy
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
run: julia --project=docs/ docs/make.jl
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ makedocs(;
)

deploydocs(;
repo="github.com/aclai-lab/SoleFeatures.jl",
repo = "github.com/aclai-lab/SoleFeatures.jl",
target = "build",
branch = "gh-pages",
versions = ["main" => "main", "stable" => "v^", "v#.#", "dev" => "dev"],
Expand Down
62 changes: 60 additions & 2 deletions src/SoleFeatures.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
__precompile__()
module SoleFeatures

using Tables
Expand All @@ -19,25 +20,82 @@ export AbstractEmbeddedFS
export AbstractLimiter
# structs

export VarianceThreshold
export VarianceRanking
export RandomRanking
export StatisticalAtLeastOnce
export StatisticalMajority
export PearsonCorRanking
export Chi2Ranking
export Chi2Threshold
export MutualInformationClassifRanking
export CompoundStatisticalAtLeastOnce
export CompoundStatisticalMajority
export CorrelationFilter

# main functions
export select, apply, buildbitmask, transform, transform!
# utils
export bm2var

@reexport using DataFrames

const req_py_pkgs = ["scipy", "scikit-learn", "skfeature"]
const fs = PyNULL()
const construct_w = PyNULL()
const lap_score = PyNULL()
const fisher_score = PyNULL()
function __init__()

pypkgs = getindex.(PyCall.Conda.parseconda(`list`, PyCall.Conda.ROOTENV), "name")
needinstall = !all(p -> in(p, pypkgs), req_py_pkgs)

if (needinstall)
PyCall.Conda.pip_interop(true, PyCall.Conda.ROOTENV)
PyCall.Conda.add("scipy")
PyCall.Conda.add("scikit-learn")
PyCall.Conda.pip(
"install",
"git+https://github.com/jundongl/scikit-feature.git#egg=skfeature",
PyCall.Conda.ROOTENV
)
end

copy!(fs, pyimport_conda("sklearn.feature_selection", "scikit-learn"))
copy!(construct_w, pyimport_conda("skfeature.utility.construct_W", "skfeature"))
copy!(lap_score, pyimport_conda(
"skfeature.function.similarity_based.lap_score",
"skfeature"
))
copy!(fisher_score, pyimport_conda(
"skfeature.function.similarity_based.fisher_score",
"skfeature"
))
end

include("interface.jl")
include("core.jl")
# Utils
include("utils/utils.jl")
# Filters
include("filters/interface.jl")

include("filters/univariate/randomcriterion.jl")
include("filters/univariate/statisticalcriterion.jl")
include("filters/univariate/variancecriterion.jl")
include("filters/univariate/chi2criterion.jl")
# include("filters/univariate/utils.jl")
# include("filters/multivariate/correlationfilter.jl")

include("filters/univariate/randomfilter.jl")
include("filters/univariate/statisticalfilter.jl")
include("filters/univariate/variancefilter.jl")
include("filters/univariate/chi2filter.jl")
include("filters/univariate/pearsoncorfilter.jl")
include("filters/univariate/mutualinformationclassif.jl")
include("filters/univariate/suplapscorefiler.jl")
include("filters/univariate/fisherscorefilter.jl")
include("filters/univariate/utils.jl")
include("filters/multivariate/correlationfilter.jl")

# Experimental
include("experimental/Experimental.jl")
import .Experimental
Expand Down
12 changes: 10 additions & 2 deletions src/experimental/extraction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,20 @@ function _extract(v::AbstractVector, e::Extractor)
return res
end

# function extract(df::AbstractDataFrame, es::Array{<:Extractor})
# return DataFrame(string.(es) .=> _extract.(getindex.([df], :, getindex.(es, 1)), es))
# end

function extract(df::AbstractDataFrame, es::Array{<:Extractor})
return DataFrame(string.(es) .=> _extract.(getindex.([df], :, getindex.(es, 1)), es))
m = Matrix(undef, size(df, 1), length(es))
Threads.@threads for (i, e) in collect(enumerate(es))
m[:, i] .= _extract(df[:, e[1]], e)
end
return DataFrame([[v for v in m[:,i]] for i in 1:size(m, 2)], string.(es))
end

function groupby(es::Array{<:Extractor}, idxes::Union{Int, NTuple{N, Int}}) where {N}
res = Dict{Extractor, Vector{Extractor}}()
res = Dict{Any, Vector{Extractor}}()
for e in es
push!(get!(res, keepat(e, idxes), Vector{Extractor}()), e)
end
Expand Down
122 changes: 118 additions & 4 deletions src/experimental/windows/windows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct MovingWindowsIndex{T <: AbstractMovingWindows} <: AbstractMovingWindowsIn
end
end

(i::MovingWindowsIndex)(v::AbstractVector) = getwindow(v, i)
(i::MovingWindowsIndex)(m::AbstractArray) = getwindow(m, i)

index(mwi::MovingWindowsIndex) = mwi.index
movingwindows(mwi::MovingWindowsIndex) = mwi.movingwindows[]
Expand All @@ -33,8 +33,8 @@ function Base.isequal(mwi1::AbstractMovingWindowsIndex, mwi2::AbstractMovingWind
return index(mwi1) == index(mwi2) && movingwindows(mwi1) == movingwindows(mwi2)
end

function getwindow(v::AbstractVector, mwi::AbstractMovingWindowsIndex)
return getwindow(v, movingwindows(mwi), index(mwi))
function getwindow(a::AbstractArray, mwi::AbstractMovingWindowsIndex)
return getwindow(a, movingwindows(mwi), index(mwi))
end

# ======================== AbstractMovingWindow functions
Expand All @@ -60,9 +60,20 @@ function Base.iterate(mw::AbstractMovingWindows, i::Integer = 1)
return (getindex(mw, i), i+1)
end

function getwindow(v::AbstractVector, mw::AbstractMovingWindows, i::Integer)
function getwindow(v::AbstractArray, mw::AbstractMovingWindows, i::Integer)
return getwindows(v, mw)[i]
end
function getwindow(v::AbstractArray, mw::AbstractMovingWindows, i::Integer...)
# TODO: need to check length(i) == ndims(v)
return getwindows(v, mw)[i...]
end
function getwindow(v::AbstractArray, mw::AbstractMovingWindows, i::AbstractVector{<:Integer})
# TODO: need to check length(i) == ndims(v)
return getwindows(v, mw)[i...]
end
function getwindow(v::AbstractArray{N}, mw::AbstractMovingWindows, i::NTuple{N,<:Integer}) where N
return getwindows(v, mw)[i...]
end

# Fixed number moving windows

Expand Down Expand Up @@ -92,6 +103,10 @@ end
function getwindows(v::AbstractVector, mw::FixedNumMovingWindows)
return _moving_window(v; nwindows=nwindows(mw), relative_overlap=reloverlap(mw))
end
function getwindows(v::AbstractArray, mw::FixedNumMovingWindows)
indices = Base.product(_moving_window.(range.(1, size(v)); nwindows=nwindows(mw), relative_overlap=reloverlap(mw))...)
return [v[idxs...] for idxs in indices]
end

# Fixed size moving windows

Expand Down Expand Up @@ -126,3 +141,102 @@ function getwindows(v::AbstractVector, mw::FixedSizeMovingWindows)
npoints!(mw, v)
return _moving_window(v; window_size=wsize(mw), window_step=wstep(mw))
end
function getwindows(v::AbstractArray, mw::FixedSizeMovingWindows)
indices = Base.product(_moving_window.(range.(1, size(v)); window_size=wsize(mw), window_step=wstep(mw))...)
return [v[idxs...] for idxs in indices]
end


### Centered Window ###

struct CenteredMovingWindow <: AbstractMovingWindows
nwindows::Int

function CenteredMovingWindow(nwindows::Integer)
nwindows <= 0 && throw(DomainError(nwindows, "Must be greater than 0"))
return new(nwindows)
end
end

nwindows(mw::CenteredMovingWindow) = mw.nwindows

function Base.length(mw::CenteredMovingWindow)
return nwindows(mw)
end

function Base.isequal(mw1::CenteredMovingWindow, mw2::CenteredMovingWindow)
return nwindows(mw1) == nwindows(mw2)
end

# TODO: move this in SoleBase!!!
function _centered_moving_window(l::Integer, nw::Integer)
bound_dist = l / (2*nw)
# TODO: optimize!!!
return [max(1, 1+round(Int, i*bound_dist)):min(l, l - round(Int, i*bound_dist)) for i in 0:(nw-1)]
end

function getwindows(v::AbstractArray, mw::CenteredMovingWindow)
indices = zip(_centered_moving_window.(size(v), nwindows(mw))...)
return [v[idxs...] for idxs in indices]
end
# TODO: tests for CenteredMovingWindow

### Growing Window ###

struct GrowingdMovingWindow <: AbstractMovingWindows
nwindows::Int

function GrowingdMovingWindow(nwindows::Integer)
nwindows <= 0 && throw(DomainError(nwindows, "Must be greater than 0"))
return new(nwindows)
end
end

nwindows(mw::GrowingdMovingWindow) = mw.nwindows

function Base.length(mw::GrowingdMovingWindow)
return nwindows(mw)
end

function Base.isequal(mw1::GrowingdMovingWindow, mw2::GrowingdMovingWindow)
return nwindows(mw1) == nwindows(mw2)
end

function _growing_moving_window(l::Integer, nw::Integer)
return [ 1:round(Int, l * i / nw) for i in 1:nw ]
end

function getwindows(v::AbstractArray, mw::GrowingdMovingWindow)
indices = zip(_growing_moving_window.(size(v), nwindows(mw))...)
return [v[idxs...] for idxs in indices]
end

### Reverse Growing Window ###

struct ReverseGrowingdMovingWindow <: AbstractMovingWindows
nwindows::Int

function ReverseGrowingdMovingWindow(nwindows::Integer)
nwindows <= 0 && throw(DomainError(nwindows, "Must be greater than 0"))
return new(nwindows)
end
end

nwindows(mw::ReverseGrowingdMovingWindow) = mw.nwindows

function Base.length(mw::ReverseGrowingdMovingWindow)
return nwindows(mw)
end

function Base.isequal(mw1::ReverseGrowingdMovingWindow, mw2::ReverseGrowingdMovingWindow)
return nwindows(mw1) == nwindows(mw2)
end

function _revgrowing_moving_window(l::Integer, nw::Integer)
return [ round(Int, l * (i-1) / nw) + 1:l for i in nw:-1:1 ]
end

function getwindows(v::AbstractArray, mw::ReverseGrowingdMovingWindow)
indices = zip(_revgrowing_moving_window.(size(v), nwindows(mw))...)
return [v[idxs...] for idxs in indices]
end
10 changes: 10 additions & 0 deletions src/filters/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@ isunivariate(sc::AbstractScoringCriterion) =
ismultivariate(sc::AbstractScoringCriterion) = !isunivariate(sc)
isunsupervised(sc::AbstractScoringCriterion) = !issupervised(sc)

abstract type UnivariateFilterBased{T<:AbstractLimiter} <: AbstractFilterBasedFS end
abstract type AbstractVarianceFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractRandomFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractStatisticalFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractChi2Filter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractPearsonCorFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractMutualInformationClassif{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractSupLaplacianScore{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractFisherScore{T<:AbstractLimiter} <: UnivariateFilterBased{T} end

idxtype(sc::AbstractScoringCriterion) = ismultivariate(sc) ? Integer : AbstractVector{<:Integer}
scoretype(sc::AbstractScoringCriterion) =
error("Please, provide method scoretype($(typeof(sc))).")
Expand Down
4 changes: 2 additions & 2 deletions src/filters/univariate/chi2criterion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ end
# ========================================================================================
# CUSTOM CONSTRUCTORS

ThresholdChi2(; alpha = 0.05) = Chi2Filter(ThresholdLimiter(alpha, <=))
RankingChi2(nbest) = Chi2Filter(RankingLimiter(nbest, false))
Chi2Threshold(; alpha = 0.05) = Chi2Filter(ThresholdLimiter(alpha, <=))
Chi2Ranking(nbest) = Chi2Filter(RankingLimiter(nbest, false))
23 changes: 23 additions & 0 deletions src/filters/univariate/fisherscorefilter.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
struct FisherScoreFilter{T <: AbstractLimiter} <: AbstractFisherScore{T}
limiter::T
# parameters
end

# ========================================================================================
# TRAITS

is_supervised(::AbstractFisherScore) = true

# ========================================================================================
# SCORE

function score(
X::AbstractDataFrame,
y::AbstractVector{<:Class},
selector::FisherScoreFilter
)::Vector{Float64}
lmy = labelmap(y)
ey = labelencode(lmy, y)
scores = fisher_score.fisher_score(Matrix(X), ey)
return scores
end
27 changes: 27 additions & 0 deletions src/filters/univariate/mutualinformationclassif.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
struct MutualInformationClassif{T <: AbstractLimiter} <: AbstractMutualInformationClassif{T}
limiter::T
# parameters
end

# ========================================================================================
# TRAITS

is_supervised(::AbstractMutualInformationClassif) = true

# ========================================================================================
# SCORE

function score(
X::AbstractDataFrame,
y::AbstractVector{<:Class},
selector::MutualInformationClassif
)::Vector{Float64}
scores = fs.mutual_info_classif(Matrix(X), y)
return scores
end

# ========================================================================================
# CUSTOM CONSTRUCTORS

MutualInformationClassifRanking(nbest) =
MutualInformationClassif(RankingLimiter(nbest, true))
Loading

0 comments on commit 1d15302

Please sign in to comment.