Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions R/findOverlaps-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,118 @@ setMethod("findOverlaps", c("GenomicRanges", "GRangesList"),
}
)

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### "findOverlaps" methods for GRangesFactor objects
###

.findOverlaps_Factor_other <- function(query, subject,
maxgap=-1L, minoverlap=0L, type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"), ignore.strand=FALSE)
{
select <- match.arg(select)
type <- match.arg(type)
FUN <- function(Query, Select) {
findOverlaps(Query, subject, maxgap=maxgap, minoverlap=minoverlap,
type=type, select=Select, ignore.strand=ignore.strand)
}

if (length(query) < length(levels(query))) {
FUN(unfactor(query), Select=select)
} else {
if (select=="all") {
lev.hits <- FUN(levels(query), "all")
idx.hits <- findMatches(as.integer(query), queryHits(lev.hits))
Hits(from=queryHits(idx.hits), to=subjectHits(lev.hits)[subjectHits(idx.hits)],
nLnode=length(query), nRnode=length(subject), sort.by.query=TRUE)
} else {
lev.hits <- FUN(levels(query), select)
lev.hits[as.integer(query)]
}
}
}

setMethod("findOverlaps", c("GRangesFactor", "GenomicRanges"), .findOverlaps_Factor_other)

setMethod("findOverlaps", c("GRangesFactor", "GRangesList"), .findOverlaps_Factor_other)

.findOverlaps_other_Factor <- function(query, subject,
maxgap=-1L, minoverlap=0L, type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"), ignore.strand=FALSE)
{
select <- match.arg(select)
type <- match.arg(type)
FUN <- function(Subject, Select) {
findOverlaps(query, Subject, maxgap=maxgap, minoverlap=minoverlap,
type=type, select=Select, ignore.strand=ignore.strand)
}

if (length(subject) < length(levels(subject))) {
FUN(unfactor(subject), select)
} else {
if (select=="all") {
lev.hits <- FUN(levels(subject), "all")
idx.hits <- findMatches(subjectHits(lev.hits), as.integer(subject))
Hits(from=queryHits(lev.hits)[queryHits(idx.hits)], to=subjectHits(idx.hits),
nLnode=length(query), nRnode=length(subject), sort.by.query=TRUE)
} else {
s.idx <- as.integer(subject)
if (select=="first") {
# Get the index of the first range for each level.
u <- which(!duplicated(s.idx))
} else {
# Get the index of the last range for each level.
u <- which(!duplicated(s.idx, fromLast=TRUE))
}
lev.hits <- FUN(levels(subject)[s.idx[u]], select)
u[lev.hits]
}
}
}

setMethod("findOverlaps", c("GenomicRanges", "GRangesFactor"), .findOverlaps_other_Factor)

setMethod("findOverlaps", c("GRangesList", "GRangesFactor"), .findOverlaps_other_Factor)

setMethod("findOverlaps", c("GRangesFactor", "GRangesFactor"), function(query, subject,
maxgap=-1L, minoverlap=0L, type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"), ignore.strand=FALSE)
{
if (length(query) < length(levels(query))) {
query <- unfactor(query)
callGeneric()
} else if (length(subject) < length(levels(subject))) {
subject <- unfactor(subject)
callGeneric()
} else {
FUN <- function(Query, Subject, Select) {
findOverlaps(Query, Subject, maxgap=maxgap, minoverlap=minoverlap,
type=type, select=Select, ignore.strand=ignore.strand)
}

select <- match.arg(select)
if (select=="all") {
lev.hits <- FUN(levels(query), levels(subject), "all")
q.idx.hits <- findMatches(as.integer(query), queryHits(lev.hits))
s.idx.hits <- findMatches(subjectHits(lev.hits), as.integer(subject))
reconciler <- findMatches(subjectHits(q.idx.hits), queryHits(s.idx.hits))

Hits(from=queryHits(q.idx.hits)[queryHits(reconciler)],
to=subjectHits(s.idx.hits)[subjectHits(reconciler)],
nLnode=length(query), nRnode=length(subject), sort.by.query=TRUE)
} else {
s.idx <- as.integer(subject)
if (select=="first") {
# Get the index of the first range for each level.
u <- which(!duplicated(s.idx))
} else {
# Get the index of the last range for each level.
u <- which(!duplicated(s.idx, fromLast=TRUE))
}
lev.hits <- FUN(levels(query), levels(subject)[s.idx[u]], select)
u[lev.hits][as.integer(query)]
}
}
})

### =========================================================================
### findOverlaps-based methods
Expand Down
91 changes: 91 additions & 0 deletions inst/unitTests/test_findOverlaps-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,97 @@ test_findOverlaps_with_circular_sequences <- function()
.checkHits(1:4, 1:4, 4, 4, current5, select="all")
}

test_findOverlaps_with_GRangesFactors <- function() {
ir0 <- IRanges(c(5, 25, 20, 30, 45, 35, 10, 15), width=10)
gr0 <- GRanges("chrA", ir0)
F0 <- Factor(gr0[rep(seq_along(gr0), seq_along(gr0))])

ir1 <- IRanges(c(18, 8, 28, 38), width=5)
gr1 <- GRanges("chrA", ir1)
F1 <- Factor(gr1[rep(seq_along(gr1), rev(seq_along(gr1)))])

######################
# findOverlaps works with a Factor as the query.
out <- findOverlaps(F0, gr1)
ref <- findOverlaps(unfactor(F0), gr1)
checkIdentical(out, ref)

out <- findOverlaps(F0, gr1, minoverlap=4)
ref <- findOverlaps(unfactor(F0), gr1, minoverlap=4)
checkIdentical(out, ref)

# findOverlaps works with a Factor as the subject.
out <- findOverlaps(gr0, F1)
ref <- findOverlaps(gr0, unfactor(F1))
checkIdentical(sort(out), sort(ref)) # hack to overcome lack of subject sorting guarantees.

out <- findOverlaps(gr0, F1, maxgap=4)
ref <- findOverlaps(gr0, unfactor(F1), maxgap=4)
checkIdentical(sort(out), sort(ref)) # hack to overcome lack of subject sorting guarantees.

# findOverlaps works with two Factors.
out <- findOverlaps(F0, F1)
ref <- findOverlaps(unfactor(F0), unfactor(F1))
checkIdentical(out, ref)

out <- findOverlaps(F0, F1, maxgap=2)
ref <- findOverlaps(unfactor(F0), unfactor(F1), maxgap=2)
checkIdentical(out, ref)

######################
# All methods work with different settings for 'select'.

out <- findOverlaps(F0, gr1, select="first")
ref <- findOverlaps(unfactor(F0), gr1, select="first")
checkIdentical(out, ref)

out <- findOverlaps(F0, gr1, select="last")
ref <- findOverlaps(unfactor(F0), gr1, select="last")
checkIdentical(out, ref)

out <- findOverlaps(gr0, F1, select="first")
ref <- findOverlaps(gr0, unfactor(F1), select="first")
checkIdentical(out, ref)

out <- findOverlaps(gr0, F1, select="last")
ref <- findOverlaps(gr0, unfactor(F1), select="last")
checkIdentical(out, ref)

out <- findOverlaps(F0, F1, select="first")
ref <- findOverlaps(unfactor(F0), unfactor(F1), select="first")
checkIdentical(out, ref)

out <- findOverlaps(F0, F1, select="last")
ref <- findOverlaps(unfactor(F0), unfactor(F1), select="last")
checkIdentical(out, ref)

######################
# All methods work correctly with small Factors that cause unfactor()ing.
out <- findOverlaps(F0[1:2], gr1)
ref <- findOverlaps(unfactor(F0[1:2]), gr1)
checkIdentical(out, ref)

out <- findOverlaps(F0[3], gr1)
ref <- findOverlaps(unfactor(F0[3]), gr1)
checkIdentical(out, ref)

out <- findOverlaps(gr0, F1[4:3])
ref <- findOverlaps(gr0, unfactor(F1[4:3]))
checkIdentical(sort(out), sort(ref))

out <- findOverlaps(gr0, F1[1])
ref <- findOverlaps(gr0, unfactor(F1[1]))
checkIdentical(sort(out), sort(ref))

out <- findOverlaps(F0[10:13], F1)
ref <- findOverlaps(unfactor(F0[10:13]), unfactor(F1))
checkIdentical(out, ref)

out <- findOverlaps(F0, F1[2])
ref <- findOverlaps(unfactor(F0), unfactor(F1[2]))
checkIdentical(out, ref)
}

test_poverlaps <- function() {
ans <- poverlaps(GRanges(), GRanges())
checkIdentical(ans, Rle())
Expand Down
7 changes: 7 additions & 0 deletions man/findOverlaps-methods.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
\alias{findOverlaps,GRangesList,GenomicRanges-method}
\alias{findOverlaps,GenomicRanges,GRangesList-method}
\alias{findOverlaps,GRangesList,GRangesList-method}
\alias{findOverlaps,GRangesFactor,GenomicRanges-method}
\alias{findOverlaps,GenomicRanges,GRangesFactor-method}
\alias{findOverlaps,GRangesFactor,GRangesFactor-method}

\alias{countOverlaps}
\alias{countOverlaps,GenomicRanges,GenomicRanges-method}
Expand Down Expand Up @@ -97,6 +100,10 @@
For \code{type="equal"} with GRangesList objects, \code{query[[i]]}
matches \code{subject[[j]]} iff for each range in \code{query[[i]]}
there is an identical range in \code{subject[[j]]}, and vice versa.

If either or both \code{query} or \code{subject} are \link{GRangesFactor}
objects, overlaps are identified based on the unique levels. This improves
the efficiency of this function for large GRangesFactors with few levels.
}

\value{
Expand Down