### =========================================================================
### GFF (General Feature Format) support (all three versions, plus GTF)
### -------------------------------------------------------------------------

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Classes
###

setClass("GFFFile", contains = "RTLFile")

## private
GFFFile <- function(resource, version = c("", "1", "2", "3")) {
  version <- match.arg(version)
  new(gffFileClass(version), resource = resource)
}

setClass("GFF1File", contains = "GFFFile")
GFF1File <- function(resource) {
  GFFFile(resource, "1")
}

setClass("GFF2File", contains = "GFFFile")
GFF2File <- function(resource) {
  GFFFile(resource, "2")
}

setClass("GFF3File", contains = "GFFFile")
GFF3File <- function(resource) {
  GFFFile(resource, "3")
}

setClass("GTFFile", contains = "GFF2File")
GTFFile <- function(resource) {
  new("GTFFile", GFF2File(resource))
}

setClass("GVFFile", contains = "GFF3File")
GVFFile <- function(resource) {
  new("GVFFile", GFF3File(resource))
}

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Export
###

setGeneric("export.gff",
           function(object, con, ...) standardGeneric("export.gff"))

setMethod("export.gff", "ANY",
          function(object, con, ...)
          {
            export(object, con, ...)
          })

setMethod("export", c("ANY", "GFFFile"),
          function(object, con, format, ...)
          {
            if (hasMethod("asGFF", class(object)))
              object <- asGFF(object)
            res <- try(as(object, "GRanges"), silent = TRUE)
            if (is(res, "try-error")) {
              res <- try(as(object, "GenomicRangedDataList"), silent = TRUE)
              if (is(res, "try-error"))
                stop("cannot export object of class '", class(object), "': ",
                     res)
            }
            object <- res
            if (!missing(format))
              checkArgFormat(con, format)
            export(object, con, ...)
          })

setMethod("export", c("GRangesList", "GFFFile"),
          function(object, con, format, ...)
          {
            object <- asGFF(object)
            callGeneric()
          }
          )

setMethod("export", c("GenomicRanges", "GFFFile"),
          function(object, con, format, version = c("1", "2", "3"),
                   source = "rtracklayer", append = FALSE, index = FALSE)
          {
            if (!missing(format))
              checkArgFormat(con, format)
            if (!missing(version) || !length(gffFileVersion(con)))
              con <- asGFFVersion(con, match.arg(version))
            version <- gffFileVersion(con)
            
            file <- con
            con <- resource(con)
            
            if (!append) {
              cat("", file = con) # clear any existing file
              gffComment(con, "gff-version", version)
              sourceVersion <- try(package.version(source), TRUE)
              if (!inherits(sourceVersion, "try-error"))
                gffComment(con, "source-version", source, sourceVersion)
              gffComment(con, "date", base::format(Sys.time(), "%Y-%m-%d"))
              genome <- singleGenome(genome(object))
              if (!is.na(genome))
                gffComment(con, "genome-build", paste(".", genome, sep = "\t"))
            }
            
            if (index)
              object <- sortBySeqnameAndStart(object)

            seqname <- seqnames(object)
            if (is.null(object$ID))
              object$ID <- names(object)
            if (version == "3")
              seqname <- urlEncode(seqname, "a-zA-Z0-9.:^*$@!+_?|-")
            if (!is.null(object$source) && missing(source))
              source <- object$source
            if (version == "3")
              source <- urlEncode(source, "\t\n\r;=%&,", FALSE)
            feature <- object$type
            if (is.null(feature))
              feature <- "sequence_feature"
            score <- score(object)
            if (is.null(score)) {
              score <- NA
            } else {
              if (!("score" %in% colnames(mcols(object))))
                ## avoid outputting as attribute
                colnames(mcols(object))[1] <- "score" 
            }
            strand <- strand(object)
            if (is.null(strand))
              strand <- NA
            frame <- object$phase
            if (is.null(frame))
              frame <- NA
            
            table <- data.frame(seqname, source, feature, start(object),
                                end(object), score, strand, frame)

            attrs <- NULL
            if (version == "1") {
              attrs <- object$group
              if (is.null(attrs))
                attrs <- as.vector(seqname)
            } else {
              builtin <- c("type", "score", "phase", "source")
              custom <- setdiff(colnames(mcols(object)), builtin)
              if (length(custom)) {
                if (version == "3") tvsep <- "=" else tvsep <- " "
                attrs <- mcols(object)
                attrs <- as.data.frame(sapply(custom, function(name) {
                  x <- attrs[[name]]
                  x_flat <- if (is(x, "List")) unlist(x, use.names=FALSE) else x
                  x_char <- as.character(x_flat)
                  x_char <- sub(" *$", "", sub("^ *", "", as.character(x_char)))
                  if (version == "3")
                    x_char <- urlEncode(x_char, "%\t\n\r;=&,", FALSE)
                  if (is(x, "List")) {
                    x_char[is.na(x_char)] <- "."
                    x_char <- pasteCollapse(relist(x_char, x))
                    x_char[elementLengths(x) == 0] <- NA
                  }
                  ## FIXME: add option so these become "." instead of removing
                  x_char[is.na(x_char)] <- "\r"
                  if (!is.numeric(x_flat) && version != "3")
                    x_char <- paste0("\"", x_char, "\"")
                  paste(name, x_char, sep = tvsep)
                }, simplify = FALSE))
                if (version == "3") sep <- ";" else sep <- "; "
                attrs <- do.call(paste, c(attrs, sep = sep))
                attrs <- gsub("[^;]*?\r\"?(;|$)", "", attrs)
                attrs[nchar(attrs) == 0] <- NA
              }
            }
            
            scipen <- getOption("scipen")
            options(scipen = 100) # prevent use of scientific notation
            on.exit(options(scipen = scipen))
            
            if (!is.null(attrs)) { # write out the rows with attributes first
              write.table(cbind(table, attrs)[!is.na(attrs),], con, sep = "\t",
                          na = ".", quote = FALSE, col.names = FALSE,
                          row.names = FALSE, append = TRUE)
              table <- table[is.na(attrs),]
            }
            
            write.table(table, con, sep = "\t", na = ".", quote = FALSE,
                        col.names = FALSE, row.names = FALSE, append = TRUE)
            if (index)
              indexTrack(file)
            invisible(NULL)
          })

setMethod("export", c("GenomicRangesList", "GFFFile"),
          .export_GenomicRangesList_RTLFile)

setGeneric("export.gff1",
           function(object, con, ...) standardGeneric("export.gff1"))
setMethod("export.gff1", "ANY",
          function(object, con, ...) export(object, con, "gff1", ...))

setGeneric("export.gff2",
           function(object, con, ...) standardGeneric("export.gff2"))
setMethod("export.gff2", "ANY",
          function(object, con, ...) export(object, con, "gff2", ...))

setGeneric("export.gff3",
           function(object, con, ...) standardGeneric("export.gff3"))
setMethod("export.gff3", "ANY",
          function(object, con, ...) export(object, con, "gff3", ...))

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Import
###

setGeneric("import.gff", function(con, ...) standardGeneric("import.gff"))

setMethod("import.gff", "ANY",
          function(con, ...)
          {
            import(con, "gff", ...)
          })

setMethod("import", "GFFFile",
          function(con, format, text, version = c("", "1", "2", "3"),
                   genome = NA, colnames = NULL,
                   which = NULL, feature.type = NULL,
                   sequenceRegionsAsSeqinfo = FALSE)
          {
            if (!missing(format))
              checkArgFormat(con, format)
            if (!missing(version))
              con <- asGFFVersion(con, match.arg(version))
            stopifnot(isTRUEorFALSE(sequenceRegionsAsSeqinfo))
            
            ## download the file first if it's remote
            if (is.character(resource(con))) {
                uri <- .parseURI(resource(con))
                if (uri$scheme %in% c("ftp", "http")) {
                    destfile <- tempfile()
                    download.file(resource(con), destfile)
                    con@resource <- destfile
                }
            }

            sniffed <- .sniffGFFVersion(resource(con))
            version <- gffFileVersion(con)
            if (!length(version)) {
              if (is.null(sniffed))
                sniffed <- "1"
              con <- asGFFVersion(con, sniffed)
            }
            
            if (length(version) && !is.null(sniffed) &&
                !identical(sniffed, version))
              warning("gff-version directive indicates version is ", sniffed,
                      ", not ", version)

            if (is.na(genome)) {
              genome <- genome(con)
              if (is.null(genome))
                  genome <- NA
            }
            
### FIXME: a queryForLines() function would be more efficient

            ## Temporarily disable use of Tabix Index.
            ## TODO: Restore use of Tabix Index!
            #con <- queryForResource(con, which)
            con <- queryForResource(con)

            gr <- readGFFAsGRanges(con,
                                   version=version,
                                   colnames=colnames,
                                   filter=list(type=feature.type),
                                   genome=genome,
                                   sequenceRegionsAsSeqinfo=
                                       sequenceRegionsAsSeqinfo,
                                   speciesAsMetadata=TRUE)
            if (is.na(genome)) {
                ans_seqinfo <- seqinfo(gr)
            } else {
                ans_seqinfo <- NULL
            }
            GenomicData(ranges(gr), mcols(gr),
                        strand = strand(gr), chrom = seqnames(gr),
                        genome = genome, seqinfo = ans_seqinfo,
                        which = if (attr(con, "usedWhich")) NULL else which,
                        metadata = metadata(gr))
          })

setGeneric("import.gff1",
           function(con, ...) standardGeneric("import.gff1"))
setMethod("import.gff1", "ANY",
          function(con, ...) import(con, "gff1", ...))

setGeneric("import.gff2",
           function(con, ...) standardGeneric("import.gff2"))
setMethod("import.gff2", "ANY",
          function(con, ...) import(con, "gff2", ...))

setGeneric("import.gff3",
           function(con, ...) standardGeneric("import.gff3"))
setMethod("import.gff3", "ANY",
          function(con, ...) import(con, "gff3", ...))

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### DNAStringSet from fasta data
###



### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Coercion
###

setGeneric("asGFF", function(x, ...) standardGeneric("asGFF"))

setMethod("asGFF", "GRangesList",
          function(x, parentType = "mRNA", childType = "exon") {
            parent_range <- range(x)
            if (!all(elementLengths(parent_range) == 1))
              stop("Elements in a group must be on same sequence and strand")
            parents <- unlist(parent_range, use.names = FALSE)
            children <- unlist(x, use.names = FALSE)
            makeId <- function(x, prefix) {
                paste(prefix, seq_len(length(x)), sep = "")
            }
            parentIds <- makeId(parents, parentType)
            values(parents)$type <- parentType
            values(parents)$ID <- parentIds
            values(parents)$Name <- names(x)
            values(children)$type <- childType
            values(children)$ID <- makeId(children, childType)
            values(children)$Name <- names(children)
            values(children)$Parent <- rep.int(parentIds, elementLengths(x))
            allColumns <- union(colnames(values(parents)),
                                colnames(values(children)))
            rectifyDataFrame <- function(x) {
              x[setdiff(allColumns, colnames(x))] <- DataFrame(NA)
              x[allColumns]
            }
            values(children) <- rectifyDataFrame(values(children))
            values(parents) <- rectifyDataFrame(values(parents))
            c(parents, children)
          })

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Utilities
###

scanGFFDirectives <- function(con, tag = NULL) {
  con <- connection(con, "r")
  directives <- character()
  lines <- line <- readLines(con, n = 1)
  while(grepl("^#", line)) {
    if (grepl("^##", line)) {
        directives <- c(directives, line)
    }
    line <- readLines(con, n = 1)
    if (length(line) == 0L)
        break
    lines <- c(lines, line)
  }
  pushBack(lines, con)
  sub("^[^[:space:]]* ", "", grep(paste0("^##", tag), directives, value = TRUE))
}

gffGenomeBuild <- function(x) {
  genome_build <- scanGFFDirectives(x, "genome-build")
  unlist(strsplit(genome_build, "\t", fixed = TRUE))
}

setMethod("provider", "GFFFile", function(x) {
  gffGenomeBuild(x)[1]
})

setMethod("providerVersion", "GFFFile", function(x) {
  gffGenomeBuild(x)[2]
})
setMethod("genome", "GFFFile", function(x) providerVersion(x))

gffComment <- function(con, ...) 
  cat("##", paste(...), "\n", sep = "", file = con, append = TRUE)

.sniffGFFVersion <- function(con) {
  con <- connectionForResource(con, "r")
  version <- NULL
  lines <- line <- readLines(con, n = 1)
  while(grepl("^#", line)) {
    if (grepl("^##gff-version", line)) {
      version <- sub("^##gff-version ", "", line)
      break
    }
    line <- readLines(con, n = 1)
    lines <- c(lines, line)
  }
  pushBack(lines, con)
  version
}

gffFileClass <- function(version) {
  paste("GFF", version, "File", sep = "")
}

gffFileVersion <- function(file) {
  versions <- c("1", "2", "3")
  unlist(Filter(function(v) is(file, gffFileClass(v)), versions))
}

asGFFVersion <- function(con, version) {
  if (!is(con, gffFileClass(version))) {
    if (class(con) != "GFFFile")
      warning("Treating a '", class(con), "' as GFF version '", version, "'")
    con <- GFFFile(resource(con), version)
  }
  con
}
