# auxiliary functions


CheckValidity <- function(dframe, col.required, no.na=col.required){
  # Check the validity of the data frame
  #
  # Args:
  #   dframe: a data frame 
  #   col.required: a vector of required column names
  #   no.na: a vector of the names of columns that must not contain NAs
  #
  # Return:
  #   Just check and throw errors
  #
  
  if (!all(col.required %in% colnames(dframe))){
    stop(paste("Data must contain these columns:", 
               paste(col.required, collapse=" & ")))
  }
  
  if (any(is.na(dframe[, no.na]))){
    stop(paste("Columns", paste(no.na, collapse=" & "), "should contain no NA values!"))
  }
  
}

MaxOccurrence <- function(v){
  # Return the element that appears the most in a vector, NA if not unique
  #
  # Args:
  #   v: a vector
  #
  # Return:
  #   The element in v that appears most often. If there are multiple such 
  #   elements, return NA
  #
  
  # Error handling
  if (any(is.na(v))){
    stop("No NA values are allowed!")
  }
  
  vtab <- table(v)
  
  max.occurrence <- names(vtab)[which(vtab == max(vtab))]
  
  if (length(max.occurrence) > 1) {
    return(NA)
  }
  return(max.occurrence)
}


WeightedWinner <- function(dframe, wt, bias=function(cat)0){
  # Compute the result of the weighted plurality rule (NAs for ties) for a 
  # single item
  #
  # Args: 
  #   dframe: a data frame containing columns Item, Annotator, and Category
  #           Item is supposed to be unique
  #   wt: a weighting function that takes in Item, Annotator and Category and 
  #     return the corresponding weight
  #   bias: a function that assigns the initial bias for each category
  #
  # Result:
  #   The category with the greatest weight for the corresponding item. 
  #   If there is a tie, it will be an NA
  
  #
  weights <- sapply(split(dframe, dframe$Category), 
                    function(d)sum(mapply(wt, d$Item, d$Annotator, d$Category)))
  
  for (cat in names(weights)){
    weights[cat] <- weights[cat] + bias(cat)
  }
  
  winner <- names(weights)[which(weights == max(weights))]
  
  if (length(winner) > 1) {
    return(NA)
  }
  return(winner)
}

IndFreq <- function(dframe){
  # Compute the frequency table for each individual
  #
  # Args: 
  #   dframe: a data frame containing columns Item, Annotator, and Category
  #           Item is supposed to be unique
  #
  # Result:
  #   An array of the relative frequency tables, one for each annotator
  #
  
  return(tapply(dframe$Category, dframe$Annotator, 
                function(v)table(v, dnn=NULL)/length(v)))
}

AgreementVector <- function(ann1, ann2){
  # Compute the agreement vector between two collective annotations
  #
  # Args:
  #   ann1, ann2: named vectors representing the collective annotations.
  #
  # Return:
  #   The agreement vector that has the same length as ann1 and specifies 
  #   whether each item in ann1 has the same category in ann2.
  #   
  # Note:
  #   (1) NAs will remain in the vector
  #   (2) all items in ann1 must be in ann2, but the reverse need not be true
  #
  
  # TODO: Error handling: check uniqueness of the vector names
  
  result <- rep(FALSE, length(ann1))
  names(result) <- names(ann1)
  
  for (item in names(result)){
    if (is.null(ann2[item])) {
      stop(paste("Second annotation does not include item", 
                 item))
    }
    
    result[item] <- ann1[item] == ann2[item]
    
  }
  
  return(result)
}