# auxiliary functions CheckValidity <- function(dframe, col.required, no.na=col.required){ # Check the validity of the data frame # # Args: # dframe: a data frame # col.required: a vector of required column names # no.na: a vector of the names of columns that must not contain NAs # # Return: # Just check and throw errors # if (!all(col.required %in% colnames(dframe))){ stop(paste("Data must contain these columns:", paste(col.required, collapse=" & "))) } if (any(is.na(dframe[, no.na]))){ stop(paste("Columns", paste(no.na, collapse=" & "), "should contain no NA values!")) } } MaxOccurrence <- function(v){ # Return the element that appears the most in a vector, NA if not unique # # Args: # v: a vector # # Return: # The element in v that appears most often. If there are multiple such # elements, return NA # # Error handling if (any(is.na(v))){ stop("No NA values are allowed!") } vtab <- table(v) max.occurrence <- names(vtab)[which(vtab == max(vtab))] if (length(max.occurrence) > 1) { return(NA) } return(max.occurrence) } WeightedWinner <- function(dframe, wt, bias=function(cat)0){ # Compute the result of the weighted plurality rule (NAs for ties) for a # single item # # Args: # dframe: a data frame containing columns Item, Annotator, and Category # Item is supposed to be unique # wt: a weighting function that takes in Item, Annotator and Category and # return the corresponding weight # bias: a function that assigns the initial bias for each category # # Result: # The category with the greatest weight for the corresponding item. # If there is a tie, it will be an NA # weights <- sapply(split(dframe, dframe$Category), function(d)sum(mapply(wt, d$Item, d$Annotator, d$Category))) for (cat in names(weights)){ weights[cat] <- weights[cat] + bias(cat) } winner <- names(weights)[which(weights == max(weights))] if (length(winner) > 1) { return(NA) } return(winner) } IndFreq <- function(dframe){ # Compute the frequency table for each individual # # Args: # dframe: a data frame containing columns Item, Annotator, and Category # Item is supposed to be unique # # Result: # An array of the relative frequency tables, one for each annotator # return(tapply(dframe$Category, dframe$Annotator, function(v)table(v, dnn=NULL)/length(v))) } AgreementVector <- function(ann1, ann2){ # Compute the agreement vector between two collective annotations # # Args: # ann1, ann2: named vectors representing the collective annotations. # # Return: # The agreement vector that has the same length as ann1 and specifies # whether each item in ann1 has the same category in ann2. # # Note: # (1) NAs will remain in the vector # (2) all items in ann1 must be in ann2, but the reverse need not be true # # TODO: Error handling: check uniqueness of the vector names result <- rep(FALSE, length(ann1)) names(result) <- names(ann1) for (item in names(result)){ if (is.null(ann2[item])) { stop(paste("Second annotation does not include item", item)) } result[item] <- ann1[item] == ann2[item] } return(result) }