Count unique values from a column in matrix or data-frame
# Find number of unique first names
length(levels(data$fname))
Count specific values in vector or column
# Find number of occurrences for "ac" in vector nlist
table(nlist)["value"]
Transpose matrix A into At
At <- t(A)
Select unique rows based on a column of matrix
# Select records with unique employee id
Udata <- data[!duplicated(data$EmpID),]
Get column number in R given column name
which( colnames(data)=="fname" )
Remove all NAs from data frame
Data <- data[complete.cases(data),]
# Or
Data <- na.omit(data)
For every X how many Y are there?
split(y, round(x)) # If x is real we should use round with desired precision
# We can also do a box plot to get range of each split.
boxplot(split(y, round(x)))
Random sampling from vector x?
# Selected 20 elements from x
sample(x,20)
How to stratify the data?
# Stratify data into three stratas
quantile(data$column, c(0.333333, 0.6666666), na.rm=T)
# This will give us split points we should be used actually split the data
How to get index with maximum value?
# Find the index in temperature vector with maximum value
which.max(data$temp)
How to filter data based on specific value?
# Find where state is AZ?
match(“AZ",data$state)
which(data$state == “AZ”)
Replace text with number
data$color <- factor(data$color, labels = c(1, 3, 5))
# Find number of unique first names
length(levels(data$fname))
Count specific values in vector or column
# Find number of occurrences for "ac" in vector nlist
table(nlist)["value"]
Transpose matrix A into At
At <- t(A)
Select unique rows based on a column of matrix
# Select records with unique employee id
Udata <- data[!duplicated(data$EmpID),]
Get column number in R given column name
which( colnames(data)=="fname" )
Remove all NAs from data frame
Data <- data[complete.cases(data),]
# Or
Data <- na.omit(data)
For every X how many Y are there?
split(y, round(x)) # If x is real we should use round with desired precision
# We can also do a box plot to get range of each split.
boxplot(split(y, round(x)))
Random sampling from vector x?
# Selected 20 elements from x
sample(x,20)
How to stratify the data?
# Stratify data into three stratas
quantile(data$column, c(0.333333, 0.6666666), na.rm=T)
# This will give us split points we should be used actually split the data
How to get index with maximum value?
# Find the index in temperature vector with maximum value
which.max(data$temp)
How to filter data based on specific value?
# Find where state is AZ?
match(“AZ",data$state)
which(data$state == “AZ”)
Replace text with number
data$color <- factor(data$color, labels = c(1, 3, 5))
No comments:
Post a Comment