### FACTOR ANALYSIS
# Data - 54 males stickleback behavior:
# LUNGES - The number of lunges towards the model male.
# BITES - The number of times that the male model was bitten.
# ZIGZAGS - The 'Zig-Zag' display is part of display behaviour, designed to attract females.
# NEST - The number of nest building behaviours.
# SPINES -The number of times the fish raised the 'spines' on its back.
# DNEST - The duration of nest building activities.
# BOUT - The number of 'bout-facing' behaviours (male-male interaction).
df <- read.table ("/home/suvar/DataAnalysis/Boltengagen_Course/DATA/stickleback.csv", header=T, as.is=T, sep = "\t")
# Primary data analysis
df
summary(df) # data is variable
round(apply(df, 2, sd), 2) # variation
plot(df) # obvious correlation is visible for LUNGES~BITES
# outliers should be ideally deleted
# Correlation
round(cor(df),2)
# Check for correlation between two variables:
# Pearson correlation should be used with a caution: normality is not proven.
cor.test(df$LUNGES, df$SPINES, alternative = "two.sided", method = "pearson", exact = NULL, conf.level = 0.95)
# Conclusions of primary analysis
# 1. Fish like fight more than court
# 2. Averages and variance are different
# 3. NESt/DNEST more than have of samples are 0.
# Factor Analysis
# more than 3 factors will show error "two many factors":
# between 2 and 3 factors 2 shows better logical result and
# split fish into two groups: agressive and peaceful:
fa = factanal(df, factors=2, method = "mls", scores = "regression")
fa
# BOUT and SPINES are unique and not linked with other data, can be excluded
# value close to 0 are omitted
# Parameters
fa$loadings # Loadings
fa$uniquenesses
fa$correlation
fa$criteria
fa$factors # how many factors
fa$dof # degrees of freedom
fa$method # method of factor weights
fa$scores # factors: strength of signals for each sample
fa$n.obs # number of observations
fa$call # command
plot(fa$scores[,1], fa$scores[,2]) # no correlation between factors
# Correlation matrix between variables using factors
# Find difference between two correlation matrix. Result is a model error.
# Matrix multiplication
loadings(fa) %*% t(loadings(fa)) # the same as fa$loadings %*% t(fa$loadings)
# Diagonal matrix of correlation of unique factors
diag(fa$uniquenesses)
# Correlation matrix of initial data (prediction)
fa$correlation
# Compaire with initial data
zzz = fa$correlation -
fa$loadings %*% t(fa$loadings) -
diag(fa$uniquenesses)
round(zzz,2)
# It' a miricle, we can almost recover (predict) correlation using factors