Chapter 36 Factor analysis

### FACTOR ANALYSIS

# Data - 54 males stickleback behavior:
# LUNGES - The number of lunges towards the model male.
# BITES - The number of times that the male model was bitten.
# ZIGZAGS - The 'Zig-Zag' display is part of display behaviour, designed to attract females.
# NEST - The number of nest building behaviours.
# SPINES -The number of times the fish raised the 'spines' on its back.
# DNEST - The duration of nest building activities.
# BOUT - The number of 'bout-facing' behaviours (male-male interaction).

df <- read.table ("/home/suvar/DataAnalysis/Boltengagen_Course/DATA/stickleback.csv", header=T, as.is=T, sep = "\t")

# Primary data analysis
df
summary(df)     # data is variable
round(apply(df, 2, sd), 2) # variation

plot(df)      # obvious correlation is visible for LUNGES~BITES
# outliers should be ideally deleted

# Correlation
round(cor(df),2)

# Check for correlation between two variables:
# Pearson correlation should be used with a caution: normality is not proven.
cor.test(df$LUNGES, df$SPINES, alternative = "two.sided", method = "pearson", exact = NULL, conf.level = 0.95)

# Conclusions of primary analysis
# 1. Fish like fight more than court
# 2. Averages and variance are different
# 3. NESt/DNEST more than have of samples are 0.

# Factor Analysis
# more than 3 factors will show error "two many factors":
# between 2 and 3 factors 2 shows better logical result and
# split fish into two groups: agressive and peaceful:
fa = factanal(df, factors=2, method = "mls", scores = "regression")
fa
# BOUT and SPINES are unique and not linked with other data, can be excluded
# value close to 0 are omitted

# Parameters 
fa$loadings     # Loadings
fa$uniquenesses
fa$correlation
fa$criteria
fa$factors      # how many factors
fa$dof          # degrees of freedom
fa$method       # method of factor weights
fa$scores       # factors: strength of signals for each sample
fa$n.obs        # number of observations
fa$call         # command

plot(fa$scores[,1], fa$scores[,2])   # no correlation between factors

# Correlation matrix between variables using factors
# Find difference between two correlation matrix. Result is a model error.

# Matrix multiplication
loadings(fa) %*% t(loadings(fa))  # the same as fa$loadings %*% t(fa$loadings)

# Diagonal matrix of correlation of unique factors
diag(fa$uniquenesses)

# Correlation matrix of initial data (prediction)
fa$correlation

# Compaire with initial data

zzz = fa$correlation - 
fa$loadings %*% t(fa$loadings) -
diag(fa$uniquenesses)
round(zzz,2)

# It' a miricle, we can almost recover (predict) correlation using factors