Fst
Load package & Data Load
library(hierfstat);library(data.table);library(knitr);library(DT)
fst.file="https://raw.githubusercontent.com/jinseob2kim/jinseob2kim.github.io/master/fstexample.txt"
a=fread(fst.file)
Read example file. 7 pop & 289 SNPs (PER3 gene)
datatable(a)
Basic stat: original Fst
gg=basic.stats(a)
perloc1=gg$perloc # per locus statistics
datatable(perloc1) %>% formatRound(1:ncol(perloc1),3)
fstloc1=perloc1$Fst # per locus Fst
all1=gg$overall # overall locus statistics
kable(t(all1),caption = "overall locus statistics",digits = 3)
overall locus statistics
0.282 |
0.307 |
0.332 |
0.024 |
0.336 |
0.029 |
0.074 |
0.085 |
0.081 |
0.041 |
fst1=all1[7] # overall locus Fst
fst1
## Fst
## 0.074
Weir & Cockerham’s theta
gg2=wc(a)
fstloc2=gg2$per.loc$FST # per locus fst
kable(t(head(fstloc2)),digits=3,caption = "First 6 Fst")
First 6 Fst
0.052 |
0.052 |
0.064 |
0.045 |
0.061 |
0.011 |
fst2=gg2$FST # overall locus: mean
fst2
## [1] 0.07931026
Compare
library(ggplot2)
f=data.table(fstloc1,fstloc2)
ggplot(f,aes(fstloc1,fstloc2))+geom_point()+xlab("Wright's Fst")+ylab("Cockerham's theta")+annotate(geom="text", x=0.02, y=0.4,label= paste("r =",round(cor(fstloc1,fstloc2),3)),size=5)
Selection Index: EHH, iHS, xpEHH
Read example file, chr=12: 1424 SNPs & 280 haplotype
library(rehh)
make.example.files()
datatable(fread("bta12_cgu.hap")[1:100],rownames = F)
datatable(fread("map.inp")[1:100],rownames=F)
b<-data2haplohh(hap_file="bta12_cgu.hap",map_file="map.inp",
recode.allele=TRUE,chr.name=12)
## Map file seems OK: 1424 SNPs declared for chromosome 12
## Standard rehh input file assumed
## Alleles are being recoded according to map file as:
## 0 (missing data), 1 (ancestral allele) or 2 (derived allele)
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 280 haplotypes and 1424 SNPs
Bifurcation plot
bifurcation.diagram(b,mrk_foc=456,all_foc=1,nmrk_l=20,nmrk_r=20,
main="Ancestral Allele")
bifurcation.diagram(b,mrk_foc=456,all_foc=2,nmrk_l=20,nmrk_r=20,
main="Derived Allele")
EHH, Site specific EHH(EHHs)
# computing EHH statistics for the focal SNP at position 456
#par(mfrow=c(1,2))
res1=calc_ehh(b,mrk=456,limhaplo=2,limehh=0.05,plotehh=T)
include_graphics("ehh.png")
res11=calc_ehhs(b,mrk=456,limhaplo=2,limehh=0.05,plotehh=TRUE) # site EHH
include_graphics("ehhs.png")
Integrated EHH
res2=scan_hh(b) # a: ancestral, d: derived, IES: average of IHHa & IHHd (squared allele freq weighted)
datatable(res2) %>% formatRound(3,2) %>% formatRound(4:7,0)
iHS
ihs_res2=ihh2ihs(res2)
datatable(ihs_res2$iHS) %>% formatRound(3:4,3) ## iHS & pvalue
datatable(ihs_res2$frequency.class) %>% formatRound(2:3,3) ## summary per alle freq bin.
ihsplot(ihs_res2) ## iHS plot: y- pvalue, x-bp
include_graphics("iHS.png")
Population Comparison: rSB & xpEHH
data(wgscan.cgu) ; data(wgscan.eut) ## Load example result
res.rsb=ies2rsb(wgscan.cgu,wgscan.eut,"CGU","EUT") ## Rsb: Compare 2 population
kable(head(res.rsb),digits=3)
F0100190 |
1 |
113642 |
-0.340 |
0.134 |
F0100220 |
1 |
244699 |
-1.057 |
0.537 |
F0100250 |
1 |
369419 |
-0.147 |
0.054 |
F0100270 |
1 |
447278 |
-1.819 |
1.162 |
F0100280 |
1 |
487654 |
-0.219 |
0.083 |
F0100290 |
1 |
524507 |
-0.794 |
0.369 |
rsbplot(res.rsb) ## Rsb plot
include_graphics("rsb.png")
res.xpehh<-ies2xpehh(wgscan.cgu,wgscan.eut,"CGU","EUT")
kable(head(res.xpehh),digits=3)
F0100190 |
1 |
113642 |
-0.556 |
0.238 |
F0100220 |
1 |
244699 |
-0.752 |
0.345 |
F0100250 |
1 |
369419 |
-0.889 |
0.427 |
F0100270 |
1 |
447278 |
-0.347 |
0.138 |
F0100280 |
1 |
487654 |
-0.918 |
0.446 |
F0100290 |
1 |
524507 |
-0.752 |
0.345 |
xpehhplot(res.xpehh) ## xpEHH plot
include_graphics("xpehh.png")
plot(res.rsb[,3],res.xpehh[,3],xlab="Rsb",ylab="XPEHH",pch=16,cex=0.5,cex.lab=0.75,cex.axis=0.75)
abline(a=0,b=1,lty=2,main="Rsb vs xpEHH")