Sumstats from Generation Scotland need to be aligned with the PGC imputation panel (1KG).
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
daner <- read_tsv('daner_mdd_genscot_1119a_rmUKBB.gz')
## Parsed with column specification:
## cols(
## CHR = col_integer(),
## SNP = col_character(),
## BP = col_integer(),
## A1 = col_character(),
## A2 = col_character(),
## FRQ_A_951 = col_double(),
## FRQ_U_6114 = col_double(),
## INFO = col_double(),
## OR = col_double(),
## SE = col_double(),
## P = col_double(),
## ngt = col_integer()
## )
List and load alignments files. The document suggests pulling from files with the suffix *.EUR.frq2.gz
but these do not exist for the circa 2012 imputation files that were used for MDD2, so instead we open the *.eur.bfile.bim
files.
eur_bim_files <-
list.files('/home/gwas/pgc-samples/hapmap_ref/impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/', pattern="my.ALL_1000G_phase1integrated_v3_aug2012_macGT1_chr.+\\.eur\\.bfile\\.bim", full.names=TRUE)
# specify column times i=integer, c=character
eur_bim <- bind_rows(lapply(eur_bim_files, read_table2, col_names=c('CHR', 'snpid', 'CM', 'BP', 'A1', 'A2'), col_types='iciicc'))
Merge sumstats on CPIDs CHR_POS_A1_A2
and CHR_POS_A2_A1
daner_cpid <-
bind_rows(
daner %>%
inner_join(eur_bim, by=c('CHR'='CHR', 'BP'='BP', 'A1'='A1', 'A2'='A2')),
daner %>%
inner_join(eur_bim, by=c('CHR'='CHR', 'BP'='BP', 'A1'='A2', 'A2'='A1'))
)
Number of sites with differerent RSIDs
daner_cpid %>%
filter(SNP != snpid) %>%
tally()
## # A tibble: 1 x 1
## n
## <int>
## 1 49169
Unmatched sites
daner_cpid_nomatch <-
daner %>%
anti_join(daner_cpid, by=c('CHR', 'SNP'))
Update SNP ID
daner_aligned <-
daner_cpid %>%
select(CHR, SNP=snpid, BP, A1, A2, starts_with('FRQ'), INFO, OR, SE, P, ngt) %>%
bind_rows(daner_cpid_nomatch) %>%
arrange(CHR, BP)
write_tsv(daner_aligned, 'daner_mdd_genscot_1119a_rmUKBB.aligned.gz')