human_mt
#> DNAStringSet object of length 13:
#> width seq names
#> [1] 681 ATGAACGAAAATCTGTTCGCTTC...CTACCTGCACGACAACACATAA MT-ATP6
#> [2] 346 ATAAACTTCGCCTTAATTTTAAT...AAAGGATTAGACTGAACCGAAT MT-ND3
#> [3] 956 ATACCCATGGCCAACCTCCTACT...CCAGCATTCCCCCTCAAACCTA MT-ND1
#> [4] 207 ATGCCCCAACTAAATACTACCGT...TTCATTGCCCCCACAATCCTAG MT-ATP8
#> [5] 1141 ATGACCCCAATACGCAAAACTAA...AACAAAATACTCAAATGGGCCT MT-CYB
#> ... ... ...
#> [9] 1042 ATTAATCCCCTGGCCCAACCCGT...CCTTTTATACTAATAATCTTAT MT-ND2
#> [10] 525 ATGATGTATGCTTTGTTTCTGTT...TGAGATTGCTCGGGGGAATAGG MT-ND6
#> [11] 1542 ATGTTCGCCGACCGTTGACTATT...ACCCGTATACATAAAATCTAGA MT-CO1
#> [12] 684 ATGGCACATGCAGCGCAAGTAGG...AGGGCCCGTATTTACCCTATAG MT-CO2
#> [13] 784 ATGACCCACCAATCACATGCCTA...TCCATCTATTGATGAGGGTCTT MT-CO3
ctab <- get_codon_table(gcid = '2')
# we do not check CDS length and stop codons as incomplete stop codons are prevalent among MT CDSs.
human_mt_qc <- check_cds(
human_mt,
codon_table = ctab,
check_stop = FALSE,
rm_stop = FALSE,
check_len = FALSE,
start_codons = c('ATG', 'ATA', 'ATT'))
human_mt_qc
#> DNAStringSet object of length 13:
#> width seq names
#> [1] 678 AACGAAAATCTGTTCGCTTCATT...CTACCTGCACGACAACACATAA MT-ATP6
#> [2] 343 AACTTCGCCTTAATTTTAATAAT...AAAGGATTAGACTGAACCGAAT MT-ND3
#> [3] 953 CCCATGGCCAACCTCCTACTCCT...CCAGCATTCCCCCTCAAACCTA MT-ND1
#> [4] 204 CCCCAACTAAATACTACCGTATG...TTCATTGCCCCCACAATCCTAG MT-ATP8
#> [5] 1138 ACCCCAATACGCAAAACTAACCC...AACAAAATACTCAAATGGGCCT MT-CYB
#> ... ... ...
#> [9] 1039 AATCCCCTGGCCCAACCCGTCAT...CCTTTTATACTAATAATCTTAT MT-ND2
#> [10] 522 ATGTATGCTTTGTTTCTGTTGAG...TGAGATTGCTCGGGGGAATAGG MT-ND6
#> [11] 1539 TTCGCCGACCGTTGACTATTCTC...ACCCGTATACATAAAATCTAGA MT-CO1
#> [12] 681 GCACATGCAGCGCAAGTAGGTCT...AGGGCCCGTATTTACCCTATAG MT-CO2
#> [13] 781 ACCCACCAATCACATGCCTATCA...TCCATCTATTGATGAGGGTCTT MT-CO3
# As stop codons are present, now we manually remove them
len_trim <- width(human_mt_qc) %% 3
len_trim <- ifelse(len_trim == 0, 3, len_trim)
human_mt_qc <- subseq(human_mt_qc, start = 1, end = width(human_mt_qc) - len_trim)
# calculate codon frequency
mt_cf <- count_codons(human_mt_qc)
# calculate ENC
get_enc(mt_cf, codon_table = ctab)
#> MT-ATP6 MT-ND3 MT-ND1 MT-ATP8 MT-CYB MT-ND4L MT-ND4 MT-ND5
#> 46.44871 44.93068 42.42264 50.54562 42.97931 47.29975 42.65228 43.53521
#> MT-ND2 MT-ND6 MT-CO1 MT-CO2 MT-CO3
#> 45.18387 45.56454 44.83277 49.19525 47.07683