Non-standard genetic code

suppressPackageStartupMessages(library(Biostrings))
library(cubar)
human_mt
#> DNAStringSet object of length 13:
#>      width seq                                              names               
#>  [1]   681 ATGAACGAAAATCTGTTCGCTTC...CTACCTGCACGACAACACATAA MT-ATP6
#>  [2]   346 ATAAACTTCGCCTTAATTTTAAT...AAAGGATTAGACTGAACCGAAT MT-ND3
#>  [3]   956 ATACCCATGGCCAACCTCCTACT...CCAGCATTCCCCCTCAAACCTA MT-ND1
#>  [4]   207 ATGCCCCAACTAAATACTACCGT...TTCATTGCCCCCACAATCCTAG MT-ATP8
#>  [5]  1141 ATGACCCCAATACGCAAAACTAA...AACAAAATACTCAAATGGGCCT MT-CYB
#>  ...   ... ...
#>  [9]  1042 ATTAATCCCCTGGCCCAACCCGT...CCTTTTATACTAATAATCTTAT MT-ND2
#> [10]   525 ATGATGTATGCTTTGTTTCTGTT...TGAGATTGCTCGGGGGAATAGG MT-ND6
#> [11]  1542 ATGTTCGCCGACCGTTGACTATT...ACCCGTATACATAAAATCTAGA MT-CO1
#> [12]   684 ATGGCACATGCAGCGCAAGTAGG...AGGGCCCGTATTTACCCTATAG MT-CO2
#> [13]   784 ATGACCCACCAATCACATGCCTA...TCCATCTATTGATGAGGGTCTT MT-CO3

ctab <- get_codon_table(gcid = '2')

# we do not check CDS length and stop codons as incomplete stop codons are prevalent among MT CDSs.
human_mt_qc <- check_cds(
    human_mt,
    codon_table = ctab,
    check_stop = FALSE,
    rm_stop = FALSE,
    check_len = FALSE,
    start_codons = c('ATG', 'ATA', 'ATT'))

human_mt_qc
#> DNAStringSet object of length 13:
#>      width seq                                              names               
#>  [1]   678 AACGAAAATCTGTTCGCTTCATT...CTACCTGCACGACAACACATAA MT-ATP6
#>  [2]   343 AACTTCGCCTTAATTTTAATAAT...AAAGGATTAGACTGAACCGAAT MT-ND3
#>  [3]   953 CCCATGGCCAACCTCCTACTCCT...CCAGCATTCCCCCTCAAACCTA MT-ND1
#>  [4]   204 CCCCAACTAAATACTACCGTATG...TTCATTGCCCCCACAATCCTAG MT-ATP8
#>  [5]  1138 ACCCCAATACGCAAAACTAACCC...AACAAAATACTCAAATGGGCCT MT-CYB
#>  ...   ... ...
#>  [9]  1039 AATCCCCTGGCCCAACCCGTCAT...CCTTTTATACTAATAATCTTAT MT-ND2
#> [10]   522 ATGTATGCTTTGTTTCTGTTGAG...TGAGATTGCTCGGGGGAATAGG MT-ND6
#> [11]  1539 TTCGCCGACCGTTGACTATTCTC...ACCCGTATACATAAAATCTAGA MT-CO1
#> [12]   681 GCACATGCAGCGCAAGTAGGTCT...AGGGCCCGTATTTACCCTATAG MT-CO2
#> [13]   781 ACCCACCAATCACATGCCTATCA...TCCATCTATTGATGAGGGTCTT MT-CO3

# As stop codons are present, now we manually remove them
len_trim <- width(human_mt_qc) %% 3
len_trim <- ifelse(len_trim == 0, 3, len_trim)
human_mt_qc <- subseq(human_mt_qc, start = 1, end = width(human_mt_qc) - len_trim)

# calculate codon frequency
mt_cf <- count_codons(human_mt_qc)


# calculate ENC
get_enc(mt_cf, codon_table = ctab)
#>  MT-ATP6   MT-ND3   MT-ND1  MT-ATP8   MT-CYB  MT-ND4L   MT-ND4   MT-ND5 
#> 46.44871 44.93068 42.42264 50.54562 42.97931 47.29975 42.65228 43.53521 
#>   MT-ND2   MT-ND6   MT-CO1   MT-CO2   MT-CO3 
#> 45.18387 45.56454 44.83277 49.19525 47.07683