Using “wsl” command in system2() to run commands in wsl
system2("wsl", "cd ~/bioinfor/; ls", stdout = TRUE)
## [1] "AF086833.gb" "NC_045512-version1.fa" "RNASeqByExample"
## [4] "chr22.fa" "runinfo.csv"
We can retrieve the SARS-coronavirus 2 gene sequences using efetch
system2("wsl","efetch -db=nuccore -format=gb -id=NC_045512", stdout = "../../../NC_045512.gb")
Accession number NC_045512 in Fasta format.
system2("wsl","efetch -db=nuccore -format=fasta -id=NC_045512 > NC_045512.fa", stdout = TRUE)
## character(0)
system2("wsl", "cat ../../../NC_045512.gb | head", stdout = TRUE)
## [1] "LOCUS NC_045512 29903 bp ss-RNA linear VRL 18-JUL-2020"
## [2] "DEFINITION Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1,"
## [3] " complete genome."
## [4] "ACCESSION NC_045512"
## [5] "VERSION NC_045512.2"
## [6] "DBLINK BioProject: PRJNA485481"
## [7] "KEYWORDS RefSeq."
## [8] "SOURCE Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)"
## [9] " ORGANISM Severe acute respiratory syndrome coronavirus 2"
## [10] " Viruses; Riboviria; Orthornavirae; Pisuviricota; Pisoniviricetes;"
- GO annotations for the human genes that may be relevant for SARS-CoV-2 infection
system2("wsl", "wget http://geneontology.org/data/sars-cov-2_targets.gaf")
- GO annotations for the SARS-CoV-2 virus genes
system2("wsl", "wget http://geneontology.org/data/uniprot_sars-cov-2.gpa")
system2("wsl", "cat sars-cov-2_targets.gaf | head -20", stdout = TRUE)
## [1] "!gaf-version: 2.1"
## [2] "!"
## [3] "!Generated by GoExporter package"
## [4] "!"
## [5] "!Created on: Tue Apr 07 2020 09:52:20 GMT+0000 (Coordinated Universal Time)"
## [6] "!"
## [7] "!Contact Email: laurent.albou@lbl.gov"
## [8] "!"
## [9] "!Documentation about this header can be found here: http://geneontology.org/docs/go-annotation-file-gaf-format-2.1/"
## [10] "!"
## [11] "UniProtKB\tP50897\tPPT1\t\tGO:0002084\tGO_REF:0000024\tISS\tUniProtKB:P45478\tP\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20060925\tUniProt\t\tUniProtKB:P50897"
## [12] "UniProtKB\tP50897\tPPT1\t\tGO:0002084\tPMID:10658183\tIDA\t\tP\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20060925\tUniProt\t\tUniProtKB:P50897"
## [13] "UniProtKB\tP50897\tPPT1\t\tGO:0002084\tPMID:10737604\tIDA\t\tP\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20060921\tUniProt\t\tUniProtKB:P50897"
## [14] "UniProtKB\tP50897\tPPT1\t\tGO:0005515\tPMID:17237713\tIPI\tUniProtKB:O14773\tF\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20191121\tUniProt\t\tUniProtKB:P50897"
## [15] "UniProtKB\tP50897\tPPT1\t\tGO:0005515\tPMID:19941651\tIPI\tUniProtKB:Q3UMW8\tF\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20160821\tUniProt\t\tUniProtKB:P50897"
## [16] "UniProtKB\tP50897\tPPT1\t\tGO:0005515\tPMID:25544563\tIPI\tUniProtKB:Q76RH3\tF\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20200222\tIntAct\t\tUniProtKB:P50897"
## [17] "UniProtKB\tP50897\tPPT1\t\tGO:0005576\tGO_REF:0000024\tISS\tUniProtKB:P45478\tC\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20060925\tUniProt\t\tUniProtKB:P50897"
## [18] "UniProtKB\tP50897\tPPT1\t\tGO:0005576\tPMID:8895569\tIDA\t\tC\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20060922\tUniProt\t\tUniProtKB:P50897"
## [19] "UniProtKB\tP50897\tPPT1\t\tGO:0005623\tGO_REF:0000108\tIEA\tGO:0007042\tC\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20200223\tGOC\t\tUniProtKB:P50897"
## [20] "UniProtKB\tP50897\tPPT1\t\tGO:0005634\tPMID:10992246\tIDA\t\tC\tPalmitoyl-protein thioesterase 1\tCLN1|PPT\tprotein\ttaxon:9606\t20060918\tUniProt\t\tUniProtKB:P50897"
system2("wsl", "cat uniprot_sars-cov-2.gpa | head -30", stdout = TRUE)
## [1] "!gpa-version: 1.1"
## [2] "!Columns:"
## [3] "!"
## [4] "! name required? cardinality GAF column #"
## [5] "! DB required 1 1"
## [6] "! DB_Object_ID required 1 2 / 17"
## [7] "! Qualifier required 1 or greater 4"
## [8] "! GO ID required 1 5"
## [9] "! DB:Reference(s) required 1 or greater 6"
## [10] "! ECO evidence code required 1 7 + 6 (GO evidence code + reference)"
## [11] "! With optional 0 or greater 8"
## [12] "! Interacting taxon ID optional 0 or 1 13"
## [13] "! Date required 1 14"
## [14] "! Assigned_by required 1 15"
## [15] "! Annotation Extension optional 0 or greater 16"
## [16] "! Annotation Properties optional 0 or 1 n/a"
## [17] "!"
## [18] "!Generated: 2020-03-24 16:33"
## [19] "!GO-version: http://purl.obolibrary.org/obo/go/releases/2020-03-22/extensions/go-plus.owl"
## [20] "!"
## [21] "UniProtKB\tP0DTC1\tenables\tGO:0003723\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0694\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [22] "UniProtKB\tP0DTC1\tenables\tGO:0004518\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0540\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [23] "UniProtKB\tP0DTC1\tenables\tGO:0004519\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0255\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [24] "UniProtKB\tP0DTC1\tenables\tGO:0008233\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0645\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [25] "UniProtKB\tP0DTC1\tenables\tGO:0008234\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0788\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [26] "UniProtKB\tP0DTC1\tenables\tGO:0016787\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0378\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [27] "UniProtKB\tP0DTC1\tenables\tGO:0036459\tGO_REF:0000003\tECO:0000501\tEC:3.4.19.12\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [28] "UniProtKB\tP0DTC1\tenables\tGO:0046872\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0479\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [29] "UniProtKB\tP0DTC1\tinvolved_in\tGO:0006508\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0645\t\t20200321\tUniProt\t\tgo_evidence=IEA"
## [30] "UniProtKB\tP0DTC1\tinvolved_in\tGO:0016032\tGO_REF:0000043\tECO:0000322\tUniProtKB-KW:KW-0945\t\t20200321\tUniProt\t\tgo_evidence=IEA"
GOLGA2 is the most highly annotated proteins in the GO dataset.
system2("wsl", "cat sars-cov-2_targets.gaf | cut -f 3 | sort | uniq -c | sort -r | head", stdout = TRUE)
## [1] " 437 GOLGA2" " 270 RHOA" " 260 ITGB1" " 217 RIPK1"
## [5] " 211 RBX1" " 187 NUP62" " 171 TBK1" " 156 PRKACA"
## [9] " 155 HDAC2" " 153 TLE5"