Scripts

Example scripts on how to apply obogaf-parser module respectively to the Gene Ontology (GO script) and to the Human Phenotype Ontology (HPO script)

GO script

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/perl

## loading obogaf::parser and
use strict;
use warnings;
use obogaf::parser qw(:all);

## elapased time
use Time::HiRes qw(time);
my $start= time;

## recursively create directories ([-p] mkdir option in perl does not work)
use File::Path qw(make_path);

## create folder where storing example I/O files
my $basedir= "data/";
make_path($basedir) unless(-d $basedir);

## note: if you want to store data in your home, use File::HomeDir
# use File::HomeDir qw(home);
# my $basedir = File::HomeDir->my_home."/data/";
# mkdir $basedir unless(-e $basedir);

## declare variables
my ($res, $stat, $parentIndex, $childIndex, $geneindex, $classindex, $parlist, $pares, $chdlist, $chdres);

## ~~ GO OBO ~~ ##
## download GO obo file
my $obofile= $basedir."gobasic.obo";
my $gobo= qx{wget --output-document=$obofile http://purl.obolibrary.org/obo/go/go-basic.obo};
print "GO obo file downloaded: done\n\n";

## shrink GO obo file to a subset of terms
my @terms = qw(GO:0000002 GO:0000003 GO:0000018 GO:0000030 GO:0000038);
my $termsfile= $basedir."goterms.txt";
open OUT, "> $termsfile";
foreach my $go (@terms){print OUT "$go\n";}
close OUT;

$res= obo_filter($obofile, $termsfile);
my $newobo= $basedir."go-shrunk.obo";
open OUT, ">", $newobo;
print OUT "${$res}";
close OUT;

## extract edges from GO obo file
my $gores= build_edges($obofile);
my $goedges= $basedir."gobasic-edges.txt"; ## go edges file declared here
open FH, "> $goedges";
print FH "${$gores}"; ## scalar dereferencing
close FH;
print "build GO edges: done\n\n";

## extract GO subontology nodes and relationships
my @domains= qw(biological_process molecular_function cellular_component);
my %aspects= (biological_process => "BP", molecular_function => "MF", cellular_component => "CC");

foreach my $domain (@domains){
    my $outfile= $basedir."gobasic-edges"."$aspects{$domain}".".txt";
    open FH, "> $outfile";
    my $domainres= build_subonto($goedges, $domain);
    print FH "${$domainres}";
    close FH;
}
print "build edges for each GO subontology: done\n\n";

## make stats on the whole GOobo file
($parentIndex, $childIndex)= (1,2);
$res= make_stat($goedges, $parentIndex, $childIndex);
print "$res";
print "\nGO stats: done\n\n";

## make stats on a GO-BP subontology
my $goedgesbp= $basedir."gobasic-edgesBP.txt";
($parentIndex, $childIndex)= (0,1);
$res= make_stat($goedgesbp, $parentIndex, $childIndex);
print "$res";
print "\nGO BP stats: done\n\n";

## compute parents and children list (whole ontology)
$parlist= $basedir."gobasic-parGO.txt";
$pares= get_parents_or_children_list($goedges, 1,2, "parents");
open FH, "> $parlist";
foreach my $k (sort{$a cmp $b} keys %$pares) { print FH "$k $$pares{$k}\n";} ## parents  list
close FH;

$chdlist= $basedir."gobasic-chdGO.txt";
$chdres= get_parents_or_children_list($goedges, 1,2, "children");
open FH, "> $chdlist";
foreach my $k (sort{$a cmp $b} keys %$chdres) { print FH "$k $$chdres{$k}\n";} ## children list
close FH;

print "\nGO parents/children list: done\n\n";

## compute parents and children list (GO-BP subontology)
$parlist= $basedir."gobasic-parGO-BP.txt";
$pares= get_parents_or_children_list($goedgesbp, 0,1, "parents");
open FH, "> $parlist";
foreach my $k (sort{$a cmp $b} keys %$pares) { print FH "$k $$pares{$k}\n";} ## parents  list
close FH;

$chdlist= $basedir."gobasic-chdGO-BP.txt";
$chdres= get_parents_or_children_list($goedgesbp, 0,1, "children");
open FH, "> $chdlist";
foreach my $k (sort{$a cmp $b} keys %$chdres) { print FH "$k $$chdres{$k}\n";} ## children list
close FH;

print "\nGO BP parents/children list: done\n\n";

## ~~ GOA ANNOTATION ~~ ##
## download GO annotation from GOA database (CHICKEN organism)
my $goafile= $basedir."goa_chicken.gaf.gz"; ## goa annotation file declared here
my $goachicken= qx{wget --output-document=$goafile ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/CHICKEN/goa_chicken.gaf.gz};

## extract GO annotation from GOA database (CHICKEN organism)
($geneindex, $classindex)= (1, 4);
($res, $stat)= gene2biofun($goafile, $geneindex, $classindex);
my $goaout= $basedir."chicken.uniprot2go.txt";
open FH, "> $goaout";
foreach my $k (sort{$a cmp $b} keys %$res) { print FH "$k $$res{$k}\n";} ## dereferencing
close FH;
print "${$stat}\n";
print "build GOA annotations (CHICKEN): done\n\n";

## ~~ MAP GO TERMS BETWEEN RELEASE ~~ ##
## download old GOA CHICKEN annotation file
my $goafileOld= $basedir."goa_chicken.gaf.128.gz"; ## goa annotation file declared here
my $goachickenOld= qx{wget --output-document=$goafileOld ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/old/CHICKEN/goa_chicken.gaf.128.gz};

## map GO terms between release
($res, $stat)= map_OBOterm_between_release($obofile, $goafileOld, $classindex);
my $mapfile= $basedir."chicken.goa.mapped.txt";
open FH, "> $mapfile";
print FH "${$res}";
close FH;
print "${$stat}";

## ~~ ELAPSED TIME ~~ ##
print "\n\n";
my $span= time - $start;
$span= sprintf("%.4f", $span);
printf "Elapased Time:\t$span\n";

exit;

HPO script

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
#!/usr/bin/perl

## loading obogaf::parser and useful Perl module
use strict;
use warnings;
use obogaf::parser qw(:all);
## elapased time
use Time::HiRes qw(time);
my $start= time;

## recursively create directories ([-p] mkdir option in perl does not work)
use File::Path qw(make_path);

## create folder where storing example I/O files
my $basedir= "data/";
make_path($basedir) unless(-d $basedir);

## note: if case you want to store data in your home, use File::HomeDir
# use File::HomeDir qw(home);
# my $basedir = File::HomeDir->my_home."/data/";
# mkdir $basedir unless(-e $basedir);

## declare variables
my ($res, $stat, $parentIndex, $childIndex, $geneindex, $classindex, $parlist, $pares, $chdlist, $chdres);

## ~~ HPO OBO ~~ ##
## download HPO obo file
my $obofile= $basedir."hpo.obo";
my $hpobo= qx{wget --output-document=$obofile http://purl.obolibrary.org/obo/hp.obo};
print "HPO obo file downloaded: done\n\n";

## shrink HPO obo file to a subset of terms
my @terms = qw(HP:0001507 HP:0000008 HP:0002719 HP:0000021 HP:0000023);
my $termsfile= $basedir."hpoterms.txt";
open OUT, "> $termsfile";
foreach my $go (@terms){print OUT "$go\n";}
close OUT;

$res= obo_filter($obofile, $termsfile);
my $newobo= $basedir."hpo-shrunk.obo";
open OUT, ">", $newobo;
print OUT "${$res}";
close OUT;

## extract edges from HPO obo file
my $hpores= build_edges($obofile);
my $hpoedges= $basedir."hpo-edges.txt"; ## hpo edges file declared here
open FH, "> $hpoedges";
print FH "${$hpores}"; ## scalar dereferencing
close FH;
print "build HPO edges: done\n\n";

## make stats on HPO
($parentIndex, $childIndex)= (0,1);
$res= make_stat($hpoedges, $parentIndex, $childIndex);
print "$res";
print "\nHPO stats: done\n\n";

## compute parents and children list on HPO ontology
$parlist= $basedir."gobasic-parHPO.txt";
$pares= get_parents_or_children_list($hpoedges, 0,1, "parents");
open FH, "> $parlist";
foreach my $k (sort{$a cmp $b} keys %$pares) { print FH "$k $$pares{$k}\n";} ## parents  list
close FH;

$chdlist= $basedir."gobasic-chdHPO.txt";
$chdres= get_parents_or_children_list($hpoedges, 0,1, "children");
open FH, "> $chdlist";
foreach my $k (sort{$a cmp $b} keys %$chdres) { print FH "$k $$chdres{$k}\n";} ## children list
close FH;

print "\nHPO parents/children list: done\n\n";

## ~~ HPO ANNOTATION ~~ ##
## download HPO annotations
my $hpofile= $basedir."hpo.ann.txt"; ## hpo annotation file declared here
my $hpoann= qx{wget --output-document=$hpofile http://compbio.charite.de/jenkins/job/hpo.annotations.monthly/lastStableBuild/artifact/annotation/ALL_SOURCES_ALL_FREQUENCIES_genes_to_phenotype.txt};

## extract HPO annotations
($geneindex, $classindex)= (1,3);
($res, $stat)= gene2biofun($hpofile, $geneindex, $classindex);
my $hpout= $basedir."hpo.gene2pheno.txt";
open FH, "> $hpout";
foreach my $k (sort{$a cmp $b} keys %$res) { print FH "$k $$res{$k}\n";} ## dereferencing
close FH;
print "${$stat}\n";
print "build HPO annotations: done\n\n";

## ~~ MAP HPO TERMS BETWEEN RELEASE ~~ ##
## download old HPO annotation file
my $hpofileOld= $basedir."hpo.ann.old.txt"; ## goa annotation file declared here
my $hpold= qx{wget --output-document=$hpofileOld http://compbio.charite.de/jenkins/job/hpo.annotations.monthly/139/artifact/annotation/ALL_SOURCES_ALL_FREQUENCIES_genes_to_phenotype.txt};

## map HPO terms between release
($res, $stat)= map_OBOterm_between_release($obofile, $hpofileOld, 3);
my $mapfile= $basedir."hpo.ann.mapped.txt";
open FH, "> $mapfile";
print FH "${$res}";
close FH;
print "${$stat}";

## ~~ ELAPSED TIME ~~ ##
print "\n\n";
my $span= time - $start;
$span= sprintf("%.4f", $span);
printf "Elapased Time:\t$span\n";

exit;