Difference between revisions of "Perl example code"
PaulBoddie (talk | contribs) (Fixed descriptions of exercise 11.) |
|||
(6 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
These are code examples that accompany the [[MBV3070]] Perl lectures. | These are code examples that accompany the [[MBV3070]] Perl lectures. | ||
− | Copy and paste the text to a text file called | + | Copy and paste the text to a text file called Exercise1.plx |
And then, in the command prompt, type | And then, in the command prompt, type | ||
<pre> | <pre> | ||
− | perl | + | perl Exercise1.plx |
</pre> | </pre> | ||
Line 12: | Line 12: | ||
− | + | Exercise1.plx | |
<pre> | <pre> | ||
Line 24: | Line 24: | ||
</pre> | </pre> | ||
− | + | Exercise2.plx | |
<pre> | <pre> | ||
Line 46: | Line 46: | ||
</pre> | </pre> | ||
− | + | Exercise3.plx | |
<pre> | <pre> | ||
Line 74: | Line 74: | ||
</pre> | </pre> | ||
− | + | Exercise4.plx | |
<pre> | <pre> | ||
Line 99: | Line 99: | ||
</pre> | </pre> | ||
− | + | Exercise5.plx | |
<pre> | <pre> | ||
Line 119: | Line 119: | ||
</pre> | </pre> | ||
− | + | Exercise6.plx | |
<pre> | <pre> | ||
Line 154: | Line 154: | ||
− | + | Exercise7.plx | |
<pre> | <pre> | ||
Line 172: | Line 172: | ||
</pre> | </pre> | ||
− | + | Exercise8.plx | |
<pre> | <pre> | ||
Line 218: | Line 218: | ||
</pre> | </pre> | ||
− | + | Exercise9.plx | |
<pre> | <pre> | ||
Line 246: | Line 246: | ||
− | + | Exercise10.plx | |
<pre> | <pre> | ||
Line 295: | Line 295: | ||
− | + | Exercise11.plx | |
<pre> | <pre> | ||
Line 304: | Line 304: | ||
#TASK: Print a list of all Perl programs we did so far. | #TASK: Print a list of all Perl programs we did so far. | ||
#These files can be found in your current directory and | #These files can be found in your current directory and | ||
− | #they | + | #they contain the word "exercise" |
print "List of programs we made today:\n"; | print "List of programs we made today:\n"; | ||
Line 315: | Line 315: | ||
#use foreach to step through the array | #use foreach to step through the array | ||
− | #if a file contains word | + | #if a file contains word "exercise" print it out |
foreach my $file (@files){ | foreach my $file (@files){ | ||
− | if($file =~ / | + | if($file =~ /exercise/){ |
print "$file\n"; | print "$file\n"; | ||
} | } | ||
Line 324: | Line 324: | ||
</pre> | </pre> | ||
+ | Exercise 11 - an alternative | ||
+ | <pre> | ||
+ | #!/usr/bin/perl | ||
+ | use strict; | ||
+ | use warnings; | ||
+ | |||
+ | #TASK: Print a list of all Perl programs we did so far. | ||
+ | #These files can be found in your current directory and | ||
+ | #they contain the word "exercise" | ||
+ | |||
+ | print "List of programs we made today:\n"; | ||
+ | |||
+ | #system call for 'ls' function - the result goes into a string | ||
+ | open(LISTING, "dir|"); | ||
+ | |||
+ | #while we can read a line from the file... | ||
+ | #if a file contains word "exercise" print it out | ||
+ | |||
+ | while (<LISTING>) { | ||
+ | my $file = $_; | ||
+ | if($file =~ /exercise/){ | ||
+ | print "$file\n"; | ||
+ | } | ||
+ | } | ||
+ | close(LISTING); | ||
+ | </pre> | ||
− | + | Exercise12.plx | |
<pre> | <pre> | ||
Line 374: | Line 400: | ||
− | + | Exercise13.plx | |
<pre> | <pre> | ||
Line 385: | Line 411: | ||
#open input and output files | #open input and output files | ||
− | open(IN," | + | open(IN,"exercise13input.txt"); |
− | open(OUT,"> | + | open(OUT,">exercise13output.txt"); |
#read the input file line-by-line | #read the input file line-by-line | ||
Line 405: | Line 431: | ||
</pre> | </pre> | ||
− | + | exercise13input.txt | |
− | + | ||
− | + | This is the regex that we are looking for in the lines below: | |
− | |||
+ | /^ATG?C*[ATCG]+?A{3,10}$/ | ||
+ | And this is our input file: | ||
+ | <pre> | ||
ATGCCCAA | ATGCCCAA | ||
ATGCCCAAAA | ATGCCCAAAA | ||
Line 419: | Line 447: | ||
</pre> | </pre> | ||
+ | Exercise 13 - an alternative | ||
+ | |||
+ | <pre> | ||
+ | #!/usr/bin/perl | ||
+ | use strict; | ||
+ | use warnings; | ||
+ | |||
+ | my $line; | ||
+ | |||
+ | #read the input file line-by-line | ||
+ | #for each line ask if it matches the regex | ||
+ | #print it if it matches | ||
+ | while($line = <>){ | ||
+ | chomp $line; | ||
+ | if ($line =~ /^ATG?C*[ATCG]+?A{3,10}$/) { | ||
+ | print "$line\n"; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | exit(); | ||
+ | </pre> | ||
+ | |||
+ | Exercise14.plx | ||
+ | |||
+ | <pre> | ||
+ | #!/usr/bin/perl | ||
+ | use strict; | ||
+ | use warnings; | ||
+ | |||
+ | |||
+ | #Open a web browser | ||
+ | #Go to http://search.cpan.org/ | ||
+ | #Type in “bioperl Tools BLAST” | ||
+ | #Follow the link to Bio::Tools::Blast | ||
+ | #Browse through this page and the example code | ||
+ | |||
+ | |||
+ | |||
+ | #bioperl example code | ||
+ | use strict; | ||
+ | use warnings; | ||
+ | |||
+ | #make the bioperl module (class) accessible to your program | ||
+ | use Bio::Seq; | ||
+ | |||
+ | print"ok - ready to use Bio::Seq"; | ||
+ | </pre> | ||
+ | |||
+ | Exercise 15 - install BioPerl | ||
+ | |||
+ | Useful references: | ||
+ | |||
+ | * http://www.bioperl.org/wiki/Installing_BioPerl | ||
+ | * http://www.bioperl.org/wiki/Installing_Bioperl_for_Unix#INSTALLING_BIOPERL_THE_EASY_WAY_USING_CPAN | ||
+ | |||
+ | Using ActivePerl's Perl Package Manager: | ||
+ | |||
+ | <ol> | ||
+ | <li>At the command line prompt type... | ||
+ | <pre>ppm</pre></li> | ||
+ | <li>At the <tt>ppm</tt> prompt, type... | ||
+ | <pre>search bioperl</pre></li> | ||
+ | <li>Then type... | ||
+ | <pre>install bioperl</pre></li> | ||
+ | </ol> | ||
+ | |||
+ | Exercise16.plx | ||
<pre> | <pre> | ||
+ | #! /usr/local/bin/perl | ||
+ | # Create and run a program which creates a Seq object and manipulates it: | ||
+ | |||
+ | use Bio::Seq; | ||
+ | |||
+ | # initiation of Seq object | ||
+ | $seq = Bio::Seq->new('-seq' =>'CGGCGTCTGGAACTCTATTTTAAGAACCTCTCAAAACGAAACAAGC', | ||
+ | '-desc' => 'An example', | ||
+ | '-display_id' => 'NM_005476', | ||
+ | '-accession_number' => '6382074', | ||
+ | '-moltype' => 'dna'); | ||
+ | |||
+ | # sequence manipulations | ||
+ | $aa = $seq -> moltype(); # one of 'dna','rna','protein' | ||
+ | $ab = $seq -> subseq(5,10); # part of the sequence as string | ||
+ | |||
+ | $ac = $seq -> revcom; # returns an object of the reverse complemented sequence | ||
+ | $ac1 = $ac -> seq(); | ||
+ | |||
+ | $ad = $seq -> translate; # returns an object of the sequence translation | ||
+ | $ad1 = $ad -> seq(); | ||
+ | |||
+ | $ae = $seq -> translate(undef,undef,1); # returns an object of the sequence translation (using frame 1) (0,1,2 can be used)? | ||
+ | $ae1 = $ae -> seq(); | ||
+ | |||
+ | print "Molecule Type: $aa\n"; | ||
+ | print "Sequence from 5 to 10: $ab\n"; | ||
+ | print "Reverse complemented sequence: $ac1\n"; | ||
+ | print "Translated sequence: $ad1\n"; | ||
+ | print "Translated sequence (using frame 1): $ae1\n"; | ||
+ | </pre> | ||
+ | |||
+ | Exercise17.plx | ||
+ | |||
+ | <pre> | ||
+ | |||
+ | Check out the code of several examples | ||
+ | using BioPerl at: | ||
+ | |||
+ | |||
+ | http://bip.weizmann.ac.il/course/prog2/perlBioinfo/ | ||
+ | |||
+ | |||
+ | </pre> | ||
+ | |||
+ | Another answer to exercise 12 | ||
+ | |||
+ | <pre> | ||
+ | #!/usr/bin/perl | ||
+ | use strict; | ||
+ | use warnings; | ||
+ | |||
+ | #TASK: demonstrate the use of “my” in setting the | ||
+ | #scope of a variable | ||
+ | my $some_variable = 100; | ||
+ | |||
+ | #body of the main program with the function call | ||
+ | print "the value of some_variable is: $some_variable\n"; | ||
+ | subroutine1(); | ||
+ | print "but here, some_variable is still: $some_variable\n"; | ||
+ | |||
+ | #subroutine using $some_variable | ||
+ | sub subroutine1{ | ||
+ | my $some_variable = 0; | ||
+ | print "in subroutine1,some_variable is: $some_variable\n"; | ||
+ | } | ||
+ | |||
+ | |||
+ | #what happens if you comment out "use strict" and | ||
+ | #remove "my" from lines 7 and 16 | ||
+ | </pre> | ||
+ | |||
+ | |||
+ | Another answer to exercise 13 | ||
+ | |||
+ | <pre> | ||
+ | #!/usr/bin/perl | ||
+ | use strict; | ||
+ | use warnings; | ||
+ | |||
+ | #TASK: check your answers to the regex exercise | ||
+ | |||
+ | #open input and output files | ||
+ | open(IN,"myanswers.txt"); | ||
+ | |||
+ | |||
+ | #read the input file line-by-line | ||
+ | #for each line test if it matches a regular expression | ||
+ | while(<IN>){ | ||
+ | chomp; | ||
+ | my $is_correct = does_it_match($_); | ||
+ | if ($is_correct){ | ||
+ | print "$_ is a match\n"; | ||
+ | } | ||
+ | else{ | ||
+ | print "$_ is NOT a match\n"; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | #close input file and exit | ||
+ | close(IN); | ||
+ | exit(); | ||
+ | |||
+ | |||
+ | #does it match | ||
+ | sub does_it_match{ | ||
+ | my($answer) = @_; | ||
+ | my $is_correct = 0; | ||
+ | if ($answer =~ m/^ATG?C*[ATCG]+?A{3,10}$/){ | ||
+ | $is_correct = 1; | ||
+ | } | ||
+ | return $is_correct; | ||
+ | } | ||
+ | |||
</pre> | </pre> |
Latest revision as of 15:22, 22 November 2011
These are code examples that accompany the MBV3070 Perl lectures.
Copy and paste the text to a text file called Exercise1.plx And then, in the command prompt, type
perl Exercise1.plx
to run the example script.
Exercise1.plx
#!/usr/bin/perl use strict; use warnings; print "My first Perl program\n"; #try single quotes print "First line\nsecond line and there is a tab\there\n";
Exercise2.plx
#!/usr/bin/perl use strict; use warnings; #assign values to variables $x and $y and print them out my $x = 4; my $y = 2; print "x is $x and y is $y\n"; #example of arithmetic expression my $z = $x + $y**3; $x++; print "x is $x and z is $z\n"; #evaluating arithmetic expression within print command print "add 3 to $z: $z + 3\n"; #did it work? print "add 3 to $z:", $z + 3,"\n";
Exercise3.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Concatenate two given sequences, #find the length of the new sequence and #print out the second codon of the sequence #assign strings to variables my $DNA = "GATTACACAT"; my $polyA = "AAAA"; #concatenate two strings my $modifiedDNA = $DNA.$polyA; #calculate the length of $modifiedDNA and #print out the value of the variable and its length my $DNAlength = length($modifiedDNA); print "Modified DNA: $modifiedDNA has length $DNAlength\n"; #extract the second codon in $modifiedDNA my $codon = substr($modifiedDNA,3,3); print "Second codon is $codon\n";
Exercise4.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Ask the user for her name and age #and calculate her age in days #get a string from the keyboard print "Please enter your name\n"; my $name = <STDIN>; #getting rid of the new line character #try leaving this line out chomp($name); #prompt the user for his/her age #get a number from the keyboard print "$name please enter your age\n"; my $age = <>; chomp($age); #calculate age in days my $age_in_days = $age*365; print "You are $age_in_days days old\n";
Exercise5.plx
#!/usr/bin/perl use strict; use warnings; #initialize an array my @bases = ("A","C","G","T"); #print two elements of the array print $bases[0],$bases[2],"\n"; #print the whole array print @bases,"\n"; #try with double quotes #print the number of elements in the array print scalar(@bases),"\n";
Exercise6.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Count the frequency of base G #in a given DNA sequence my $DNA = "GATTACACAT"; #initialize $countG and $currentPos my $countG = 0; my $currentPos = 0; #calculate the length of $DNA my $DNAlength = length($DNA); #for each letter in the sequence check if it is the base G #if 'yes' increment $countG while($currentPos < $DNAlength){ my $base = substr($DNA,$currentPos,1); if($base eq "G"){ $countG++; } $currentPos++; } #end of while loop #print out the number of Gs print "There are $countG G bases\n";
Exercise7.plx
#!/usr/bin/perl use strict; use warnings; my @array; #initialize a 20-element array with numbers 0,...19 for(my $i=0;$i<20;$i++){ $array[$i] = $i; } #print elements one-by-one using foreach foreach my $element (@array){ print "$element\n"; }
Exercise8.plx
#!/usr/bin/perl use strict; use warnings; #TASK: For a given DNA sequence find its RNA transcript, #find its reverse complement and check if #the reverse complement contains a start codon my $DNA = "GATTACACAT"; #transcribe DNA to RNA - T changes to U my $RNA = $DNA; $RNA =~ s/T/U/g; print "RNA sequence is $RNA\n"; #find the reverse complement of $DNA using substitution operator #first - reverse the sequence my $rcDNA = reverse($DNA); $rcDNA =~ s/T/A/g; $rcDNA =~ s/A/T/g; $rcDNA =~ s/G/C/g; $rcDNA =~ s/C/G/g; print "Reverse complement of $DNA is $rcDNA\n"; #did it work? #find the reverse complement of $DNA using translation operator #first - reverse the sequence $rcDNA = reverse($DNA); #then - complement the sequence $rcDNA =~ tr/ACGT/TGCA/; #then - print the reverse complement print "Reverse complement of $DNA is $rcDNA\n"; #look for a start codon in the reverse sequence if($rcDNA =~ /ATG/){ print "Start codon found\n"; } else{ print "Start codon not found\n"; }
Exercise9.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Make a subroutine that calculates #the reverse #complement of a DNA sequence and call it #from the main program #body of the main program with the function call my $DNA = "GATTACACAT"; my $rcDNA = revcomp($DNA); print "$rcDNA\n"; exit; #definition of the function for reverse complement sub revcomp{ my($DNAin) = @_; my $DNAout = reverse($DNAin); $DNAout =~ tr/ACGT/TGCA/; return $DNAout; }
Exercise10.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Read DNA sequences from ‘DNAseq’ input file – #there is one sequence per line #For each sequence find the reverse complement and #print it to ‘DNAseqRC’ output file #open input and output files open(IN,"DNAseq.txt"); open(OUT,">DNAseqRC.txt"); #read the input file line-by-line #for each line find the reverse complement #print it in the output file while(<IN>){ chomp; my $rcDNA = revcomp($_); print OUT "$rcDNA\n"; } #close input and output files close(IN); close(OUT); exit(); #definition of the function for reverse complement sub revcomp{ my($DNAin) = @_; my $DNAout = reverse($DNAin); $DNAout =~ tr/ACGT/TGCA/; return $DNAout; }
DNAseq.txt
ACGACTAGCATCAGCAT AAAAATGATCGACTATATAGCATA AAAGGTGCATCAGCATGG
Exercise11.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Print a list of all Perl programs we did so far. #These files can be found in your current directory and #they contain the word "exercise" print "List of programs we made today:\n"; #system call for 'ls' function - the result goes into a string my $listing = `dir`; #these are back quotes #split the string to get individual files my @files = split(/\n/,$listing); #use foreach to step through the array #if a file contains word "exercise" print it out foreach my $file (@files){ if($file =~ /exercise/){ print "$file\n"; } }
Exercise 11 - an alternative
#!/usr/bin/perl use strict; use warnings; #TASK: Print a list of all Perl programs we did so far. #These files can be found in your current directory and #they contain the word "exercise" print "List of programs we made today:\n"; #system call for 'ls' function - the result goes into a string open(LISTING, "dir|"); #while we can read a line from the file... #if a file contains word "exercise" print it out while (<LISTING>) { my $file = $_; if($file =~ /exercise/){ print "$file\n"; } } close(LISTING);
Exercise12.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Write a program that has one function. #Use a variable named “$some_variable” in this #function and in the main body of the program. #Prove that you can alter the value of #$some_variable in the function without #changing the value of $some_variable in the #the main body of the program #declare variables used in the main routine my $some_variable; my $some_variable_changed_by_subroutine; #main routine $some_variable = 10; print "i am in the main routine and some_variable is: $some_variable\n"; $some_variable_changed_by_subroutine = subroutine($some_variable); print "i am in the main routine some_variable is now: $some_variable\n"; print "i am in the main routine some_variable_changed_by_subroutine is: $some_variable_changed_by_subroutine\n"; #a subroutine that uses a variable with the same name as a variable in the main routine sub subroutine{ my $some_variable; $some_variable = $_[0]; print "i am in the subroutine and some_variable is: $some_variable\n"; ++$some_variable; print "i am in the subroutine and some_variable is now: $some_variable\n"; return $some_variable }
Exercise13.plx
#!/usr/bin/perl use strict; use warnings; #TASK: Read lines from input file – #print lines that match a regular expression #open input and output files open(IN,"exercise13input.txt"); open(OUT,">exercise13output.txt"); #read the input file line-by-line #for each line ask if it matches the regex #print it in the output file while(<IN>){ chomp; if ($_ =~ /^ATG?C*[ATCG]+?A{3,10}$/) { print OUT "$_\n"; print "$_\n"; } } #close input and output files close(IN); close(OUT); exit();
exercise13input.txt
This is the regex that we are looking for in the lines below:
/^ATG?C*[ATCG]+?A{3,10}$/
And this is our input file:
ATGCCCAA ATGCCCAAAA ATGCCCAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD
Exercise 13 - an alternative
#!/usr/bin/perl use strict; use warnings; my $line; #read the input file line-by-line #for each line ask if it matches the regex #print it if it matches while($line = <>){ chomp $line; if ($line =~ /^ATG?C*[ATCG]+?A{3,10}$/) { print "$line\n"; } } exit();
Exercise14.plx
#!/usr/bin/perl use strict; use warnings; #Open a web browser #Go to http://search.cpan.org/ #Type in “bioperl Tools BLAST” #Follow the link to Bio::Tools::Blast #Browse through this page and the example code #bioperl example code use strict; use warnings; #make the bioperl module (class) accessible to your program use Bio::Seq; print"ok - ready to use Bio::Seq";
Exercise 15 - install BioPerl
Useful references:
- http://www.bioperl.org/wiki/Installing_BioPerl
- http://www.bioperl.org/wiki/Installing_Bioperl_for_Unix#INSTALLING_BIOPERL_THE_EASY_WAY_USING_CPAN
Using ActivePerl's Perl Package Manager:
- At the command line prompt type...
ppm
- At the ppm prompt, type...
search bioperl
- Then type...
install bioperl
Exercise16.plx
#! /usr/local/bin/perl # Create and run a program which creates a Seq object and manipulates it: use Bio::Seq; # initiation of Seq object $seq = Bio::Seq->new('-seq' =>'CGGCGTCTGGAACTCTATTTTAAGAACCTCTCAAAACGAAACAAGC', '-desc' => 'An example', '-display_id' => 'NM_005476', '-accession_number' => '6382074', '-moltype' => 'dna'); # sequence manipulations $aa = $seq -> moltype(); # one of 'dna','rna','protein' $ab = $seq -> subseq(5,10); # part of the sequence as string $ac = $seq -> revcom; # returns an object of the reverse complemented sequence $ac1 = $ac -> seq(); $ad = $seq -> translate; # returns an object of the sequence translation $ad1 = $ad -> seq(); $ae = $seq -> translate(undef,undef,1); # returns an object of the sequence translation (using frame 1) (0,1,2 can be used)? $ae1 = $ae -> seq(); print "Molecule Type: $aa\n"; print "Sequence from 5 to 10: $ab\n"; print "Reverse complemented sequence: $ac1\n"; print "Translated sequence: $ad1\n"; print "Translated sequence (using frame 1): $ae1\n";
Exercise17.plx
Check out the code of several examples using BioPerl at: http://bip.weizmann.ac.il/course/prog2/perlBioinfo/
Another answer to exercise 12
#!/usr/bin/perl use strict; use warnings; #TASK: demonstrate the use of “my” in setting the #scope of a variable my $some_variable = 100; #body of the main program with the function call print "the value of some_variable is: $some_variable\n"; subroutine1(); print "but here, some_variable is still: $some_variable\n"; #subroutine using $some_variable sub subroutine1{ my $some_variable = 0; print "in subroutine1,some_variable is: $some_variable\n"; } #what happens if you comment out "use strict" and #remove "my" from lines 7 and 16
Another answer to exercise 13
#!/usr/bin/perl use strict; use warnings; #TASK: check your answers to the regex exercise #open input and output files open(IN,"myanswers.txt"); #read the input file line-by-line #for each line test if it matches a regular expression while(<IN>){ chomp; my $is_correct = does_it_match($_); if ($is_correct){ print "$_ is a match\n"; } else{ print "$_ is NOT a match\n"; } } #close input file and exit close(IN); exit(); #does it match sub does_it_match{ my($answer) = @_; my $is_correct = 0; if ($answer =~ m/^ATG?C*[ATCG]+?A{3,10}$/){ $is_correct = 1; } return $is_correct; }