#!/usr/bin/env perl # Annotate proteins with hydrophobic amino acids, members of {VWFAMILY}. # # First, read in all lines of the protein sequence. while (<>) { chomp; $aa = $aa . $_; } # Translate all nucleotides to upper case $aa =~ tr/a-z/A-Z/; # Remove non-protein alphabet letters # s => substitute # g => globally; across the string $aa =~ s/[^A-Z]//g; # Change all the VWFAMILY letters to minus signs; hydrophobic $aa =~ s/[VWFAMILY]/-/g; # Change all remaining letters to positive signs; not hydrophobic $aa =~ s/[A-Z]/+/g; # Here, match $aa against 4-10 +'s, then combinations of between 17 and 33 -'s while ($aa =~ /([\+\-]+?)((\+{1,2}\-+)+)/g) { $precursor = $1; $seq = $2; if (length($seq) <10 || length($seq) > 33) { next; } $scorestring = $seq; $scorestring =~ s/\-//g; $score = (length($scorestring)/length($seq)); if ($score > 0.43) { next; } $p = (pos $aa) - (length($seq)); if (length($seq) == 0) { print "Woops!"; } print length($seq),": ","<" . $precursor . ">", $p ,"[" . $seq . "]",($p+length($seq))," score: ",$score,"\n"; }