#!/usr/bin/perl -w use strict; use warnings; use Getopt::Long; my $version = "1.2"; my $options = get_options($version); my $fastqInput = $$options{'i'}; my $fastaOutput = $$options{'o'}; my $minLen = $$options{'m'}; my $maxLen = $$options{'l'}; $minLen //= 25; $maxLen //= 35; if ($fastqInput =~ /.gz$/) { open(CHART, "gzip -dc $fastqInput |") || die "cannot open $fastqInput\n"; } else { open(CHART, "<$fastqInput") || die "cannot open $fastqInput\n"; } if ($fastaOutput =~ /\.gz$/) { open OUT, "|gzip >$fastaOutput" ||die $!; } else { open OUT, ">$fastaOutput" ||die $!; } my %seqs; while() { chomp; my $seqname = $_; my $seq=; my $strand=; my $qual=; $seq=~s/\s+//g; $seq=~s/\n+//g; next if length($seq) < $minLen or length($seq) > $maxLen; if(not exists $seqs{$seq}) { $seqs{$seq} = 1; } else { $seqs{$seq}++; } } close CHART; my $n=0; for my $key (reverse sort {$seqs{$a}<=>$seqs{$b}} keys %seqs) { $n++; print OUT ">seq$n"."_x$seqs{$key}\n$key\n"; } close OUT; sub usage_message { my $version_num = shift; my $usage_message = "\nConvert fastq to collapsed fasta: Usage: perl $0 -i -o -m -l : fastq file without adaptor (.fq .fastq). : output fasta file (.fa .fasta). : discard reads shorter than minimum length (integer, default: 25). : discard reads longer than maximum length (integer, default: 35). \n"; return $usage_message; } sub get_options { my($v_num) = shift; my %options = (); GetOptions(\%options, 'help', 'i=s', 'o=s', 'm=i', 'l=i' ); unless(%options) { my $usage_message = usage_message($v_num); die "$usage_message\n"; } return \%options; }