在生信信息分析过程中,如果遇到特别大的fa文件,难免会对文件进行分割,但是如果只是按照条数分割,会导致分割之后的文件之间差异很大,这样会导致一些问题。因此按照文件大小均分是很有意义的。
小编写了一个perl的均分的程序,希望可以帮到大家。
#!/usr/bin/perl -w use strict; use warnings; use Getopt::Long; use Data::Dumper; use FindBin qw( $Bin $Script ); use File::Basename qw(basename dirname); my $BEGIN_TIME = time (); my $version = "1.0.0" ; ####################################################################################### # ------------------------------------------------------------------ # GetOptions # ------------------------------------------------------------------ my ( $fa , $num , $outdir ); GetOptions( "help|?" =>\&USAGE, "fa:s" =>\ $fa , "num:s" =>\ $num , "od:s" =>\ $outdir , ) or &USAGE; &USAGE unless ( $fa and $num ); $outdir ||= "./" ; ` mkdir $outdir ` unless (-d $outdir ); my $faname =basename( $fa ); ############################键入程序################################################### open (FA, $fa )or die $!; $/= ">" ; my @name ; my @Len ; my @seq ; my $totallen ; while (<FA>) { chomp ; next if (/^$/); my ( $nameinfo , @seqarray )= split (/\n/, $_ ); my ( $id )= split (/\s+/, $nameinfo ); my $seq = join ( "" , @seqarray ); push @name , $id ; my $slen = length ( $seq ); push @Len , $slen ; push @seq , $seq ; $totallen += $slen ; } close (FA); my $splitsize = int ( $totallen / $num )+1; my $index =1; my $mylen =0; for ( my $i =0; $i < scalar ( @name ) ; $i ++) { $mylen += $Len [ $i ]; open (OUT, ">>$outdir/$faname.$index.fa" )or die $!; if ( $mylen < $splitsize ) { print OUT ">" , $name [ $i ], "\n" ; print OUT $seq [ $i ], "\n" ; } else { close (OUT); $mylen =0; $index ++; } } print STDOUT "\nDone. Total elapsed time : " , time ()- $BEGIN_TIME , "s\n" ; ####################################################################################### # ------------------------------------------------------------------ # sub function # ------------------------------------------------------------------ sub GetTime {
} sub USAGE { # my $usage=<<"USAGE"; Program: Version: $version Usage: Options: -fa <file> Input fa file, forced -num <str> Key of output file, forced -od <dir> Dir of output file, default ./ -h Help USAGE print $usage ; exit ; } |
欢迎关注生信