#!/usr/bin/perl while ($_ = ) { my $line= fix_ion_line( $_); print $line } exit 0; ############################################################### # Helpers below my %seen; sub fix_ion_line { local $_= shift; chomp; # Basic polishing s/no dependencies//; s/ /\t/g; # Split into fields and start work my @fields = split /\t/; if ( $. == 1 ) { goto PUSHFIELDS } # Remove any empty fields at the end to not distract us # (Not really needed though, split() above take care of that itself.) while( not( $fields[-1] and $fields[-1]=~ /\S/)) { #warn "INFO: $fields[0]: Removing empty field at the end.\n"; pop @fields } # Skip duplicate entries !$seen{$fields[0]}++ or do { #warn "INFO: $fields[0]: Duplicate entry. First one takes precedence.\n"; next }; # Remove duplicate MD5 sums if( "$fields[-1]" eq "$fields[-2]") { #warn "INFO: $fields[0]: Duplicate MD5 sum field found. Removing it.\n"; pop @fields; } # See if package name is missing, and if yes, add one. # NOTE: This will work as long as the row with no package name is # not also one in which package name contains "-" as part of the name. # (If it does, then the s/// below which removes everything after "-" # will produce and insert the incomplete name, e.g. c-ares would be "c".) if( $fields[1]=~ /^\d+$/ or length( $fields[1])== 32) { #warn "INFO: $fields[0]: Package name missing. Extracting and inserting it.\n"; my $pfname= $fields[0]; $pfname=~ s/^neko_//; $pfname=~ s/\-.*//; splice ( @fields, 1, 0, $pfname); } # Insert internal version numbers if missing unless ( $fields[2] =~ /^\d{1,3}$/ ) { # internal version is missing #warn "INFO: $fields[0]: Internal version missing. Inserting it.\n"; splice ( @fields, 2, 0, 100); # Insert a fixed version of "100" } # Check for general well-formedness of rows unless( @fields>= 4 and @fields<= 5) { #warn "INFO: $fields[0]: Malformed row. Skipping it.\n"; next; } PUSHFIELDS: $fields[$#fields] .= "\n"; join("\t", @fields); }