#!/usr/bin/perl # 2009-04-09 # Converts between the R output format and the XML format for clustering (for Ken). # Bryan Bishop (kanzure@gmail.com) http://heybryan.org/ Automated Design Lab UT Austin # Note: generate list.txt via: ls -1 *.dat > list.txt open(FIL, "; close(FIL); open(IND, "; close(IND); chop(@indices); # get rid of the newline at the end of each line (we have no need for its kind here). print "the size of the indices array is: " . $#indices . "\n\n\n"; #$ARGV[0]; # Let's set this to be the initial part of the file name- all the way up to the "$i.dat"i if(!$ARGV[0]) { print "ERROR! ARGV0?!?!"; exit(); } foreach $filename (@myfiles) { if(!($filename =~ /$ARGV[0]/)) { next; } # we're reading through "list.txt" and checking for the relevant filenames. cheap hack. ignore. open(TFIL, "<$filename"); @lines = ; close(TFIL); @overallvalues = ""; @values = ""; foreach my $line (@lines) { chop($line); # get rid of trailing characters. #print "the line is: $line\n"; @values = split(/ /, $line); foreach my $val (@values) { print "val: $val ($#overallvalues)\n"; push(@overallvalues, $val); } } #print "size of the array is: $#overallvalues\n"; # Now correlate the index of a value in overallvalues with a name in 50-most-different/waterlifting-output-50-most-different.txt (or whatever) (same line number) # candidates000.dsm # How many clusters exist in this file? Well, check the filename. $numberOfClusters = $filename; $numberOfClusters =~ s/$ARGV[0]//; $numberOfClusters =~ s/\.dat//; chop($numberOfClusters); #print "numberOfClusters = $numberOfClusters\n"; $filename2 = $filename; chop($filename2); open (OUTP, ">$filename2.cluster.xml"); print "------ starting a new clusterset (w/ $numberOfClusters clusters)-----------\n"; for($i = 1; $i <= $numberOfClusters; $i++) { $j = 0; #open (OUTP, ">$filename.clustering.xml"); print OUTP "\n"; print "\n"; foreach my $subval (@overallvalues) { if($subval == $i) { # current cluster ID: $i # current place in the array: $j; # so, the name is then: $indices[j] #chomp($indices[$j]); print OUTP "$indices[$j]\n"; print "value of \$j is $j\n"; } $j++; #print "value of \$j is $j\n"; } print "\n"; print OUTP "\n"; } close(OUTP); if($#overallvalues == 0) { print "filename: $filename\n"; } }