#!/usr/bin/ruby

# this scripts searchs 10 directories at the same level as the script location
# one file will exist in each directory and the script will retrieve two colums from 
# each file to add to the output, the columns CVorder,Name, RI_7 will also be added to
# the output, these are the same in all files but in different order, the value
# RI_7 is read from the first argument

# the first 13 rows of the input file are ignored, the number to be ignored
# is passeed as the second argumemt

# each file contains the same records, but not in the same order, each input file is
# sorted on CVindex as a common index to keep all of the output in registration

# the particular column the will be added to the output is read from the file path,
# including the directory, for the path
# f0/V_mae_65.91_E197_f0_r02_399_rprop.out.txt

# the value of f* is captured in $1 
# the column is the 4th "_" delimited token (E197) is output

# output is as follows, columns:
# columns: CVorder  Name  RIexp  
# columns: f0  f1  f2  f3  f4  f5  f6  f7  f8  f9
# columns: f0_E*  f1_E*  f2_E*  f3_E*  f4_E*  f5_E*  f6_E*  f7_E*  f8_E*  f9_E*

# if a file is missing from one of the directories, that column is just missing from the output

# get the tareget name from the first argument
target_name = ARGV[0]
# second argument gives the number of lines to ignore
ignore_lines_str = ARGV[1]
# convert string arg to integer
ignore_lines=ignore_lines_str.to_i

# Get the list of filenames into an array, sort them, then execute code in {|file...}
# for each filename.
data_array=Dir['f[0-9]/*.out.txt'].sort.map { |file|

    # Read the lines in file into an array, drop the first 8 lines, then split
    # each line by tab chars.  colindex is then set to the first line and 
    # records is an array with all the rest of the lines.  Each line is an array
    # of strings.  In ruby syntax:
    #    colindex = ["f1index", "CVorder", "Name", "f0", "RIexp", "E100",...]
    #    records = [ ["1", "2", "TEMED", "R", "518.000000", "507.998865"...],
    #                ["2", "3", "N,N-diisopropylethylamine", "R", "546.000000", ...],
    #                ...
    #              ]

    (colindex,*records)=File.readlines(file).drop(ignore_lines).map { |l| l.chomp.split(/\t/) }

    # This converts colindex from an array to a hashmap.  The key of the hash is
    # the string, the value of the hash is the index of the string in the record.
    # For example, the above example of colindex is changed to:
    #    colindex = {"f0"=>3, "RIexp"=>4, "Name"=>2, "CVorder"=>1, "f1index"=>0, "E100"=>5}

    # This can be used to convert a column name to an index.  For example:
    #   colindex[ "f1index" ] returns 0
    #   colindex[ "f0" ] returns 3.
    colindex=Hash[*colindex.each_with_index.to_a.flatten]

    # all rows of each input file are sorted on CVorder, each of the 10 input files
    # has the same records with rows in different order, CVorder is the common index
    records=records.sort_by { |r| r[colindex['CVorder']].to_i }

    # This transposes the records.  records is an array of arrays, basically a matrix.
    # Transpose will perform a matrix transpose.  Before:
    #    records = [ ["1", "2", "TEMED", "R", "518.000000", "507.998865"...],
    #                ["2", "3", "N,N-diisopropylethylamine", "R", "546.000000", ...],
    #                ...
    #              ]
    # After
    #    records = [ ["1", "2", ...],
    #                ["2", "3", ...],
    #                ["TEMED", "N,N-diisopropylethylamine",...],
    #                ....
    #              ]
    # Each line becomes the list of selectors instead of each column.
    records=records.transpose

    # match file name to regex, exception if there is no match
    # The "\" followed by a character is a special regex command:
    #   \A - matches the beginning of the string
    #   \d - matches any digit.  \d+ matches ONE or more digits

#   changed to match new filename format
#   f0/V_mae_55.73_E1499_f9_r01_2000-ON-0.25_S3A_v1_36.35.1.out.txt
    file=~/\A(f.)\/[A-Z]_[a-z0-9]+_\d+\.\d+_(E\d+)_/||raise

    # epoch number is always at $2 now
    epoch = $2

    # Return a multi-dimensional array made up of CVorder, Name, RIexp, and epoch.
    [ [ [ "CVorder", *records[ colindex["CVorder"]]],
        [ "Name", *records[ colindex[ "Name" ]]],
        [ target_name, *records[ colindex[ target_name ]]] ],
      [ [ $1, *records[colindex[ $1 ]]],
        [ "#{$1}_#{epoch}", *records[colindex[ epoch ]]] ] ]
}

(prefixes,data)=data_array.transpose

prefixes.all? { |x| x==prefixes[0] }||raise

print(data.transpose.inject(prefixes[0]) { |a,b| a+b }.transpose.map { |r| r.join("\t")+"\n" }.join)

