#!/bin/sh # use -*- tcl -*- \ exec tclsh "$0" "$@" # Perform a diff on two CSV files. # The result is a CSV file package require csv package require cmdline # ---------------------------------------------------- # csvdiff ?-sep sepchar? ?-key LIST? file1 file2 # # Argument processing and checks. set sepChar , set usage "Usage: $argv0 ?-sep sepchar? ?-key LIST? file1 file2\n\tLIST=idx,...\n\tidx in \{n, -m, n-, n-m\}" set keySpec "0-" while {[set ok [cmdline::getopt argv {sep.arg key.arg} opt val]] > 0} { #puts stderr "= $opt $val" switch -exact -- $opt { sep {set sepChar $val} key {set keySpec $val} } } if {($ok < 0) || ([llength $argv] != 2)} { puts stderr $usage exit -1 } foreach {inA inB} $argv break if {[llength $keySpec] == 0} { #puts stderr >>$keySpec<< #puts stderr B puts stderr $usage exit -1 } set idx [list] foreach i $keySpec { if {[regexp -- {[0-9]+-[0-9]+} $i]} { foreach {f t} [split $i -] break lappend idx [list $f $t] } elseif {[regexp -- {[0-9]+-} $i]} { foreach {f t} [split $i -] break lappend idx [list $f end] } elseif {[regexp -- {-[0-9]+} $i]} { foreach {f t} [split $i -] break lappend idx [list 0 $t] } elseif {[regexp -- {[0-9]+} $i]} { lappend idx [list $i $i] } else { #puts stderr >>$idx<< #puts stderr C puts stderr $usage exit -1 } } set keySpec $idx set inA [open $inA r] set inB [open $inB r] # ---------------------------------------------------- # Actual processing, uses the following information from the # commandline: # # inA - channel for input A # inB - channel for input B # sepChar - separator character # We read file2 completely and then go through the records of # file1. For any record we don't find we write a "deleted" record. If # we find the matching record we remove it from the internal # storage. In a second sweep through internal array we write "added" # records for the remaining data as that was not in file1 but is in # file2. proc keyof {data} { global keySpec set key [list] foreach i $keySpec { foreach {f t} $i break eval lappend key [lrange $data $f $t] } return $key } set order [list] array set map {} while {![eof $inB]} { if {[gets $inB line] < 0} { continue } set data [::csv::split $line $sepChar] set key [keyof $data] set map($key) . lappend order $data } close $inB while {![eof $inA]} { if {[gets $inA line] < 0} { continue } set data [::csv::split $line $sepChar] set key [keyof $data] if {[info exists map($key)]} { unset map($key) continue } puts stdout [::csv::join [linsert $data 0 -] $sepChar] } foreach data $order { set key [keyof $data] if {[info exists map($key)]} { puts stdout [::csv::join [linsert $data 0 +] $sepChar] } } exit