#!/usr/bin/perl

use strict;
use DBI;
use Getopt::Std;
use MARC::Record;
use MARC::File::USMARC;
use MARC::File::XML;
use MARC::Charset;
use Unicode::Normalize;

# hard-coded parameters
my $IN_DIR = '/data/idzebra/210/20080119/bcl_bcl';
my $OUT_DIR = '/tmp/recovery/bcl_bcl';
my $FILE_BEGIN = 1;
my $FILE_END = 33 ;
my $sf852_a = 'RMZ';
my $sf852_b = 'RMZU';
#/hard-coded parameters

my %options = ();
getopts("c:",\%options);
my $configFile = $options{c};
if (!$configFile || ! -f $configFile) {
    print "Usage: $0 -c CONFIG_FILE\n";
    exit 1;
}

if (! -d $OUT_DIR) {
    mkdir $OUT_DIR;
}

my $config = loadConfig($configFile);
my $dbh = makeConnection($config);
END {
    if ($dbh) {
        $dbh->disconnect();
    }
}
my $barcode_rid;
my $rid_barcode;
$| = 1;
my $sth = $dbh->prepare(<<_SQL_);
select distinct r.rid from opl_ge852record r inner join opl_marcRecord m on r.rid=m.rid where m.title is null
_SQL_
    $sth->execute;
     
    while ((my $rid) = $sth->fetchrow_array) {
        my $src ="$IN_DIR/" . ceil($rid/1000) ."/$rid" .".xml";
        my $target= "$OUT_DIR/" . ceil($rid/1000) ."/$rid" . ".xml" ;
        system "cp $src $target";
    }
    
    $sth->finish;


=item

my $barcode_rid = {
    'YGCN09813' => '3',
    'YGCN09814777' => '3',
    'YGCN09814888' => '3',
    'YGCN05710' => '5',
    'YGCN03605' => '11',
    'YGCN03606' => '11',
};

my $rid_barcode = {
    '3' => {
        'YGCN09813' => 'new',
        'YGCN09814777' => 'new',
        'YGCN09814888' => 'new',
    },
    '5' => {
        'YGCN05710' => 'new',
    },
    '11' => {
        'YGCN03605' => 'new',
        'YGCN03606' => 'new',
    },
};
=cut

for (my $file = $FILE_BEGIN; $file <= $FILE_END; $file++) {
    extractRecord("$IN_DIR/$file", $rid_barcode, $barcode_rid, $sf852_a, $sf852_b);
}


exit 0;
################################################################################


sub makeConnection {
    my ($config) = @_;
    if (!$config) {
        return;
    }
    my ($db_driver, $db_name, $db_host, $db_port, $db_user, $db_password);

    $db_driver   = $config->{'db_driver'} || 'mysql';
    $db_name     = $config->{'db_name'};
    $db_host     = $config->{'db_host'};
    $db_port     = $config->{'db_port'}   || '3306';
    $db_user     = $config->{'db_user'};
    $db_password = $config->{'db_password'};

    my $dsn = "dbi:$db_driver:$db_name:$db_host:$db_port";

    return DBI->connect($dsn, $db_user, $db_password);
}
############################################################


sub loadConfig {
    my ($configFile) = @_;
#    print "Enter the config filename of Opals: ";
#    $configFile = <STDIN>;
    my $config = {};

    open CONF, $configFile || die "Cannot open file $configFile";
    while (<CONF>) {
        chomp;
        s/#.*//;                # remove comments
        next if /^\s*$/;        # ignore blank lines

        if (/^\s*(\w+)\s*=\s*(.*?)\s*$/) {
            $config->{$1} = $2;
        }
    }
    close CONF;

    return $config;
} 
############################################################


sub extractRecord {
    my ($input, $rid_barcode, $barcode_rid, $sf852_a, $sf852_b) = @_;

    print "\n=====================\nInput file: $input\n";

    my $marcfile = MARC::File::USMARC->in($input);
    my $count = 1;
#    while ($count < 9835) {
#        $count++;
#        $marcfile->skip();
#    }
    while (my $record = $marcfile->next()) {
        #print $record->as_formatted,"\n***************************************$count\n\n";
        $record = utf8_fromMarc8($record);
        my $rec_matched = 0;
        my $rid;
        my $sample_holding;
        
        # - check bar codes in record against the given list
        # - remove field 852 if its barcode is not in the given list
        foreach my $field ($record->field('852')) {
            my $bc = $field->subfield('p');
            if (!$rid) {
                $rid = $barcode_rid->{$bc};
            }

            if ($rid_barcode->{$rid}->{$bc}) {
                $rec_matched++;
                $rid_barcode->{$rid}->{$bc} = $rid;
                $sample_holding = $field->clone();
            }
            else {
                $record->delete_field($field);
            }
        }

        # generate field 852 for new holdings
        foreach my $bc (keys %{$rid_barcode->{$rid}}) {
            if ($rid_barcode->{$rid}->{$bc} eq 'new') {
                if ($sample_holding) {
                    $sample_holding->update('p' => $bc);
                }
                else {
                    $sample_holding = MARC::Field->new(
                        852, '1', '0',
                        'a' => $sf852_a,
                        'b' => $sf852_b,
                        'p' => $bc,
                        '3' => 'G'
                    );
                }

                $record->append_fields($sample_holding->clone());
                $rec_matched++;
            }
        }

        foreach my $f001 ($record->field('001')) {
            $record->delete_field($f001);
        }
        my @field = $record->fields();
        if (scalar(@field) > 0) {
            $record->insert_fields_before(
                $field[0],
                MARC::Field->new('001', $rid));
        }

        if ($rec_matched > 0) {
            #print $record->as_formatted, "\n***************************\n";
            my $xml = MARC::File::XML::record($record);
            open OUT, ">/tmp/recovery/".$rid.".xml";
            print OUT mxml_writeSortData($xml);
            close OUT;
        }

        if ($count % 1000 == 0) {
            print "\n";
        }
        if ($count % 25 == 1) {
            print "#";
        }
        $count++;
#        if ($count > 5) {
#            last;
#        }
    }
    $marcfile->close();
}
############################################################


sub utf8_fromMarc8 {
    my ($marc) = @_;
    my ($tag, $ind1, $ind2, $code, $data);

    my $charset = MARC::Charset->new();

    $data = $marc->leader();
    substr $data, 9, 1, 'a';
    $marc->leader(NFC($charset->to_utf8($data))) if ($data);

    foreach my $field ($marc->fields()) {
        my $field_new;
        $tag = $field->tag();
        next if $tag eq '000';
        $tag = NFC($charset->to_utf8($tag)) if ($tag);
        if ($tag =~ m/00[1-9]/) { # control field
            $data = $field->data();
            $data = NFC($charset->to_utf8($data)) if ($data);
            $field_new = MARC::Field->new($tag, $data);
        }
        else {
            $ind1 = $field->indicator(1);
            $ind2 = $field->indicator(2);
            
            $ind1 = NFC($charset->to_utf8($ind1)) if ($ind1);
            $ind2 = NFC($charset->to_utf8($ind2)) if ($ind2);
            
            my $firstSubfield = 1;
            #print $tag, "\n";
            foreach my $subfield ($field->subfields()) {
                ($code, $data) = @$subfield;
                $code = NFC($charset->to_utf8($code)) if ($code);
                $data = NFC($charset->to_utf8($data)) if ($data);
                if ($firstSubfield) {
                    $firstSubfield = 0;
                    $field_new = MARC::Field->new($tag,
                                                  $ind1, $ind2,
                                                  $code, $data);
                }
                else {
                    $field_new->add_subfields($code, $data);
                }
            }
        }
        $field->replace_with($field_new) if $field_new;
    }

    return $marc;
}
############################################################


sub mxml_writeSortData {
    my ($xml) = @_;
    
    # Remove existing sort data
    $xml =~ s/[\s]*<subfield code="-">.*<\/subfield>//g;
    #/Remove sort data

    # Write sort data
    my $tag = 245;
    my @sfCode = ('a', 'b', 'p');
    $xml = mxml_addSortData($xml, $tag, \@sfCode);

    $tag = 260;
    @sfCode = ('c');
    $xml = mxml_addSortData($xml, $tag, \@sfCode);

    $tag = 852;
    @sfCode = ('k', 'h', 'i');
    $xml = mxml_addSortData($xml, $tag, \@sfCode);
    #/Write sort data

    return $xml;
}



############################################################
sub mxml_addSortData {
    my ($xml, $tag, $code) = @_;

# OPALS's signature: __OPALS_MARC_XML_f801d6faada54a60faa577bbb60f33__
    while ($xml =~ s/([\s]*<datafield tag="$tag" ind1="[\d\ ]" ind2="[\d\ ]">([\s]*<subfield code="[\w\d]">.*<\/subfield>)*[\s]*<\/datafield>)/__OPALS_MARC_XML_f801d6faada54a60faa577bbb60f33__/) {
        my $field = $1;
        #$field =~ s/\\([\[\]\(\)])/$1/g;
        #print "$field\n";
        my @sfCode;
        my $subfield;
        foreach my $c (@$code) {
            if ($field =~ m/[\s]*<subfield code="$c">(.*)<\/subfield>/) {
                $subfield->{$c} = $1;
            }
        }

        my $dataSort = '';
        
        if ($tag == 245) {
            if ($subfield->{'a'}) {
                $dataSort .= $subfield->{'a'};
                $dataSort =~ s/[\ \:]+$//;

                my $ind2 = 0;
                if ($field =~ m/[\s]*<datafield tag="245" ind1="[01]" ind2="([\d])">/) {
                    $ind2 = $1;
                }

                $dataSort = substr($dataSort, $ind2);
            }
            
            if ($subfield->{'b'}) {
                $dataSort .= ': ' . $subfield->{'b'};
            }
            elsif ($subfield->{'p'}) {
                $dataSort .= ': ' . $subfield->{'p'};
            }
        }
        elsif ($tag == 260 && 
               $subfield->{'c'} && 
               $subfield->{'c'} =~ m/([\d]{4})/) {
                $dataSort .= $1;
        }
        elsif ($tag == 852) {
            $dataSort .= $subfield->{'k'} if $subfield->{'k'};
            $dataSort .= ' ' . $subfield->{'h'} if $subfield->{'h'};
            $dataSort .= ' ' . $subfield->{'i'} if $subfield->{'i'};
            $dataSort =~ s/ +/ /g;
            $dataSort =~ s/(^ | $)//g;
        }
        #print "$dataSort\n";
        
        $field =~ s/<\/datafield>/  <subfield code="-">$dataSort<\/subfield>\n  <\/datafield>/;
        #$field =~ s/([\[\]\(\)])/\\$1/g;
        #$xml =~ s/$field<\/datafield>/$field  <subfield code="-">$dataSort<\/subfield>\n  <\/datafield>/;
        $xml =~ s/__OPALS_MARC_XML_f801d6faada54a60faa577bbb60f33__/$field/;
    }

    return $xml;
}
