#!/usr/bin/perl

use strict;
use DBI;
use Getopt::Std;
use MARC::Record;
use MARC::File::USMARC;
use MARC::File::XML;
use MARC::Charset;
use Unicode::Normalize;

# hard-coded parameters
#use Opals::Context("/etc/opals/conf/_MY_SITE_");
my $IN_DIR = '/data/opals/import/rmz_rmzu';
my $OUT_DIR = '/tmp/marcRecordErrReport';
my $FILE_BEGIN = 1;
my $FILE_END = 33 ;
my $sf852_a = 'RMZ';
my $sf852_b = 'RMZU';
#/hard-coded parameters





my  @sites=('acsd_acs','acsd_lgm','afs_fl','bcl_bcl','bsd_bhs','bwl_bwl','cce_cce','ccisd_mbm','ccl_ccl','cesu_r391m','cesu_uid','cfs_cfs','cncsd_mce','cssd_hme','demo','dfum_dfum','dqsm_dqsm','ebook_ebook','ELS','ercsd_bdml','ercsd_ces','ercsd_ews','ercsd_fes','ercsd_hes','ercsd_hil','ercsd_lks','ercsd_wujo','ercsd_wujp','ercsd_wujq','ercsd_wujr','ercsd_wujs','ersd_mes','esp_esp','esu6_dps','fc_fcl','fcf_fcf','fcsd_famd','fhsm_fhsm','fpc_fpc','ges','gvsd_gvc','hca_hca','hcl_fs','hcs_nse','hfrs_hfrs','hpps_hphs','iccsd_ice','infolep_infolep','jame_jame','jbha_jbha','kajh','kash','lpc_lpc','lus_vwl','lvcc_lvcc','mcps_rlhs','msd_0202','msd_4124','msd_4125','msd_6532','msd_7580','msd_7818','msd_7819','msd_7869','msd_7870','mts_mte','mumc_mumc','mwcl_mwcl','neci_neci','nlnz_nzbn','nufsd_wzy2','nufsd_wzy3','nufsd_wzy4','nufsd_wzyp','onc_mi','onc_mie','oslc_oslc','pcsd_kgr','pmc_pmc','pnwp','ppl_ppl','pps_phs','psut_psut','qhch_hcd','qmsb','rcs_amhs','rice_rice','risd_rhhs','rnb','rnh','rnw','rpc_rpc','rps_rps','sccoe_schs','sifs_fse','sjcs','sjp_sjp','sll_bfe','sll_bfh','sll_dcm','SLL_dmt','sll_hmc','snm_snm','snpl_snpl','snps_ecg','snps_ilt','snps_jam','snps_jch','snps_osk','spe','swcs_swhs','sw_ips','uds_uds','vfp_vfp','wapa_wapa','wbc_wbc','wcc_wcc','wpc_wpc','wps_eis','wps_ris','wps_wsts','wsd_whs','wuja_pro','wuja_rkca','wuja_wuj','wuja_wuj4','wuja_wuja','xaaa_rfrc','xaaa_xaal','xaaa_xaar','xana_olps','ztest');

#BDT 
#my @sites =('bdt_bna','bdt_bnf','bdt_bnk','bdt_bnm','bdt_bno','bdt_btb','bdt_dpa','bdt_hce','bdt_hea','bdt_lmc','bdt_mee','bdt_mem','bdt_nvh','bdt_oaa','bdt_oaj','bdt_tca','bdt_tcj','bdt_ueo','bdt_vle','bdt_vlk','bdt_vln','bdt_wra','bdt_wrj','bdt_wrlbdt_bne','bdt_bnj','bdt_bnl','bdt_bnn','bdt_bnp','bdt_btl','bdt_dpj','bdt_hcs','bdt_hej','bdt_mea','bdt_mel','bdt_nva','bdt_nvk','bdt_oae','bdt_oal','bdt_tch','bdt_tck','bdt_vla','bdt_vlj','bdt_vlm','bdt_vlo','bdt_wre','bdt_wrk');

#cay
#my @sites=('cay_age','cay_ah','cay_cay','cay_cmh','cay_cmm','cay_me','cay_mh','cay_pbe','cay_pbs','cay_sce','cay_sch','cay_use','cay_usec','cay_ush','cay_we','cay_wh');

#DCMO

#my @sites=('dcmo','',' dcmo_ahs','','dcmo_dhcat','dcmo_dtcat','dcmo_hfs','dcmo_nms','dcmo_nsg','dcmo_om','dcmo_ovg','dcmo_see','','dcmo_sie','dcmo_uh',' dcmo_uve','dcmo_ae','dcmo_decat','dcmo_dmcat','dcmo_fam','','dcmo_nhs','dcmo_npb','dcmo_oh',' dcmo_op','dcmo_ovh','dcmo_setrc','dcmo_ueu','dcmo_uoe','dcmo_uvh');

#ONC
#my @sites=('onc_an','onc_crrf','onc_cvrf','onc_demo','onc_ed','onc_gc','onc_kwc','onc_ncoc','onc_oaoc','onc_om','onc_os','onc_ro','onc_setrc','onc_sl');

#OSW

#my @sites=('osw_apw','osw_sce','osw_vbo');
#SLL
#my @sites =('sll_bfe','sll_bfh','sll_cce','sll_cch','sll_ccm','sll_cfh','sll_cpe','sll_cpp','sll_dmt','sll_eks','sll_gce','sll_gcs','sll_gcw','sll_gfe','sll_hds','sll_hmc','sll_hrs');
#my  @sites=('acsd_acs','acsd_lgm','afs_fl','bcl_bcl','bsd_bhs','bwl_bwl');

#OPALSV
my @sites=('bab_bab','',' cbi_cbict','habt_habt','halb_ska','hfs_hfs','','jpps_bialik','maimo_mus','mis_tas','','','myh_myh','','rasg_ems','rmba_rmba','rmz_rmzm','','ssdseu_wol','',' ssg_ssg','','tfs_tfs','ybh_ybh','','zdra_zdra','bcds_bcds',' chat_chat','haftr_hus','hanc_bhs','jbha_jbha','jpps_jpp','',' mchs_mchs','mjbha_mjbha','nshds_ihs','rasg_fhh','rmz_rmz','','rmz_rmzu','','ssdseu_wou','',' sssw_mmsu','utt_sno','yof_yof','bjec_akiva','dkja_dkja','halb_drs',' hcl_hcl',' jiqg_jiqg','laes_laes','','mdy_ceh','','mjbha_mjbls','nsh_nsh','','rky_rky',' rmz_rmzl',' ssdseu_cfl','ssdsrv_ssdsrv','tabc_tabc','utt_utt','yula_yula');


my $dbh;# = Opals::Context->dbh();
my $n=0;
open report,">/tmp/marcRecordErrReport";
foreach my $s (@sites){
    my $config = loadConfig("/etc/opals/conf/$s");

    $dbh = makeConnection($config);
    print "/etc/opals/conf/$s\n";
    if($dbh){
    my $sth = $dbh->prepare(<<_SQL_);
select  count(r.rid) as count
from    opl_ge852record r inner join opl_marcRecord  m on r.rid=m.rid
where   m.title is null 
_SQL_
    $sth->execute;
    my $dir;
    my($count) = $sth->fetchrow_array;
    if($count){
        $n++;
        print report "---- $s ---- \n";
        print report  "    $count records effected.\n\n";
    }
    $sth->finish;
    }

} 
print report "TOTAL $n effected \n";

close report;



END {
    if ($dbh) {
        $dbh->disconnect();
    }
}
my $barcode_rid;
my $rid_barcode;
$| = 1;


exit 0;
################################################################################


sub makeConnection {
    my ($config) = @_;
    if (!$config) {
        return;
    }
    my ($db_driver, $db_name, $db_host, $db_port, $db_user, $db_password);

    $db_driver   = $config->{'db_driver'} || 'mysql';
    $db_name     = $config->{'db_name'};
    $db_host     = $config->{'db_host'};
    $db_port     = $config->{'db_port'}   || '3306';
    $db_user     = $config->{'db_user'};
    $db_password = $config->{'db_password'};

    my $dsn = "dbi:$db_driver:$db_name:$db_host:$db_port";

    return DBI->connect($dsn, $db_user, $db_password);
}
############################################################


sub loadConfig {
    my ($configFile) = @_;
#    print "Enter the config filename of Opals: ";
#    $configFile = <STDIN>;
    my $config = {};

    open CONF, $configFile || die "Cannot open file $configFile";
    while (<CONF>) {
        chomp;
        s/#.*//;                # remove comments
        next if /^\s*$/;        # ignore blank lines

        if (/^\s*(\w+)\s*=\s*(.*?)\s*$/) {
            $config->{$1} = $2;
        }
    }
    close CONF;

    return $config;
} 
############################################################


sub extractRecord {
    my ($input, $rid_barcode, $barcode_rid, $sf852_a, $sf852_b) = @_;

    print "\n=====================\nInput file: $input\n";

    my $marcfile = MARC::File::USMARC->in($input);
    my $count = 1;
#    while ($count < 9835) {
#        $count++;
#        $marcfile->skip();
#    }
    while (my $record = $marcfile->next()) {
        #print $record->as_formatted,"\n***************************************$count\n\n";
        $record = utf8_fromMarc8($record);
        my $rec_matched = 0;
        my $rid;
        my $sample_holding;
        
        # - check bar codes in record against the given list
        # - remove field 852 if its barcode is not in the given list
        foreach my $field ($record->field('852')) {
            my $bc = $field->subfield('p');
            if (!$rid) {
                $rid = $barcode_rid->{$bc};
            }

            if ($rid_barcode->{$rid}->{$bc}) {
                $rec_matched++;
                $rid_barcode->{$rid}->{$bc} = $rid;
                $sample_holding = $field->clone();
            }
            else {
                $record->delete_field($field);
            }
        }

        # generate field 852 for new holdings
        foreach my $bc (keys %{$rid_barcode->{$rid}}) {
            if ($rid_barcode->{$rid}->{$bc} eq 'new') {
                if ($sample_holding) {
                    $sample_holding->update('p' => $bc);
                }
                else {
                    $sample_holding = MARC::Field->new(
                        852, '1', '0',
                        'a' => $sf852_a,
                        'b' => $sf852_b,
                        'p' => $bc,
                        '3' => 'G'
                    );
                }

                $record->append_fields($sample_holding->clone());
                $rec_matched++;
            }
        }

        foreach my $f001 ($record->field('001')) {
            $record->delete_field($f001);
        }
        my @field = $record->fields();
        if (scalar(@field) > 0) {
            $record->insert_fields_before(
                $field[0],
                MARC::Field->new('001', $rid));
        }

        if ($rec_matched > 0) {
            #print $record->as_formatted, "\n***************************\n";
            my $xml = MARC::File::XML::record($record);
            open OUT, ">/tmp/recovery/".$rid.".xml";
            print OUT mxml_writeSortData($xml);
            close OUT;
        }

        if ($count % 1000 == 0) {
            print "\n";
        }
        if ($count % 25 == 1) {
            print "#";
        }
        $count++;
#        if ($count > 5) {
#            last;
#        }
    }
    $marcfile->close();
}
############################################################


sub utf8_fromMarc8 {
    my ($marc) = @_;
    my ($tag, $ind1, $ind2, $code, $data);

    my $charset = MARC::Charset->new();

    $data = $marc->leader();
    substr $data, 9, 1, 'a';
    $marc->leader(NFC($charset->to_utf8($data))) if ($data);

    foreach my $field ($marc->fields()) {
        my $field_new;
        $tag = $field->tag();
        next if $tag eq '000';
        $tag = NFC($charset->to_utf8($tag)) if ($tag);
        if ($tag =~ m/00[1-9]/) { # control field
            $data = $field->data();
            $data = NFC($charset->to_utf8($data)) if ($data);
            $field_new = MARC::Field->new($tag, $data);
        }
        else {
            $ind1 = $field->indicator(1);
            $ind2 = $field->indicator(2);
            
            $ind1 = NFC($charset->to_utf8($ind1)) if ($ind1);
            $ind2 = NFC($charset->to_utf8($ind2)) if ($ind2);
            
            my $firstSubfield = 1;
            #print $tag, "\n";
            foreach my $subfield ($field->subfields()) {
                ($code, $data) = @$subfield;
                $code = NFC($charset->to_utf8($code)) if ($code);
                $data = NFC($charset->to_utf8($data)) if ($data);
                if ($firstSubfield) {
                    $firstSubfield = 0;
                    $field_new = MARC::Field->new($tag,
                                                  $ind1, $ind2,
                                                  $code, $data);
                }
                else {
                    $field_new->add_subfields($code, $data);
                }
            }
        }
        $field->replace_with($field_new) if $field_new;
    }

    return $marc;
}
############################################################


sub mxml_writeSortData {
    my ($xml) = @_;
    
    # Remove existing sort data
    $xml =~ s/[\s]*<subfield code="-">.*<\/subfield>//g;
    #/Remove sort data

    # Write sort data
    my $tag = 245;
    my @sfCode = ('a', 'b', 'p');
    $xml = mxml_addSortData($xml, $tag, \@sfCode);

    $tag = 260;
    @sfCode = ('c');
    $xml = mxml_addSortData($xml, $tag, \@sfCode);

    $tag = 852;
    @sfCode = ('k', 'h', 'i');
    $xml = mxml_addSortData($xml, $tag, \@sfCode);
    #/Write sort data

    return $xml;
}



############################################################
sub mxml_addSortData {
    my ($xml, $tag, $code) = @_;

# OPALS's signature: __OPALS_MARC_XML_f801d6faada54a60faa577bbb60f33__
    while ($xml =~ s/([\s]*<datafield tag="$tag" ind1="[\d\ ]" ind2="[\d\ ]">([\s]*<subfield code="[\w\d]">.*<\/subfield>)*[\s]*<\/datafield>)/__OPALS_MARC_XML_f801d6faada54a60faa577bbb60f33__/) {
        my $field = $1;
        #$field =~ s/\\([\[\]\(\)])/$1/g;
        #print "$field\n";
        my @sfCode;
        my $subfield;
        foreach my $c (@$code) {
            if ($field =~ m/[\s]*<subfield code="$c">(.*)<\/subfield>/) {
                $subfield->{$c} = $1;
            }
        }

        my $dataSort = '';
        
        if ($tag == 245) {
            if ($subfield->{'a'}) {
                $dataSort .= $subfield->{'a'};
                $dataSort =~ s/[\ \:]+$//;

                my $ind2 = 0;
                if ($field =~ m/[\s]*<datafield tag="245" ind1="[01]" ind2="([\d])">/) {
                    $ind2 = $1;
                }

                $dataSort = substr($dataSort, $ind2);
            }
            
            if ($subfield->{'b'}) {
                $dataSort .= ': ' . $subfield->{'b'};
            }
            elsif ($subfield->{'p'}) {
                $dataSort .= ': ' . $subfield->{'p'};
            }
        }
        elsif ($tag == 260 && 
               $subfield->{'c'} && 
               $subfield->{'c'} =~ m/([\d]{4})/) {
                $dataSort .= $1;
        }
        elsif ($tag == 852) {
            $dataSort .= $subfield->{'k'} if $subfield->{'k'};
            $dataSort .= ' ' . $subfield->{'h'} if $subfield->{'h'};
            $dataSort .= ' ' . $subfield->{'i'} if $subfield->{'i'};
            $dataSort =~ s/ +/ /g;
            $dataSort =~ s/(^ | $)//g;
        }
        #print "$dataSort\n";
        
        $field =~ s/<\/datafield>/  <subfield code="-">$dataSort<\/subfield>\n  <\/datafield>/;
        #$field =~ s/([\[\]\(\)])/\\$1/g;
        #$xml =~ s/$field<\/datafield>/$field  <subfield code="-">$dataSort<\/subfield>\n  <\/datafield>/;
        $xml =~ s/__OPALS_MARC_XML_f801d6faada54a60faa577bbb60f33__/$field/;
    }

    return $xml;
}
