#!/usr/bin/perl

use strict;
use DBI;
use Getopt::Std;
use POSIX qw(
    ceil
);

my %options = ();
getopts("c:",\%options);
my $configFile = $options{c};
if (!$configFile || ! -f $configFile) {
    print "Usage: $0 -c CONFIG_FILE\n";
    exit 1;
}

my $config = loadConfig($configFile);
my $dbh = makeConnection($config);
END {
    if ($dbh) {
        $dbh->disconnect();
    }
}

$| = 1;
# Codes start...

my $zdbDir = $config->{'zRoot'} .'/'
           . $config->{'zPort'} .'/'
           . 'record' .'/'
           . $config->{'zDatabase'};

my $indexDir;
my $attTbl;
my $query_insert ;
#if(!createIndexTable($dbh) || !createTable_arl($dbh)){
#    print "fail to create table opl_recordindex or opl_arl.\n";;
#}
#else{
    $attTbl=loadAttTable($dbh);
    $query_insert = createInsertQuery();
    my $sth = $dbh->prepare(<<_SQL_);
select  m.rid
from    opl_marcRecord m left outer join opl_recordindex i using(rid) 
        inner join opl_item t on t.rid=m.rid
where i.rid is null && t.available=1
order by m.rid asc
_SQL_

    my $sth_arl = get_sth_arl($dbh);

    $sth->execute;
    my $dir;
    my $arl_spec = get_arl_spec();

    while (my ($rid) = $sth->fetchrow_array) {
        print "index record $rid\n";
        #parseMarc2mysql($dbh,getXmlRecord($zdbDir, $rid),$rid);
        parseAppendRecord($dbh, getXmlRecord($zdbDir, $rid),$rid, $sth_arl, $arl_spec);
        #updateLeader_008($dbh, $rid,getXmlRecord($zdbDir, $rid));
        #if ($rid > 20) {
        #    last;
        #}
    }
    $sth_arl->finish;
    $sth->finish;
#}


# Codes end.

exit 0;
################################################################################


sub makeConnection {
    my ($config) = @_;
    if (!$config) {
        return;
    }
    my ($db_driver, $db_name, $db_host, $db_port, $db_user, $db_password);

    $db_driver   = $config->{'db_driver'} || 'mysql';
    $db_name     = $config->{'db_name'};
    $db_host     = $config->{'db_host'};
    $db_port     = $config->{'db_port'}   || '3306';
    $db_user     = $config->{'db_user'};
    $db_password = $config->{'db_password'};

    my $dsn = "dbi:$db_driver:$db_name:$db_host:$db_port";

    return DBI->connect($dsn, $db_user, $db_password);
}
############################################################


sub loadConfig {
    my ($configFile) = @_;
#    print "Enter the config filename of Opals: ";
#    $configFile = <STDIN>;
    my $config = {};

    open CONF, $configFile || die "Cannot open file $configFile";
    while (<CONF>) {
        chomp;
        s/#.*//;                # remove comments
        next if /^\s*$/;        # ignore blank lines

        if (/^\s*(\w+)\s*=\s*(.*?)\s*$/) {
            $config->{$1} = $2;
        }
    }
    close CONF;
    
    return $config;
}
################################################################################


sub createInsertQuery(){
    my $queryStr="insert into opl_recordindex set rid=? ";
    foreach my $attr (sort keys %$attTbl){
        $queryStr .= ", attr_$attr= ? ";
    }
    return  $queryStr;
}
############################################################


sub createIndexTable(){
    my ($dbh) =@_;
    my $query = createIndexTblQuery($dbh); 
    my $rv = $dbh->do($query);
    return $rv;
}
############################################################


sub createIndexTblQuery(){
    my ($dbh) =@_;
    my $sth = $dbh->prepare(<<_SQL_);
drop table /*!32312 IF EXISTS*/ opl_recordindex;
_SQL_

    $sth->execute;
    $sth = $dbh->prepare(<<_SQL_);     
select distinct attrVal,attrList from opl_index_map order by attrVal;
_SQL_

    my $fieldList="`rid` int(10) unsigned NOT NULL";
    my $indexList="PRIMARY KEY  (`rid`)";
    
    $sth->execute;
    while (my ($attrVal,$attrList) = $sth->fetchrow_array) {
        if($attrVal ne '1016'){
            $fieldList .= ", `attr_$attrVal` text";
            $indexList .= ", FULLTEXT KEY `index_a$attrVal` (attr_$attrVal)";
        }
        else{
            $indexList .= ", FULLTEXT KEY `index_a1016` ($attrList)";            
        }
    }

    my $queryCreateTbl = "create table opl_recordindex ($fieldList, $indexList
    ) ENGINE=MyISAM DEFAULT CHARSET=utf8 ";
    return $queryCreateTbl;
}
############################################################


sub createTable_arl {
    my ($dbh) = @_;

    $dbh->do(<<_SQL_);
drop table if exists opl_arl
_SQL_

    my $rv = $dbh->do(<<_SQL_);
create table opl_arl (
    rid     int(10) unsigned not null default 0,
    tag     char(3) not null default '',
    ind1    char(1) not null default '',
    ind2    char(1) not null default '',
    code    char(1) not null default '',
    data1   varchar(255) not null default '',
    data2   varchar(255) not null default '',
    fulltext key ft_data (data1(255), data2(255)),
    key i_rid           (rid),
    key i_rti1c_data    (rid, tag, ind1, code, data1(160), data2(160))
) ENGINE=MyISAM DEFAULT CHARSET=utf8
_SQL_

    return $rv;
}
############################################################


sub get_sth_arl {
    my ($dbh) = @_;

    my $sth_arl = $dbh->prepare(<<_SQL_);
insert into opl_arl
set     rid = ?,
        tag = ?,
        ind1 = ?,
        code = ?,
        data1 = ?,
        data2 = ?
_SQL_

    return $sth_arl;
}
############################################################


sub get_arl_spec {
    my $arl_spec;
    $arl_spec->{'521'}->{'0'}->{'a'} = 'range data';
    $arl_spec->{'521'}->{'1'}->{'a'} = 'range data';
    $arl_spec->{'521'}->{'2'}->{'a'} = 'range data';
    $arl_spec->{'526'}->{'0'}->{'a'} = 'data';
    $arl_spec->{'526'}->{'0'}->{'b'} = 'range data';
    $arl_spec->{'526'}->{'0'}->{'c'} = 'range data';
    $arl_spec->{'526'}->{'0'}->{'d'} = 'data';

    return $arl_spec;
}
############################################################


sub add_arl {
    my ($sth_arl, $arl_data) = @_;

    my $rv = $sth_arl->execute(
        $arl_data->{'rid'},
        $arl_data->{'tag'},
        $arl_data->{'ind1'},
        $arl_data->{'code'},
        $arl_data->{'data1'},
        $arl_data->{'data2'}
    );

    return $rv;
}
############################################################


sub getXmlRecord {
    my ($zdbDir, $rid) = @_;

    my $record = '';
    my $dir = ceil($rid/1000);
    if (! -f "$zdbDir/$dir/$rid.xml") {
        print "ERROR: $zdbDir/$dir/$rid.xml: not found.\n";
        return;
    }

    #print "$rid\n";
    #return;

    open MARCXML, "<$zdbDir/$dir/$rid.xml";
    while (<MARCXML>) {
        $record .= $_;
    }
    close MARCXML;
    return $record;
}
############################################################


sub parseAppendRecord {
    my ($dbh, $xml, $rid, $sth_arl, $arl_spec) = @_;
    my $rv;
    my ($fXml, $ind1Xml, $ind2Xml, $sfXml, 
        $tag, $sflist, $sfCode, $sfdata);

    my $rec;
    my @val;
    push @val, $rid;
    my $read;
    foreach my $att (sort keys %$attTbl) {
        $rec->{$att} = "";
        $fXml = $xml;
        foreach my $tag_sf (@{$attTbl->{$att}}) {
            $tag    = $tag_sf->{'tag'};
            $sflist = $tag_sf->{'subfields'};
            if($tag =~ m/00[\d]/) {
                $fXml =~ s/[\s]*<controlfield tag="$tag">(.*)<\/controlfield>//;
                $rec->{$att} .= "$1\n";   
            }
            else {
                while ($fXml =~ s/[\s]*<datafield tag="$tag" ind1="([\d ])" ind2="([\d ])">(([\s]*<subfield code="[\w-]">.*<\/subfield>)*)[\s]*<\/datafield>//) {   
                    $ind1Xml = $1;
                    $ind2Xml = $2;
                    $sfXml = $3;
                    while ($sfXml =~ s/[\s]*<subfield code="([\w|-])">(.*)<\/subfield>//) {
                        ($sfCode, $sfdata) = ($1, $2);

                        if ($tag eq '020' && $sfCode eq 'a') {
                            if ($sfdata =~ m/([\d\-]{9,}[xX]?)/) {
                                $sfdata = $1;
                                $sfdata =~ s/-//g;

                                if (length($sfdata) != 10 &&
                                    length($sfdata) != 13) {
                                    $sfdata = '';
                                }
                            }
                            else {
                                $sfdata = '';
                            }
                        }

                        if (!$read->{$tag} 
                            && $arl_spec->{$tag}->{$ind1Xml}->{$sfCode}) {
                            my $arl_data;
                            $arl_data->{'rid'} = $rid;
                            $arl_data->{'tag'} = $tag;
                            $arl_data->{'ind1'} = $ind1Xml;
                            $arl_data->{'code'} = $sfCode;

                            if ($arl_spec->{$tag}->{$ind1Xml}->{$sfCode} 
                                    eq 'range data'
                                && $sfdata =~ 
                                    m/(K|[\d]+[\.]?[\d]*)-([\d]+[\.]?[\d]*)/) {
                                $arl_data->{'data1'} = $1;
                                $arl_data->{'data2'} = $2;
                            }
                            else {
                                $arl_data->{'data1'} = $sfdata;
                                $arl_data->{'data2'} = $sfdata;
                            }

                            #add_arl($sth_arl, $arl_data);
                        }

                        if ($sflist eq '' || $sflist =~ m/$sfCode/) {
                            if ($tag eq '020') {
                                $rec->{$att} .= "$sfdata ";
                            }
                            else {
                                $rec->{$att} .= "$sfdata\n";
                            }
                        }
                    }
                }
            }

            $read->{$tag} = 1;
        }
        push @val, $rec->{$att};
    }
    $rv = $dbh->do($query_insert, undef, @val);
}
############################################################


sub updateLeader_008 {
   my ($dbh, $rid, $xml) = @_;
   my ($leader, $cf_008)=('','');
   my $sth = $dbh->prepare(<<_SQL_);
update opl_marcRecord
set leader=?, cf_008=?
where rid =?
_SQL_
   
    # Extract leader
    if ($xml =~ s/[\s]*<leader>([\w ]{24})<\/leader>//) {
        $leader=$1;
    }
    
    # Extract control field -- 008
    if($xml =~ s/[\s]*<controlfield tag="008">(.*)<\/controlfield>// ){
        $cf_008=$1;
    }
    $sth->execute($leader,$cf_008,$rid);
 
}
############################################################


sub parseMarc2mysql{

    my ($dbh, $xml, $rid) = @_;

    my ($tag, $ind1, $ind2, $sfCode, $data);
    my $tagId=1;
    my $sth_leader = $dbh->prepare(<<_SQL_);
insert into opl_fullMarcRecord 
set rid= $rid, tag_id=1, tag='000',  data=? 
_SQL_
    my $sth_control = $dbh->prepare(<<_SQL_);
insert into opl_fullMarcRecord 
set rid= $rid, tag_id=?, tag=?,  data=? 
_SQL_

    my $sth_data=$dbh->prepare(<<_SQL_);
insert into opl_fullMarcRecord 
set rid= $rid, tag_id=?, tag=?, subfield=?, indicator1 =?, indicator2=?, data=? ;
_SQL_
   
    # Extract leader
    if ($xml =~ s/[\s]*<leader>([\w ]{24})<\/leader>//) {
        $sth_leader->execute($1);
    }
    $tagId +=1;
    # Extract control field
    while ($xml =~ s/[\s]*<controlfield tag="(00[\d])">(.*)<\/controlfield>//) {
        $tag  = $1;
        $data = $2;
        $sth_control->execute($tagId,$tag, $data);
        $tagId +=1;
    }

    # Extract data field
    my $sfXml;
    while ($xml =~ s/[\s]*<datafield tag="([\d]{3})" ind1="([\d ])" ind2="([\d ])">(([\s]*<subfield code="[\w-]">.*<\/subfield>)*)[\s]*<\/datafield>//) {
        $tag   = $1;
        $ind1  = $2;
        $ind2  = $3;
        $sfXml = $4;

        my @subfield = ();
        while ($sfXml =~ s/[\s]*<subfield code="([\w])">(.*)<\/subfield>//) {
            ($sfCode, $data) = ($1, $2);
            ($data) || ($data = ' '); # fix empty (sub)field
            $sth_data->execute($tagId,$tag,$sfCode,$ind1,$ind2,$data);
        }
        $tagId +=1;

    }

    
}
#////////////////////////////////////////////////////////////////////////////
sub loadAttTable{
    my ($dbh) =@_;
    my $attTbl;
    my $sth = $dbh->prepare(<<_SQL_);
select  attrval, tag, subfields
from    opl_index_map
where   tag is not null
     && subfields is not null
_SQL_
    $sth->execute;
    while (my ($attval, $tag, $subfields) = $sth->fetchrow_array) {
        push @{$attTbl->{$attval}}, {
            tag         => $tag,
            subfields   => $subfields
        };
    }
    return $attTbl;
}

#////////////////////////////////////////////////////////////////////////////
