#!/usr/bin/perl

use strict;
use DBI;
use Getopt::Std;
use POSIX qw(
    ceil
);

my %options = ();
getopts("c:",\%options);
my $indexDir = $options{c};
if (!$indexDir || (! -f $indexDir && ! -d $indexDir)) {
    print "Usage: $0 -c directory|xml file\n";
    exit 1;
}


$| = 1;
# Codes start...


my $fieldMapTbl = loadFieldMapTable();
   doIndex($indexDir);


exit 0;

sub doIndex{
    my($fName)=@_;
    if(-d $fName){
        foreach my $f(<$fName/*>){
            doIndex($f);
        }
    }
    else{
        print "file:$fName\n";
        my($rid,$solrXmlRec)=createSolrRecord($fName);
        saveSolrRec($rid,$solrXmlRec);

    }
}

############################################################
sub getXmlRecord {
    my ($fname) = @_;

    my $record = '';
    if (! -f $fname) {
        print "ERROR: $fname: not found.\n";
        return;
    }

    open MARCXML, "<$fname";
    while (<MARCXML>) {
        $record .= $_;
    }
    close MARCXML;
    return $record;
}

############################################################


#####################################################
sub createSolrRecord {
    my ($fileName) = @_;
    my $xml=getXmlRecord($fileName);
    my $rv;
    my $rid=0;
    my ($fXml, $ind1Xml, $ind2Xml, $sfXml, $fieldData,
        $tag, $sflist, $sfCode, $sfdata,$sflistExcl,$repeatable,$sfConcatStr);

    my $rec;
    if($xml =~ s/[\s]*<controlfield tag="001">(.*)<\/controlfield>//){
            $rid=$1;
    }
    my $solrXmlRec="<add>\n<doc>\n<field name=\"rid\">$rid</field>\n<field name=\"path\">$fileName</field>\n";

    my $recType= getRecFormat($xml);
    foreach my $format(@$recType){
        $solrXmlRec .="<field name=\"format\">$format</field>\n";
    }

    foreach my $fName (sort keys %$fieldMapTbl) {
        $fXml = $xml;
        foreach my $tag_sf (@{$fieldMapTbl->{$fName}}) {
            $tag    = $tag_sf->{'tag'};
            $sflist = $tag_sf->{'subfields'};
            $sflistExcl = $tag_sf->{'subfieldExclude'};
            $sfConcatStr= $tag_sf->{'sfConcatStr'};
            $repeatable = $tag_sf->{'repeatable'};
            while ($fXml =~ s/[\s]*<datafield tag="$tag" ind1="([\d ])" ind2="([\d ])">(([\s]*<subfield code="[\w-]">.*<\/subfield>)*)[\s]*<\/datafield>//) {   
                $ind1Xml = $1;
                $ind2Xml = $2;
                $sfXml = $3;
                $fieldData="";
                if($fName =~ m/^study_program$|^content_note$|^illustrator$|^uri$/gi){
                    if($tag eq '505'  ){
                        $solrXmlRec .=getContentNote($sfXml);
                    }
                    elsif($tag eq '526'  ){
                        $solrXmlRec .=getStudyProgram($sfXml);
                    }
                    elsif($tag eq '700'  ){
                        $solrXmlRec .=getIllustrator($sfXml);
                    }
                    elsif($tag eq '856'  ){
                        $solrXmlRec .=getURI($sfXml);
                    }
                    next
                }
                while ($sfXml =~ s/[\s]*<subfield code="([\w|-])">(.*)<\/subfield>//) {
                    ($sfCode, $sfdata) = ($1, $2);

                    if ($tag eq '020' && $sfCode eq 'a') {
                        if ($sfdata =~ m/([\d\-]{9,}[xX]?)/) {
                            $sfdata = $1;
                            $sfdata =~ s/-//g;

                            if (length($sfdata) != 10 &&
                                length($sfdata) != 13) {
                                $sfdata = '';
                            }
                        }
                        else {
                            $sfdata = '';
                        }
                    }


                    if (($sflist eq '' || $sflist =~ m/$sfCode/) &&  $sflistExcl !~ m/$sfCode/) {
                        $fieldData .= $sfConcatStr if($fieldData ne "");
                        if($fName eq 'content_note'){
                            $fieldData .="\$$sfCode$sfdata ";
                        }
                        else{
                            $fieldData .= "$sfdata ";
                        }
                    }
                }
               $solrXmlRec .= "<field name=\"$fName\" >$fieldData</field>\n" if($fieldData ne "");
               last if(!$repeatable);
            }
        

        }
    }
    $solrXmlRec .="</doc>\n</add>";
    return ($rid,$solrXmlRec);

 }
#####################################################
sub saveSolrRec{
    my ($rid,$solrXmlRec)=@_;
    my $dir = "/tmp/sll_oms_solr/" . ceil($rid/1000);
    if (! -d $dir) {
        mkdir $dir, 0775;
        system("chown apache.apache $dir");
    }
    open REC, ">$dir/$rid.xml";
    print REC $solrXmlRec;
    close REC;
    system("chown apache.apache $dir/$rid.xml");
    system("chmod 664 $dir/$rid.xml");

}
#####################################################
sub loadFieldMapTable{
    open CONF,"</tmp/solrIndexConf.conf";
    my ($fieldName, $tag, $subfields,$subfieldExclude,$repeatable,$sfConcatStr);
    my $fieldTbl;

    while (<CONF>) {
        chomp;
        s/#.*//;                # remove comments
        next if /^\s*$/;        # ignore blank lines

        if (/^\s*(\w+)\s*=\s*(.*?)\s*$/) {
            ($fieldName, $tag, $subfields,$subfieldExclude,$repeatable,$sfConcatStr)=('','','','',1,' ');

            $fieldName=$1;
            my  $val=$2;
            if($val =~ m/\[tag:([\d]{3})\]/){
                $tag=$1;
                if($val =~ m/\[subfields:([0-9a-z]*) \]/g){
                    $subfields=$1;
                }
                if($val =~ m/\[subfieldExclude:([0-9a-z]*) \]/g){
                    $subfieldExclude=$1;
                }
                if($val =~ m/\[repeatable:(false)\]/){
                    $repeatable=0;
                }
                if($val =~ m/\[sfConcatStr:(.*)\]/){
                    $sfConcatStr=$1;
                }
            }
            push @{$fieldTbl->{$fieldName}}, {
                tag             => $tag,
                subfields       => $subfields,
                subfieldExclude => $subfieldExclude,
                repeatable      => $repeatable,
                sfConcatStr     => $sfConcatStr
            };

        }
    }
    close CONF;
    return $fieldTbl;

}
#////////////////////////////////////////////////////////////////////////////
sub loadFieldMapTable_bk{
    my ($dbh) =@_;
    my $fieldTbl;
    my $sth = $dbh->prepare(<<_SQL_);
select  fieldName, tag,if(subfields='','all',subfields) as subfields ,subfieldExclude,if(repeatable = 1,'true','false') as repeatable ,sfConcatStr 
from    opl_solrFieldMap
where   tag is not null
     && subfields is not null
_SQL_
    open solrIndexConf,">/tmp/solrIndexConf.conf";
    $sth->execute;
    while (my ($fieldName, $tag, $subfields,$subfieldExclude,$repeatable,$sfConcatStr) = $sth->fetchrow_array) {
        print "$fieldName= tag:$tag subfields:$subfields subfieldExclude:$subfieldExclude repeatable:$repeatable sfConcatStr:$sfConcatStr\n";
        print solrIndexConf "$fieldName= tag:$tag subfields:$subfields  subfieldExclude:$subfieldExclude repeatable:$repeatable sfConcatStr:$sfConcatStr\n";
        push @{$fieldTbl->{$fieldName}}, {
            tag             => $tag,
            subfields       => $subfields,
            subfieldExclude => $subfieldExclude,
            repeatable      => $repeatable,
            sfConcatStr     => $sfConcatStr
        };
    }
    close solrIndexConf;
    return $fieldTbl;
}

#////////////////////////////////////////////////////////////////////////////
sub getIllustrator{
    my ($f700xml)=@_;
    my $solrField="";
    if($f700xml=~ m/[\s]*<subfield code="e">(.*)<\/subfield>/){
        if($1 =~ m/^ill/gi){
            if($f700xml=~ m/[\s]*<subfield code="a">(.*)<\/subfield>/){
                $solrField .= "<field name=\"illustrator\" >$1</field>\n" ;
            }
        }
    }
}
#////////////////////////////////////////////////////////////////////////////
sub getURI{
    my ($f856xml)=@_;
    my $solrField="";
    my $fieldMap={'u'=>'uri',
                  '3'=>'uri_description'
                 };

    while ($f856xml =~ s/[\s]*<subfield code="([u3])">(.*)<\/subfield>//) {
        if($2 ne''){
            $solrField .= "<field name=\"" . $fieldMap->{$1} ."\" >$2</field>\n" ;
        }
    }
    return $solrField;    
    
}
#////////////////////////////////////////////////////////////////////////////
sub getStudyProgram{
    my ($f526xml)=@_;
    my $solrField="";
    my $stdPrgm="";
    my $fieldMap={'a'=>'name',
                  'b'=>'interestLevel',
                  'c'=>'readingLevel',
                  'd'=>'pointValue',
                  'z'=>'quizNumber'
                 };
    my $field={};             
    while ($f526xml =~ s/[\s]*<subfield code="([abcdz])">(.*)<\/subfield>//) {
        if($2 ne''){
            $solrField .= "<field name=\"stdPrgm_" . $fieldMap->{$1} ."\" >$2</field>\n" ;
        }
        $field->{$fieldMap->{$1}}=$2;
    }
    if($field->{'name'}){
        $stdPrgm = $field->{'name'};
    }
    if($field->{'interestLevel'}){
        $stdPrgm .= ": " if ($stdPrgm ne "");
        $stdPrgm .= "Interest level: " . $field->{'interestLevel'};

    }
    if($field->{'readingLevel'}){
        $stdPrgm .= " / " if ($stdPrgm ne "");
        $stdPrgm .= "Reading level: " . $field->{'readingLevel'};
    }
    if($field->{'pointValue'}){
        $stdPrgm .= "/ " if ($stdPrgm ne "");
        $stdPrgm .= "Point value: " . $field->{'pointValue'};
    }
    if($field->{'quizNumber'}){
        $stdPrgm .= " / " if ($stdPrgm ne "quizNumber");
        $stdPrgm .= "Quiz number : " . $field->{'quizNumber'};
    }
    if($stdPrgm ne ""){
        $solrField .= "<field name=\"stdPrgm\" >$stdPrgm</field>\n" ;

    }
    return $solrField;    
    
}
#////////////////////////////////////////////////////////////////////////////
sub getContentNote{
    my ($f505xml)=@_;
    my $solrField="";
    my $fieldMap={'a'=>'content_note_format',
                  'g'=>'content_note_miscInfo',
                  't'=>'content_note_title',
                  'r'=>'content_note_resp',
                  'u'=>'content_note_uri'
                 };
    while ($f505xml =~ s/[\s]*<subfield code="([agtru])">(.*)<\/subfield>//) {
        if($2 ne''){
            $solrField .= "<field name=\"" . $fieldMap->{$1} ."\" >$2</field>\n" ;
        }
    }
    return $solrField;    
}

#////////////////////////////////////////////////////////////////////////////
sub getRecFormat{
    my($xml)=@_;
    my($l_06,$l_07,$cf007_00,$cf007_01,$cf008_26)=('','','','','');

    if($xml =~ m/[\s]*<leader>(.*)<\/leader>/){
        if(length($1)>6){
            $l_06=substr($1,6,1);
        }
        if(length($1)>6){
            $l_07=substr($1,7,1);
        }
    }

    if($xml =~ s/[\s]*<controlfield tag="007">(.*)<\/controlfield>//){
        if(length($1)>0){
            $cf007_00=substr($1,0,1);
        }
        if(length($1)>1){
            $cf007_01=substr($1,1,1);
        }
    }

    if($xml =~ s/[\s]*<controlfield tag="008">(.*)<\/controlfield>//){
        if(length($1)>26){
            $cf008_26=substr($1,26,1);
        }
    }
    my @recType=();

  

# Journal ,serial
    if($l_06 =~ m/[a]/ && $l_07 =~ m/[sb]/){ 
        push @recType,"Journal";
    }
# Book
    if($l_06 =~ m/[at]/ && $l_07 =~ m/[acdm]/ ){
        push @recType,"Book";
    }
# Artifact 
    if($l_06 =~ m/[r]/ ){
        push @recType,"Artifacts";
    }
# Photos, Posters 
    if($l_06 =~ m/[k]/ ){
        push @recType,"Poster";
    }
# Music Cassette 
    if($l_06 =~ m/[j]/ ){
        push @recType,"Music Cassettte";
    }
# Book on Tape
    if($l_06 =~ m/[i]/ ){
        push @recType,"Book on Tape";
    }
    
# Videocassette
#    elsif($cf007_00 =~ m/[v]/){
#        push @recType,"Videocassette";
#    }

# Movie
    if($l_06 =~ m/[g]/ ){
        push @recType,"Movie";
    }
    
    
# Libray Kit
    if($l_06 =~ m/[op]/){
        push @recType,"Libray Kit";
    }
# Map
    if($l_06 =~ m/[ef]/){
        push @recType,"Map";
    }
# Sheet Music
    if($l_06 =~ m/[cd]/){
        push @recType,"Sheet Music";
    }
    if($l_06 =~ m/[m]/){
# Internet        
        if($cf008_26 =~ m/[j]/){
            push @recType,"Internet";
        }
# Electronic Media
        else{
            push @recType,"Electronic Media";
        }
    }
# CD Music
    if($cf007_00 =~ m/[s]/ && $cf007_01 =~ m/[d]/){
        push @recType,"CD Music";
    }
# Compact Disc
    if($cf007_00 =~ m/[c]/ && $cf007_01 =~ m/[o]/){
        push @recType,"Compact Disc";
    }
#default book
    if(scalar(@recType)==0){
        push @recType,"Book";
    }

    return \@recType;
}


