package Opals::MarcXmlParser;

# Version number
$VERSION   = 0.01;

use strict;
use Encode;
use XML::SAX;
use base qw( XML::SAX::Base );

use Opals::Constant;
use Opals::Utility qw(
    util_restoreLiteral
);
my $recTypeDef =RECTYPE_DEF_TBL;
#===========================================================================================
#Thu, Jul 22, 2010 @ 15:37:35 EDT
#
# public methods:
#       new                     :contructor
#       loadConfig              :load solr index config file
#       getSolrRecXml           :parse and return solr xml record from marcXml
#       getSolrRecXml_file      :parse and return solr xml record from marcXml file
#       getRecInfoGeneral       :parse and return general info record from marcXml
#       getRecInfoGeneral_file  :parse and return general info record from marcXml file 
#
#============================================================================================


my $langCodeMap={};      #refer to the end of this file for code-language map table
my $genInfoRecFielMap={};#refer to the end of this file for general info field map table
my $literaryFormMap={};  #refer to the end of this file  ....
#////////////////////////////////////////////////////////////////////////////

sub new{
    my $type=shift;
    my $self={};
    $self->{'confFile'} = shift; 
    bless ($self);
    return $self;
}
#////////////////////////////////////////////////////////////////////////////
sub loadConfig{
    my ($self,$confFile)=@_;
    $self->{'fieldMap'}={} if(! -f $confFile);
    $self->{'fieldMap'}= _loadIndexConfig($self->{'confFile'});
}
#////////////////////////////////////////////////////////////////////////////
sub getSolrRecXml{
    my ($self,$xml)=@_;
    my $solrRec="";
    return undef if(!defined  $self->{'confFile'});
    if(!defined  $self->{'fieldMap'}){
        $self->{'fieldMap'}= _loadIndexConfig($self->{'confFile'});
    }
    return _parse2SolrXml($self,$xml);

}
#////////////////////////////////////////////////////////////////////////////
sub getSolrRecXml_file{
    my ($self,$file)=@_;
    my $xml =_getXmlFile($file,1);
    return getSolrRecXml($self,$xml);
}
#////////////////////////////////////////////////////////////////////////////
sub getRecInfoGeneral{
    my ($self,$xml)=@_;
    $self->{'genRecInfo'}={};
    if($xml ne ''){
        return _parse2generalInfoRec($self,$xml);
    }
    else{
        return undef ;
    }
}
#////////////////////////////////////////////////////////////////////////////
sub getRecInfoGeneral_file{
    my ($self,$file)=@_;

    my $xml =_getXmlFile($file);
    return getRecInfoGeneral($self,$xml);
}

#////////////////////////////////////////////////////////////////////////////
sub _parse2generalInfoRec{
    my ($self,$marcXml)=@_;

    my $tmp=$marcXml;
    my $leader="";
    my $cf008="";
    my $rid="";
    my @cf006=();
    my @cf007=();
    my $tmpXml="";
    my $retRec={};

#get leader
    if($marcXml =~ m/<leader>(.*)<\/leader>/g){
        $leader=$1;
    }
#get control field 001    
    if($marcXml =~ m/<controlfield tag="001">(.*)<\/controlfield>/g){
        $self->{'genRecInfo'}->{'rid'}=$1;
    }
#get control field 002 --ebook id         
    if($marcXml =~ m/<controlfield tag="002">(.*)<\/controlfield>/g){
        $self->{'genRecInfo'}->{'bid'}=$1;
    }    
#get control field 004 -- OPALS Bookcover id         
    if($marcXml =~ m/<controlfield tag="004">(.*)<\/controlfield>/g){
        $self->{'genRecInfo'}->{'cid'}=$1;
    }  
#get control field 060
    $tmpXml=$marcXml;
    while($tmpXml =~ m/<controlfield tag="006">(.*?)<\/controlfield>(.*)/s){
        push @cf006,$1;
        $tmpXml =$2;
    }
#get control field 007
    $tmpXml=$marcXml;
    while($tmpXml =~ m/<controlfield tag="007">(.*?)<\/controlfield>(.*)/s){
        push @cf007,$1;
        $tmpXml =$2;
    }
          
#get control field 008    
    if($marcXml =~ m/<controlfield tag="008">(.*?)<\/controlfield>/s){
        $cf008 =$1;
    }
    $self->{'genRecInfo'}->{'langCode'}     = _getRecLangCode($cf008);
    $self->{'genRecInfo'}->{'literaryForm'} = _getRecLiteraryForm($cf008);
    #$self->{'genRecInfo'}->{'language'}     = _getRecLanguage($cf008);

    my $recFormat  = _getRecFormat($leader,\@cf006,\@cf007,$cf008);
    foreach my $format(@$recFormat){         
         push @{$self->{'genRecInfo'}->{'format'}}, {item=>$format} if($format ne '');
    }


#parse data fields
    while($tmp =~ m/<datafield tag="([\d]{3})" ind1="([\d ])" ind2="([\d ])">(.*?)<\/datafield>(.*)/s){
        $tmp=$5;
        my $tag=$1;
        my $ind1=$2;
        my $ind2=$3;
        my $sfXml=$4;
        my @sfList=();
        if(defined $genInfoRecFielMap->{$tag} || $tag =~ m/6..|5../) {
            _getGeneralDatafield($self,$tag,$sfXml,$ind1,$ind2);
#            while($sfXml =~ m/<subfield code="([\w])">(.*?)<\/subfield>(.*)/s){
#                push @sfList,{code=>$1,data=>$2};
#                $sfXml=$3;
#            }
            #if( $tag =~ m/(5|6)../ ){
            #    $tag =$1 ."xx";
            #    _getGeneralDatafield($self,$tag,\@sfList,$ind1,$ind2);
            #}
        }
        
    }
    return $self->{'genRecInfo'};
    
}


#////////////////////////////////////////////////////////////////////////////
sub _parse2SolrXml{
    my ($self,$marcXml)=@_;
    _init($self);
    my $tmp=$marcXml;
    my $leader="";
    my $cf008="";
    my $rid="";
    my @cf006=();
    my @cf007=();
    my $tmpXml="";
    my $solrRec="";
    my $solrSubjectRec="";
    my $bibRec="";
    my @arlList=();
#get leader
    if($marcXml =~ m/<leader>(.*)<\/leader>/g){
        $leader=$1;
    }
#get control field 001    
    if($marcXml =~ m/<controlfield tag="001">(.*)<\/controlfield>/g){
        $rid=$1;
    }
    
#get control field 006
    $tmpXml=$marcXml;
    while($tmpXml =~ m/<controlfield tag="006">(.*?)<\/controlfield>(.*)/s){
        push @cf006,$1;
        $tmpXml =$2;
    }
#get control field 007
    $tmpXml=$marcXml;
    while($tmpXml =~ m/<controlfield tag="007">(.*?)<\/controlfield>(.*)/s){
        push @cf007,$1;
        $tmpXml =$2;
    }
        
#get control field 008    
    if($marcXml =~ m/<controlfield tag="008">(.*?)<\/controlfield>/s){
        $cf008 =$1;
    }
    my $langCode     = _getRecLangCode($cf008);
    my $literaryForm = _getRecLiteraryForm($cf008);
    my $recFormat    = _getRecFormat($leader,\@cf006,\@cf007,$cf008);

    $bibRec  = "<field name=\"rid\">$rid<\/field>\n"
             . "<field name=\"language\">$langCode</field>\n"
             . "<field name=\"literaryForm\">$literaryForm</field>\n";
 
    foreach my $format(@$recFormat){         
         $bibRec .= "<field name=\"format\">$format</field>\n";
    }

#parse data fields
    while($tmp =~ m/<datafield tag="([\d]{3})" ind1="([\d ])" ind2="([\d ])">(.*?)<\/datafield>(.*)/s){
        $tmp=$5;
        my ($tag,$ind1,$ind2,$sfXml)=($1,$2,$3,$4);
        my @sfList=();
        if(defined $self->{'fieldMap'}->{$tag}) {
            while($sfXml =~ m/<subfield code="([\w-])">(.*?)<\/subfield>(.*)/s){
                push @sfList,{code=>$1,data=>$2};
                $sfXml=$3;
            }
            _getDatafield($self,$tag,$ind1,$ind2,\@sfList);
#parse ARL record
            if($tag eq '526'){
                push @arlList, _createSolrARL(\@sfList); 
            }
        }
    }
    my ($authorDoc,$sbjDoc,$sbjId,$authId,$sbjCount,$authCount)=("","","","",0,0);
    foreach my $f(@{$self->{'field'}}){
        $f->{"fieldVal"} =~ s/^[\s]*|[\s]*$//g;
        next if($f->{"fieldVal"} eq '');
        if($f->{"field"} eq 'isbn'){
            $f->{"fieldVal"}=_fixISBN($f->{"fieldVal"});
        }
        if($f->{"field"} eq 'datePublication'){
            $bibRec  .="<field name=\"datePubSort\">". _fixPubDate($f->{"fieldVal"}) . "</field>\n";
        }
        $bibRec  .=sprintf("<field name=\"%s\">%s</field>\n",$f->{"field"},$f->{"fieldVal"});

        if($f->{"field"} eq "subject"){
            $sbjId ="sbj_$rid" . "_$sbjCount";
            $sbjDoc    .= _createSolrDistinctSbjDoc($sbjId,$rid,$f->{"fieldVal"});
            $sbjCount++;
        }
        elsif($f->{"field"} eq "author_main"){
            $authId ="aut_$rid" ."_$authCount";
            $authorDoc .= _createSolrDistinctAuthorDoc($authId,$rid,$f->{"fieldVal"});
            $authCount++;
        }
    }
    $self->{'field'}=undef;

    $solrRec .="<doc>\n<field name=\"id\">$rid<\/field>\n<field name=\"db\">main</field>$bibRec</doc>\n";
    $solrRec .=$sbjDoc;
    $solrRec .=$authorDoc;
    my $arlCount=0;
    foreach my $arl(@arlList){
        if($arl ne ''){
            $solrRec .="<doc>\n<field name=\"id\">arl_$rid" ."_$arlCount<\/field>\n<field name=\"db\">ARL</field>\n$bibRec$arl</doc>\n";
            $arlCount++;
        }
    }
    $self->{'firstFieldOnly'}=undef;
    return $solrRec;
    
}

#////////////////////////////////////////////////////////////////////////////
sub _fixISBN{
    my($isbn)=@_;
    $isbn=~ m/([\D^x^X]*)([\dxX][\dxX -]{8,}[\dxX])([\D^x^X]*)/;
    $isbn= $2;
    $isbn=~ s/[- ]//g;
    return $isbn; 
}

#////////////////////////////////////////////////////////////////////////////
sub _fixPubDate{
    my ($pubdate)=@_;
    my $retStr=$pubdate;
       $retStr =~ s/^[\D]+//g;
    
    return $retStr;
}
#////////////////////////////////////////////////////////////////////////////
sub _createSolrDistinctSbjDoc{
    my($id,$rid,$subject)=@_;
    return sprintf("<doc><field name=\"db\">subject</field><field name=\"id\">%s</field>
                    <field name=\"rid\">%s</field>
                    <field name=\"subject_grp\">%s</field>
                    </doc>\n"
                    ,$id,$rid,$subject);
}
#////////////////////////////////////////////////////////////////////////////
sub _createSolrDistinctAuthorDoc{
    my($id,$rid,$author)=@_;
    return sprintf("<doc><field name=\"db\">author</field>
                    <field name=\"id\">%s</field>
                    <field name=\"rid\">%s</field>
                    <field name=\"author_grp\">%s</field>
                    </doc>\n"
                    ,$id,$rid,$author);
}


#////////////////////////////////////////////////////////////////////////////
sub _createSolrARL{
    my($sfList)=@_;
    my $studyProgram=undef;
    my $map={a=>'studyPrgm',
             b=>'interestLevel',
             c=>'readingLevel' ,
             d=>'pointValue',
             z=>'quizNumber'
             };
    my $arl={};
    my $arlDoc ="";
    foreach my $sf(@$sfList){
        my ($code,$data)=($sf->{'code'},$sf->{'data'});
#normalize grade
        if($code eq 'b'){
            if($data =~ m/(K|[\d]+[\.]?[\d]*)-([\d]+[\.]?[\d]*)/i){
                 if(($1 eq 'k' || $1 eq 'K' || $1 <=3) && $2<='3'){
                      $data='LG';
                  }
                  elsif($2 le '8'){
                      $data='MG';
                  }
                  elsif($2 le '12'){
                      $data='UG';
                  }
                  else{
                      $data='';
                  }
              }
        }
#/normalize grade
                
        if(defined $map->{$code}){
            $arl->{$map->{$code}} = $data;
        }
    }
    if($arl->{'studyPrgm'} =~ m/Accelerated Reader|Reading Counts/gi){
        foreach my $fName(keys %$arl){
            $arlDoc .= "<field name=\"$fName\">".$arl->{$fName} ."</field>\n";
        }
    }
    
    return $arlDoc;
}
#////////////////////////////////////////////////////////////////////////////
sub _getGeneralDatafield{
    my ($self,$tag,$sfXml,$ind1,$ind2)=@_;
    my $sfList=undef; 
    my $tagxx=substr($tag,0,1) ."xx";
    my @prop=();
    @prop =@{$genInfoRecFielMap->{$tag}} if(defined $genInfoRecFielMap->{$tag});
    @prop =(@prop,@{$genInfoRecFielMap->{$tagxx}}) if(defined $genInfoRecFielMap->{$tagxx});
    if(scalar(@prop)){
        $sfList=_getSubfieldList($sfXml);
        foreach my $p(@prop){
            next if(defined $p->{'tagExcl'} && $p->{'tagExcl'} =~ m/$tag/
                    ||(defined $ind1 && defined $p->{'ind1'} &&  $ind1 ne $p->{'ind1'})
                    ||(defined $ind2 && defined $p->{'ind2'} &&  $ind2 ne $p->{'ind2'}) );

            _getdataField($self,$p,$sfList,$ind1,$ind2) ;
        }
    }
}
#////////////////////////////////////////////////////////////////////////////
sub _getdataField{
    my ($self,$prop,$sfList,$ind1,$ind2)=@_;

        my $fieldName =$prop->{'name'};
        my $funcName = $prop->{'funcName'};
        my @sfIncl   = split(/,/,$prop->{'subfieldIncl'});
        my $sfExcl   = $prop->{'subfieldExcl'};
        my $tagExcl   = $prop->{'tagExcl'};
        my $sfConStr = $prop->{'sfConcatStr'};
        my $multiple = $prop->{'multiple'};
        my $fieldVal="";

        if($funcName ne ''){
            _getFieldByFuncName($self,$fieldName,$funcName,$sfList,$ind1,$ind2);
        }
        else{
            if(scalar(@sfIncl)>0){
                foreach my $sfCode(@sfIncl){
                    foreach my $sf(@$sfList){
                        if($sf->{'code'} eq $sfCode){
                            $fieldVal .= $sfConStr if($fieldVal ne "");
                            $fieldVal .= $sf->{'data'};
                        }
                    }
                }
            }
            else{
                foreach my $sf(@$sfList){
                    my ($code,$data)=($sf->{'code'},$sf->{'data'});
                    if($sfExcl !~ m/$code/ && $data ne ''){
                        $fieldVal .= $sfConStr if($fieldVal ne "");
                        $fieldVal .=$data;
                    }
                }
            }
            next if($fieldVal eq '');
            $fieldVal=util_restoreLiteral($fieldVal);
            if($multiple eq 'true'){
                push @{$self->{'genRecInfo'}->{$fieldName}},{item=>$fieldVal} ;
            }
            else{
                $self->{'genRecInfo'}->{$fieldName} =$fieldVal if(!defined $self->{'genRecInfo'}->{$fieldName});
            }
        }

}

#////////////////////////////////////////////////////////////////////////////
sub _getSubfieldList{
    my($sfXml)=@_;
    my $sfList=[];
    while($sfXml =~ m/<subfield code="([\w\-])">(.*?)<\/subfield>(.*)/s){
        my($code,$data)=($1,$2);
        #$data =~ s/&quot;/"/g;
        push @$sfList,{code=>$code,data=>$data};
        $sfXml=$3;
   }

   return  $sfList;
}
#////////////////////////////////////////////////////////////////////////////
sub _getGeneralDatafield_bk{
    my ($self,$tag,$sfList,$ind1,$ind2)=@_;

    foreach my $prop(@{$genInfoRecFielMap->{$tag}}){
        my $fieldName =$prop->{'name'};
        my $funcName = $prop->{'funcName'};
        my @sfIncl   = split(/,/,$prop->{'subfieldIncl'});
        my $sfExcl   = $prop->{'subfieldExcl'};
        my $sfConStr = $prop->{'sfConcatStr'};
        my $multiple = $prop->{'multiple'};
        my $fieldVal="";
        if($funcName ne ''){
            _getFieldByFuncName($self,$fieldName,$funcName,$sfList,$ind1,$ind2);
        }
        else{
            if(scalar(@sfIncl)>0){
                foreach my $sfCode(@sfIncl){
                    foreach my $sf(@$sfList){
                        if($sf->{'code'} eq $sfCode){
                            $fieldVal .= $sfConStr if($fieldVal ne "");
                            $fieldVal .= $sf->{'data'};
                        }
                    }
                }
            }
            else{
                foreach my $sf(@$sfList){
                    my ($code,$data)=($sf->{'code'},$sf->{'data'});
                    if($sfExcl !~ m/$code/){
                        $fieldVal .= $sfConStr if($fieldVal ne "");
                        $fieldVal .=$data;
                    }
                }
            }
            next if($fieldVal eq '');
            if($multiple eq 'true'){
                push @{$self->{'genRecInfo'}->{$fieldName}},{item=>$fieldVal} ;
            }
            else{
                $self->{'genRecInfo'}->{$fieldName} =$fieldVal if(!defined $self->{'genRecInfo'}->{$fieldName});
            }
        }
    }

}

#////////////////////////////////////////////////////////////////////////////
sub _getFieldByFuncName{
    my ($self,$fieldName,$funcName,$sfList,$ind1,$ind2)=@_;
    return if (scalar(!defined $sfList || @$sfList)==0);
    if($funcName eq 'getContentNote'){
        _getContentNote($self,$fieldName,$sfList);
    }
    elsif($funcName eq 'getStudyProgram'){
        _getStudyProgram($self,$fieldName,$sfList);
    }
    elsif($funcName eq 'getLexileMeasure'){
        _getLexileMeasure($self,$fieldName,$sfList,$ind1);
    }
   elsif($funcName eq 'getFountasPinnell'){
        _getFountasPinnell($self,$fieldName,$sfList,$ind1);
    }
    elsif($funcName eq 'getIllustrator'){
        _getIllustrator($self,$fieldName,$sfList);
    }
    elsif($funcName eq 'getHoldingInfo'){
        _getHoldingInfo($self,$fieldName,$sfList);
    }
    elsif($funcName eq 'getUri'){
        _getUri($self,$fieldName,$sfList,$ind1,$ind2);
    }
    elsif($funcName eq 'getAuthor880'){
       _getAuthor880($self,$fieldName,$sfList);
    }
    elsif($funcName eq 'getTitle880'){
       _getTitle880($self,$fieldName,$sfList);
    }
    elsif($funcName eq 'getPlace880'){
       _getPlace880($self,$fieldName,$sfList);
    }
    elsif($funcName eq 'getPublisher880'){
       _getPublisher880($self,$fieldName,$sfList);
    }

}
#////////////////////////////////////////////////////////////////////////////
sub  _getHoldingInfo{
    my ($self,$fieldName,$sfList)=@_;
    my ($sysCode,$libCode,$location,$barcode,$callnumber)=('','','','','','');
    my ($callnumber,$sf_k,$sf_h,$sf_i,$sf_m, $itemType, $price)=('','','','','','','','');
    foreach my $sf(@$sfList){
        $sf->{'data'}=util_restoreLiteral($sf->{'data'});
        if($sf->{'code'} eq 'a'){
            $sysCode=$sf->{'data'}
        }
        elsif($sf->{'code'} eq 'b'){
            $libCode=$sf->{'data'}
        }
        elsif($sf->{'code'} eq 'c'){
            $location=$sf->{'data'}
        }
        elsif($sf->{'code'} eq 'p'){
            $barcode=$sf->{'data'}
        }
        elsif($sf->{'code'} eq 'k'){
            $sf_k=$sf->{'data'}
        }
        elsif($sf->{'code'} eq 'h'){
            $sf_h=$sf->{'data'}
        }
        elsif($sf->{'code'} eq 'i'){
            $sf_i=$sf->{'data'}
        }
        elsif($sf->{'code'} eq 'm'){
            $sf_m=$sf->{'data'}
        }
        elsif($sf->{'code'} eq '3'){
            $itemType=$sf->{'data'};
        }
        elsif($sf->{'code'} eq '9'){
            $price=$sf->{'data'};
        }

    }
    $callnumber = "$sf_k $sf_h $sf_i $sf_m";
    $callnumber =~ s/  / /g;
    $callnumber =~ s/^\s+//g;
    push @{$self->{'genRecInfo'}->{$fieldName}},{sysCode=>$sysCode,
                                                 libCode=>$libCode,
                                                 location=>$location,
                                                 barcode=>$barcode,
                                                 callnumber=>$callnumber,
                                                 itemType=>$itemType,
                                                 price=>$price
                                                };

}
#////////////////////////////////////////////////////////////////////////////
sub _getContentNote{
    my ($self,$fieldName,$sfList)=@_;
    my $curNote=undef;
    my @contNoteList=();
    my @titleRespList=();
    my @uriList=();
    my($title,$resp)=('','');
    my $contentNote=undef;
    $contentNote->{'format'}=" ";
    foreach my $sf(@$sfList){
        my ($code,$data)=($sf->{'code'},$sf->{'data'});
        $data =~ s/"/&quot;/g;
        if($code eq 'a'){
            $contentNote->{'format'}=$data;
        }
        elsif($code eq 'g'){
            if($title ne ''){
               push @{$curNote->{'titleRespList'}},{cnTitle=>$title,cnResp=>''};
                $title=undef;
            }

            push @contNoteList, $curNote if(defined $curNote);
            $curNote={miscInfo=>$data};
        }
        elsif($code eq 't'){
            if($title ne''){
                push @{$curNote->{'titleRespList'}},{cnTitle=>$title,cnResp=>''};
                $title=undef;
            }
            $title = $data;
            $title =~ s/^\s+|\s+$//;
        }
        elsif($code eq 'r'){
            $resp = $data;
            $resp =~ s/^\s+|\s+$//;
            push @{$curNote->{'titleRespList'}},{cnTitle=>$title,cnResp=>$resp};
            ($title,$resp)=(undef,undef);
            
        }
        elsif($code eq 'u'){
            push @{$curNote->{'cnUriList'}}, {uri=>$data};
        }
        
    }
    push @{$curNote->{'titleRespList'}},{cnTitle=>$title,cnResp=>''}if($title ne'');
    push @contNoteList, $curNote if(defined $curNote);
    $contentNote->{'toc'}=\@contNoteList if(scalar(@contNoteList)>0);
    
    push @{$self->{'genRecInfo'}->{$fieldName}},$contentNote  if(defined $contentNote);

}
#////////////////////////////////////////////////////////////////////////////
sub _getStudyProgram{
    my ($self,$fieldName,$sfList)=@_;
    my $studyProgram=undef;
    my $map={a=>'studyPrgm_name',
          b=>'interestLevel',
          c=>'readingLevel' ,
          d=>'pointValue',
          z=>'quizNumber'
          };


    foreach my $sf(@$sfList){
        my ($code,$data)=($sf->{'code'},$sf->{'data'});
        if(defined $map->{$code}){
            $studyProgram->{$map->{$code}} = $data;
        }
    }
    push @{$self->{'genRecInfo'}->{$fieldName}},$studyProgram if(defined $studyProgram);

}
sub _getLexileMeasure{
    my ($self,$fieldName,$sfList,$ind1)=@_;
    if($ind1 && $ind1 eq '8'){
        my ($sfa,$sfb)=("","");
         foreach my $sf(@$sfList){
             $sfa= $sf->{'data'} if($sf->{'code'} eq 'a');
             $sfb= $sf->{'data'} if($sf->{'code'} eq 'b');
         }
         if($sfb =~ m/lexile/i){
            $self->{'genRecInfo'}->{$fieldName}=$sfa;
         }

    }

}
sub _getFountasPinnell{
    my ($self,$fieldName,$sfList,$ind1)=@_;
    if($ind1 && $ind1 eq '8'){
        my ($sfa,$sfb)=("","");
         foreach my $sf(@$sfList){
             $sfa= $sf->{'data'} if($sf->{'code'} eq 'a');
             $sfb= $sf->{'data'} if($sf->{'code'} eq 'b');
         }
         $sfb =~ s/&amp;/&/g;
         #if($sfb =~ m/.*(f\s+&\s+p|f\s+and\s+p|fountas\s+&\s+pinnell|fountas\s+and\s+pinnell|fp).*/i){
         if($sfb =~ m/.*((Fountas|f)(\s*)(and|&|&amp;)*(\s*)(p|Pinnell)).*/i){
            $self->{'genRecInfo'}->{$fieldName}=$sfa;
         }

    }

}

#////////////////////////////////////////////////////////////////////////////
sub _getIllustrator{
    my ($self,$fieldName,$sfList)=@_;
    my($sf_a,$sf_e)=('','');
    foreach my $sf(@$sfList){
        $sf_a = $sf->{'data'} if($sf_a ne'' && $sf->{'code'} eq 'a');
        $sf_e = $sf->{'data'} if($sf_e ne'' && $sf->{'code'} eq 'e');
    }
    if($sf_e =~ m/^ill/gi && $sf_a ne ''){
        push @{$self->{'genRecInfo'}->{$fieldName}}, $sf_a;
    }

}
#////////////////////////////////////////////////////////////////////////////
sub _getUri_bk{# subfield may repeated
    my ($self,$fieldName,$sfList)=@_;
    my $uri_desc='';
    my $uri_arr=undef;
    foreach my $sf(@$sfList){
        $uri_desc = $sf->{'data'} if($sf->{'code'} eq '3');
        push @$uri_arr,{uri=>$sf->{'data'} }  if($sf->{'code'} eq 'u');
    }
    if(defined $uri_arr){
        push  @{$self->{'genRecInfo'}->{$fieldName}},{uri_desc=>$uri_desc, uri_list=>$uri_arr}
    }
}
#////////////////////////////////////////////////////////////////////////////
sub _getUri{
    my ($self,$fieldName,$sfList,$ind1,$ind2)=@_;
    my $uri_desc='';
    my $uri="";
    foreach my $sf(@$sfList){
        $uri_desc = $sf->{'data'} if($sf->{'code'} eq '3');
        $uri_desc = $sf->{'data'} if($sf->{'code'} eq 'y' && $uri_desc eq '');
        $uri_desc = $sf->{'data'} if($sf->{'code'} eq 'z' && $uri_desc eq '');
        $uri =$sf->{'data'}  if($sf->{'code'} eq 'u');
        last if($uri_desc ne '' && $uri ne '');
    }
    if(defined $uri){
        if($ind1 eq "4" && $ind2 eq "0"){
            unshift @{$self->{'genRecInfo'}->{$fieldName}},{uri_desc=>$uri_desc, uri=>$uri};

        }
        else{
            push  @{$self->{'genRecInfo'}->{$fieldName}},{uri_desc=>$uri_desc, uri=>$uri};
        }
    }
}

#////////////////////////////////////////////////////////////////////////////
sub _getAuthor880{
    my ($self,$fieldName,$sfList)=@_;
    my $sf880_a = "";            
    my $sf880_6 = ""; 
    foreach my $sf(@$sfList){
        $sf880_a=$sf->{'data'} if($sf->{'code'} eq 'a');
        $sf880_6=$sf->{'data'} if($sf->{'code'} eq '6');
    }
    if($sf880_6 && $sf880_6 =~ m/100/g){
        $self->{'genRecInfo'}->{$fieldName}   = $sf880_a;
    }
    
}
#////////////////////////////////////////////////////////////////////////////
sub _getTitle880{
    my ($self,$fieldName,$sfList)=@_;
    my $sf880_a = "";            
    my $sf880_6 = ""; 
    foreach my $sf(@$sfList){
        $sf880_a=$sf->{'data'} if($sf->{'code'} eq 'a');
        $sf880_6=$sf->{'data'} if($sf->{'code'} eq '6');
    }
    if($sf880_6 && $sf880_6 =~ m/245/g){
        $self->{'genRecInfo'}->{$fieldName}   = $sf880_a;
    }
    
}
#////////////////////////////////////////////////////////////////////////////
sub _getPlace880{
    my ($self,$fieldName,$sfList)=@_;
    my $sf880_a = "";            
    my $sf880_6 = ""; 
    foreach my $sf(@$sfList){
        $sf880_a=$sf->{'data'} if($sf->{'code'} eq 'a');
        $sf880_6=$sf->{'data'} if($sf->{'code'} eq '6');
    }
    if($sf880_6 && $sf880_6 =~ m/260/g){
        $self->{'genRecInfo'}->{$fieldName}   = $sf880_a;
    }
    
}
#////////////////////////////////////////////////////////////////////////////
sub _getPublisher880{
    my ($self,$fieldName,$sfList)=@_;
    my $sf880_b = "";            
    my $sf880_6 = ""; 
    foreach my $sf(@$sfList){
        $sf880_b=$sf->{'data'} if($sf->{'code'} eq 'b');
        $sf880_6=$sf->{'data'} if($sf->{'code'} eq '6');
    }
    if($sf880_6 && $sf880_6 =~ m/260/g){
        $self->{'genRecInfo'}->{$fieldName}   = $sf880_b;
    }
}


#////////////////////////////////////////////////////////////////////////////
sub _getDatafield{
    my ($self,$tag,$ind1,$ind2,$sfList)=@_;
    my $fieldStr="";
    my $fieldMap = $self->{'fieldMap'};
    foreach my $prop(@{$fieldMap->{$tag}}){
        my $fieldName =$prop->{'name'};
        my $sfIncl   = $prop->{'subfieldIncl'};
        my $sfExcl   = $prop->{'subfieldExcl'};
        my $sfConStr = $prop->{'sfConcatStr'};
        my $firstFieldOnly = $prop->{'firstFieldOnly'};
        my $fieldVal="";
        next if(defined $self->{'firstFieldOnly'}->{$fieldName});
        foreach my $sf(@$sfList){
            my ($code,$data)=($sf->{'code'},$sf->{'data'});
            next if($code eq '+');
            if(($sfIncl =~ m/$code/ || $sfIncl eq "") && $sfExcl !~ m/$code/){
                $fieldVal .= $sfConStr if($fieldVal ne "");
                $fieldVal .=$data;
            }
        }
        if($fieldName eq 'title_sort' && $ind2>0 && $fieldVal ne ''){#title sort
            $fieldVal =substr($fieldVal,$ind2);
        }
        push @{$self->{'field'}},{field=>$fieldName,fieldVal=>$fieldVal} if ($fieldVal ne '');
        if($firstFieldOnly eq '1'){
            $self->{'firstFieldOnly'}->{$fieldName} =1;
        }
    }

}
sub _init{
    my($self)=@_;
    $self->{'field'}=undef;
    $self->{'firstFieldOnly'}=undef;

}
#////////////////////////////////////////////////////////////////////////////
sub _getXmlFile{
    my ($file,$toUTF)=@_;
    my $xml="";
    if(-f $file){
        open FILE,"<$file";
        binmode FILE, ":utf8" if(defined $toUTF && $toUTF==1);
        while(<FILE>){
            $xml .=$_;
        }
        close FILE;
    }
    return $xml;
}
#////////////////////////////////////////////////////////////////////////////
sub _getRecLangCode{
    my $cf008=shift;
    my $langCode='eng';#default;
    if(length($cf008)>=38){
        $langCode= substr($cf008,35,3);
    }
    return $langCode;
}
#////////////////////////////////////////////////////////////////////////////
sub _getRecLanguage{
    my $cf008=shift;
    my $language='English';#default;
    if(length($cf008)>=38){
        my $lanCode= substr($cf008,35,3);
        $language=$langCodeMap->{$lanCode} if(defined $langCodeMap->{$lanCode});
    }
    return $language;
}

#////////////////////////////////////////////////////////////////////////////
sub _getRecLiteraryForm{
    my $cf008=shift;
    my $litForm="unknown";
    if(length($cf008)>=34){
        my $c= substr($cf008,33,1);
        $litForm = $literaryFormMap->{$c} if(defined $literaryFormMap->{$c});
    }
    return $litForm;
}

#////////////////////////////////////////////////////////////////////////////
sub _loadIndexConfig{
    my ($configFile)=@_;
    return if(! -f $configFile);
    my $factory = XML::SAX::ParserFactory->new();
    my $parser = $factory->parser( Handler =>Opals::ConfigHandler->new() );
    $parser->parse_file($configFile);
    return $parser->{'Handler'}->{'fieldMap'};
}
#////////////////////////////////////////////////////////////////////////////
sub _getRecFormat{
    my ($leader,$cf006List,$cf007List,$cf008) = @_;

    my $cf006= (defined @$cf006List[0])?lc(@$cf006List[0]) :"";
    my $cf007= (defined @$cf007List[0])?lc(@$cf007List[0]) :"";
     #  $cf008   = (defined @$cf008)?lc($cf008):"";
    my $recProp={leader=>$leader,
                  '006'=>$cf006,
                  '007'=>$cf007,
                  '008'=>$cf008
                 };
    my ($score,$lastScore,$complete)=(0,0,1);    
    my @recType=();
    my $recFormat="book";
    foreach my $mType(keys %$recTypeDef){
        next if( $mType eq '_default');
        my $typeDef=$recTypeDef->{$mType};
        $score=0;
        $complete=1;
        foreach my $f(keys %$typeDef){
            next if($f !~ m/leader|00[1-9]/g);
            my @pArr=@{$typeDef->{$f}->{'prop'}} ;
            foreach my $e(@pArr){
                if($e->{'v'} ne substr($recProp->{$f},$e->{'p'},1)){
                    $complete=0;
                    last;
                }
                $score++;
            }
        }
        if($complete && $score>$lastScore){
            $lastScore=$score ;
            $recFormat = $mType;
        }
    }
    push @recType ,$recFormat;
    return \@recType;
}


###################################
# END package Opals::MarcXmlParser;
###################################



1;
#################################################################
package Opals::ConfigHandler;
use strict;
sub new {
    my $type = shift;
    return bless {}, $type;
}

sub start_element {
    my ($self, $element) = @_;
    my $name = $element->{'Name'};
    if ($name eq 'field') {
        foreach my $k (qw(tag name subfieldIncl subfieldExcl sfConcatStr firstFieldOnly)) {
            if($k eq 'tag'){
                $self->{'tag'}=$element->{'Attributes'}{'{}'.$k}{'Value'};
            }
            else{
                $self->{'indexProp'}->{$k} = $element->{'Attributes'}{'{}'.$k}{'Value'};
            }
        }
    }
}

#################################################################
sub end_element { 
    my ( $self, $element ) = @_;
    my $name = $element->{ Name };
    if ($name eq 'field') {
        my $tag=$self->{'tag'};
        push @{$self->{'fieldMap'}->{$tag}}, $self->{'indexProp'};
        $self->{'indexProp'} = undef;
    }

}

####################################
# END package Opals::ConfigHandler;
####################################
#################################################################
#
# Literary form map
#

$literaryFormMap={
    '0'  	=>'nonFiction',
	'1' 	=>'fiction',
	'c' 	=>'comicStrips',
	'd' 	=>'dramas',
	'e' 	=>'essays',
	'f' 	=>'novels',
	'h' 	=>'humor',
	'i' 	=>'letters',
	'j' 	=>'shortStories',
	'm' 	=>'mixedDorms',
	'p' 	=>'poetry',
	's' 	=>'speeches',
	'u' 	=>'unknown'
};

#################################################################
#
# General record info field map
#
$genInfoRecFielMap={
                    '010'=>[{name=>'lccn',              subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"}],
                    '020'=>[{name=>'isbn_first',        subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'isbn',              subfieldIncl=>"a,z",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"true"}],
                    '100'=>[{name=>'author',            subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"}],
                    '245'=>[{name=>'title',             subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'subtitle',          subfieldIncl=>"b",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'titleSort',         subfieldIncl=>"-",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'responsibleStmnt',  subfieldIncl=>"c",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'medium',            subfieldIncl=>"h",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'nameOfPart',        subfieldIncl=>"p",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"true"},
                            {name=>'numOfPart',         subfieldIncl=>"n",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"true"},
                            {name=>'version',           subfieldIncl=>"s",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"}
                           ],
                    '260'=>[{name=>'placePublication',  subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'namePublisher',     subfieldIncl=>"b",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'datePublication',   subfieldIncl=>"c",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'pubDateSort',       subfieldIncl=>"-",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"}
                           ],
                    '264'=>[{name=>'placePublication',  subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ", ind2=>'1',   multiple=>"false"},
                            {name=>'namePublisher',     subfieldIncl=>"b",  subfieldExcl=>"", sfConcatStr=>" ", ind2=>'1',   multiple=>"false"},
                            {name=>'datePublication',   subfieldIncl=>"c",  subfieldExcl=>"", sfConcatStr=>" ", ind2=>'1',   multiple=>"false"}
                           ],
                    '300'=>[{name=>'page',              subfieldIncl=>"a,b,c",  subfieldExcl=>"", sfConcatStr=>"; ",ultiple=>"false"}],
                    '440'=>[{name=>'title_series_a',    subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'title_series_v',    subfieldIncl=>"v",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"}
                           ],
                    '490'=>[{name=>'title_series_a',    subfieldIncl=>"a",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'title_series_v',    subfieldIncl=>"v",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"}
                           ],
                    '505'=>[{name=>'contentNote',       funcName=>'getContentNote',                                 multiple=>"true"}],
                    '510'=>[{name=>'citationRefNote',   subfieldIncl=>"a,c",  subfieldExcl=>"", sfConcatStr=>" --",  multiple=>"true"}],
                    '520'=>[{name=>'summary',           subfieldIncl=>"a,c",  subfieldExcl=>"", sfConcatStr=>" --",  multiple=>"true"}],
                    '521'=>[{name=>'lexileMeasure',     funcName=>'getLexileMeasure',                               multiple=>"false"},
                            {name=>'fountasPinnell',    funcName=>'getFountasPinnell',                              multiple=>"false"},
                            {name=>'note',              subfieldIncl=>"a,b",   subfieldExcl=>"", sfConcatStr=>" ",  multiple=>"true"}],
                    '526'=>[{name=>'studyProgram',      funcName=>'getStudyProgram',                                multiple=>"true"}],
                    '586'=>[{name=>'awards',            subfieldIncl=>"a",   subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"true"}],
                    '5xx'=>[{name=>'note',              subfieldIncl=>"a",   subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"true", tagExcl=>"505,510,520,521,526,586"}],
                    '650'=>[{name=>'era',               subfieldIncl=>"y",   subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"true"}],
                    '651'=>[{name=>'era',               subfieldIncl=>"y",   subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"true"}],
                    '6xx'=>[{name=>'subject',           subfieldIncl=>"",   subfieldExcl=>"2", sfConcatStr=>" -- ", multiple=>"true"}],
                    '700'=>[{name=>'illustrator',       funcName=>'getIllustrator',                                 multiple=>"true"}],
                    '852'=>[{name=>'callnum1St',        subfieldIncl=>"k,h,i,m",  subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"false"},
                            {name=>'852_h',             subfieldIncl=>"h",  subfieldExcl=>"", sfConcatStr=>" ",     multiple=>"false"},
                            {name=>'callnumberPrefix_first',     subfieldIncl=>"k",  subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"false"},
                            {name=>'classificationPart_first',   subfieldIncl=>"h",  subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"false"},
                            {name=>'itemPart_first',             subfieldIncl=>"i",  subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"false"},
                            {name=>'callnumberSubfix_first',     subfieldIncl=>"m",  subfieldExcl=>"", sfConcatStr=>" ",    multiple=>"false"},
                            {name=>'itemList',          funcName=>'getHoldingInfo',                                 multiple=>"true"}],
                    '856'=>[{name=>'uriList',           funcName=>'getUri',                                         multiple=>"true"}],
                    '880'=>[{name=>'author_880',        funcName=>'getAuthor880',                                   multiple=>"false"},
                            {name=>'title_880',         funcName=>'getTitle880',                                    multiple=>"false"},
                            {name=>'place_880',         funcName=>'getPlace880',                                    multiple=>"false"},
                            {name=>'publisher_880',     funcName=>'getPublisher880',                                multiple=>"false"},
                           ]
    };


#################################################################
#
# Language code map for 008 char post 35-37
#ref from http://www.science.co.il/language/Codes.asp?s=code3
#

$langCodeMap={
    aar=>"Afar",
    abk=>"Abkhazian",
    afr=>"Afrikaans",
    alb=>"Albanian",
    sqi=>"Albanian",
    amh=>"Amharic",
    ara=>"Arabic",
    arg=>"Aragonese",
    arm=>"Armenian",
    hye=>"Armenian",
    asm=>"Assamese",
    ave=>"Avestan",
    aym=>"Aymara",
    aze=>"Azerbaijani",
    bak=>"Bashkir",
    baq=>"Basque",
    eus=>"Basque",
    bel=>"Belarusian",
    ben=>"Bengali",
    bih=>"Bihari",
    bis=>"Bislama",
    bos=>"Bosnian",
    bre=>"Breton",
    bul=>"Bulgarian",
    bur=>"Burmese",
    mya=>"Burmese",
    cat=>"Catalan",
    cha=>"Chamorro",
    che=>"Chechen",
    chi=>"Chinese",
    zho=>"Chinese",
    chv=>"Chuvash",
    cor=>"Cornish",
    cos=>"Corsican",
    cze=>"Czech",
    ces=>"Czech",
    dan=>"Danish",
    div=>"Divehi",
    dut=>"Dutch",
    nld=>"Dutch",
    dzo=>"Dzongkha",
    eng=>"English",
    epo=>"Esperanto",
    est=>"Estonian",
    fao=>"Faroese",
    fij=>"Fijian",
    fin=>"Finnish",
    fre=>"French",
    fra=>"French",
    fry=>"Western Frisian",
    geo=>"Georgian",
    kat=>"Georgian",
    ger=>"German",
    deu=>"German",
    gla=>"Gaelic",
    gle=>"Irish",
    glg=>"Galician",
    glv=>"Manx",
    gre=>"Greek",
    ell=>"Greek",
    grn=>"Guarani",
    guj=>"Gujarati",
    hat=>"Haitian",
    hau=>"Hausa",
    heb=>"Hebrew",
    her=>"Herero",
    hin=>"Hindi",
    hmo=>"Hiri Motu",
    hun=>"Hungarian",
    ice=>"Icelandic",
    isl=>"Icelandic",
    ido=>"Ido",
    iii=>"Sichuan Yi",
    iku=>"Inuktitut",
    ile=>"Interlingue",
    ina=>"Interlingua",
    ind=>"Indonesian",
    ipk=>"Inupiaq",
    ita=>"Italian",
    jav=>"Javanese",
    jpn=>"Japanese",
    kal=>"Kalaallisut",
    kan=>"Kannada",
    kas=>"Kashmiri",
    kaz=>"Kazakh",
    khm=>"Khmer",
    kik=>"Kikuyu",
    kin=>"Kinyarwanda",
    kir=>"Kirghiz",
    kom=>"Komi",
    kor=>"Korean",
    kua=>"Kuanyama",
    kur=>"Kurdish",
    lao=>"Lao",
    lat=>"Latin",
    lav=>"Latvian",
    lim=>"Limburgan",
    lin=>"Lingala",
    lit=>"Lithuanian",
    ltz=>"Luxembourgish",
    mac=>"Macedonian",
    mkd=>"Macedonian",
    mah=>"Marshallese",
    mal=>"Malayalam",
    mao=>"Maori",
    mri=>"Maori",
    mar=>"Marathi",
    may=>"Malay",
    msa=>"Malay",
    mlg=>"Malagasy",
    mlt=>"Maltese",
    mol=>"Moldavian",
    mon=>"Mongolian",
    nau=>"Nauru",
    nav=>"Navaho, Navajo",
    nbl=>"Ndebele, South",
    nde=>"Ndebele, North",
    ndo=>"Ndonga",
    nep=>"Nepali",
    nno=>"Norwegian Nynorsk",
    nob=>"Norwegian Bokmal",
    nor=>"Norwegian",
    nya=>"Nyanja",
    oci=>"Occitan",
    ori=>"Oriya",
    orm=>"Oromo",
    oss=>"Ossetian",
    pan=>"Panjabi",
    per=>"Persian",
    fas=>"Persian",
    pli=>"Pali",
    pol=>"Polish",
    por=>"Portuguese",
    pus=>"Pushto",
    que=>"Quechua",
    roh=>"Raeto-Romance",
    rum=>"Romanian",
    ron=>"Romanian",
    run=>"Rundi",
    rus=>"Russian",
    sag=>"Sango",
    san=>"Sanskrit",
    scc=>"Serbian",
    srp=>"Serbian",
    scr=>"Croatian",
    hrv=>"Croatian",
    sin=>"Sinhala",
    slo=>"Slovak",
    slk=>"Slovak",
    slv=>"Slovenian",
    sme=>"Northern Sami",
    smo=>"Samoan",
    sna=>"Shona",
    snd=>"Sindhi",
    som=>"Somali",
    sot=>"Sotho,Southern",
    spa=>"Spanish",
    srd=>"Sardinian",
    ssw=>"Swati",
    sun=>"Sundanese",
    swa=>"Swahili",
    swe=>"Swedish",
    tah=>"Tahitian",
    tam=>"Tamil",
    tat=>"Tatar",
    tel=>"Telugu",
    tgk=>"Tajik",
    tgl=>"Tagalog",
    tha=>"Thai",
    tib=>"Tibetan",
    bod=>"Tibetan",
    tir=>"Tigrinya",
    ton=>"Tonga",
    tsn=>"Tswana",
    tso=>"Tsonga",
    tuk=>"Turkmen",
    tur=>"Turkish",
    twi=>"Twi",
    uig=>"Uighur",
    ukr=>"Ukrainian",
    urd=>"Urdu",
    uzb=>"Uzbek",
    vie=>"Vietnamese",
    vol=>"Volapuk",
    wel=>"Welsh",
    cym=>"Welsh",
    wln=>"Walloon",
    wol=>"Wolof",
    xho=>"Xhosa",
    yid=>"Yiddish",
    yor=>"Yoruba",
    zha=>"Zhuang; Chuang",
    zul=>"Zulu"
 };


1;
