#!/usr/bin/perl -w

use strict;
use DBI;
use Getopt::Std;
use POSIX qw(
    ceil
);
use Digest::SHA qw(
    sha1_hex
    sha512_hex
);
use LWP::UserAgent;
use HTTP::Request::Common;
use URI::Escape;

#use Opals::SolrIndex;

#my $dirtoget="/data/idzebra/210/record/opals/";

my $thesaurus={
    "0"=>"Library of Congress Subject Headings",
    "1"=>"LC subject headings for children's literature",
    "2"=>"Medical Subject Headings",
    "3"=>"National Agricultural Library subject authority file",
    "4"=>"Source not specified",
    "5"=>"Canadian Subject Headings",
    "6"=>"Répertoire de vedettes-matière"
};

my $sbjRecFmt="<doc><field name=\"id\">%s</field><field name=\"source\">%s</field><field name=\"tag\">%s</field><field name=\"ind1\">%s</field><field name=\"ind2\">%s</field><field name=\"subject\">%s</field><field name=\"subfields\">%s</field></doc>\n";
parseSbjFromXml("/tmp/loc_sbjHeading/part01.xml");
parseSbjFromXml("/tmp/loc_sbjHeading/part02.xml");
postRequest("<commit/>");


exit 0;

############################################################

sub getSubjectList {
    my ($xml) = @_;
    my $sbjRecList;
    my ($id,$subj,$subfield,$tag,$ind1,$ind2,$field, $code, $data,$src)=("","","","","","","","","","");

    while ($xml =~ s/([\s]*<datafield tag="([1-5]([\d]{2}))" ind1="([\d ])" ind2="([\d ])">(([\s]*<subfield code="[\w\-]">.*<\/subfield>)+)[\s]*<\/datafield>)//) {
        $tag  =  "6$3";
        $ind2 = 0;
        $field= $6;
        $subj="";
        $subfield="";
        $src= $thesaurus->{$ind2} if($thesaurus->{$ind2});
        next if($3 !~ m/00|11|30|48|50|51|53|55|56|57|58|62/g);
        while ($field =~ s/<subfield code="([\w\-])">(.*)<\/subfield>//) {
            $code = $1;
            $data = $2;
            $data =~ s/[\.|,|:|;]*$//g;
            $subfield .= "\$$code$data" if ($code !~ m/^(-|w)$/) ;

            if ($code !~ m/^(2|-|w)$/) {
                $subj =$subj eq ""? "$data":"$subj" . "--$data";
            print "$subj\n";
            }
            if($code eq '2' && $ind2 eq '7'){
                $src=$data;
            }
        }
        $ind1="#" if($ind1 eq '' || $ind1 eq ' ');
        $ind2="#" if($ind2 eq '' || $ind2 eq ' ');
        $id =sha1_hex("$tag$ind1$ind2$subfield");
        my $sbjRec   = sprintf($sbjRecFmt,$id,$src,$tag,$ind1,$ind2,$subj,$subfield) if($subj ne '');
        $sbjRecList .= $sbjRec;
    }
   return $sbjRecList;;
}

sub parseSbjFromXml{
    my ($path)=@_;
    my $xml = '';
    my $count=0;
    open MARCXML, "<$path";
    while (<MARCXML>) {
        $xml .= $_;
        if($_ =~ m/<record>/g){
            $xml = $_;
        }
        elsif($_ =~ m/<\/record>/){
            my $sbjRecList = getSubjectList($xml);
            postRequest("<add>$sbjRecList</add>") if($sbjRecList && $sbjRecList ne "");
            #last if($count++ >10000);
        }
    }
    close MARCXML;
    
}


sub postRequest{
    my ($xml)=@_;
    my($sHost,$sPort,$sDatabase) =("localhost","8983","sbjAuthority");

    my $url="http://$sHost:$sPort/solr/$sDatabase/update" ;
    #my $url="http://$sHost:$sPort/solr/update" ;

    my $req = HTTP::Request->new( POST => $url );
    my $timeout = 600;
    my $ua        = LWP::UserAgent->new(agent => 'OPALS');
    $ua->timeout($timeout);
    $ua->agent("SolrHTTPUpdateHandlerAgent");
    $req->content_type('Content-type:text/xml; charset=utf-8');
    $req->content($xml);
    my $res = $ua->request($req);

    
}

