#!/usr/bin/perl -w

use strict;
use DBI;
use Getopt::Std;
use POSIX qw(
    ceil
);
use Digest::SHA qw(
    sha1_hex
    sha512_hex
);
use LWP::UserAgent;
use HTTP::Request::Common;
use URI::Escape;

#use Opals::SolrIndex;

#my $dirtoget="/data/idzebra/210/record/opals/";
my $dirtoget="/mnt/calypso/data/idzebra/210/record/union";
opendir(IMD, $dirtoget) || die("Cannot open directory");
my @thefiles= readdir(IMD);

closedir(IMD);

my $thesaurus={
    "0"=>"Library of Congress Subject Headings",
    "1"=>"LC subject headings for children's literature",
    "2"=>"Medical Subject Headings",
    "3"=>"National Agricultural Library subject authority file",
    "4"=>"Source not specified",
    "5"=>"Canadian Subject Headings",
    "6"=>"Répertoire de vedettes-matière"
};

my $sbjRecFmt="<doc><field name=\"id\">%s</field><field name=\"source\">%s</field><field name=\"tag\">%s</field><field name=\"ind1\">%s</field><field name=\"ind2\">%s</field><field name=\"subject\">%s</field><field name=\"subfields\">%s</field></doc>\n";

foreach my $d(@thefiles){
    #next if($d ne '1');
    opendir(IMD, "$dirtoget/$d") || die("Cannot open directory");
    print "$dirtoget/$d\n";

    my @xmlRecs= readdir(IMD);
    foreach my $f(@xmlRecs){
       if($f =~ m/.xml$/g && (-f "$dirtoget/$d/$f")){
            my $sbjRecList = getSubjectList(getXmlRec("$dirtoget/$d/$f"));
            postRequest("<add>$sbjRecList</add>") if($sbjRecList && $sbjRecList ne "");
       }
    }
    postRequest("<commit/>");
}

exit 0;

############################################################

sub getSubjectList {
    my ($xml) = @_;
    
    my $sbjRecList;
    my ($id,$subj,$subfield,$tag,$ind1,$ind2,$field, $code, $data,$src)=("","","","","","","","","","");
    while ($xml =~ s/([\s]*<datafield tag="(6[\d]{2})" ind1="([\d ])" ind2="([\d ])">(([\s]*<subfield code="[\w\-]">.*<\/subfield>)+)[\s]*<\/datafield>)//) {
        $tag  = $2;
        $ind1 = $3;
        $ind2 = $4;
        $field= $5;
        $subj="";
        $subfield="";
        $src= $thesaurus->{$ind2} if($thesaurus->{$ind2});
        while ($field =~ s/<subfield code="([\w\-])">(.*)<\/subfield>//) {
            $code = $1;
            $data = $2;
            $data =~ s/[\.|,|:|;]*$//g;
            $subfield .= "\$$code$data" if ($code !~ m/^(-)$/) ;

            if ($code !~ m/^(2|-)$/) {
                $subj =$subj eq ""? "$data":"$subj" . "--$data";
            }
            if($code eq '2' && $ind2 eq '7'){
                $src=$data;
            }
        }
        $ind1="#" if($ind1 eq '' || $ind1 eq ' ');
        $ind2="#" if($ind2 eq '' || $ind2 eq ' ');
        $id =sha1_hex("$tag$ind1$ind2$subfield");
        my $sbjRec   = sprintf($sbjRecFmt,$id,$src,$tag,$ind1,$ind2,$subj,$subfield) if($subj ne '');
        $sbjRecList .= $sbjRec;
    }
   return $sbjRecList;;
}

sub getXmlRec{
    my ($path)=@_;
    my $xml = '';
    open MARCXML, "<$path";
    while (<MARCXML>) {
        $xml .= $_;
    }
    close MARCXML;
    return $xml

    
}


sub postRequest{
    my ($xml)=@_;
    my($sHost,$sPort,$sDatabase) =("localhost","8983","sbjAuthority");

    my $url="http://$sHost:$sPort/solr/$sDatabase/update" ;
    #my $url="http://$sHost:$sPort/solr/update" ;

    my $req = HTTP::Request->new( POST => $url );
    my $timeout = 600;
    my $ua        = LWP::UserAgent->new(agent => 'OPALS');
    $ua->timeout($timeout);
    $ua->agent("SolrHTTPUpdateHandlerAgent");
    $req->content_type('Content-type:text/xml; charset=utf-8');
    $req->content($xml);
    my $res = $ua->request($req);

    
}

