package Opals::Epub;

# Version number
$VERSION   = 0.01;

use strict;
use Encode;
use XML::SAX;
use base qw( XML::SAX::Base );
use utf8;
use strict;
#################################################################
sub new {
    my $type = shift;
    my $self={title=>'',author=>'',toc=>[]};
    return bless {}, $type;
}
#################################################################
sub parse{
    my ($self,$epubFile)=@_;
    $self->unpackEpub($epubFile);
    _getOPFPath($self);
    my $opfFilePath=sprintf("%s/%s/%s",$self->{'rootDir'},$self->{'opfPath'},$self->{'opfFileName'});
    my $opfCont=_getFileCont($opfFilePath);
    my $factory = XML::SAX::ParserFactory->new();
    my $parser = $factory->parser( Handler =>OPFHandler->new() );
    $parser->parse_string($opfCont);
    ####   <metadata>
    $self->{'metadata'}     =$parser->{'Handler'}->{'metadata'};
    #### <manifest>
    $self->{'manifest'}     =$parser->{'Handler'}->{'manifest'};
    #### <spine> 
    $self->{'spine'}=$parser->{'Handler'}->{'spine'};
    my $tocFileName=$self->getTocFileName();
    if(defined $tocFileName ){
        my $tocPath=sprintf("%s/%s/%s",$self->{'rootDir'},$self->{'opfPath'},$tocFileName);
        _getTOC($self,$tocPath);
    }
    $self->_getMissingManifestItem($self->{'opfPath'});

    

}
#------------------------------------------------------------------------------

sub unpackEpub{
    my ($self,$epubFile)=@_;
    umask 002;
    my $dir=`/bin/mktemp -d `;
    system("chown apache.apache $dir");
    system("unzip $epubFile -d $dir");
    $dir =~ s/\n//g;
    $self->{'rootDir'}=$dir;


}

#################################################################
sub getTocFileName{
    my($self)=@_;
    foreach my $id(keys %{$self->{'manifest'}}){
        if($self->{'manifest'}->{$id}->{'href'} =~ m/\.ncx$/){
            return $self->{'manifest'}->{$id}->{'href'};
        }
    }
    return undef;

}
#################################################################
sub getTOC{
    my ($self)=@_;
    return $self->{'toc'};
}
#################################################################
sub getManifest{
    my ($self)=@_;
    return $self->{'manifest'};
}

#################################################################
sub getSpine{
    my ($self)=@_;
    return $self->{'spine'};
}

#################################################################
sub getAuthor{
    my ($self)=@_;
    return $self->{'author'};
}
#################################################################
sub getTitle{
    my ($self)=@_;
    return $self->{'title'};
}
#################################################################
sub getDescription{
    my ($self)=@_;
    return  $self->{'description'};
}
#################################################################
sub getLanguage{
    my ($self)=@_;
    return $self->{'language'};
}
#################################################################
sub getISBN{
    my ($self)=@_;
    return $self->{'ISBN'};
}
#################################################################
sub getBookCoverImg{
    my ($self)=@_;
    my $img=undef;
    my $mediaType="";
    my $bookCoverId= $self->getBookCoverId();
    if($bookCoverId ne ""){
        $mediaType=$self->{'manifest'}->{$bookCoverId}->{'media-type'};
        $img = $self->getImgFile($bookCoverId);
    }
    return ($img,$mediaType);
}


#################################################################
sub getBookCoverId{
    my ($self)=@_;
    my $bookCoverId = "";
       $bookCoverId = $self->getMetadataField("bookCover");

    if($bookCoverId ne "" && defined $self->{'manifest'}->{$bookCoverId}){
        if($self->{'manifest'}->{$bookCoverId}->{'media-type'} eq "application/xhtml+xml"){
            $bookCoverId =  $self->getCoverIdFromHtmlFile($bookCoverId);
        }       
      
    }
   
    else{
        my $manifest=$self->getManifest();
        foreach my $fid(keys %$manifest){
            if($manifest->{$fid}->{'href'}=~ m/^cover|\/cover/i){
                if($manifest->{$fid}->{'media-type'} =~ m/image\//){
                    $bookCoverId = $fid;
                    last;
                }
                elsif($manifest->{$fid}->{'media-type'} eq "application/xhtml+xml"){
                    $bookCoverId = $self->getCoverIdFromHtmlFile($fid);
                    last;
                }
            }
        }

    }
    return $bookCoverId ;
}
#################################################################
sub getCoverIdFromHtmlFile{
    my ($self,$fid)=@_;
    my $bookCoverId="";
    my $fileCont = $self->getFile($fid);
     if($fileCont =~ m/<img (.*)src="(.*?)"/i){
        my $fName=$2;
        foreach my $fid(keys %{$self->{'manifest'}}){
            if($self->{'manifest'}->{$fid}->{'href'} eq $fName){
                $bookCoverId=$fid;
                last;
            }
        }
    }
    return $bookCoverId;

}
#################################################################
sub getMetadataField{
    my ($self,$fieldName)=@_;
    my $metadata= ($self->{'metadata'}->{$fieldName});
    return ($metadata || "");
}

#################################################################
sub getFile{
    my ($self,$fid)=@_;
    my $contStr="";
    my $fname=$self->{'manifest'}->{$fid}->{'href'};
    my $filePath=sprintf("%s/%s/%s",$self->{'rootDir'},$self->{'opfPath'},$fname);
    $contStr =_getFileCont($filePath) if(-f $filePath);
    return $contStr;
}
#################################################################
sub getImgFile{
    my ($self,$fid)=@_;
    my $contStr="";
    my $fname=$self->{'manifest'}->{$fid}->{'href'};
    my $filePath=sprintf("%s/%s/%s",$self->{'rootDir'},$self->{'opfPath'},$fname);
    $contStr =_getImgFile($filePath) if(-f $filePath);
    return $contStr;
}

#################################################################
sub saveFile{
    my ($self,$fid,$fileCont)=@_;
    my $contStr="";
    my $fname=$self->{'manifest'}->{$fid}->{'href'};
    my $filePath=sprintf("%s/%s/%s",$self->{'rootDir'},$self->{'opfPath'},$fname);
    $contStr =_saveFileCont($filePath,$fileCont) ;
    return $contStr;
}
#################################################################
sub reAssignUri{
    my ($self,$uriPrefix)=@_;
    $self->reAssignTocUri($uriPrefix);
    $self->reAssignFileUri($uriPrefix);
}

#################################################################
sub reAssignTocUri{
    my ($self,$uriPrefix)=@_;
    foreach my $nav(@{$self->{'toc'}}){
        foreach my $fid(keys %{$self->{'manifest'}}){
            my $uri=$self->{'manifest'}->{$fid}->{'href'};
            my $newUri="$uriPrefix$fid";
            #print"  $uri >> $newUri\n ";
            if($nav->{'src'} =~ s/$uri/$newUri/){
                $nav->{'src'} =~ s/\/\//\//g;
                last;
            }
        }
    }
}
#################################################################
sub reAssignFileUri{
    my ($self,$uriPrefix)=@_;
    my $manifest=$self->getManifest();
    foreach my $fid(keys %$manifest){
        my $fileCont=$self->getFile($fid);
        #$fileCont=decode_utf8($fileCont);
        #$fileCont=~ s/“|”/"/g;
        if($manifest->{$fid}->{'media-type'} eq 'application/xhtml+xml'){
            foreach my $id(keys %$manifest){
                my $src=$manifest->{$id}->{'href'};
                my $re="$uriPrefix$id";
                $fileCont=~ s/$src/$re/g;
            }
            $self->saveFile($fid,$fileCont);
        }
    }

}

#------------------------------------------------------------------------------
sub _getTOC{
    my($self,$tocFile)=@_;
    my $tocCont=_getFileCont($tocFile);
    my $factory = XML::SAX::ParserFactory->new();
    my $parser = $factory->parser( Handler =>TOCHandler->new() );
    $parser->parse_string($tocCont);
    my $toc =$parser->{'Handler'}->{'toc'};
    @$toc =sort {$a->{'playOrder'} <=> $b->{'playOrder'}} @$toc;
    $self->{'toc'}=$toc;

}

#------------------------------------------------------------------------------
sub _getOPFPath{
    my ($self,$path)=@_;
    my $opfPath="";
    my $cf=$self->{'rootDir'} . "/META-INF/container.xml";
    my $factory = XML::SAX::ParserFactory->new();
    my $parser = $factory->parser( Handler =>CONTHandler->new() );
    my $xml=_getFileCont($cf) ; 
    $parser->parse_string($xml);
    $opfPath =$parser->{'Handler'}->{"fullPath"};
    my $relPath=""; 
    my $name="";

    my $ind =rindex($opfPath,"/");
    if($ind>0){
        $name   =substr($opfPath,$ind+1);
        $relPath=substr($opfPath,0,$ind);
    }
    else{
        $name=$opfPath;
    }
    $self->{'opfPath'}=$relPath;
    $self->{'opfFileName'}=$name;
    
}
#------------------------------------------------------------------------------
sub _getMissingManifestItem{
     my ($self)=@_;
     my $i;
     $self->_getFileList($self->{'opfPath'});
     my $mediaTypeMap={
         html =>"application/xhtml+xml",
         htm  =>"application/xhtml+xml",
         xhtml=>"application/xhtml+xml",
         jpg  =>"image/jpg",
         jpeg =>"image/jpeg",
         gif  =>"image/gif",
         png  =>"image/png",
         bmp  =>"image/bmp",
     };

     foreach my $fid(keys %{$self->{'manifest'}}){
         my $uri=$self->{'manifest'}->{$fid}->{'href'};
         $self->{'fileList'}->{$uri}=1;
     }
     foreach my $f(keys %{$self->{'fileList'}}){
         if(!$self->{'fileList'}->{$f}){
             my $itemId=$self->_genItemId();
             $self->{'manifest'}->{$itemId}->{'href'}=$f;
             if($f =~ m/\.(html|htm|xhtml|jpg|jpeg|png|bmp|gif)$/i){
                $self->{'manifest'}->{$itemId}->{'media-type'}=$mediaTypeMap->{$1};
             }
             else{
                 $self->{'manifest'}->{$itemId}->{'media-type'}="application/xhtml+xml";
             }
         }
     }


}

#------------------------------------------------------------------------------
sub _genItemId{
    my($self)=@_;
    $self->{'tmpId'}=0 if(!defined $self->{'tmpId'});
    my $itemId= sprintf "item_%d",$self->{'tmpId'};
    while(defined $self->{'manifest'}->{$itemId}){
        $itemId= sprintf "item_%d",$self->{'tmpId'}++;
    }
    return $itemId;
}
#------------------------------------------------------------------------------
sub _getFileList{
    my ($self,$path)=@_;
    my $fname=$path;
    $fname=sprintf("%s/%s",$self->{'rootDir'},$path) if($path !~/$self->{'rootDir'}/);
    $fname =~ s/\/+/\//g;

    if(-d $fname){
        foreach my $f(<$fname/*>){
            #$f=~ s/$fname\///;#  sprintf("%s/%s",$self->{'rootDir'},$f);
            $self->_getFileList($f);
        }
    }
    else{
        my $re=sprintf "%s/%s/",$self->{'rootDir'}, $self->{'opfPath'};
        $path =~ s/$re//;
        $path =~ s/\/+/\//g;
        $self->{'fileList'}->{$path}=0;
    }
}
#------------------------------------------------------------------------------
sub _getImgFile{
    my($path)=@_;
    my $cont="";
    open CONT ,"<$path" || die "Cannot open file $path";
    while(<CONT>){
        $cont .=$_;
    }
    return $cont;

}
#------------------------------------------------------------------------------
sub _getFileCont{
    my($path)=@_;
    my $cont="";
    open CONT ,"<:utf8","$path" || die "Cannot open file $path";
    while(<CONT>){
        $cont .=$_;
    }
    return $cont;

}
#------------------------------------------------------------------------------
sub _saveFileCont{
    my($path,$cont)=@_;
    open FILE ,">:utf8","$path" || die "Cannot open file $path";
    print FILE $cont;
    close FILE
}

#################################################################
#  OPFHandler
#
#
#################################################################
package OPFHandler;
use strict;
sub new {
    my ($class) = @_;
    #my $type = shift;
    my $self={    
    section=>{'metadata'=>{'title'       =>{fieldName=>'title',
                                               fieldType=>'text',
                                               valFrom  =>{'text'=>1}
                                              },
                              'creator'     =>{fieldName=>'author',
                                               fieldType=>'text',
                                               valFrom  =>{'text'=>1},
                                               attrCond=>{AND=>{role=>'aut'}}
                                              },
                              'description' =>{fieldName=>'description',
                                               fieldType=>'text',
                                               valFrom  =>{'text'=>1}
                                              },
                              'language'    =>{fieldName=>'language',
                                               fieldType=>'text',
                                               valFrom  =>{'text'=>1}
                                              },
                              'identifier'  =>{fieldName=>'ISBN',
                                               fieldType=>'text',
                                               attrCond     =>{OR=>{id=>'isbn',id=>'isbn10'}},
                                               valFrom  =>{'text'=>1 }
                                              },
                              'meta'        =>{fieldName=>'bookCover',
                                               fieldType=>'text',
                                               attrCond =>{AND=>{name=>'cover'}},
                                               valFrom  =>{attr=>'content'}
                                              },
                              'publisher'   =>{fieldName=>'publisher',
                                               fieldType=>'text',
                                               valFrom  =>{'text'=>1}
                                               },
                              'date'        =>{fieldName=>'pubDate',
                                               fieldType=>'text',
                                               valFrom  =>{'text'=>1}
                                              },
                             },
                 'manifest'=>{'item'        => {fieldName=>'manifest',
                                                fieldType=>'hashOfHash',
                                                key=>'id',
                                                valFrom  =>{attr=>['href','media-type']}
                                                }
                             },
                 'spine'   =>{'itemref'     => {fieldName=>'spine',
                                                fieldType=>'arrayOfText',
                                                valFrom   =>{attr=>'idref'}
                                                }
                              }
            }
    };
                
    bless $self, $class;
    #bless {}, $type;
    return $self;
}
#------------------------------------------------------------------------------
sub start_element {
    my ($self, $element) = @_;
    my $name = $element->{'Name'};
    $name =~ s/^.*?://g;
    $self->{text}="";
    my $attr = fixAttrName($element->{'Attributes'});
    if($name eq 'metadata'){
        $self->{'parsingSection'}='metadata';
    }
    elsif($name eq 'manifest'){
        $self->{'parsingSection'}='manifest';
    }
    elsif($name eq 'spine'){
        $self->{'parsingSection'}='spine';
    }
    elsif($name eq 'guide'){
        $self->{'parsingSection'}='guide';
    }
    else{
        my $section=$self->{'parsingSection'};
        return if(!defined $section);
        if(defined $self->{'section'}->{$section}){
            my $prop=$self->{'section'}->{$section}->{$name};
            if(defined $prop){
                 $self->{'attr'}->{$section}->{$prop->{'fieldName'}}=$attr;
                 if(defined($prop->{'attrCond'}) && !checkAttrCond($attr,$prop->{'attrCond'})){
                     return;
                 }

                 if(defined $prop->{'valFrom'} && $prop->{'valFrom'}->{'attr'}){
                     my $fieldName=$prop->{'fieldName'};
                     if($prop->{'fieldType'} eq 'hashOfHash'){
                         my $key=$attr->{$prop->{'key'}}->{'Value'};
                         my $val={};
                         foreach my $a(@{$prop->{'valFrom'}->{'attr'}}){
                             $val->{$a}=$attr->{$a}->{'Value'};
                         }
                         $self->{$fieldName}->{$key}=$val;
                     }
                     elsif($prop->{'fieldType'} eq 'arrayOfHash'){
                         my $val={};
                         foreach my $a(@{$prop->{'valFrom'}->{'attr'}}){
                             $val->{$a}=$attr->{$a}->{'Value'};
                         }
                         $self->{$fieldName}=[] if(!defined $self->{$fieldName});
                         push @{$self->{$fieldName}},$val;
                     }
                     elsif($prop->{'fieldType'} eq 'arrayOfText'
                          && defined $prop->{'valFrom'}->{'attr'}){
                         my $val=$attr->{$prop->{'valFrom'}->{'attr'}}->{'Value'};
                         push @{$self->{$fieldName}},$val;
                     }
                     else{ #fieldType=text
                        if($section eq 'metadata'){
                            $self->{'metadata'}->{$fieldName}=$attr->{$prop->{'valFrom'}->{'attr'}}->{'Value'};
                        }
                        else{
                            $self->{$fieldName}=$attr->{$prop->{'valFrom'}->{'attr'}}->{'Value'};
                        }
                     }

                 }
            }

        
    }
    }
}

#------------------------------------------------------------------------------
sub end_element {
    my ($self, $element) = @_;
    my $name = $element->{'Name'};
    $name =~ s/^.*?://g;
    my $section=$self->{'parsingSection'};
    return if(!defined $section);
    if(defined $self->{'section'}->{$section}){
        my $prop = $self->{'section'}->{$section}->{$name};
        return if(ref($prop->{'valFrom'}) eq 'ARRAY');
        if(defined $prop && $prop->{'valFrom'}->{'text'}){
             my $fieldName=$prop->{'fieldName'};
             my $attr = $self->{'attr'}->{$section}->{$fieldName};
             if(defined($prop->{'attrCond'}) && !checkAttrCond($attr,$prop->{'attrCond'})){
                 return;
             }
             if($prop->{'fieldType'} eq 'arrayOfText'
                      && defined $prop->{'fieldType'}->{'attr'}){
                     push @{$self->{$fieldName}},$self->{text};
             }
             else{ #fieldType=text
                 if($section eq 'metadata'){
                    $self->{'metadata'}->{$fieldName}=$self->{text};
                 }
                 else{
                    $self->{$fieldName}=$self->{text};
                 }
             }
        }

    }
    
}
#------------------------------------------------------------------------------
sub checkAttrCond{
    my($attr,$attrCond)=@_;
    my $andCond=$attrCond->{'AND'};
    my $orCond=$attrCond->{'OR'};
    my $ret=0;
    if(defined $andCond){
        foreach my $a(keys %$andCond){
            return $ret if($attr->{$a}->{'Value'} ne $andCond->{$a});
        }
    }
    $ret=1;
    if(defined $orCond){
        foreach my $a(keys %$orCond){
            return $ret if($attr->{$a}->{'Value'} eq $orCond->{$a});
        }
        $ret=0;
    }
    return $ret;
}
#------------------------------------------------------------------------------
sub start_element_bk {
    my ($self, $element) = @_;
    my $name = $element->{'Name'};
    my $attr = fixAttrName($element->{'Attributes'});
    if($name eq 'metadata'){
        $self->{'parsingSection'}='metadata';
    }
    elsif($name eq 'manifest'){
        $self->{'parsingSection'}='manifest';
    }
    elsif($name eq 'spine'){
        $self->{'parsingSection'}='spine';
    }
    elsif($name eq 'guide'){
        $self->{'parsingSection'}='guide';
    }
    elsif(($name eq 'item' || $name eq 'opf:item') && $self->{'parsingSection'} eq 'manifest' ){
        my $id  = $attr->{'id'}->{'Value'};
        my $href= $attr->{'href'}->{'Value'};
        my $mediaType= $attr->{'media-type'}->{'Value'};
        if($href =~ m/\.ncx$/){
            $self->{'tocFileName'}=$href;
        }
        else{
            $self->{'manifest'}->{$id}={href=>$href,mediaType=>$mediaType};
        }
    }
    elsif($name eq 'itemref' && $self->{'parsingSection'} eq 'spine' ){
        my $idref= $attr->{'idref'}->{'Value'};
        push @{$self->{'spine'}},$idref;
    }
    elsif(($name eq 'dc:creator' || $name eq 'creator') && $self->{'parsingSection'} eq 'metadata'){
        $self->{'creatorAttr'}=$attr;
    }
    elsif(($name eq 'dc:identifier' || $name eq 'identifier') && $self->{'parsingSection'} eq 'metadata'){
        $self->{'identifierAttr'}=$attr;
    }

}
#------------------------------------------------------------------------------
sub fixAttrName{
    my ( $attr ) = @_;
    my $ret={};
    foreach my $a(keys %$attr){
        my $k =$a;
        $k =~ s/{.*}//g;
        $k =~ s/^.*?://g; 
        $ret->{$k}=$attr->{$a};
    }
    return $ret;
}

#------------------------------------------------------------------------------
sub end_element_bk { 
    my ( $self, $element ) = @_;
    my $name = $element->{ Name };


    if( ($name =~ m/dc:creator/ ||  $name =~ m/creator/ ) &&  
         $self->{'creatorAttr'}->{"role"}->{'Value'} eq "aut"){
        $self->{'author'}=$self->{text};
    }
    elsif($name eq 'dc:title' || $name eq 'title'){
        $self->{'title'}=$self->{text};
    }
    elsif(($name eq 'dc:description' || $name eq 'description') && $self->{'parsingSection'} eq 'metadata'){
        $self->{'description'}=$self->{text};
        $self->{'description'} =~ s/^\s*|\s*$//g;
    }
    elsif($name =~ m/dc:language/  || $name =~ m/language/){
        $self->{'language'}=$self->{text};
    }
    elsif(($name  =~ m/dc:identifier/ || $name  =~ m/identifier/) && 
            $self->{'identifierAttr'}->{"id"}->{'Value'} =~ m/isbn/){
        $self->{'ISBN'}=$self->{text};
        $self->{'ISBN'}=~ s/\D//g;
    }
     
}
#------------------------------------------------------------------------------
sub characters {
    my $self = shift;
    my $text = shift;
    $self->{text} .= $text->{Data};
}



#////////////////////////////////////////////////////////////////////////////
#  TOCHandler
#
#
#################################################################
package TOCHandler;
use strict;
sub new {
    my $type = shift;
    my $self={};
    $self->{'level'}=0;
    return bless {}, $type;
}

sub start_element {
    my ($self, $element) = @_;
    my $name = $element->{'Name'};
        $name =~ s/^.*?://g; 
    my $attr = fixAttrName($element->{'Attributes'});
    if($name eq "navPoint"){
        push @{$self->{'playOrder'}},int($attr->{'playOrder'}->{'Value'}) ;
        $self->{'level'} +=1;
    }
    elsif($name eq "content"){
         push @{$self->{'src'}} ,$attr->{'src'}->{'Value'}
    }
    elsif($name eq "text" &&  $self->{'parentTag'} eq "navLabel"){
        $self->{'isLabel'} =1;
        $self->{text}="";
    }    
    $self->{'parentTag'}=$name;

}

#################################################################
sub end_element { 
    my ( $self, $element ) = @_;
    my $name = $element->{ Name };
        $name =~ s/^.*?://g; 
    if($name eq 'navPoint'){
        my $label= pop @{$self->{'label'}};
        $label =~ s/\n//g;
        $label =~ s/^\s+|\s+$//g;
        my $playOrder= pop @{$self->{'playOrder'}};
        my $src= pop @{$self->{'src'}};
        my $level=$self->{'level'};
        push @{$self->{'toc'}},{playOrder=>$playOrder,label=>$label,src=>$src,level=>$level};
        $self->{'level'} -=1

    }
    elsif($name eq "text" && $self->{'isLabel'}){
        push @{$self->{'label'}},$self->{text};
        $self->{'isLabel'}=0;
    }

}
#################################################################
sub characters {
    my $self = shift;
    my $text = shift;
    $self->{text} .= $text->{Data};
}
sub fixAttrName{
    my ( $attr ) = @_;
    my $ret={};
    foreach my $a(keys %$attr){
        my $k =$a;
        $k =~ s/{.*}//g;
        $k =~ s/^.*?://g; 
        $ret->{$k}=$attr->{$a};
    }
    return $ret;
}
#////////////////////////////////////////////////////////////////////////////
#  Container Handler
#
#
#################################################################
package CONTHandler;
use strict;
sub new {
    my $type = shift;
    return bless {}, $type;
}

sub start_element {
    my ($self, $element) = @_;
    my $name = $element->{'Name'};
       $name =~ s/^.*?://g; 
    my $attr = fixAttrName($element->{'Attributes'});
    if($name eq "rootfile"){
        $self->{'fullPath'}=$attr->{'full-path'};;
    }

}

sub fixAttrName{
    my ( $attr ) = @_;
    my $ret={};
    foreach my $a(keys %$attr){
        my $k =$a;
        $k =~ s/{.*}//g;
        $k =~ s/^.*?://g; 
        $ret->{$k}=$attr->{$a}->{'Value'};
    }
    return $ret;
}

1;
