package Opals::RSS;

# Version number
$VERSION   = 0.01;

use strict;
use Encode;
use LWP::UserAgent;
use HTTP::Request::Common;
use URI::Escape;

#===========================================================================================
#Tue, Aug 17, 2010 @ 14:10:44 EDT
#
# public methods:
#       new                     :contructor
#       read                    :read RSS feed and parse 
#
#       return: {chanel=>{title=>title,link=>link,description=>descdription},
#                 itemList=>[{title=>title,link=>link,description=>descdription},...]
#                }
#   example
#    my $rss=Opals::RSS->new();
#    my $rss =$rss->read("http://rss.cbc.ca/lineup/topstories.xml");
#
#============================================================================================

sub new{
    my $type=shift;
    my $self={};
    bless ($self);
    return $self;
}

sub read{
    my($self,$params)=@_;
    my $timeout = 30;
    my $userAgent = LWP::UserAgent->new(agent   => 'OPALS',
                                        timeout =>$timeout);
    
    my $request = HTTP::Request->new(GET => $params->{'rssUrl'}); 
    my $response = $userAgent->request($request );
    if(!$response->is_success){
        return {};
    }
    if($response->content =~ m/<rss .*>(.*)<\/rss>/s){
        _parseRssXml($self,$response->content);
    }
    elsif($response->content =~ m/<feed (.*)<\/feed>/s){
        _parseRssFeedXml($self,$response->content);
    }

    my @retArr=();

    my $nItemShow = $params->{'nItemShow'};
    my $fieldsShow = $params->{'fieldsShow'};
    my @fields= split(",",$params->{'fieldsShow'}) ;
    push @fields,"link";
    if('ARRAY' eq ref($self->{'itemList'})){
        for(my $i=0; $i < $nItemShow && $i<scalar(@{$self->{'itemList'}});$i++){
            my $item={};
            foreach my $f(@fields){
                $item->{$f} =$self->{'itemList'}[$i]->{$f};
            }
            push @retArr,$item;
        
        }
    }
    return {channel  =>$self->{'channel'},
            itemList=>\@retArr};
    
}

sub _parseRssXml{
    my ($self,$xml)=@_;
    if($xml =~  m/<channel>(.*)<\/channel>/s){
        my $tmpXml=$1;
        if($tmpXml =~ m/<title>(.*?)<\/title>/s){
            $self->{'channel'}->{'title'}=_extractFromembededData($1);
        }
        if($tmpXml =~ m/<description>(.*?)<\/description>/s){
            $self->{'channel'}->{'description'}=_extractFromembededData($1);
        }
        if($tmpXml =~ m/<link>(.*?)<\/link>/s){
            $self->{'channel'}->{'link'}=_extractFromembededData($1);
        }
        if($tmpXml =~ m/<link (.*?) href=(.*?)\/>/s){
            $self->{'channel'}->{'link'}=_extractFromembededData($2);
        }

        while($tmpXml =~  m/<item>(.*?)<\/item>(.*)/s){
            $tmpXml= $2;
            my $item=_parseRssItem($1);
            push @{$self->{'itemList'}},$item if($item);
        }
    }
}
sub _extractFromembededData{
    my ($data)=@_;
    if($data =~ m/<!\[CDATA\[(.*)\]\]>/s){
        $data=$1;
    }
    return $data;
}
sub _parseRssFeedXml{
    my ($self,$xml)=@_;
    if($xml =~  m/<feed (.*)<\/feed>/s){
        my $tmpXml=$1;
        if($tmpXml =~ m/<title.*>(.*?)<\/title>/s){
            $self->{'channel'}->{'title'}=_extractFromembededData($1);
        }
        if($tmpXml =~ m/<link>(.*?)<\/link>/s ||
           $tmpXml =~ m/<link .* href="(.*)"\/>/){
            $self->{'channel'}->{'link'}=$1;
        }
        if($tmpXml =~ m/<link (.*?) href=(.*?)\/>/s){
            $self->{'channel'}->{'link'}=$2;
        }
        
        while($tmpXml =~  m/<entry>(.*?)<\/entry>(.*)/s){
            $tmpXml= $2;
            my $item=_parseRssFeedItem($1);
            push @{$self->{'itemList'}},$item if($item);
        }
    }
}

sub _parseRssItem{
    my ($itemXml)=@_;
    my @tags = qw(title link description pubDate);
    my $item=undef;
    foreach my $t (@tags){
        if($itemXml =~ m/<$t>(.*)<\/$t>/s){
            $item->{$t}=_extractFromembededData($1);         
        }
        if($t eq 'link' && $itemXml =~ m/<$t .* href=(.*?)\/>/s){
            $item->{$t}=_extractFromembededData($1);         
        }

    }
    return $item;
}
sub _parseRssFeedItem{
    my ($itemXml)=@_;
    my @tags = qw(title link summary published id);
    my $item=undef;
    my $fieldMap={title=>'title',id=>'id',link=>'link' ,summary=>'description',published=>'pubDate'};
    foreach my $t (@tags){
        if((($t eq 'id' || $t eq 'link') && $itemXml =~ m/<$t (.*)>(.*)<\/$t>/s)) {
            my $tmp=$1;
            if($tmp =~ m/href="(.*)"|href='(.*)'/gi){
                $item->{'link'}=$1;
            }

        }
        elsif($t eq 'link' && $itemXml =~ m/<link (.*?) href=(.*?)\/>/s ){
             $item->{'link'}=$2;
             $item->{'link'} =~ s/ (.*)//g;
             $item->{'link'} =~ s/"|'//g;


        }
        elsif($itemXml =~ m/<$t.*>(.*)<\/$t>/s){
            $item->{$fieldMap->{$t}}=$1;
        }
    }
    return $item;
}


1;
