package Opals::Utf8;

require Exporter;
@ISA       = qw(Exporter);
# Symbols to be exported by default
#@EXPORT    = qw(
#    opl_
#);
# Symbols to be exported on request
@EXPORT_OK = qw(
    utf8_fromMarc8
    utf8_normalize
    utf8_encode
);
#    utf8_toMarc8
# Version number
$VERSION   = 0.01;      


#use utf8;
use strict;
use MARC::Charset;
use MARC::File::USMARC;
use MARC::File::XML;
use MARC::Field;
use Unicode::Normalize;

#use Opals::_Some_Module_;

################################################################################
# Note on unicode normalization:
# - Both NFD and NFC maintain compatibility composites.
# - Neither NFKD nor NFKC maintain compatibility composites.
# - None of the forms generate compatibility composites that were not in the 
#   source text.
# 
# More info at:
# - http://www.unicode.org/reports/tr15/
# - http://perldoc.perl.org/Unicode/Normalize.html
#
# MARC::Charset must be changed to continue if it cannot find an equivalent 
# MARC8 character for a UTF-8 character.
#   See MARC/Charset.pm for more information.
################################################################################

sub utf8_fromMarc8 {
    my ($marc) = @_;
    my ($tag, $ind1, $ind2, $code, $data);

    my $charset = MARC::Charset->new();

    $data = $marc->leader();
    substr $data, 9, 1, 'a';
    $marc->leader(NFC($charset->to_utf8($data))) if ($data);

    foreach my $field ($marc->fields()) {
        my $field_new;
        $tag = $field->tag();
        $tag = NFC($charset->to_utf8($tag)) if ($tag);
        if ($tag =~ m/00[1-9]/) { # control field
            $data = $field->data();
            $data = NFC($charset->to_utf8($data)) if ($data);
            $field_new = MARC::Field->new($tag, $data);
        }
        else {
            $ind1 = $field->indicator(1);
            $ind2 = $field->indicator(2);
            
            $ind1 = NFC($charset->to_utf8($ind1)) if ($ind1);
            $ind2 = NFC($charset->to_utf8($ind2)) if ($ind2);
            
            my $firstSubfield = 1;
            foreach my $subfield ($field->subfields()) {
                ($code, $data) = @$subfield;
                $code = NFC($charset->to_utf8($code)) if ($code);
                $data = NFC($charset->to_utf8($data)) if ($data);
                if ($firstSubfield) {
                    $firstSubfield = 0;
                    $field_new = MARC::Field->new($tag,
                                                  $ind1, $ind2,
                                                  $code, $data);
                }
                else {
                    $field_new->add_subfields($code, $data);
                }
            }
        }
        $field->replace_with($field_new) if ($field_new);
    }

    return $marc;
}

sub utf8_normalize {
    my ($marc) = @_;
    my ($tag, $ind1, $ind2, $code, $data);

    $data = $marc->leader();
    substr $data, 9, 1, 'a';
    $marc->leader(NFC($data)) if ($data);

    foreach my $field ($marc->fields()) {
        my $field_new;
        $tag = $field->tag();
        $tag = NFC($tag) if ($tag);
        if ($tag =~ m/00[1-9]/) { # control field
            $data = $field->data();
            $data = NFC($data) if ($data);
            $field_new = MARC::Field->new($tag, $data);
        }
        else {
            $ind1 = $field->indicator(1);
            $ind2 = $field->indicator(2);
            
            $ind1 = NFC($ind1) if ($ind1);
            $ind2 = NFC($ind2) if ($ind2);
            
            my $firstSubfield = 1;
            foreach my $subfield ($field->subfields()) {
                ($code, $data) = @$subfield;
                $code = NFC($code) if ($code);
                $data = NFC($data) if ($data);
                if ($firstSubfield) {
                    $firstSubfield = 0;
                    $field_new = MARC::Field->new($tag,
                                                  $ind1, $ind2,
                                                  $code, $data);
                }
                else {
                    $field_new->add_subfields($code, $data);
                }
            }
        }
        $field->replace_with($field_new) if ($field_new);
    }

    return $marc;
}


sub utf8_toMarc8 {
    # Disable conversion to MARC-8
    return;

#    my ($marcxml) = @_;
#
#    # Remove all of line-feeds which MARC8 does not recognize.
#    my $char_0a = chr(0x0a);
#    $marcxml =~ s/$char_0a//g;
#
#    my $charset = MARC::Charset->new();
#
#    my $marc = MARC::Record->new_from_xml($marcxml);
#    my ($tag, $ind1, $ind2, $code, $data);
#
#    $data = $marc->leader();
#    substr $data, 9, 1, ' ';
#    $marc->leader($charset->to_marc8(NFD($data))) if ($data);
#
#    foreach my $field ($marc->fields()) {
#        my $field_new;
#        $tag = $field->tag();
#        $tag = $charset->to_marc8(NFD($tag)) if ($tag);
#        if ($tag =~ m/00[1-9]/) { # control field
#            $data = $field->data();
#            $data = $charset->to_marc8(NFD($data)) if ($data);
#            $field_new = MARC::Field->new($tag, $data);
#        }
#        else {
#            $ind1 = $field->indicator(1);
#            $ind2 = $field->indicator(2);
#
#            $ind1 = $charset->to_marc8(NFD($ind1)) if ($ind1);
#            $ind2 = $charset->to_marc8(NFD($ind2)) if ($ind2);
#
#            my $firstSubfield = 1;
#            foreach my $subfield ($field->subfields()) {
#                ($code, $data) = @$subfield;
#                $code = $charset->to_marc8(NFD($code)) if ($code);
#                $data = $charset->to_marc8(NFD($data)) if ($data);
#                if ($firstSubfield) {
#                    $firstSubfield = 0;
#                    $field_new = MARC::Field->new($tag,
#                                                  $ind1, $ind2,
#                                                  $code, $data);
#                }
#                else {
#                    $field_new->add_subfields($code, $data);
#                }
#            }
#        }
#        $field->replace_with($field_new);
#    }
#
#    return $marc;
}


sub utf8_encode {
    my ($obj) = @_;

    my $obj_utf8;

    my $type = ref($obj);
    if ($type eq '') {
        utf8::encode($obj);
        $obj_utf8 = $obj;
    }
    elsif ($type eq 'ARRAY') {
        my @a;
        foreach my $i (@{$obj}) {
            push @a, utf8_encode($i);
        }
        $obj_utf8 = \@a;
    }
    elsif ($type eq 'HASH') {
        foreach my $i (sort keys %{$obj}) {
            $obj_utf8->{$i} = utf8_encode($obj->{$i});
        }
    }

    return $obj_utf8;
}


1;
