#!/usr/bin/perl -w

# ----------------------------------------------------------------------------
# Generate a dom-filter indexing stylesheet based upon an .abs file
# Should be called either this way
#
#   idzebra-abs2dom something.abs > something.xsl
#
# or in a streaming way
#
#   something | idzebra-abs2dom > something.xsl
#
# The output xslt stylesheet generally needs a little bit of tweaking to be
# ready for indexing. In particular, watch out for the precedence rules of
# xslt templates which work differently from xelm declarations in an .abs file!
#
# Good luck!

use strict;

my $marc_prefix = 'marc';
my $supported_rules = {

        # Supported indexing types:
        'melm'          => \&melm_handler,
        'xelm'          => sub { return $_[1] },

        # Declarations to ignore:
        'attset'        => 0,
        'encoding'      => 0,
        'esetname'      => 0,
        'marc'          => 0,
        'name'          => 0,
        'xpath'         => 0

};

print <<END_OF_XSLT;
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:z="http://indexdata.com/zebra-2.0"
                xmlns:$marc_prefix="http://www.loc.gov/MARC21/slim"
                version="1.0">

  <xsl:output indent="yes"
        method="xml"
        version="1.0"
        encoding="UTF-8"/>

  <xsl:template match="/">
    <z:record>
      <xsl:apply-templates/>
    </z:record>
  </xsl:template>

END_OF_XSLT


while (<>) {
    my $handler = undef;

    chomp;
    s/^\s+//;
    s/\s+$//;
    next unless length;
    next if /^#/;

    my ($rule) = (/^(\S+)/);

    if ( defined $supported_rules->{$rule} ) {
        $handler = $supported_rules->{$rule};

        if ( $handler == 0 ) {
            next;
        }
    } else {
        print STDERR "$0: Unsupported indexing rule: '", $rule, "\n\n";
        next;
    }

    s/^\Q$rule\E\s+//;

    my ($index) = (/(\S+)$/);

    s/\s+\Q$index\E$//;

    my $match = $_;
    my $xpath = $handler->($rule, $match);
    my @indexes = split /,/, $index;

    # To avoid screwing up the <xsl:template match="...."> instruction...
    $xpath =~ s/"/'/g;

    print "  <xsl:template match=\"$xpath\">\n";
    print "    <z:index name=\"", join(" ", @indexes), "\">\n";
    print "      <xsl:value-of select=\".\"/>\n";
    print "    </z:index>\n";
    print "  </xsl:template>\n\n";
}

print "</xsl:stylesheet>\n";


sub melm_handler {
    my ($rule, $match) = @_;
    my ($field, $subfield) = ($match =~ /([^\$]+)\$?(.*)/);
    my $xpath = '/*/';

    if ( $field =~ /^00/ ) {
        $xpath .= $marc_prefix . ':controlfield[@tag=\'' . $field . '\']';
    } else {
        $xpath .= $marc_prefix . ':datafield[@tag=\'' . $field . '\']/' .
                  $marc_prefix . ':subfield';

        if ( $subfield ne '' ) {
            $xpath .= '[@code=\'' . $subfield . '\']';
        }
    }

    return $xpath;
}



