Split A Node If It Has Certain Children
Solution 1:
It looks like your input is a little bit inconsistent with your output. (Is that the expected output, or the output you're getting now)? Chunks a-02 and a-03 have no <highlight>
elements in the input, yet the output has <span class="highlight...">
elements. Also, chunk a-03 has text duplicated after the blockquote.
I believe I've produced a working solution that does everything in your example. Could you give this a try?
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="/">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Test</title>
</head>
<body>
<xsl:apply-templates/>
</body>
</html>
</xsl:template>
<xsl:template match="p | div">
<xsl:variable name="breaks" select="note | pgBreak | quote" />
<xsl:variable name="firstNonBreak" select="node()[count(. | $breaks) != count($breaks)][1]" />
<xsl:variable name="nonBreaksAfterBreak"
select="$breaks/following-sibling::node()[1][count(. | $breaks) != count($breaks)]" />
<xsl:apply-templates select="$breaks | $firstNonBreak | $nonBreaksAfterBreak" mode="sectChild" />
</xsl:template>
<!-- Template to output the chunk id attribute of a particular hierarchy -->
<xsl:template name="ChunkId">
<xsl:variable name="id" select="ancestor::*[../self::root]/@xml:id" />
<xsl:if test="$id">
<xsl:attribute name="data-chunkid">
<xsl:value-of select="$id"/>
</xsl:attribute>
</xsl:if>
</xsl:template>
<!-- Splitting types - notes, page breaks, quotes -->
<xsl:template match="pgBreak" mode="sectChild">
<div id="pg-{@pgId}">
<xsl:value-of select="concat('Page ', @pgId)"/>
</div>
</xsl:template>
<xsl:template match="quote | note" mode="sectChild">
<xsl:apply-templates />
</xsl:template>
<!-- Receives the first node of each block of content outside of the splitting types
and passes processing onto itself and siblings within its block-->
<xsl:template match="text() | highlight | note.ref | super" mode="sectChild">
<xsl:variable name="content">
<xsl:apply-templates select="." mode="buildContent" />
</xsl:variable>
<xsl:if test="normalize-space($content)">
<xsl:call-template name="Nest">
<xsl:with-param name="hierarchy" select="ancestor::*[not(self::root)]" />
<xsl:with-param name="content" select="$content" />
</xsl:call-template>
</xsl:if>
</xsl:template>
<!-- Recursive template to output nodes from the top level down to content -->
<xsl:template name="Nest">
<xsl:param name="topLevel" select="true()"/>
<xsl:param name="hierarchy" />
<xsl:param name="content" />
<xsl:variable name="top" select="$hierarchy[1]" />
<xsl:variable name="remainder" select="$hierarchy[position() > 1]" />
<!-- If there's a quote or note yet to come, don't output tags until we get there -->
<xsl:variable name="skipTags" select="boolean($remainder[self::quote or self::note])" />
<!-- Recursive output is captured in a variable, to be output later in this template -->
<xsl:variable name="inside">
<xsl:if test="$hierarchy">
<xsl:call-template name="Nest">
<xsl:with-param name="topLevel" select="$topLevel and $skipTags" />
<xsl:with-param name="hierarchy" select="$remainder" />
<xsl:with-param name="content" select="$content" />
</xsl:call-template>
</xsl:if>
</xsl:variable>
<xsl:choose>
<xsl:when test="not($hierarchy)">
<xsl:copy-of select="$content" />
</xsl:when>
<xsl:when test="$top/self::quote">
<blockquote>
<xsl:call-template name="ChunkId" />
<xsl:copy-of select="$inside"/>
</blockquote>
</xsl:when>
<xsl:when test="$top/self::note">
<div id="note-{$top/@id}">
<xsl:call-template name="ChunkId" />
<xsl:copy-of select="$inside"/>
</div>
</xsl:when>
<xsl:when test="not($skipTags)">
<xsl:element name="{name($top)}">
<xsl:if test="$topLevel">
<xsl:call-template name="ChunkId" />
</xsl:if>
<xsl:copy-of select="$inside"/>
</xsl:element>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="$inside"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="node()" mode="buildContent">
<xsl:if test="not(self::note or self::quote or self::pgBreak)">
<!-- output this node -->
<xsl:apply-templates select="self::node()[normalize-space(.)]" mode="contentOutput" />
<!-- pass processing onto next sibling -->
<xsl:apply-templates select="following-sibling::node()[1]" mode="buildContent" />
</xsl:if>
</xsl:template>
<!-- Bottom level content - text, note refs, superscript, highlight-->
<xsl:template match="text()" mode="contentOutput">
<xsl:copy-of select="."/>
</xsl:template>
<xsl:template match="note.ref" mode="contentOutput">
<span class="noteRef" id="{@id}">
<xsl:apply-templates mode="contentOutput"/>
</span>
</xsl:template>
<xsl:template match="super" mode="contentOutput">
<sup>
<xsl:apply-templates mode="contentOutput"/>
</sup>
</xsl:template>
<xsl:template match="highlight" mode="contentOutput">
<xsl:variable name="class" select="concat(name(.),'-',string(@rend))"/>
<span class="{$class}">
<xsl:apply-templates mode="contentOutput"/>
</span>
</xsl:template>
</xsl:stylesheet>
I believe the unclosed meta tags is a result of using method="html"
. You may need to use method="xml"
to get closed meta tags. With method="html"
, the above transform produces the following output from your sample input:
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Test</title>
</head>
<body>
<p data-chunkid="a-01"><span class="highlight-italic">Bacon ipsum dolor sit amet</span> bacon chuck pastrami swine pork rump, shoulder beef ribs doner tri-tip
tongue. Tri-tip ground round short ribs capicola meatloaf shank drumstick short loin pastrami t-
bone. Sirloin turducken short ribs t-bone andouille strip steak pork loin corned beef hamburger
bacon filet mignon pork chop tail.
<span class="noteRef" id="0001">
<sup>1</sup>
</span></p>
<div id="note-0001" data-chunkid="a-01">
<p>
You may need to consult a <span class="highlight-italic">latin</span> butcher. Good Luck.
</p>
</div>
<p data-chunkid="a-01">
Pork loin </p>
<div id="pg-01">Page 01</div>
<p data-chunkid="a-01"> ribeye bacon pastrami drumstick sirloin, shoulder pig jowl. Salami brisket rump ham, tail
hamburger strip steak pig ham hock short ribs jerky shank beef spare ribs. Capicola short ribs swine
beef meatball jowl pork belly. Doner leberkas short ribs, flank chuck pancetta bresaola bacon ham
hock pork hamburger fatback.
</p>
<p data-chunkid="a-02">
Bacon ipsum dolor sit amet bacon chuck pastrami swine pork rump, shoulder beef ribs doner tri-tip
tongue. Tri-tip ground round short ribs capicola meatloaf shank drumstick short loin pastrami t-
bone. Sirloin turducken short ribs t-bone andouille strip steak pork loin corned beef hamburger
bacon filet mignon pork chop tail.
</p>
<p data-chunkid="a-03">
Bacon ipsum dolor sit amet bacon chuck pastrami swine pork rump, shoulder beef ribs doner tri-tip
tongue.
</p>
<blockquote data-chunkid="a-03">
<p>
Tri-tip ground round short ribs capicola meatloaf shank drumstick short loin pastrami t-
bone. Sirloin </p>
</blockquote>
<div id="pg-02">Page 02</div>
<blockquote data-chunkid="a-03">
<p>turducken short ribs t-bone andouille strip steak pork loin corned beef hamburger
bacon filet mignon pork chop tail.
</p>
</blockquote>
</body>
</html>
By changing the method to "xml" and manually adding the meta
element to the transform, you can obtain the same result, but with the following <head>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Test</title>
</head>
Post a Comment for "Split A Node If It Has Certain Children"