Java tutorial
/*************************************************************************** Copyright 2014 Emily Estes Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ***************************************************************************/ options{LOOKAHEAD=3;FORCE_LA_CHECK=true;STATIC=false;} PARSER_BEGIN(MarkupParser) package net.metanotion.contentstore.markup; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.Stack; import org.apache.commons.text.StringEscapeUtils; import net.metanotion.util.MapDictionaryAdapter; import net.metanotion.web.html.Html; import net.metanotion.web.html.RawString; import net.metanotion.web.html.SafeString; import net.metanotion.web.html.Tag; import net.metanotion.contentstore.markup.TagTransformer; import net.metanotion.contentstore.markup.UriEncoder; final class MarkupParser { private static final Html _BR = new net.metanotion.web.html.Tag("br"); private static ArrayList<Html> newPara(final Map<String, TagTransformer> transformers, final int brCt, ArrayList<Html> currentPara, final ArrayList<Html> output, final boolean allowBrs) { if ((currentPara.size() > 0) && (brCt > 0)) { if (brCt == 1) { if (allowBrs) { currentPara.add(_BR); } else { currentPara.add(new SafeString(" ")); } } else { output.add(transform(transformers, new Tag("p").addAll(currentPara))); currentPara = new ArrayList<Html>(); } } return currentPara; } private static final Html transformTag(final Map<String, TagTransformer> transformers, final Html tag) { if (tag instanceof SafeString) { TagTransformer t = transformers.get("<TEXT>"); if (t == null) { t = transformers.get("*"); } if (t == null) { return tag; } else { return t.transform(tag); } } TagTransformer t = transformers.get(tag.tag().toLowerCase()); if (t == null) { t = transformers.get("*"); } if (t == null) { return tag; } else { return t.transform(tag); } } private static Html transform(final Map<String, TagTransformer> transformers, final Html tag) { final ArrayList<Object> children = new ArrayList<Object>(); if (tag instanceof SafeString) { return transformTag(transformers, tag); } final Iterator<Object> it = tag.children(); while (it.hasNext()) { final Object child = it.next(); if (child instanceof Html) { children.add(transform(transformers, (Html) child)); } else { children.add(child); } } final Map<String, Object> attribs = new HashMap<String, Object>(); for (final String key : (Set<String>) tag.keySet()) { attribs.put(key, tag.get(key)); } return transformTag(transformers, new Tag(tag.tag(), new MapDictionaryAdapter(attribs), children)); } private static ArrayList<Html> foldTags(final ArrayList<Html> raw, final boolean allowBrs, final Map<String, TagTransformer> transformers) { final ArrayList<Html> output = new ArrayList<Html>(); ArrayList<Html> currentPara = new ArrayList<Html>(); int brCt = 0; for (final Html t : raw) { if (t == _BR) { /* this is reference equality *because* we are respecting "br" tags in the input already. The only way for a <br/> tag to equal *this* value is because we generated it, not the end user. */ brCt++; } else if ("div".equalsIgnoreCase(t.tag()) || "h1".equalsIgnoreCase(t.tag()) || "h2".equalsIgnoreCase(t.tag()) || "h3".equalsIgnoreCase(t.tag()) || "h4".equalsIgnoreCase(t.tag()) || "h5".equalsIgnoreCase(t.tag()) || "h6".equalsIgnoreCase(t.tag()) || "hr".equalsIgnoreCase(t.tag())) { currentPara = newPara(transformers, 2, currentPara, output, allowBrs); brCt = 0; output.add(transform(transformers, (Html) t)); } else { currentPara = newPara(transformers, brCt, currentPara, output, allowBrs); brCt = 0; currentPara.add(t); } } newPara(transformers, 2, currentPara, output, allowBrs); return output; } }PARSER_END(MarkupParser) /* Tag whitelist a, p, strong, em, ul, ol, li, dl, dt, dd, blockquote, h1-6, hr, pre, code, b, mark, q, s, span, sub, sup, div, u, img, br "p break" tags h1-6, div, hr */ MORE:{<OPEN:"<">} MORE:{<TAG_NAME:<OPEN>("a"|"p"|"strong"|"em"|"ul"|"ol"|"li"|"dl"|"dt"|"dd"|"blockquote"|"h1"|"h2"|"h3"|"h4"|"h5"|"h6"|"hr"|"pre"|"code"|"b"|"mark"|"q"|"s"|"span"|"sub"|"sup"|"div"|"u"|"img"|"br")>}TOKEN:{<TAG:<TAG_NAME>(" "|"\t"|"\n"|"\r")>:INTAG}TOKEN:{<NORMAL_TAG:<TAG_NAME>">">}TOKEN:{<SELF_TAG:<TAG_NAME>"/>">}TOKEN:{<BR:("\n"|"\r\n")>}TOKEN:{<CLOSE_TAG:<OPEN>"/a>"|"/p>"|"/strong>"|"/em>"|"/ul>"|"/ol>"|"/li>"|"/dl>"|"/dt>"|"/dd>"|"/blockquote>"|"/h1>"|"/h2>"|"/h3>"|"/h4>"|"/h5>"|"/h6>"|"/hr>"|"/pre>"|"/code>"|"/b>"|"/mark>"|"/q>"|"/s>"|"/span>"|"/sub>"|"/sup>"|"/div>"|"/u>"|"/img>"|"/br>">} TOKEN:{<PLAIN:~[]>} <INTAG>SKIP:{" "|"\t"|"\n"|"\r"}<INTAG>TOKEN:{<ATTR:["a"-"z","A"-"Z","_","-",":"](["a"-"z","A"-"Z","0"-"9","_","-",":"])*>}<INTAG>TOKEN:{<EQ:"=">}<INTAG>TOKEN:{<ATTR_VAL:("\""(~["\""])*"\"")|("'"(~["'"])*"'")>}<INTAG>TOKEN:{<TAGEND:">">:DEFAULT}<INTAG>TOKEN:{<SELFCLOSE:"/>">:DEFAULT} Iterable<Html> Content(final boolean allowBrs, final UriEncoder encoder, final Map<String,TagTransformer> transformers):{ final ArrayList<Html> html = new ArrayList<Html>(); final ArrayList<Html> raw = new ArrayList<Html>(); final Stack<ArrayList<Html>> tagStack = new Stack<ArrayList<Html>>(); ArrayList<Html> current = raw; }{ (Raw(current) | current=Tag(encoder, current, tagStack) | current=SlashTag(current, tagStack) )* <EOF> { return foldTags(raw, allowBrs, transformers); } } ArrayList<Html> SlashTag(final ArrayList<Html> current, final Stack<ArrayList<Html>> tagStack):{ Token tag = null; }{ (tag=<CLOSE_TAG> { if(tagStack.empty()) { return current; } else { final ArrayList<Html> prev = tagStack.pop(); return prev; } }) | (tag=<NORMAL_TAG> { final ArrayList<Html> children = new ArrayList<Html>(); final Tag start = new Tag(tag.image.substring(1, tag.image.length() - 1), new MapDictionaryAdapter<String,Object>(new HashMap<String,Object>()), children); current.add(start); tagStack.push(current); return children; }) | (tag=<SELF_TAG> { current.add(new Tag(tag.image.substring(1, tag.image.length() - 2))); return current; }) } void Raw(final ArrayList<Html> raw): { Token tok;}{(<BR>{raw.add(_BR);})|(tok=<PLAIN>{raw.add(new SafeString(tok.image));}) } ArrayList<Html> Tag(final UriEncoder encoder, final ArrayList<Html> raw, final Stack<ArrayList<Html>> tagStack): { Token tag;final HashMap<String,Object>attrs=new HashMap<String,Object>();boolean close=false;}{tag=<TAG>(Attrs(tag.image.substring(1,tag.image.length()).trim(),encoder,attrs))*(<TAGEND>|<SELFCLOSE>{close=true;}){final ArrayList<Html>children=new ArrayList<Html>();raw.add(new Tag(tag.image.substring(1,tag.image.length()).trim(),new MapDictionaryAdapter<String,Object>(attrs),children));if(!close){tagStack.push(raw);return children;}else{return raw;}} } void Attrs(final String tag, final UriEncoder encoder, final HashMap<String,Object> attrs):{ Token name=null; Token val=null; } { name=<ATTR> [ <EQ> val=<ATTR_VAL> ] { if("class".equalsIgnoreCase(name.image) || ("img".equalsIgnoreCase(tag) && ( "src".equalsIgnoreCase(name.image) || "width".equalsIgnoreCase(name.image) || "height".equalsIgnoreCase(name.image) || "alt".equalsIgnoreCase(name.image) || "srcset".equalsIgnoreCase(name.image) || "sizes".equalsIgnoreCase(name.image))) || ("a".equalsIgnoreCase(tag) && ( "href".equalsIgnoreCase(name.image))) ) { if(val != null) { if(("img".equalsIgnoreCase(tag) && "src".equalsIgnoreCase(name.image)) || ("a".equalsIgnoreCase(tag) && "href".equalsIgnoreCase(name.image))) { attrs.put(name.image, encoder.encode(val.image.substring(1, val.image.length() - 1))); } else { attrs.put(name.image, val.image.substring(1, val.image.length() - 1)); } } } } }