package pt.ist.renderers.extensions.htmlEditor; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import javax.servlet.http.HttpServletRequest; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.w3c.tidy.Tidy; import pt.ist.renderers.plugin.RenderersRequestProcessorImpl; public class SafeHtmlConverter extends TidyConverter { private static final String TIDY_PROPERTIES = "HtmlEditor-Tidy-MathJax.properties"; /** * Default serial id. */ private static final long serialVersionUID = 1L; public String getTidyProperties() { return TIDY_PROPERTIES; } @Override protected void parseDocument(OutputStream outStream, Tidy tidy, Document document) { filterDocument(document); tidy.pprint(document, outStream); } @Override protected String filterOutput(String output) { // tidy escapes the ampersand when used with numerical entities. return output.replaceAll("&#([0-9xA-Fa-f]+);", "&#$1;"); } private void filterDocument(Node node) { switch (node.getNodeType()) { case Node.DOCUMENT_NODE: filterChildren(node); break; case Node.ELEMENT_NODE: Element element = (Element) node; // remove all attributes NamedNodeMap attributes = element.getAttributes(); for (int i = 0; i < attributes.getLength(); i++) { Attr attribute = (Attr) attributes.item(i); if (!isThrustedAttribute(element, attribute)) { element.removeAttribute(attribute.getNodeName()); } } // filter children filterChildren(element); break; // case Node.TEXT_NODE: // break; // default: // Node parent = node.getParentNode(); // parent.removeChild(node); // break; default: break; } } private void filterChildren(Node node) { List childrenList = new ArrayList(); NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { childrenList.add(children.item(i)); } for (Node child : childrenList) { if (isThustedNode(child)) { filterDocument(child); } else { node.removeChild(child); } } } private boolean isThustedNode(Node child) { switch (child.getNodeType()) { case Node.PROCESSING_INSTRUCTION_NODE: case Node.TEXT_NODE: case Node.COMMENT_NODE: case Node.DOCUMENT_TYPE_NODE: case Node.ENTITY_NODE: case Node.ENTITY_REFERENCE_NODE: case Node.NOTATION_NODE: return true; case Node.ELEMENT_NODE: // processed bellow break; case -1: // HACK: when xHTML is requested JTidy inserts a tag equivalent to // , a node with the type // org.w3c.tidy.Node.XML_DECL. Nevertheless, this node, answers with // -1 to getNodeType(). This node needs to be accepted since nothing // of the document is printed if this node is removed. NamedNodeMap attributes = child.getAttributes(); if (attributes.getNamedItem("encoding") != null && attributes.getNamedItem("version") != null) { return true; } default: return false; } List forbiddenElements = Arrays.asList(new String[] { "script", "iframe", "element", "applet", "form", "frame", "frameset", "link", "style" }); Element element = (Element) child; String name = element.getNodeName().toLowerCase(); if (forbiddenElements.contains(name)) { return false; } if (name.equals("img")) { String source = element.getAttribute("src"); try { URL url = new URL(source); if (isPrivateURL(url)) { return false; } } catch (MalformedURLException e) { return false; } element.removeAttribute("longdesc"); element.removeAttribute("usemap"); element.removeAttribute("ismap"); } return true; } private boolean isRelative(URL url) { HttpServletRequest currentRequest = RenderersRequestProcessorImpl.getCurrentRequest(); String serverName = currentRequest.getServerName(); return serverName.equals(url.getHost()) && url.getPath().startsWith(currentRequest.getContextPath()); } private boolean isThrustedAttribute(Node parent, Attr attribute) { String name = attribute.getName().toLowerCase(); String value = attribute.getValue(); List eventsAttributes = Arrays.asList(new String[] { "onabort", "onblur", "onchange", "onclick", "ondblclick", "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload", "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset", "onresize", "onselect", "onsubmit", "onunload" }); if (eventsAttributes.contains(name)) { // instrinsic events return false; } if (value.toLowerCase().startsWith("javascript:")) { // javascript: // doSomething() return false; } if (name.equals("class")) { // don't allow to use application styles String allowedClasses = filterAllowedClasses(value); if (allowedClasses.length() > 0) { attribute.setValue(allowedClasses); return true; } else { return false; } } if (name.equals("href")) { try { URL url = new URL(value); if (isPrivateURL(url)) { NodeList list = parent.getChildNodes(); for (int i = 0; i < list.getLength(); i++) { Node node = list.item(i); parent.removeChild(node); } Text newNode = parent.getOwnerDocument().createTextNode(value); Node linkParent = parent.getParentNode(); linkParent.replaceChild(newNode, parent); return false; } else { return true; } } catch (MalformedURLException e) { return false; } } return true; } private String filterAllowedClasses(String value) { StringBuilder classes = new StringBuilder(); for (String cssClass : value.split(" ")) { if (cssClass.startsWith("pub-")) { classes.append(cssClass); classes.append(" "); } } return classes.toString(); } private boolean isPrivateURL(URL url) { if (!isRelative(url)) { return false; } String path = url.getPath(); HttpServletRequest currentRequest = RenderersRequestProcessorImpl.getCurrentRequest(); String contextPath = currentRequest.getContextPath(); if (path.startsWith(contextPath)) { path = path.substring(contextPath.length()); } String[] forbiddenPaths = new String[] { "^/dotIstPortal\\.do.*", "^/home\\.do.*" }; for (String forbiddenPath : forbiddenPaths) { if (path.matches(forbiddenPath)) { return true; } } return false; } }