package net.sc.common.util;import java.io.File;import java.util.ArrayList;import java.util.LinkedHashMap;import java.util.List;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.jsoup.Jsoup;import org.jsoup.nodes.Attribute;import org.jsoup.nodes.Attributes;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.nodes.Node;import org.jsoup.select.Elements;/** * @author Aaron * @createTime 2015-08-07 * @desc 支持多属性的选择 */public class JsoupUtil { public static final String _REGEX = "\\[(?\\w+)=(? \\w+)\\]"; Pattern p; public JsoupUtil() { p = Pattern.compile(_REGEX); } public Elements findElement(Element queryElement, String... attrs) { return this.findElement(queryElement, false, attrs); } // TODO only只匹配了等于的情况,未匹配开始和结束的情况 public Elements findElement(Element queryElement, boolean only, String... attrs) { Elements result = new Elements(); if (queryElement == null) { return result; } if (attrs.length == 0) { return result; } Elements searchElements = new Elements(queryElement); Elements middleResults = new Elements(); for (int i = 0; i < attrs.length; i++) { for (Element search : searchElements) { middleResults.addAll(search.select(attrs[i])); } if (middleResults.size() == 0 && i < attrs.length - 1) { return result; } searchElements = middleResults; middleResults = new Elements(); } result = searchElements; if (!only) { return result; } Map searchAttrs = new LinkedHashMap (); for (int i = 0; i < attrs.length; i++) { Matcher m = p.matcher(attrs[i]); // 未发现属性的情况下,返回原结果集 if (m.find()) { searchAttrs.put(m.group("name"), m.group("value")); } else { return result; } } result = new Elements(); for (Element element : searchElements) { Attributes attriList = element.attributes(); if (attriList.size() != searchAttrs.size()) { continue; } boolean pass = false; for (Attribute attri : attriList) { if (!searchAttrs.containsKey(attri.getKey())) { pass = true; break; } } if (pass) { continue; } result.add(element); } return result; } // 获取子元素的相关数据 public Map getPropertyEle(Elements eles) { Map map = new LinkedHashMap<>(); for (Element ele : eles) { Elements childEles = ele.children(); if (childEles.size() != 2) { continue; } String name = childEles.get(0).text(), value = childEles.get(1).text(); if (name.endsWith(":") || name.endsWith(":")) { name = name.substring(0, name.length() - 1); } map.put(name.trim(), value.trim()); } return map; } // 获取以冒号分隔的相关数据 public Map getPropertyColon(List nodeList) { Map map = new LinkedHashMap<>(); for (Node node : nodeList) { String text = node.toString(); String name = "", value = ""; if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } else if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } if (StringUtil.isEmpty(name)) { continue; } if (name.indexOf("onclick=") != -1) { continue; } map.put(name.trim(), value.trim()); } return map; } // 获取以冒号分隔的相关数据 public Map getPropertyColon(Elements eles) { Map map = new LinkedHashMap<>(); for (Element ele : eles) { String text = ele.text(); String name = "", value = ""; if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } else if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } if (StringUtil.isEmpty(name)) { continue; } map.put(name.trim(), value.trim()); } return map; } public Map getTableColumnData(Element table) { return this.getTableColumnData(table, ""); } public Map getTableColumnData(Element table, String rowSelectRange) { Map map = new LinkedHashMap<>(); Elements trs = table.select("tr"); if (!StringUtil.isEmpty(rowSelectRange)) { String[] deleteRows = rowSelectRange.split(","); System.out.println(deleteRows); int offsetIndex = 0; for (int i = deleteRows.length - 1; i >= 0; i--) { int index = Integer.parseInt(deleteRows[i]); if (index < 0) { index = Math.abs(index); index = trs.size() - (index - offsetIndex); trs.remove(index); offsetIndex++; } else { trs.remove(index - 1); } } } for (Element tr : trs) { Elements tds = tr.select("td"); // th 和 td 混合的情况下,取子元素 if (tr.select("th").size() > 0) { tds = tr.children(); } int index = 0; String name = ""; for (Element td : tds) { index++; if (index % 2 == 0) { if (StringUtil.isEmpty(name)) { continue; } map.put(name, td.text().trim()); } else { name = td.text(); if (name.endsWith(":") || name.endsWith(":")) { name = name.substring(0, name.length() - 1); } name = name.trim(); } } } return map; } public List