博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
java实现有规律的字符串转Json格式
阅读量:6083 次
发布时间:2019-06-20

本文共 7483 字,大约阅读时间需要 24 分钟。

hot3.png

package net.sc.common.util;import java.io.File;import java.util.ArrayList;import java.util.LinkedHashMap;import java.util.List;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.jsoup.Jsoup;import org.jsoup.nodes.Attribute;import org.jsoup.nodes.Attributes;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.nodes.Node;import org.jsoup.select.Elements;/** * @author Aaron * @createTime 2015-08-07 * @desc 支持多属性的选择 */public class JsoupUtil {	public static final String _REGEX = "\\[(?
\\w+)=(?
\\w+)\\]"; Pattern p; public JsoupUtil() { p = Pattern.compile(_REGEX); } public Elements findElement(Element queryElement, String... attrs) { return this.findElement(queryElement, false, attrs); } // TODO only只匹配了等于的情况,未匹配开始和结束的情况 public Elements findElement(Element queryElement, boolean only, String... attrs) { Elements result = new Elements(); if (queryElement == null) { return result; } if (attrs.length == 0) { return result; } Elements searchElements = new Elements(queryElement); Elements middleResults = new Elements(); for (int i = 0; i < attrs.length; i++) { for (Element search : searchElements) { middleResults.addAll(search.select(attrs[i])); } if (middleResults.size() == 0 && i < attrs.length - 1) { return result; } searchElements = middleResults; middleResults = new Elements(); } result = searchElements; if (!only) { return result; } Map
searchAttrs = new LinkedHashMap
(); for (int i = 0; i < attrs.length; i++) { Matcher m = p.matcher(attrs[i]); // 未发现属性的情况下,返回原结果集 if (m.find()) { searchAttrs.put(m.group("name"), m.group("value")); } else { return result; } } result = new Elements(); for (Element element : searchElements) { Attributes attriList = element.attributes(); if (attriList.size() != searchAttrs.size()) { continue; } boolean pass = false; for (Attribute attri : attriList) { if (!searchAttrs.containsKey(attri.getKey())) { pass = true; break; } } if (pass) { continue; } result.add(element); } return result; } // 获取子元素的相关数据 public Map
getPropertyEle(Elements eles) { Map
map = new LinkedHashMap<>(); for (Element ele : eles) { Elements childEles = ele.children(); if (childEles.size() != 2) { continue; } String name = childEles.get(0).text(), value = childEles.get(1).text(); if (name.endsWith(":") || name.endsWith(":")) { name = name.substring(0, name.length() - 1); } map.put(name.trim(), value.trim()); } return map; } // 获取以冒号分隔的相关数据 public Map
getPropertyColon(List
nodeList) { Map
map = new LinkedHashMap<>(); for (Node node : nodeList) { String text = node.toString(); String name = "", value = ""; if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } else if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } if (StringUtil.isEmpty(name)) { continue; } if (name.indexOf("onclick=") != -1) { continue; } map.put(name.trim(), value.trim()); } return map; } // 获取以冒号分隔的相关数据 public Map
getPropertyColon(Elements eles) { Map
map = new LinkedHashMap<>(); for (Element ele : eles) { String text = ele.text(); String name = "", value = ""; if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } else if (text.indexOf(":") != -1) { name = text.substring(0, text.indexOf(":")); value = text.substring(text.indexOf(":") + 1); } if (StringUtil.isEmpty(name)) { continue; } map.put(name.trim(), value.trim()); } return map; } public Map
getTableColumnData(Element table) { return this.getTableColumnData(table, ""); } public Map
getTableColumnData(Element table, String rowSelectRange) { Map
map = new LinkedHashMap<>(); Elements trs = table.select("tr"); if (!StringUtil.isEmpty(rowSelectRange)) { String[] deleteRows = rowSelectRange.split(","); System.out.println(deleteRows); int offsetIndex = 0; for (int i = deleteRows.length - 1; i >= 0; i--) { int index = Integer.parseInt(deleteRows[i]); if (index < 0) { index = Math.abs(index); index = trs.size() - (index - offsetIndex); trs.remove(index); offsetIndex++; } else { trs.remove(index - 1); } } } for (Element tr : trs) { Elements tds = tr.select("td"); // th 和 td 混合的情况下,取子元素 if (tr.select("th").size() > 0) { tds = tr.children(); } int index = 0; String name = ""; for (Element td : tds) { index++; if (index % 2 == 0) { if (StringUtil.isEmpty(name)) { continue; } map.put(name, td.text().trim()); } else { name = td.text(); if (name.endsWith(":") || name.endsWith(":")) { name = name.substring(0, name.length() - 1); } name = name.trim(); } } } return map; } public List
> getTableRowData(Element table) { return getTableRowData(table, null, "", ""); } public List
> getTableRowData(Element table, String rowSelectRange, String columnSelectRange) { return getTableRowData(table, null, rowSelectRange, columnSelectRange); } // rowSelectRange 表示要去除的行 public List
> getTableRowData(Element table, List
selfNameList, String rowSelectRange, String columnSelectRange) { Elements elements = table.select("tr"); if (!StringUtil.isEmpty(rowSelectRange)) { String[] deleteRows = rowSelectRange.split(","); int offsetIndex = 0; for (int i = deleteRows.length - 1; i >= 0; i--) { int index = Integer.parseInt(deleteRows[i]); if (index < 0) { index = Math.abs(index); index = elements.size() - (index - offsetIndex); elements.remove(index); offsetIndex++; } else { elements.remove(index - 1); } } } int counter = 0; List
nameList = new ArrayList<>(); if (selfNameList != null && selfNameList.size() > 0) { nameList = selfNameList; } List
> valueList = new ArrayList<>(); for (Element element : elements) { counter++; Elements tds = element.select("td"); if (tds == null || tds.size() == 0) { tds = element.select("th"); } if (!StringUtil.isEmpty(columnSelectRange)) { String[] deleteColumns = columnSelectRange.split(","); int offsetIndex = 0; for (int i = deleteColumns.length - 1; i >= 0; i--) { int index = Integer.parseInt(deleteColumns[i]); if (index < 0) { index = Math.abs(index); index = tds.size() - (index - offsetIndex); tds.remove(index); offsetIndex++; } else { tds.remove(index - 1); } } } Map
pvm = new LinkedHashMap<>(); int index = 0; for (Element td : tds) { if (counter == 1 && (selfNameList == null || selfNameList.size() == 0)) { nameList.add(td.text().trim()); } else if (counter == 1 && selfNameList != null && selfNameList.size() > 0) { pvm.put(nameList.get(index), td.text()); } else { pvm.put(nameList.get(index), td.text()); } index++; } if (pvm.size() > 0) { valueList.add(pvm); } } return valueList; } public static void main(String args[]) throws Exception {// JsoupUtil ju = new JsoupUtil();// String path = ju.getClass().getResource("").getPath() + "JsoupUtil.html";// Document doc = Jsoup.parse(new File(path), "UTF-8");// Elements results = ju.findElement(doc, true, "tr[width=100]");// for (Element result : results) {// System.out.println(result.text());// } // JsoupUtil ju = new JsoupUtil();// String path = ju.getClass().getResource("").getPath() + "JsoupUtil.html";// Document doc = Jsoup.parse(new File(path), "UTF-8");// List
> list = ju.getTableRowData(doc.select("table[class=tb6]").get(0));// System.out.println(list); }}

 

转载于:https://my.oschina.net/AaronDMC/blog/750796

你可能感兴趣的文章
我的友情链接
查看>>
把LYNC从评估版升级到正式版
查看>>
我的友情链接
查看>>
eclipse 不能建立maven项目
查看>>
Session死亡讲解
查看>>
八周三次课(1月31日)
查看>>
我的友情链接
查看>>
关于linux中 变量相关 学习小白总结
查看>>
文德数据启动国庆中秋大优惠——现在购买立省三千
查看>>
每天一个python 小案例——循环和列表
查看>>
结构体/struct
查看>>
用VC++开发Oracle数据库应用程序详解
查看>>
CCS初学那点事(二)
查看>>
机器学习:数据预处理之独热编码(One-Hot)
查看>>
我的友情链接
查看>>
apache之虚拟主机
查看>>
dedeCMS5.7在任意栏目获取顶级栏目名称及链接的方法
查看>>
linux之文本搜索工具(grep、egrep)用法
查看>>
活动目录中组的类型和可用范围
查看>>
子网掩码划分随手记
查看>>