快递100物流公司列表
public static void main(String[] args) throws ClientProtocolException, IOException { String html = SimpleHttpClient.get("https://www.kuaidi100.com/network/plist.shtml"); Pattern pattern = Pattern.compile("<a href=\"(.*?)\" target=\"_blank\"><h4>.*?</h4><b>(.*?)</b>"); Matcher matcher = pattern.matcher(html); JSONObject json = new JSONObject(); while (matcher.find()) { String url = matcher.group(1); JSONObject item= parse(url); if(item != null){ json.put(item.getString("key"), item); } else{ System.out.println(url); } } System.out.println(json); } public static JSONObject parse(String url) throws ClientProtocolException, IOException{ String html = SimpleHttpClient.get(url); Pattern pattern = Pattern.compile("<div class=\"ex-title\">[\\s\\S]*?<h1>(.*?)</h1>[\\s\\S]*?<input type=\"hidden\" id=\"companyCode\" value=\"(.*?)\" />[\\s\\S]*?<font id=\"allcompanytel\" class=\"tel-icon\" title=\"拨打客服电话\">(.*?)</font> <a target=\"_blank\" rel=\"nofollow\" id=\"allcompanyurl\" class=\"url-icon\" title=\"访问官网\" href=\"(.*?)\">.*?</a> <a target=\"_blank\" class=\"net-icon\" rel=\"nofollow\" id=\"serversite\" title=\"查看快递网点\" href=\".*?\">服务网点</a>[\\s\\S]*?</div>"); Matcher matcher = pattern.matcher(html); if (matcher.find()) { String name = matcher.group(1); String key = matcher.group(2); String tel = matcher.group(3); String site = matcher.group(4); JSONObject item = new JSONObject(); item.put("key", key); item.put("name", name); item.put("tel", tel); item.put("site", site); return item; } return null; }
版权声明:本文为rubekid原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。