JAVA-替换html中图片的路径-从html代码中提取图片路径并下载(完整版)
transHtml方法实现提取网络图片中得图片路径,将其重新下载到本地,并替换html中原来得路径
package com.googosoft.until; import java.io.BufferedInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.http.HttpServletRequest; /** * @author sonyan * @version 2019年11月25日 下午2:12:32 * @desc */ public class Test { /** * 将html中的图片下载到服务器,并且使用服务器上图片的地址替换图片的网络路径 * @param html 要处理的html * @param request * @param uploadFolder 服务器上保存图片的目录 * @return */ public static String transHtml(String html,HttpServletRequest request,String uploadFolder){ List<String> imgList = getImgStrList(html,request); for (String imgStr : imgList) { try { String newUrl = reSaveImage(imgStr, request,uploadFolder); html = html.replace(imgStr, newUrl); } catch (Exception e) { e.printStackTrace(); } } return html; } /** * 将指定的网络图片保存到本地指定目录 * @param httpUrl 图片原来的网络路径 * @param request * @param uploadFolder 服务器上保存图片的目录 * @return httpUrl newPath */ private static String reSaveImage(String httpUrl, HttpServletRequest request,String uploadFolder){ FileOutputStream out = null; BufferedInputStream in = null; HttpURLConnection connection = null; Map<String, Object> urlMap = new HashMap<>(); byte[] buf = new byte[1024]; int len = 0; try { URL url = new URL(httpUrl); connection = (HttpURLConnection) url.openConnection(); connection.connect(); in = new BufferedInputStream(connection.getInputStream()); urlMap = getNewPath(httpUrl, request,uploadFolder); out = new FileOutputStream(urlMap.get("newPath").toString()); while ((len = in.read(buf)) != -1) { out.write(buf, 0, len); } out.flush(); } catch (Exception e) { e.printStackTrace(); } finally { try { in.close(); out.close(); connection.disconnect(); } catch (IOException e) { e.printStackTrace(); } } return urlMap.get("newUrl").toString(); } /** * 获取保存在服务器上的图片的实际存储地址以及访问地址 * @param httpUrl 图片原来的网络路径 * @param request * @param uploadFolder服务器上保存图片的目录 * @return */ private static Map<String, Object> getNewPath(String httpUrl, HttpServletRequest request,String uploadFolder) { Map<String, Object> relMap = new HashMap<>(); String fileName = getFileName(); String filefix = fileName.substring(fileName.lastIndexOf(".") + 1).toLowerCase();// 文件后缀 fileName = UuidUtil.get32UUID() + "." + filefix; String prefix = getUrlPrefix(request) + "/iconimg/"; relMap.put("newUrl", prefix + fileName); relMap.put("newPath", uploadFolder + fileName); return relMap; } /** * 设置图片的名称(时间+用户编号) * @return */ private static String getFileName() { return "reload"+File.pathSeparator+new Date().getTime()+".jpg"; } /** * 提取HTML字符串中的img * @param htmlStr 要处理的html字符串 * @return */ private static List<String> getImgStrList(String htmlStr,HttpServletRequest request) { List<String> list = new ArrayList<>(); String img = ""; Pattern p_image; Matcher m_image; String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>"; p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE); m_image = p_image.matcher(htmlStr); while (m_image.find()) { img = m_image.group(); Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img); while (m.find()) { String path = m.group(1); if(!path.startsWith(getUrlPrefix(request))){ list.add(handleSrc(path)); } } } return list; } /** * 去除src路径中的前后引号 * @param src 图片的src路径 * @return */ private static String handleSrc(String src) { if (src != null) { if (src.startsWith("\'")|| src.startsWith("\"")) { return src.substring(1, src.length()); } if (src.endsWith("\'")|| src.endsWith("\"")) { return src.substring(0, src.length()); } } return src; } /** * 获取网站的URL * @param request * @return 例如:http://192.168.11.3:8089 */ public static String getUrlPrefix(HttpServletRequest request) { StringBuffer str = new StringBuffer(); str.append(request.getScheme()); str.append("://"); str.append(request.getServerName()); if (80 != request.getServerPort()) { str.append(":" + request.getServerPort()); } str.append(request.getContextPath()); return str.toString(); } }
版权声明:本文为excellencesy原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。