通过自己技能把某个网站的ppt全部下载下来的过程
1、该网站的ppt链接全部都在页面上,用正则手动提取所有链接,放在指定位置的,以txt形式保存,格式如下
2、写个java文件处理一下,如下:
1 package platform; 2 3 import java.io.BufferedInputStream; 4 import java.io.BufferedOutputStream; 5 import java.io.BufferedReader; 6 import java.io.File; 7 import java.io.FileInputStream; 8 import java.io.FileNotFoundException; 9 import java.io.FileOutputStream; 10 import java.io.IOException; 11 import java.io.InputStreamReader; 12 import java.io.UnsupportedEncodingException; 13 import java.net.HttpURLConnection; 14 import java.net.URL; 15 import java.util.HashMap; 16 import java.util.Map; 17 18 import org.apache.http.HttpResponse; 19 import org.apache.http.client.ClientProtocolException; 20 import org.apache.http.client.methods.HttpPost; 21 import org.apache.http.impl.client.DefaultHttpClient; 22 23 public class TestQConDownload { 24 25 public static void main(String[] args) { 26 BufferedReader bufferedReader; 27 String lineTxt = null; 28 String title="1"; 29 String url=""; 30 try { 31 //读文件 32 bufferedReader = readTxtFile("E:\\test\\downinfo.txt"); 33 //循环遍历每行 34 while((lineTxt = bufferedReader.readLine()) != null){ 35 if(lineTxt.startsWith("【标题】")){ 36 title = lineTxt.substring(4).replaceAll(":", ""); 37 System.out.println(title); 38 } 39 if(lineTxt.startsWith("【下载地址】")){ 40 url= lineTxt.substring(6); 41 //获取跳转后的地址 42 url = getRedirectLocation(url); 43 System.out.println(url); 44 //下载到指定位置 45 downloadFile(url, "E:\\test\\download\\"+title+".pdf"); 46 } 47 } 48 bufferedReader.close(); 49 } catch (UnsupportedEncodingException e) { 50 // TODO Auto-generated catch block 51 e.printStackTrace(); 52 } catch (FileNotFoundException e) { 53 // TODO Auto-generated catch block 54 e.printStackTrace(); 55 } catch (IOException e) { 56 // TODO Auto-generated catch block 57 e.printStackTrace(); 58 } 59 60 } 61 62 public static String getRedirectLocation(String url) throws ClientProtocolException, IOException { 63 String SEND_MESSAGE_URL = url; 64 Map<String, Object> params = new HashMap<String, Object>(); 65 HttpPost get = new HttpPost(SEND_MESSAGE_URL); 66 get.setHeader("Cookie", "dx_un=%E5%B9%B4%E8%BD%BB%E7%9A%84%E7%96%AF%E5%AD%90; dx_avatar=http%3A%2F%2F7xil0e.com1.z0.glb.clouddn.com%2Fuser_580d84f25ea61.png; dx_token=0c6b719ffff50f3746b64f058cb4e719"); 67 get.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); 68 get.setHeader("Accept-Encoding", "zh-CN,zh;q=0.8"); 69 get.setHeader("Connection", "keep-alive"); 70 get.setHeader("Host", "ppt.geekbang.org"); 71 get.setHeader("Referer", "http://2016.qconshanghai.com/schedule"); 72 get.setHeader("Upgrade-Insecure-Requests", "1"); 73 get.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"); 74 75 // 设置编码 76 HttpResponse re = new DefaultHttpClient().execute(get); 77 /*if (re.getStatusLine().getStatusCode() == 200) {// 如果状态码为200,就是正常返回 78 String result = EntityUtils.toString(re.getEntity()); 79 System.out.println(result); 80 }*/ 81 String location = re.getFirstHeader("Location").getValue(); 82 get.releaseConnection(); 83 return location; 84 } 85 86 /** 87 * 下载远程文件并保存到本地 88 * @param remoteFilePath 远程文件路径 89 * @param localFilePath 本地文件路径 90 */ 91 public static void downloadFile(String remoteFilePath, String localFilePath) 92 { 93 URL urlfile = null; 94 HttpURLConnection httpUrl = null; 95 BufferedInputStream bis = null; 96 BufferedOutputStream bos = null; 97 File f = new File(localFilePath); 98 try 99 { 100 urlfile = new URL(remoteFilePath); 101 httpUrl = (HttpURLConnection)urlfile.openConnection(); 102 httpUrl.connect(); 103 bis = new BufferedInputStream(httpUrl.getInputStream()); 104 bos = new BufferedOutputStream(new FileOutputStream(f)); 105 int len = 2048; 106 byte[] b = new byte[len]; 107 while ((len = bis.read(b)) != -1) 108 { 109 bos.write(b, 0, len); 110 } 111 bos.flush(); 112 bis.close(); 113 httpUrl.disconnect(); 114 } 115 catch (Exception e) 116 { 117 e.printStackTrace(); 118 } 119 finally 120 { 121 try 122 { 123 bis.close(); 124 bos.close(); 125 } 126 catch (IOException e) 127 { 128 e.printStackTrace(); 129 } 130 } 131 } 132 133 public static BufferedReader readTxtFile(String filePath) throws UnsupportedEncodingException, FileNotFoundException{ 134 String encoding="UTF-8"; 135 File file=new File(filePath); 136 InputStreamReader read = new InputStreamReader( 137 new FileInputStream(file),encoding);//考虑到编码格式 138 BufferedReader bufferedReader = new BufferedReader(read); 139 return bufferedReader; 140 } 141 }
版权声明:本文为flying607原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。