C#.NET实现Word或Excel文件转为HTML文件
Word文件转html,返回相对路径
1 private string GetPathByDocToHTML(string strFile)
2 {
3 if (string.IsNullOrEmpty(strFile))
4 {
5 return "0";//没有文件
6 }
7
8 Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
9 Type wordType = word.GetType();
10 Microsoft.Office.Interop.Word.Documents docs = word.Documents;
11
12 // 打开文件
13 Type docsType = docs.GetType();
14
15 object fileName = strFile;
16
17 Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
18 System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
19
20 // 转换格式,另存为html
21 Type docType = doc.GetType();
22 //给文件重新起名
23 string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
24 System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
25
26 string strFileFolder = "../html/";
27 DateTime dt = DateTime.Now;
28 //以yyyymmdd形式生成子文件夹名
29 string strFileSubFolder = dt.Year.ToString();
30 strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
31 strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
32 string strFilePath = strFileFolder + strFileSubFolder + "/";
33 // 判断指定目录下是否存在文件夹,如果不存在,则创建
34 if (!Directory.Exists(Server.MapPath(strFilePath)))
35 {
36 // 创建up文件夹
37 Directory.CreateDirectory(Server.MapPath(strFilePath));
38 }
39
40 //被转换的html文档保存的位置
41 // HttpContext.Current.Server.MapPath("html" + strFileSubFolder + filename + ".html")
42 string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
43 object saveFileName = ConfigPath;
44
45 /*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成:
46 * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
47 * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
48 * 其它格式:
49 * wdFormatHTML
50 * wdFormatDocument
51 * wdFormatDOSText
52 * wdFormatDOSTextLineBreaks
53 * wdFormatEncodedText
54 * wdFormatRTF
55 * wdFormatTemplate
56 * wdFormatText
57 * wdFormatTextLineBreaks
58 * wdFormatUnicodeText
59 */
60 docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
61 null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
62
63 //docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
64 // null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
65
66 //关闭文档
67 docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
68 null, doc, new object[] { null, null, null });
69
70 // 退出 Word
71 wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
72 //转到新生成的页面
73 //return ("/" + filename + ".html");
74
75 //转化HTML页面统一编码格式
76 TransHTMLEncoding(ConfigPath);
77
78 return (strFilePath + filename + ".html");
79 }
Excel文件转HTML,返回相对路径
1 private string GetPathByXlsToHTML(string strFile)
2 {
3 if (string.IsNullOrEmpty(strFile))
4 {
5 return "0";//没有文件
6 }
7
8 //实例化Excel
9 Microsoft.Office.Interop.Excel.Application repExcel = new Microsoft.Office.Interop.Excel.Application();
10 Microsoft.Office.Interop.Excel.Workbook workbook = null;
11 Microsoft.Office.Interop.Excel.Worksheet worksheet = null;
12
13 //打开文件,n.FullPath是文件路径
14 workbook = repExcel.Application.Workbooks.Open(strFile, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing);
15 worksheet = (Microsoft.Office.Interop.Excel.Worksheet)workbook.Worksheets[1];
16
17 //给文件重新起名
18 string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
19 System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
20
21 string strFileFolder = "../html/";
22 DateTime dt = DateTime.Now;
23 //以yyyymmdd形式生成子文件夹名
24 string strFileSubFolder = dt.Year.ToString();
25 strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
26 strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
27 string strFilePath = strFileFolder + strFileSubFolder + "/";
28 // 判断指定目录下是否存在文件夹,如果不存在,则创建
29 if (!Directory.Exists(Server.MapPath(strFilePath)))
30 {
31 // 创建up文件夹
32 Directory.CreateDirectory(Server.MapPath(strFilePath));
33 }
34 string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
35 object savefilename = (object)ConfigPath;
36
37 object ofmt = Microsoft.Office.Interop.Excel.XlFileFormat.xlHtml;
38 //进行另存为操作
39 workbook.SaveAs(savefilename, ofmt, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Microsoft.Office.Interop.Excel.XlSaveAsAccessMode.xlNoChange, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing);
40 object osave = false;
41 //逐步关闭所有使用的对象
42 workbook.Close(osave, Type.Missing, Type.Missing);
43 repExcel.Quit();
44 System.Runtime.InteropServices.Marshal.ReleaseComObject(worksheet);
45 worksheet = null;
46 //垃圾回收
47 GC.Collect();
48 System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
49 workbook = null;
50 GC.Collect();
51 System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel.Application.Workbooks);
52 GC.Collect();
53 System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel);
54 repExcel = null;
55 GC.Collect();
56 //依据时间杀灭进程
57 System.Diagnostics.Process[] process = System.Diagnostics.Process.GetProcessesByName("EXCEL");
58 foreach (System.Diagnostics.Process p in process)
59 {
60 if (DateTime.Now.Second - p.StartTime.Second > 0 && DateTime.Now.Second - p.StartTime.Second < 5)
61 {
62 p.Kill();
63 }
64 }
65
66 return (strFilePath + filename + ".html");
67 }
这里可能会遇到一个问题,由于转化为HTML文件的页面编码可能使得浏览器无法正确解读,所以需要转码,转换代码如下:
1 private void TransHTMLEncoding(string strFilePath)
2 {
3 try
4 {
5 System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0));
6 string html = sr.ReadToEnd();
7 sr.Close();
8 html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content=\'text/html; charset=gb2312\'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
9 System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default);
10
11 sw.Write(html);
12 sw.Close();
13 }
14 catch (Exception ex)
15 {
16 Page.RegisterStartupScript("alt", "<script>alert(\'" + ex.Message + "\')</script>");
17 }
18 }
这样就可以正常在页面上正常显示了