POI解析word文档
poi解析word的表格:
提前先准备需要的jar包:
org.apache.poi
poi
3.13
org.apache.poi
poi
3.15
org.apache.poi
poi-scratchpad
3.15
org.freehep
freehep-graphicsio-emf
2.1.3
org.freehep
freehep-io
2.0.5
xml-apis
xml-apis-ext
1.3.04
provided
net.arnx
wmf2svg
0.9.5
org.w3c
dom
2.3.0-jaxb-1.0.6
ru.ilb.w3c
w3c
1.1
注:此方法是将word文档中的表格解析出来,存放到一个集合中,集合中的数据是
XXX
...
...
/**
* 获得文档的表格
* path 文件路径
* @return
*/
public static List
解析图片(包括emf,wmf,wmf转svg最终转换为png)
/**
* 获取图片的路径
*
* @param path
* 文档的路径
* @return 返回list集合,里面存放的是图片的集合
*/
public static List> getWordImageUrl(String path) {
File file = null;
List> list = new ArrayList>();
List emfs = new ArrayList();
try {
file = new File(path);
FileInputStream f = new FileInputStream(file.getAbsolutePath());
HWPFDocument doc = new HWPFDocument(f);
// 获取图片表
PicturesTable pTable = doc.getPicturesTable();
// 返回字符长度
int length = doc.characterLength();
String directory = "";// 文档目录,用于删除emf文件的时候传入路径
if (length > 0) {
String[] ym = getYearAndMonth();// 获得年+月
// 如果二级目录没有的话则生成
directory = "G:/upload/img/" + ym[0] + "/" + ym[1];
File fp1 = new File(directory);
if (!fp1.exists() && !fp1.isDirectory()) {
fp1.mkdirs();
}
for (int i = 0; i < length; i++) {
Range range = new Range(i, i + 1, doc);
// 得到这个角色在索引。
CharacterRun cr = range.getCharacterRun(0);
// 确定指定字符运行包含参考图片
if (pTable.hasPicture(cr)) {
// 将遍历到的图片进行解析,生成emf图片,并保存到磁盘中
Picture pic = pTable.extractPicture(cr, false);
String afileName = pic.suggestFullFileName();
afileName = afileName.substring(afileName.length() - 4);
String fileMainName = UUID.randomUUID().toString()
.replace("-", "");
String saveUrl = "G:/upload/img/" + ym[0] + "/" + ym[1]
+ "/" + fileMainName + afileName;
// 将文件读取到输出流中
OutputStream out = new FileOutputStream(new File(
saveUrl));
// 写入到磁盘中
pic.writeImageContent(out);
if (out != null) {
out.close();
}
if (!emfs.contains(saveUrl)) {
emfs.add(saveUrl);
}
}
}
}
if (doc != null) {
doc.close();
}
if (f != null) {
f.close();
}
// 将emf转为png
emfConversionPng(emfs);
return list;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
/**
* emf或者wmf转换为png图片格式
*
* @param saveUrl
* @return
* @throws IOException
*/
public static void emfConversionPng(List list) throws IOException {
if (list.size() > 0) {
// 对文件的命名进行重新修改
for (int i = 0; i < list.size(); i++) {
String saveUrl = list.get(i);
// 从doc文档解析的图片很有可能已经是png了,所以此处需要判断
if (saveUrl.contains("emf") || saveUrl.contains("EMF")) {
InputStream is = new FileInputStream(saveUrl);
EMFInputStream eis = new EMFInputStream(is,
EMFInputStream.DEFAULT_VERSION);
EMFRenderer emfRenderer = new EMFRenderer(eis);
final int width = (int) eis.readHeader().getBounds()
.getWidth();
final int height = (int) eis.readHeader().getBounds()
.getHeight();
// 设置图片的大小和样式
final BufferedImage result = new BufferedImage(width + 60,
height + 40, BufferedImage.TYPE_4BYTE_ABGR);
Graphics2D g2 = (Graphics2D) result.createGraphics();
emfRenderer.paint(g2);
String url = saveUrl.replace(
saveUrl.substring(saveUrl.length() - 3), "png");
File outputfile = new File(url);
// 写入到磁盘中(格式设置为png背景不会变为橙色)
ImageIO.write(result, "png", outputfile);
// 当前的图片写入到磁盘中后,将流关闭
if (eis != null) {
eis.close();
}
if (is != null) {
is.close();
}
} else if (saveUrl.contains("wmf") || saveUrl.contains("WMF")) {
// 将wmf转svg
String svgFile = saveUrl.substring(0,
saveUrl.lastIndexOf(".wmf"))
+ ".svg";
wmfToSvg(saveUrl, svgFile);
// 将svg转png
String jpgFile = saveUrl.substring(0,
saveUrl.lastIndexOf(".wmf"))
+ ".png";
svgToJpg(svgFile, jpgFile);
}
}
}
}
/**
* 将wmf转换为svg
*
* @param src
* @param dest
*/
public static void wmfToSvg(String src, String dest) {
File file = new File(src);
boolean compatible = false;
try {
InputStream in = new FileInputStream(file);
WmfParser parser = new WmfParser();
final SvgGdi gdi = new SvgGdi(compatible);
parser.parse(in, gdi);
Document doc = gdi.getDocument();
OutputStream out = new FileOutputStream(dest);
if (dest.endsWith(".svgz")) {
out = new GZIPOutputStream(out);
}
output(doc, out);
if (out != null) {
out.close();
}
if (in != null) {
in.close();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
/**
* 输出信息
*
* @param doc
* @param out
* @throws Exception
*/
private static void output(Document doc, OutputStream out) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,
"-//W3C//DTD SVG 1.0//EN");
transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
transformer.transform(new DOMSource(doc), new StreamResult(out));
if (out != null) {
out.flush();
out.close();
}
}
/**
* 将svg转化为JPG
*
* @param src
* @param dest
*/
public static void svgToJpg(String src, String dest) {
FileOutputStream jpgOut = null;
FileInputStream svgStream = null;
ByteArrayOutputStream svgOut = null;
ByteArrayInputStream svgInputStream = null;
ByteArrayOutputStream jpg = null;
File svg = null;
try {
// 获取到svg文件
svg = new File(src);
svgStream = new FileInputStream(svg);
svgOut = new ByteArrayOutputStream();
// 获取到svg的stream
int noOfByteRead = 0;
while ((noOfByteRead = svgStream.read()) != -1) {
svgOut.write(noOfByteRead);
}
ImageTranscoder it = new PNGTranscoder();
it.addTranscodingHint(JPEGTranscoder.KEY_QUALITY, new Float(1f));
it.addTranscodingHint(ImageTranscoder.KEY_WIDTH, new Float(500));
jpg = new ByteArrayOutputStream();
svgInputStream = new ByteArrayInputStream(svgOut.toByteArray());
it.transcode(new TranscoderInput(svgInputStream),
new TranscoderOutput(jpg));
jpgOut = new FileOutputStream(dest);
jpgOut.write(jpg.toByteArray());
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (svgInputStream != null) {
svgInputStream.close();
}
if (jpg != null) {
jpg.close();
}
if (svgStream != null) {
svgStream.close();
}
if (svgOut != null) {
svgOut.close();
}
if (jpgOut != null) {
jpgOut.flush();
jpgOut.close();
}
if (svg != null) {
svg.delete();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
使用到的所有的jar包:
下载地址csdn:http://download.csdn.net/detail/www1056481167/9762899
百度网盘(推荐下载地址): https://pan.baidu.com/s/1V6BfrtQTmqdniauhbr_BBQ 提取码: nvqm (不需要积分)
注:本文为作者搜集poi3.5的api总结出来的,还有参考其他博主的文章,如有冒犯,请于作者联系,读者在使用的过程中有什么不明白的都可以联系我,经自己最大的能力去帮助学习poI的同学们。
免责声明:由于无法甄别是否为投稿用户创作以及文章的准确性,本站尊重并保护知识产权,根据《信息网络传播权保护条例》,如我们转载的作品侵犯了您的权利,请您通知我们,请将本侵权页面网址发送邮件到qingge@88.com,深感抱歉,我们会做删除处理。
