当前位置:   article > 正文

java 解压zip文件,java.lang.IllegalArgumentException: MALFORMED[1]

java.lang.illegalargumentexception: malformed[1]

在做zip文件解压缩的时候碰到一个异常信息,见下面的异常详情:


java.lang.IllegalArgumentException: MALFORMED[1]
    at java.util.zip.ZipCoder.toString(ZipCoder.java:65) ~[na:1.8.0_181]
    at java.util.zip.ZipFile.getZipEntry(ZipFile.java:583) ~[na:1.8.0_181]
    at java.util.zip.ZipFile.access$900(ZipFile.java:60) ~[na:1.8.0_181]
    at java.util.zip.ZipFile$ZipEntryIterator.next(ZipFile.java:539) ~[na:1.8.0_181]
    at java.util.zip.ZipFile$ZipEntryIterator.nextElement(ZipFile.java:514) ~[na:1.8.0_181]
    at java.util.zip.ZipFile$ZipEntryIterator.nextElement(ZipFile.java:495) ~[na:1.8.0_181]
 

代码:

  1. <!-- ZIP操作 -->
  2. <dependency>
  3. <groupId>org.apache.commons</groupId>
  4. <artifactId>commons-compress</artifactId>
  5. <version>1.16.1</version>
  6. </dependency>
ZipFile zipFile = new ZipFile(new File(filePath));

这种网上查下,很快定位问题,就是编码问题

然后各种查获取zip文件的编码,最多的就是cpdetector工具

https://blog.csdn.net/u014052432/article/details/79243496

pom:

  1. <dependency>
  2. <groupId>cpdetector</groupId>
  3. <artifactId>cpdetector</artifactId>
  4. <version>1.0.10</version>
  5. </dependency>

如果下不了的自行网上下载jar到对应的本地目录内,现在网上还真不好找这个jar,我是github内找到的

代码都编写好了,进行测试,确实能获取zip文件的编码,但是异常现象依旧存在。

然后又是各种 “zip获取编码”的查找,终于发现一篇:

guying4875大神写的博客:https://blog.csdn.net/guying4875/article/details/81034022

拷贝代码测试,顺利通过

  1. /**
  2. *
  3. */
  4. import java.io.BufferedInputStream;
  5. import java.io.BufferedReader;
  6. import java.io.File;
  7. import java.io.FileInputStream;
  8. import java.io.FileOutputStream;
  9. import java.io.InputStreamReader;
  10. import java.io.OutputStreamWriter;
  11. import java.io.Writer;
  12. import java.util.BitSet;
  13. import org.slf4j.Logger;
  14. import org.slf4j.LoggerFactory;
  15. /**
  16. * @author 自动识别文件编码格式
  17. *
  18. */
  19. public class EncodeUtil {
  20. private static Logger logger = LoggerFactory.getLogger(EncodeUtil.class);
  21. private static int BYTE_SIZE = 8;
  22. public static String CODE_UTF8 = "UTF-8";
  23. public static String CODE_UTF8_BOM = "UTF-8_BOM";
  24. public static String CODE_GBK = "GBK";
  25. /**
  26. * 通过文件全名称获取编码集名称
  27. *
  28. * @param fullFileName
  29. * @param ignoreBom
  30. * @return
  31. * @throws Exception
  32. */
  33. public static String getEncode(String fullFileName, boolean ignoreBom) throws Exception {
  34. logger.debug("fullFileName ; {}", fullFileName);
  35. BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullFileName));
  36. return getEncode(bis, ignoreBom);
  37. }
  38. /**
  39. * 通过文件缓存流获取编码集名称,文件流必须为未曾
  40. *
  41. * @param bis
  42. * @param ignoreBom 是否忽略utf-8 bom
  43. * @return
  44. * @throws Exception
  45. */
  46. public static String getEncode(BufferedInputStream bis, boolean ignoreBom) throws Exception {
  47. bis.mark(0);
  48. String encodeType = "未识别";
  49. byte[] head = new byte[3];
  50. bis.read(head);
  51. if (head[0] == -1 && head[1] == -2) {
  52. encodeType = "UTF-16";
  53. } else if (head[0] == -2 && head[1] == -1) {
  54. encodeType = "Unicode";
  55. } else if (head[0] == -17 && head[1] == -69 && head[2] == -65) { //带BOM
  56. if (ignoreBom) {
  57. encodeType = CODE_UTF8;
  58. } else {
  59. encodeType = CODE_UTF8_BOM;
  60. }
  61. } else if ("Unicode".equals(encodeType)) {
  62. encodeType = "UTF-16";
  63. } else if (isUTF8(bis)) {
  64. encodeType = CODE_UTF8;
  65. } else {
  66. encodeType = CODE_GBK;
  67. }
  68. logger.info("result encode type : " + encodeType);
  69. return encodeType;
  70. }
  71. /**
  72. * 是否是无BOM的UTF8格式,不判断常规场景,只区分无BOM UTF8和GBK
  73. *
  74. * @param bis
  75. * @return
  76. */
  77. private static boolean isUTF8( BufferedInputStream bis) throws Exception {
  78. bis.reset();
  79. //读取第一个字节
  80. int code = bis.read();
  81. do {
  82. BitSet bitSet = convert2BitSet(code);
  83. //判断是否为单字节
  84. if (bitSet.get(0)) {//多字节时,再读取N个字节
  85. if (!checkMultiByte(bis, bitSet)) {//未检测通过,直接返回
  86. return false;
  87. }
  88. } else {
  89. //单字节时什么都不用做,再次读取字节
  90. }
  91. code = bis.read();
  92. } while (code != -1);
  93. return true;
  94. }
  95. /**
  96. * 检测多字节,判断是否为utf8,已经读取了一个字节
  97. *
  98. * @param bis
  99. * @param bitSet
  100. * @return
  101. */
  102. private static boolean checkMultiByte(BufferedInputStream bis, BitSet bitSet) throws Exception {
  103. int count = getCountOfSequential(bitSet);
  104. byte[] bytes = new byte[count - 1];//已经读取了一个字节,不能再读取
  105. bis.read(bytes);
  106. for (byte b : bytes) {
  107. if (!checkUtf8Byte(b)) {
  108. return false;
  109. }
  110. }
  111. return true;
  112. }
  113. /**
  114. * 检测单字节,判断是否为utf8
  115. *
  116. * @param b
  117. * @return
  118. */
  119. private static boolean checkUtf8Byte(byte b) throws Exception {
  120. BitSet bitSet = convert2BitSet(b);
  121. return bitSet.get(0) && !bitSet.get(1);
  122. }
  123. /**
  124. * 检测bitSet中从开始有多少个连续的1
  125. *
  126. * @param bitSet
  127. * @return
  128. */
  129. private static int getCountOfSequential( BitSet bitSet) {
  130. int count = 0;
  131. for (int i = 0; i < BYTE_SIZE; i++) {
  132. if (bitSet.get(i)) {
  133. count++;
  134. } else {
  135. break;
  136. }
  137. }
  138. return count;
  139. }
  140. /**
  141. * 将整形转为BitSet
  142. *
  143. * @param code
  144. * @return
  145. */
  146. private static BitSet convert2BitSet(int code) {
  147. BitSet bitSet = new BitSet(BYTE_SIZE);
  148. for (int i = 0; i < BYTE_SIZE; i++) {
  149. int tmp3 = code >> (BYTE_SIZE - i - 1);
  150. int tmp2 = 0x1 & tmp3;
  151. if (tmp2 == 1) {
  152. bitSet.set(i);
  153. }
  154. }
  155. return bitSet;
  156. }
  157. /**
  158. * 将一指定编码的文件转换为另一编码的文件
  159. *
  160. * @param oldFullFileName
  161. * @param oldCharsetName
  162. * @param newFullFileName
  163. * @param newCharsetName
  164. */
  165. public static void convert(String oldFullFileName, String oldCharsetName, String newFullFileName, String newCharsetName) throws Exception {
  166. logger.info("the old file name is : {}, The oldCharsetName is : {}", oldFullFileName, oldCharsetName);
  167. logger.info("the new file name is : {}, The newCharsetName is : {}", newFullFileName, newCharsetName);
  168. StringBuffer content = new StringBuffer();
  169. BufferedReader bin = new BufferedReader(new InputStreamReader(new FileInputStream(oldFullFileName), oldCharsetName));
  170. String line;
  171. while ((line = bin.readLine()) != null) {
  172. content.append(line);
  173. content.append(System.getProperty("line.separator"));
  174. }
  175. newFullFileName = newFullFileName.replace("\\", "/");
  176. File dir = new File(newFullFileName.substring(0, newFullFileName.lastIndexOf("/")));
  177. if (!dir.exists()) {
  178. dir.mkdirs();
  179. }
  180. Writer out = new OutputStreamWriter(new FileOutputStream(newFullFileName), newCharsetName);
  181. out.write(content.toString());
  182. }
  183. }

修改后的代码:

  1. String fileEncode = EncodeUtil.getEncode(filePath,true);
  2. ZipFile zipFile = new ZipFile(new File(filePath), Charset.forName(fileEncode));

问题解决

再次感谢@guying4875

 

 

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/80488
推荐阅读
相关标签
  

闽ICP备14008679号