import net.sf.sevenzipjbinding.*;
import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
import net.sf.sevenzipjbinding.util.ByteArrayStream;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.txt.UniversalEncodingDetector;
private static final ArchiveStreamFactory FACTORY = new ArchiveStreamFactory();
// for Zip
private void unpackZip(Resource resource) {
try (InputStream bufferedInputStream = new BufferedInputStream(resource.getInputStream());
ArchiveInputStream archiveInputStream = FACTORY.createArchiveInputStream(bufferedInputStream)) {
ArchiveEntry archiveEntry;
while ((archiveEntry = archiveInputStream.getNextEntry()) != null) {
String entryPath = convertToText(((ZipArchiveEntry) archiveEntry).getRawName());
log.info("archiveEntry is {}", entryPath);
if (!archiveEntry.isDirectory() && archiveInputStream.canReadEntryData(archiveEntry)) {
doSomeWork(new InputStreamResource(archiveInputStream), Paths.get(entryPath).getFileName().toString());
}
}
} catch (IOException | ArchiveException e) {
log.error("Произошла ошибка при попытке чтения вложенного архива", e);
}
}
private String convertToText(byte[] name) {
Charset detectedCharset = IBM_866;
try {
// Apache tika used
detectedCharset = new UniversalEncodingDetector().detect(new ByteArrayInputStream(name), new Metadata());
} catch (IOException e) {
log.error("Error on charset detecting");
}
if (UTF_8.equals(detectedCharset)) {
return new String(name, UTF_8);
} else {
return new String(name, IBM_866);
}
}
// for Rar
private void unpackWith7Zip(Resource resource) {
try (IInStream inStream = new ByteArrayStream(getCopyResourceStream(resource), false);
IInArchive inArchive = SevenZip.openInArchive(null, inStream)) {
for (ISimpleInArchiveItem item : inArchive.getSimpleInterface().getArchiveItems()) {
log.info("archiveEntry is {}", item.getPath());
if (!item.isFolder()) {
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
ExtractOperationResult result = item.extractSlow(data -> {
try {
byteArrayOutputStream.write(data);
} catch (IOException e) {
log.error("Error write data from file {} , exception: {}", item.getPath(), e.getLocalizedMessage());
}
return data.length;
});
if (result == ExtractOperationResult.OK) {
doSomeWork(new ByteArrayResource(byteArrayOutputStream.toByteArray()), Paths.get(item.getPath()).getFileName().toString());
} else {
log.error("Error unpack archive {}, with type {} , status: {}", item.getPath(), inArchive.getArchiveFormat(), result);
}
}
}
}
} catch (Exception e) {
log.error("Error extracting archive");
}
}