alibaba easyexcel 原理,源码解析
1.文件解析器
2. SAX XML解析标签处理Handler
3. 使用缓存 + SAX 事件机制,避免大数据量 OOM
4.核心类 ExcelAnalyserImpl
构造函数代码中,根据文件类型选择处理类源码
private void choiceExcelExecutor(ReadWorkbook readWorkbook) throws Exception {
ExcelTypeEnum excelType = ExcelTypeEnum.valueOf(readWorkbook);
DefaultXlsxReadContext xlsxReadContext;
switch(excelType) {
case XLS:
POIFSFileSystem poifsFileSystem;
if (readWorkbook.getFile() != null) {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getFile());
} else {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getInputStream());
}
if (poifsFileSystem.getRoot().hasEntry("EncryptedPackage")) {
InputStream decryptedStream = null;
try {
decryptedStream = DocumentFactoryHelper.getDecryptedStream(poifsFileSystem.getRoot().getFileSystem(), readWorkbook.getPassword());
xlsxReadContext = new DefaultXlsxReadContext(readWorkbook, ExcelTypeEnum.XLSX);
this.analysisContext = xlsxReadContext;
this.excelReadExecutor = new XlsxSaxAnalyser(xlsxReadContext, decryptedStream);
} finally {
IOUtils.closeQuietly(decryptedStream);
poifsFileSystem.close();
}
return;
}
if (readWorkbook.getPassword() != null) {
Biff8EncryptionKey.setCurrentUserPassword(readWorkbook.getPassword());
}
XlsReadContext xlsReadContext = new DefaultXlsReadContext(readWorkbook, ExcelTypeEnum.XLS);
xlsReadContext.xlsReadWorkbookHolder().setPoifsFileSystem(poifsFileSystem);
this.analysisContext = xlsReadContext;
this.excelReadExecutor = new XlsSaxAnalyser(xlsReadContext);
break;
case XLSX:
xlsxReadContext = new DefaultXlsxReadContext(readWorkbook, ExcelTypeEnum.XLSX);
this.analysisContext = xlsxReadContext;
this.excelReadExecutor = new XlsxSaxAnalyser(xlsxReadContext, (InputStream)null);
break;
case CSV:
CsvReadContext csvReadContext = new DefaultCsvReadContext(readWorkbook, ExcelTypeEnum.CSV);
this.analysisContext = csvReadContext;
this.excelReadExecutor = new CsvExcelReadExecutor(csvReadContext);
}
}
public void analysis(List<ReadSheet> readSheetList, Boolean readAll) {
try {
if (!readAll && CollectionUtils.isEmpty(readSheetList)) {
throw new IllegalArgumentException("Specify at least one read sheet.");
} else {
this.analysisContext.readWorkbookHolder().setParameterSheetDataList(readSheetList);
this.analysisContext.readWorkbookHolder().setReadAll(readAll);
try {
this.excelReadExecutor.execute();
} catch (ExcelAnalysisStopException var4) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Custom stop!");
}
}
}
} catch (RuntimeException var5) {
this.finish();
throw var5;
} catch (Throwable var6) {
this.finish();
throw new ExcelAnalysisException(var6);
}
}
5. 核心类 AnalysisContext 实现类 AnalysisContextImpl
public AnalysisContextImpl(ReadWorkbook readWorkbook, ExcelTypeEnum actualExcelType) {
if (readWorkbook == null) {
throw new IllegalArgumentException("Workbook argument cannot be null");
} else {
switch(actualExcelType) {
case XLS:
this.readWorkbookHolder = new XlsReadWorkbookHolder(readWorkbook);
break;
case XLSX:
this.readWorkbookHolder = new XlsxReadWorkbookHolder(readWorkbook);
break;
case CSV:
this.readWorkbookHolder = new CsvReadWorkbookHolder(readWorkbook);
}
this.currentReadHolder = this.readWorkbookHolder;
this.analysisEventProcessor = new DefaultAnalysisEventProcessor();
if (log.isDebugEnabled()) {
log.debug("Initialization 'AnalysisContextImpl' complete");
}
}
}
6.缓存文件
// https://learn.microsoft.com/zh-cn/office/open-xml/working-with-the-shared-string-table
// 使用共享字符串表
private OPCPackage readOpcPackage(XlsxReadWorkbookHolder xlsxReadWorkbookHolder, InputStream decryptedStream) throws Exception {
if (decryptedStream == null && xlsxReadWorkbookHolder.getFile() != null) {
return OPCPackage.open(xlsxReadWorkbookHolder.getFile());
} else if (xlsxReadWorkbookHolder.getMandatoryUseInputStream()) {
return decryptedStream != null ? OPCPackage.open(decryptedStream) : OPCPackage.open(xlsxReadWorkbookHolder.getInputStream());
} else {
File readTempFile = FileUtils.createCacheTmpFile();
xlsxReadWorkbookHolder.setTempFile(readTempFile);
File tempFile = new File(readTempFile.getPath(), UUID.randomUUID().toString() + ".xlsx");
if (decryptedStream != null) {
FileUtils.writeToFile(tempFile, decryptedStream, false);
} else {
FileUtils.writeToFile(tempFile, xlsxReadWorkbookHolder.getInputStream(), xlsxReadWorkbookHolder.getAutoCloseStream());
}
return OPCPackage.open(tempFile, PackageAccess.READ);
}
}
// 缓存的临时文件
// tempFilePrefix = System.getProperty("java.io.tmpdir") + File.separator + UUID.randomUUID().toString() + File.separator;
// poiFilesPath = tempFilePrefix + "poifiles" + File.separator;
// cachePath = tempFilePrefix + "excache" + File.separator;
调用 read()方法时最终会调用 ExcelReadExecutor#execute(), 在实例化 ExcelReadExecutor 之前会先将文件保存起来,以 xlsx 为例代码如上所示。
7.解析excel类核心源码
private void parseXmlSource(InputStream inputStream, ContentHandler handler) {
InputSource inputSource = new InputSource(inputStream);
try {
String xlsxSAXParserFactoryName = this.xlsxReadContext.xlsxReadWorkbookHolder().getSaxParserFactoryName();
SAXParserFactory saxFactory;
if (StringUtils.isEmpty(xlsxSAXParserFactoryName)) {
saxFactory = SAXParserFactory.newInstance();
} else {
saxFactory = SAXParserFactory.newInstance(xlsxSAXParserFactoryName, (ClassLoader)null);
}
try {
saxFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
} catch (Throwable var20) {
}
try {
saxFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
} catch (Throwable var19) {
}
try {
saxFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
} catch (Throwable var18) {
}
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setContentHandler(handler);
xmlReader.parse(inputSource);
inputStream.close();
} catch (ParserConfigurationException | SAXException | IOException var21) {
throw new ExcelAnalysisException(var21);
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException var17) {
throw new ExcelAnalysisException("Can not close 'inputStream'!");
}
}
}
}
public void execute() {
Iterator var1 = this.sheetList.iterator();
while(var1.hasNext()) {
ReadSheet readSheet = (ReadSheet)var1.next();
readSheet = SheetUtils.match(readSheet, this.xlsxReadContext);
if (readSheet != null) {
this.xlsxReadContext.currentSheet(readSheet);
this.parseXmlSource((InputStream)this.sheetMap.get(readSheet.getSheetNo()), new XlsxRowHandler(this.xlsxReadContext));
this.readComments(readSheet);
this.xlsxReadContext.analysisEventProcessor().endSheet(this.xlsxReadContext);
}
}
}
最终会调用 DefaultAnalysisEventProcessor:endRow() -> this.dealData(analysisContext), 里面会调用 ReadListener#invoke() 处理每一行数据。 @ExcelProperty 解析类 参照 ExcelHeadProperty , 处理数据转换为实体类源码是在 ModelBuildEventListener 中进行的
private void dealData(AnalysisContext analysisContext) {
ReadRowHolder readRowHolder = analysisContext.readRowHolder();
Map<Integer, ReadCellData<?>> cellDataMap = readRowHolder.getCellMap();
readRowHolder.setCurrentRowAnalysisResult(cellDataMap);
int rowIndex = readRowHolder.getRowIndex();
int currentHeadRowNumber = analysisContext.readSheetHolder().getHeadRowNumber();
boolean isData = rowIndex >= currentHeadRowNumber;
if (!isData && currentHeadRowNumber == rowIndex + 1) {
this.buildHead(analysisContext, cellDataMap);
}
Iterator var7 = analysisContext.currentReadHolder().readListenerList().iterator();
while(var7.hasNext()) {
ReadListener readListener = (ReadListener)var7.next();
try {
if (isData) {
readListener.invoke(readRowHolder.getCurrentRowAnalysisResult(), analysisContext);
} else {
readListener.invokeHead(cellDataMap, analysisContext);
}
} catch (Exception var10) {
this.onException(analysisContext, var10);
break;
}
if (!readListener.hasNext(analysisContext)) {
throw new ExcelAnalysisStopException();
}
}
}