alibaba easyexcel 原理,源码解析

1.文件解析器

2. SAX XML解析标签处理Handler

3. 使用缓存 + SAX 事件机制,避免大数据量 OOM

4.核心类 ExcelAnalyserImpl

构造函数代码中,根据文件类型选择处理类源码

private void choiceExcelExecutor(ReadWorkbook readWorkbook) throws Exception {
        ExcelTypeEnum excelType = ExcelTypeEnum.valueOf(readWorkbook);
        DefaultXlsxReadContext xlsxReadContext;
        switch(excelType) {
        case XLS:
            POIFSFileSystem poifsFileSystem;
            if (readWorkbook.getFile() != null) {
                poifsFileSystem = new POIFSFileSystem(readWorkbook.getFile());
            } else {
                poifsFileSystem = new POIFSFileSystem(readWorkbook.getInputStream());
            }

            if (poifsFileSystem.getRoot().hasEntry("EncryptedPackage")) {
                InputStream decryptedStream = null;

                try {
                    decryptedStream = DocumentFactoryHelper.getDecryptedStream(poifsFileSystem.getRoot().getFileSystem(), readWorkbook.getPassword());
                    xlsxReadContext = new DefaultXlsxReadContext(readWorkbook, ExcelTypeEnum.XLSX);
                    this.analysisContext = xlsxReadContext;
                    this.excelReadExecutor = new XlsxSaxAnalyser(xlsxReadContext, decryptedStream);
                } finally {
                    IOUtils.closeQuietly(decryptedStream);
                    poifsFileSystem.close();
                }

                return;
            }

            if (readWorkbook.getPassword() != null) {
                Biff8EncryptionKey.setCurrentUserPassword(readWorkbook.getPassword());
            }

            XlsReadContext xlsReadContext = new DefaultXlsReadContext(readWorkbook, ExcelTypeEnum.XLS);
            xlsReadContext.xlsReadWorkbookHolder().setPoifsFileSystem(poifsFileSystem);
            this.analysisContext = xlsReadContext;
            this.excelReadExecutor = new XlsSaxAnalyser(xlsReadContext);
            break;
        case XLSX:
            xlsxReadContext = new DefaultXlsxReadContext(readWorkbook, ExcelTypeEnum.XLSX);
            this.analysisContext = xlsxReadContext;
            this.excelReadExecutor = new XlsxSaxAnalyser(xlsxReadContext, (InputStream)null);
            break;
        case CSV:
            CsvReadContext csvReadContext = new DefaultCsvReadContext(readWorkbook, ExcelTypeEnum.CSV);
            this.analysisContext = csvReadContext;
            this.excelReadExecutor = new CsvExcelReadExecutor(csvReadContext);
        }

    }

public void analysis(List<ReadSheet> readSheetList, Boolean readAll) {
        try {
            if (!readAll && CollectionUtils.isEmpty(readSheetList)) {
                throw new IllegalArgumentException("Specify at least one read sheet.");
            } else {
                this.analysisContext.readWorkbookHolder().setParameterSheetDataList(readSheetList);
                this.analysisContext.readWorkbookHolder().setReadAll(readAll);

                try {
                    this.excelReadExecutor.execute();
                } catch (ExcelAnalysisStopException var4) {
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("Custom stop!");
                    }
                }

            }
        } catch (RuntimeException var5) {
            this.finish();
            throw var5;
        } catch (Throwable var6) {
            this.finish();
            throw new ExcelAnalysisException(var6);
        }
    }

5. 核心类 AnalysisContext 实现类 AnalysisContextImpl

public AnalysisContextImpl(ReadWorkbook readWorkbook, ExcelTypeEnum actualExcelType) {
        if (readWorkbook == null) {
            throw new IllegalArgumentException("Workbook argument cannot be null");
        } else {
            switch(actualExcelType) {
            case XLS:
                this.readWorkbookHolder = new XlsReadWorkbookHolder(readWorkbook);
                break;
            case XLSX:
                this.readWorkbookHolder = new XlsxReadWorkbookHolder(readWorkbook);
                break;
            case CSV:
                this.readWorkbookHolder = new CsvReadWorkbookHolder(readWorkbook);
            }

            this.currentReadHolder = this.readWorkbookHolder;
            this.analysisEventProcessor = new DefaultAnalysisEventProcessor();
            if (log.isDebugEnabled()) {
                log.debug("Initialization 'AnalysisContextImpl' complete");
            }

        }
    }

6.缓存文件

// https://learn.microsoft.com/zh-cn/office/open-xml/working-with-the-shared-string-table
// 使用共享字符串表
private OPCPackage readOpcPackage(XlsxReadWorkbookHolder xlsxReadWorkbookHolder, InputStream decryptedStream) throws Exception {
        if (decryptedStream == null && xlsxReadWorkbookHolder.getFile() != null) {
            return OPCPackage.open(xlsxReadWorkbookHolder.getFile());
        } else if (xlsxReadWorkbookHolder.getMandatoryUseInputStream()) {
            return decryptedStream != null ? OPCPackage.open(decryptedStream) : OPCPackage.open(xlsxReadWorkbookHolder.getInputStream());
        } else {
            File readTempFile = FileUtils.createCacheTmpFile();
            xlsxReadWorkbookHolder.setTempFile(readTempFile);
            File tempFile = new File(readTempFile.getPath(), UUID.randomUUID().toString() + ".xlsx");
            if (decryptedStream != null) {
                FileUtils.writeToFile(tempFile, decryptedStream, false);
            } else {
                FileUtils.writeToFile(tempFile, xlsxReadWorkbookHolder.getInputStream(), xlsxReadWorkbookHolder.getAutoCloseStream());
            }

            return OPCPackage.open(tempFile, PackageAccess.READ);
        }
    }
// 缓存的临时文件
// tempFilePrefix = System.getProperty("java.io.tmpdir") + File.separator + UUID.randomUUID().toString() + File.separator;
        // poiFilesPath = tempFilePrefix + "poifiles" + File.separator;
        // cachePath = tempFilePrefix + "excache" + File.separator;

调用 read()方法时最终会调用 ExcelReadExecutor#execute(), 在实例化 ExcelReadExecutor 之前会先将文件保存起来,以 xlsx 为例代码如上所示。

7.解析excel类核心源码

private void parseXmlSource(InputStream inputStream, ContentHandler handler) {
        InputSource inputSource = new InputSource(inputStream);

        try {
            String xlsxSAXParserFactoryName = this.xlsxReadContext.xlsxReadWorkbookHolder().getSaxParserFactoryName();
            SAXParserFactory saxFactory;
            if (StringUtils.isEmpty(xlsxSAXParserFactoryName)) {
                saxFactory = SAXParserFactory.newInstance();
            } else {
                saxFactory = SAXParserFactory.newInstance(xlsxSAXParserFactoryName, (ClassLoader)null);
            }

            try {
                saxFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
            } catch (Throwable var20) {
            }

            try {
                saxFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
            } catch (Throwable var19) {
            }

            try {
                saxFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
            } catch (Throwable var18) {
            }

            SAXParser saxParser = saxFactory.newSAXParser();
            XMLReader xmlReader = saxParser.getXMLReader();
            xmlReader.setContentHandler(handler);
            xmlReader.parse(inputSource);
            inputStream.close();
        } catch (ParserConfigurationException | SAXException | IOException var21) {
            throw new ExcelAnalysisException(var21);
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException var17) {
                    throw new ExcelAnalysisException("Can not close 'inputStream'!");
                }
            }

        }

    }

    public void execute() {
        Iterator var1 = this.sheetList.iterator();

        while(var1.hasNext()) {
            ReadSheet readSheet = (ReadSheet)var1.next();
            readSheet = SheetUtils.match(readSheet, this.xlsxReadContext);
            if (readSheet != null) {
                this.xlsxReadContext.currentSheet(readSheet);
                this.parseXmlSource((InputStream)this.sheetMap.get(readSheet.getSheetNo()), new XlsxRowHandler(this.xlsxReadContext));
                this.readComments(readSheet);
                this.xlsxReadContext.analysisEventProcessor().endSheet(this.xlsxReadContext);
            }
        }

    }

最终会调用 DefaultAnalysisEventProcessor:endRow() -> this.dealData(analysisContext), 里面会调用 ReadListener#invoke() 处理每一行数据。 @ExcelProperty 解析类 参照 ExcelHeadProperty , 处理数据转换为实体类源码是在 ModelBuildEventListener 中进行的

private void dealData(AnalysisContext analysisContext) {
        ReadRowHolder readRowHolder = analysisContext.readRowHolder();
        Map<Integer, ReadCellData<?>> cellDataMap = readRowHolder.getCellMap();
        readRowHolder.setCurrentRowAnalysisResult(cellDataMap);
        int rowIndex = readRowHolder.getRowIndex();
        int currentHeadRowNumber = analysisContext.readSheetHolder().getHeadRowNumber();
        boolean isData = rowIndex >= currentHeadRowNumber;
        if (!isData && currentHeadRowNumber == rowIndex + 1) {
            this.buildHead(analysisContext, cellDataMap);
        }

        Iterator var7 = analysisContext.currentReadHolder().readListenerList().iterator();

        while(var7.hasNext()) {
            ReadListener readListener = (ReadListener)var7.next();

            try {
                if (isData) {
                    readListener.invoke(readRowHolder.getCurrentRowAnalysisResult(), analysisContext);
                } else {
                    readListener.invokeHead(cellDataMap, analysisContext);
                }
            } catch (Exception var10) {
                this.onException(analysisContext, var10);
                break;
            }

            if (!readListener.hasNext(analysisContext)) {
                throw new ExcelAnalysisStopException();
            }
        }

    }

8. 常见问题

参考官方文档: https://easyexcel.opensource.alibaba.com/qa/read