Documents4j 문서 포맷 변환기(excel->html)
Documents4j 문서 포맷 변환기(excel->html)
먼저 해당 문서 를 보기전에 아래 사이트를 실습을 해보거나 분석을 꼭 해보시기 바랍니다.
그래야 해당 문서가 이해가 수월 할것으로 예상 됩니다.
http://idlecomputer.tistory.com/248
먼저 기본 로직은 아래와 같습니다.
아래 로직중
1.temp1 은 Documents4j 라이브러리가 만드는 Input 파라 메터 입니다.
2.temp2 은 Documents4j 라이브러리가 만드는 Output 파라 메터 입니다.
3.압축은 7z을 사용하기 때문에 7z이 설치가 되어 있어야 합니다. (VBScript 를 사용할때 해당 경로 적어 줍니다 )
먼저 HTML 을 위해서 오픈 소스 수정이 필요 합니다.
먼저 Maven 프로젝트 소스를 Import 해보면 https://github.com/documents4j/documents4j/releases (소스 다운로드 Site)
먼저 HTML 포멧을 추가 시켜 줘야 합니다.
에서 HTML 부분을 추가해 줍니다
package com.documents4j.api; import java.io.Serializable; /** * Represents an immutable document MIME type. */ public class DocumentType implements Serializable, Comparable{ public static final DocumentType MS_WORD = new DocumentType(Value.APPLICATION, Value.WORD_ANY); public static final DocumentType RTF = new DocumentType(Value.APPLICATION, Value.RTF); public static final DocumentType DOCX = new DocumentType(Value.APPLICATION, Value.DOCX); public static final DocumentType DOC = new DocumentType(Value.APPLICATION, Value.DOC); public static final DocumentType MS_EXCEL = new DocumentType(Value.APPLICATION, Value.EXCEL_ANY); public static final DocumentType XLSX = new DocumentType(Value.APPLICATION, Value.XLSX); public static final DocumentType XLS = new DocumentType(Value.APPLICATION, Value.XLS); public static final DocumentType ODS = new DocumentType(Value.APPLICATION, Value.ODS); public static final DocumentType CSV = new DocumentType(Value.TEXT, Value.CSV); public static final DocumentType XML = new DocumentType(Value.APPLICATION, Value.XML); public static final DocumentType MHTML = new DocumentType(Value.APPLICATION, Value.MHTML); public static final DocumentType PDF = new DocumentType(Value.APPLICATION, Value.PDF); public static final DocumentType HTML = new DocumentType(Value.APPLICATION, Value.HTML); public static final DocumentType PDFA = new DocumentType(Value.APPLICATION, Value.PDFA); public static final DocumentType TEXT = new DocumentType(Value.TEXT, Value.PLAIN); private final String type; private final String subtype; /** * Creates a new document type. * * @param type The MIME type's type name. * @param subtype The MIME type's subtype name. */ public DocumentType(String type, String subtype) { if (type == null || subtype == null) { throw new NullPointerException("Type elements must not be null"); } this.type = type; this.subtype = subtype; } /** * Creates a new document type. * * @param fullType The MIME type's type name and subtype name, separated by a {@code /}. */ public DocumentType(String fullType) { int separator = fullType.indexOf('/'); if (separator == -1 || fullType.length() == separator + 1) { throw new IllegalArgumentException("Not a legal */* document type: " + fullType); } else { type = fullType.substring(0, separator); subtype = fullType.substring(separator + 1); } } public String getType() { return type; } public String getSubtype() { return subtype; } @Override public boolean equals(Object other) { if (this == other) return true; if (other == null || getClass() != other.getClass()) return false; DocumentType documentType = (DocumentType) other; return subtype.equals(documentType.subtype) && type.equals(documentType.type); } @Override public int hashCode() { return 31 * type.hashCode() + subtype.hashCode(); } @Override public int compareTo(DocumentType other) { return toString().compareTo(other.toString()); } @Override public String toString() { return type + "/" + subtype; } /** * A holder type for type and subtype names of known {@link com.documents4j.api.DocumentType}s. */ public static class Value { public static final String APPLICATION = "application"; public static final String TEXT = "text"; public static final String DOC = "msword"; public static final String DOCX = "vnd.openxmlformats-officedocument.wordprocessingml.document"; public static final String WORD_ANY = "vnd.com.documents4j.any-msword"; public static final String XLS = "vnd.ms-excel"; public static final String XLSX = "vnd.openxmlformats-officedocument.spreadsheetml.sheet"; public static final String EXCEL_ANY = "vnd.com.documents4j.any-msexcel"; public static final String ODS = "vnd.oasis.opendocument.spreadsheet"; public static final String PDF = "pdf"; public static final String HTML = "htm"; public static final String PDFA = "vnd.com.documents4j.pdf-a"; public static final String RTF = "rtf"; public static final String XML = "xml"; public static final String MHTML = "x-mimearchive"; public static final String CSV = "csv"; public static final String PLAIN = "plain"; private Value() { throw new UnsupportedOperationException(); } } }
그리고 아래와 같이 소스를 추가해 줍니다.
package com.documents4j.conversion.msoffice; import com.documents4j.api.DocumentType; /** * An enumeration of MS Excel file * format encodings. */ enum MicrosoftExcelFormat implements MicrosoftOfficeFormat { PDF("999", "pdf", DocumentType.PDF), XLSX("51", "xlsx", DocumentType.XLSX), XLS("43", "xls", DocumentType.XLS), HTML("44", "htm", DocumentType.HTML), ODS("60", "ods", DocumentType.ODS), CSV("6", "csv", DocumentType.CSV), XML("46", "xml", DocumentType.XML), TEXT("42", "txt", DocumentType.TEXT); private final String value; private final DocumentType documentType; private final String fileExtension; private MicrosoftExcelFormat(String value, String fileExtension, DocumentType documentType) { this.value = value; this.fileExtension = fileExtension; this.documentType = documentType; } public static MicrosoftExcelFormat of(DocumentType documentType) { for (MicrosoftExcelFormat enumeration : MicrosoftExcelFormat.values()) { if (enumeration.documentType.equals(documentType)) { return enumeration; } } throw new IllegalArgumentException("Unknown document type: " + documentType); } @Override public String getValue() { return value; } @Override public String getFileExtension() { return fileExtension; } }
위에서 HTML 44 로 설정한 이유는
VBA 에서 html 저장의 상수가 44 이기 때문입니다.
그리고 나서
package com.documents4j.conversion.msoffice; import com.documents4j.api.DocumentType; import com.documents4j.conversion.ViableConversion; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import static com.documents4j.api.DocumentType.Value.*; /** * A converter back-end for MS Excel. */ @ViableConversion( from = {APPLICATION + "/" + XLS, APPLICATION + "/" + XLSX, APPLICATION + "/" + EXCEL_ANY, APPLICATION + "/" + ODS}, to = {APPLICATION + "/" + PDF, APPLICATION + "/" + XLS, APPLICATION + "/" + XLSX, APPLICATION + "/" + HTML, APPLICATION + "/" + ODS, TEXT + "/" + CSV, TEXT + "/" + PLAIN, APPLICATION + "/" + XML}) public class MicrosoftExcelBridge extends AbstractMicrosoftOfficeBridge { private static final Logger LOGGER = LoggerFactory.getLogger(MicrosoftExcelBridge.class); private static final Object EXCEL_LOCK = new Object(); /** * Other than MS Word, MS Excel does not behave well under stress. Thus, MS Excel must not be asked to convert * more than one document at a time. */ private static final Semaphore CONVERSION_LOCK = new Semaphore(1, true); public MicrosoftExcelBridge(File baseFolder, long processTimeout, TimeUnit processTimeoutUnit) { super(baseFolder, processTimeout, processTimeoutUnit, MicrosoftExcelScript.CONVERSION); startUp(); } private void startUp() { synchronized (EXCEL_LOCK) { tryStart(MicrosoftExcelScript.STARTUP); LOGGER.info("From-Microsoft-Excel-Converter was started successfully"); } } @Override public void shutDown() { synchronized (EXCEL_LOCK) { tryStop(MicrosoftExcelScript.SHUTDOWN); LOGGER.info("From-Microsoft-Excel-Converter was shut down successfully"); } } @Override protected MicrosoftOfficeTargetNameCorrector targetNameCorrector(File target, String fileExtension) { return new MicrosoftExcelTargetNameCorrectorAndLockManager(target, fileExtension, CONVERSION_LOCK, LOGGER); } @Override protected MicrosoftOfficeFormat formatOf(DocumentType documentType) { return MicrosoftExcelFormat.of(documentType); } @Override protected MicrosoftOfficeScript getAssertionScript() { return MicrosoftExcelScript.ASSERTION; } @Override protected Logger getLogger() { return LOGGER; } }
위와 같이 HTML 코드를 추가해 줍니다.
해당 파일에서 실제 VBA 코드를 VBScript 를 작성해 줍니다.
' See http://msdn.microsoft.com/en-us/library/bb243311%28v=office.12%29.aspx Const WdExportFormatPDF = 17 Const MagicFormatPDF = 999 Const HtmFormat = 44 Dim arguments Set arguments = WScript.Arguments Function Zip(Outfile) 'This script is provided under the Creative Commons license located 'at http://creativecommons.org/licenses/by-nc/2.5/ . It may not 'be used for commercial purposes with out the expressed written consent 'of NateRice.com Dim outputfilename Dim outputfoldername Dim outputzipfile Set oFSO = WScript.CreateObject("Scripting.FileSystemObject") Set oShell = WScript.CreateObject("Wscript.Shell") '--------Find Working Directory-------- aScriptFilename = Split(Wscript.ScriptFullName, "\") sScriptFilename = aScriptFileName(Ubound(aScriptFilename)) sWorkingDirectory = Replace(Wscript.ScriptFullName, sScriptFilename, "") '-------------------------------------- '-------Ensure we can find 7z.exe------ If oFSO.FileExists(sWorkingDirectory & "\" & "7z.exe") Then s7zLocation = "" ElseIf oFSO.FileExists("C:\Program Files\7-Zip\7z.exe") Then s7zLocation = "C:\Program Files\7-Zip\" Else Zip = "Error: Couldn't find 7z.exe" Exit Function End If '-------------------------------------- outputfilename = """" & Outfile &".htm" & """" outputfoldername = """" & Outfile & ".files" & """" outputzipfile = """" &Outfile &".zip" & """" oShell.Run """" & s7zLocation & "7z.exe"" a " & outputzipfile &" " _ &outputfoldername&" "& outputfilename, 0, True If oFSO.FileExists(Outfile &".htm") Then oFSO.DeleteFile (Outfile &".htm") End IF If oFSO.FolderExists(Outfile & ".files") Then oFSO.DeleteFolder(Outfile & ".files") END IF oFSO.MoveFile Outfile &".zip",Outfile &".htm" If oFSO.FileExists(Outfile) Then Zip = 1 Else Zip = "Error: Archive Creation Failed." End If End Function ' Transforms a file using MS Excel into the given format. Function ConvertFile( inputFile, outputFile, formatEnumeration ) Dim fileSystemObject Dim excelApplication Dim excelDocument ' Get the running instance of MS Excel. If Excel is not running, exit the conversion. On Error Resume Next Set excelApplication = GetObject(, "Excel.Application") If Err <> 0 Then WScript.Quit -6 End If On Error GoTo 0 ' Find the source file on the file system. Set fileSystemObject = CreateObject("Scripting.FileSystemObject") inputFile = fileSystemObject.GetAbsolutePathName(inputFile) ' Convert the source file only if it exists. If fileSystemObject.FileExists(inputFile) Then ' Attempt to open the source document. On Error Resume Next Set excelDocument = excelApplication.Workbooks.Open(inputFile) If Err <> 0 Then WScript.Quit -2 End If On Error GoTo 0 ' Convert: See http://msdn2.microsoft.com/en-us/library/bb221597.aspx ' Encoding is https://docs.microsoft.com/en-us/dotnet/api/microsoft.office.core.msoencoding?view=office-pia, ' ScreenSize is https://docs.microsoft.com/en-us/dotnet/api/microsoft.office.core.msoscreensize?view=office-pia On Error Resume Next If formatEnumeration = MagicFormatPDF Then For Each ws In excelDocument.Worksheets excelApplication.Application.PrintCommunication = False ws.PageSetup.FitToPagesWide = 1 ws.PageSetup.FitToPagesTall = 0 excelApplication.Application.PrintCommunication = True Next excelDocument.ExportAsFixedFormat xlTypePDF, outputFile, xlQualityStandard, True, True excelDocument.Close False ElseIf formatEnumeration = HtmFormat Then excelDocument.WebOptions.RelyOnCSS = True excelDocument.WebOptions.OrganizeInFolder = True excelDocument.WebOptions.UseLongFileNames = True excelDocument.WebOptions.DownloadComponents = False excelDocument.WebOptions.RelyOnVML = False excelDocument.WebOptions.AllowPNG = True excelDocument.WebOptions.ScreenSize = 4 excelDocument.WebOptions.PixelsPerInch = 96 excelDocument.WebOptions.Encoding = 949 excelApplication.Application.DefaultWebOptions.SaveHiddenData = True excelApplication.Application.DefaultWebOptions.LoadPictures = True excelApplication.Application.DefaultWebOptions.UpdateLinksOnSave = True excelApplication.Application.DefaultWebOptions.CheckIfOfficeIsHTMLEditor = True excelApplication.Application.DefaultWebOptions.AlwaysSaveInDefaultEncoding = False excelApplication.Application.DefaultWebOptions.SaveNewWebPagesAsWebArchives = True excelDocument.SaveAs outputFile, formatEnumeration excelDocument.Close False Call Zip(outputFile) Else excelDocument.SaveAs outputFile, formatEnumeration excelDocument.Close False End If ' Close the source document. If Err <> 0 Then WScript.Quit -3 End If On Error GoTo 0 ' Signal that the conversion was successful. WScript.Quit 2 Else ' Files does not exist, could not convert WScript.Quit -4 End If End Function ' Execute the script. Call ConvertFile( WScript.Arguments.Unnamed.Item(0), WScript.Arguments.Unnamed.Item(1), CInt(WScript.Arguments.Unnamed.Item(2)) )
그리고 나서 Maven Install 을 해주시면
기본 Maven 의 라이브러리 설치 임시 폴더는 c:\user\username\.m2 폴더에 설치가 되고 해당 라이브러리 적용 되게 됩니다.
그리고 나서 stand alone 서버를 mvn package -P extras 로 해주시면 해당 소스가 적용 됩니다.
그리고 나서 테스트 해보면
일단 서버 프로그램 실행 해준뒤에
클라인언트에서
public class Main { public static final String SAMPLE_XLSX_FILE_PATH = "./sample-xlsx-file.xlsx"; public static void main(String[] args) throws InterruptedException, ExecutionException, IOException { // TODO Auto-generated method stub File wordFile = new File("TESTDCO.xlsx"), target = new File("test1.zip"); RemoteConverter.Builder builder = RemoteConverter.builder() .baseFolder(new File("test")) .requestTimeout(3600, TimeUnit.SECONDS) .baseUri("http://127.0.0.1:9998"); IConverter converter = builder.build(); Boolean conversion = converter .convert(wordFile).as(DocumentType.MS_EXCEL) .to(target).as(DocumentType.HTML) .execute(); System.out.println("test"); } }
소스로 테스트 해주면
위와 같이 압축 파일을 전송 받게 되며
이렇게 2개의 파일이 압축 되어 있으므로 해당 파일을 웹서버에 올리면
excel이 html 형식으로 보기 좋게 컨버팅 되어 사용 할수 있습니다.