1 module xlsx; 2 3 import std.file, std.xml, std.format, std.regex, std.conv, std.algorithm, std.range, std.utf; 4 import archive.core, archive.zip; 5 6 private struct Coord { 7 int row; 8 int column; 9 } 10 11 /// Aliases a Sheet as a two-dimensional array of strings. 12 alias Sheet = string[][]; 13 14 /++ 15 Reads a sheet from an XLSX file. 16 17 Params: 18 fileName = The path of the XLSX file to read 19 sheetNum = The sheet number to read (one-indexed) 20 21 Returns: The contents of the sheet, as a two-dimensional array of strings. 22 +/ 23 Sheet readSheet(string fileName, int sheetNum) { 24 assert(sheetNum > 0); 25 26 auto zip = new ZipArchive(read(fileName)); 27 auto sheet = zip.getFile(format!"xl/worksheets/sheet%d.xml"(sheetNum)); 28 if(sheet is null) throw new Exception("Invalid sheet number"); 29 //std.utf.validate!(ubyte[])(sheet.data); 30 string xml = cast(string) sheet.data; 31 32 validate(xml); 33 34 auto sstFile = zip.getFile("xl/sharedStrings.xml"); 35 string sstXML = cast(string) sstFile.data; 36 string[] sst = parseStringTable(sstXML); 37 38 return parseSheetXML(xml, sst); 39 } 40 41 /++ 42 Reads a sheet from an XLSX file by its name. 43 44 Params: 45 fileName = the path of the XLSX file to read 46 sheetNum = The name of the sheet to read 47 48 Returns: the contents of the sheet, as a two-dimensional array of strings. 49 +/ 50 Sheet readSheetByName(string fileName, string sheetName) { 51 auto zip = new ZipArchive(read(fileName)); 52 auto workbook = zip.getFile("xl/workbook.xml"); 53 if(workbook is null) throw new Exception("Invalid XLSX file"); 54 //std.utf.validate!(ubyte[])(sheet.data); 55 string xml = cast(string) workbook.data; 56 57 const int id = getSheetId(xml, sheetName); 58 59 return readSheet(fileName, id); 60 } 61 62 /++ 63 Parses an XLSX sheet from XML. 64 65 Params: 66 xmlString = The XML to parse. 67 68 Returns: the equivalent sheet, as a two dimensional array of strings. 69 +/ 70 Sheet parseSheetXML(string xmlString, string[] sst) { 71 Sheet temp; 72 73 int cols = 0; 74 auto doc = new DocumentParser(xmlString); 75 doc.onEndTag["dimension"] = (in Element dim) { 76 auto dims = parseDimensions(dim.tag.attr["ref"]); 77 //temp ~= new string[dims.row]; 78 cols = dims.column; 79 }; 80 81 doc.onStartTag["row"] = (ElementParser rowTag) { 82 //int r = parse!int(rowTag.tag.attr["r"])-1; 83 84 auto theRow = new string[cols]; 85 rowTag.onStartTag["c"] = (ElementParser cTag) { 86 Coord loc = parseLocation(cTag.tag.attr["r"]); 87 bool isRef = false; 88 if("t" in cTag.tag.attr) 89 isRef = cTag.tag.attr["t"] == "s"; 90 91 string val; 92 cTag.onEndTag["v"] = (in Element v) { val = v.text; }; 93 cTag.parse(); 94 95 if(isRef) { 96 int index = parse!int(val); 97 theRow[loc.column] = sst[index]; 98 } 99 else { 100 theRow[loc.column] = val; 101 } 102 }; 103 rowTag.parse(); 104 temp ~= theRow; 105 }; 106 doc.parse(); 107 return temp; 108 } 109 unittest { 110 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 111 <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x14ac" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"><dimension ref="A1:C3"/><sheetViews><sheetView tabSelected="1" workbookViewId="0"><selection activeCell="A4" sqref="A4"/></sheetView></sheetViews><sheetFormatPr defaultRowHeight="15" x14ac:dyDescent="0.25"/><sheetData><row r="1" spans="1:3" x14ac:dyDescent="0.25"><c r="A1"><v>1</v></c><c r="B1"><v>5</v></c><c r="C1"><v>7</v></c></row><row r="2" spans="1:3" x14ac:dyDescent="0.25"><c r="A2"><v>2</v></c><c r="B2"><v>4</v></c><c r="C2"><v>3</v></c></row><row r="3" spans="1:3" x14ac:dyDescent="0.25"><c r="A3"><v>7</v></c><c r="B3"><v>82</v></c><c r="C3"><v>1</v></c></row></sheetData><pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/></worksheet>`; 112 Sheet testSheet = [["1","5","7"],["2","4","3"],["7","82","1"]]; 113 Sheet result = parseSheetXML(test, null); 114 assert(result == testSheet); 115 } 116 117 /++ 118 Gets the numeric (> 1) id of a sheet with a given name. 119 120 Params: 121 wbXml = The XML content of the workbook.xml file in the main XLSX zip archive. 122 sheetName = The name of the sheet to find. 123 124 Returns: the id of the given sheet 125 126 Exceptions: 127 Exception if the given sheet name is not in the workbook. 128 +/ 129 private int getSheetId(string wbXml, string sheetName) { 130 int theId; 131 auto doc = new DocumentParser(wbXml); 132 doc.onEndTag["sheet"] = (in Element sheet) { 133 if(sheet.tag.attr["name"] == sheetName) { 134 string wat = sheet.tag.attr["sheetId"].dup; 135 theId = parse!int(wat); 136 } 137 }; 138 doc.parse(); 139 140 if(theId != 0) { 141 return theId; 142 } 143 else throw new Exception("No sheet with that name!"); 144 } 145 unittest { 146 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 147 <workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x15 xr2" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main" xmlns:xr2="http://schemas.microsoft.com/office/spreadsheetml/2015/revision2"><fileVersion appName="xl" lastEdited="7" lowestEdited="7" rupBuild="18625"/><workbookPr defaultThemeVersion="166925"/><mc:AlternateContent xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"><mc:Choice Requires="x15"><x15ac:absPath url="C:\Users\rraab.ADVILL\Documents\Code\D\dlang-xlsx\" xmlns:x15ac="http://schemas.microsoft.com/office/spreadsheetml/2010/11/ac"/></mc:Choice></mc:AlternateContent><bookViews><workbookView xWindow="0" yWindow="0" windowWidth="28800" windowHeight="12210" activeTab="1" xr2:uid="{045295F2-59E2-495E-BB00-149A1C289780}"/></bookViews><sheets><sheet name="Test1" sheetId="1" r:id="rId1"/><sheet name="Test2" sheetId="2" r:id="rId2"/></sheets><calcPr calcId="171027"/><extLst><ext uri="{140A7094-0E35-4892-8432-C4D2E57EDEB5}" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main"><x15:workbookPr chartTrackingRefBase="1"/></ext></extLst></workbook>`; 148 assert(getSheetId(test, "Test1") == 1); 149 assert(getSheetId(test, "Test2") == 2); 150 } 151 152 /++ 153 Parses a Shared String Table XML string into an array of strings. 154 155 Params: 156 sst = The Shared String Table XML string to parse 157 Returns: 158 A simple string array that can be indexed into from an "s" type cell's value (which is a zero-based index, thankfully) 159 +/ 160 private string[] parseStringTable(string sst) { 161 auto doc = new DocumentParser(sst); 162 string[] table; 163 164 doc.onEndTag["t"] = (in Element t) { 165 table ~= t.text(); 166 }; 167 168 doc.parse(); 169 170 return table; 171 } 172 173 /++ 174 Gets a column from a sheet, with each element parsed as the templated type. 175 176 Params: 177 sheet = The Sheet from which to extract the column. 178 col = The (zero-based) index of the column to extract. 179 180 Returns: the given column, as a newly-allocated array. 181 +/ 182 T[] getColumn(T)(Sheet sheet, size_t col) { 183 return sheet.map!(x => parse!T(x[col])).array; 184 } 185 unittest { 186 assert(getColumn!int([["1","5","7"],["2","4","3"],["7","82","1"]], 1) == [5, 4, 82]); 187 } 188 189 ///Parses an Excel letter-number column-major one-based location string to a pair of row-major zero-based indeces 190 private Coord parseLocation(string location) { 191 auto pat = ctRegex!"([A-Z]+)([0-9]+)"; 192 auto m = location.matchFirst(pat); 193 Coord temp; 194 string rrow = m[2]; 195 temp.row = parse!int(rrow)-1; 196 temp.column = m[1].columnNameToNumber-1; 197 return temp; 198 } 199 unittest { 200 Coord C3 = {2,2}; 201 Coord B15 = {14,1}; 202 assert(parseLocation("C3") == C3); 203 assert(parseLocation("B15") == B15); 204 } 205 206 private Coord parseDimensions(string dims) { 207 auto pat = ctRegex!"([A-Z]+)([0-9]+):([A-Z]+)([0-9]+)"; 208 auto m = dims.matchFirst(pat); 209 Coord temp; 210 string m4 = m[4]; 211 string m2 = m[2]; 212 temp.row = parse!int(m4) - parse!int(m2) + 1; 213 temp.column = m[3].columnNameToNumber - m[1].columnNameToNumber + 1; 214 return temp; 215 } 216 217 private int columnNameToNumber(string col) { 218 reverse(col.dup); 219 int num = 0; 220 foreach(i, c; col) { 221 num += (c - 'A' + 1)*26^^i; 222 } 223 return num; 224 } 225 226 unittest { 227 assert(columnNameToNumber("AA") == 27); 228 }