1 module xlsx; 2 3 import std.file, std.format, std.regex, std.conv, std.algorithm, std.range, std.utf; 4 import archive.core, archive.zip; 5 6 import dxml.parser; 7 8 private struct Coord { 9 int row; 10 int column; 11 } 12 13 /// Aliases a Sheet as a two-dimensional array of strings. 14 alias Sheet = string[][]; 15 16 private enum configSplitYes = makeConfig(SplitEmpty.yes); 17 18 /++ 19 Reads a sheet from an XLSX file. 20 21 Params: 22 fileName = The path of the XLSX file to read 23 sheetNum = The sheet number to read (one-indexed) 24 25 Returns: The contents of the sheet, as a two-dimensional array of strings. 26 +/ 27 Sheet readSheet(string fileName, int sheetNum) { 28 assert(sheetNum > 0); 29 30 auto zip = new ZipArchive(read(fileName)); 31 auto sheet = zip.getFile(format!"xl/worksheets/sheet%d.xml"(sheetNum)); 32 if(sheet is null) throw new Exception("Invalid sheet number"); 33 //std.utf.validate!(ubyte[])(sheet.data); 34 string xml = cast(string) sheet.data; 35 36 validate(xml); 37 38 auto sstFile = zip.getFile("xl/sharedStrings.xml"); 39 string sstXML = cast(string) sstFile.data; 40 string[] sst = parseStringTable(sstXML); 41 42 return parseSheetXML(xml, sst); 43 } 44 45 /++ 46 Reads a sheet from an XLSX file by its name. 47 48 Params: 49 fileName = the path of the XLSX file to read 50 sheetNum = The name of the sheet to read 51 52 Returns: the contents of the sheet, as a two-dimensional array of strings. 53 +/ 54 Sheet readSheetByName(string fileName, string sheetName) { 55 auto zip = new ZipArchive(read(fileName)); 56 auto workbook = zip.getFile("xl/workbook.xml"); 57 if(workbook is null) throw new Exception("Invalid XLSX file"); 58 //std.utf.validate!(ubyte[])(sheet.data); 59 string xml = cast(string) workbook.data; 60 61 const int id = getSheetId(xml, sheetName); 62 63 return readSheet(fileName, id); 64 } 65 66 /++ 67 Parses an XLSX sheet from XML. 68 69 Params: 70 xmlString = The XML to parse. 71 72 Returns: the equivalent sheet, as a two dimensional array of strings. 73 +/ 74 Sheet parseSheetXML(string xmlString, string[] sst) { 75 Sheet temp; 76 77 int cols = 0; 78 79 string[] theRow; 80 81 auto range = parseXML!configSplitYes(xmlString); 82 while(!range.empty) { 83 if(range.front.type == EntityType.elementStart) { 84 if(range.front.name == "dimension") { 85 auto attr = range.front.attributes.front; 86 assert(attr.name == "ref"); 87 auto dims = parseDimensions(attr.value); 88 cols = dims.column; 89 assert(cols > 0); 90 } 91 else if(range.front.name == "row") { 92 assert(cols > 0); 93 theRow = new string[cols]; 94 } 95 else if(range.front.name == "c") { 96 auto attrs = range.front.attributes; 97 Coord loc; 98 bool isref; 99 foreach(attr; attrs) { 100 if(attr.name == "r") { 101 loc = parseLocation(attr.value); 102 } 103 else if(attr.name == "t") { 104 if(attr.value == "s") isref=true; 105 else isref = false; 106 } 107 } 108 assert(loc.row >= 0 && loc.column >= 0); 109 range.popFront; 110 if(range.front.type == EntityType.elementStart) { 111 if(range.front.name == "f") { 112 range.popFront; 113 if(range.front.type == EntityType.text) range.popFront; 114 range.popFront; 115 } 116 range.popFront; 117 assert(range.front.type == EntityType.text); 118 string text = range.front.text; 119 assert(theRow.length > 0); 120 assert(loc.column < theRow.length); 121 if(isref) theRow[loc.column] = sst[parse!int(text)]; 122 else theRow[loc.column] = text; 123 } 124 else { 125 theRow[loc.column] = ""; 126 } 127 } 128 } 129 else if(range.front.type == EntityType.elementEnd) { 130 if(range.front.name == "row") { 131 temp ~= theRow; 132 } 133 } 134 range.popFront; 135 } 136 137 138 return temp; 139 } 140 unittest { 141 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 142 <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x14ac" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"><dimension ref="A1:C3"/><sheetViews><sheetView tabSelected="1" workbookViewId="0"><selection activeCell="A4" sqref="A4"/></sheetView></sheetViews><sheetFormatPr defaultRowHeight="15" x14ac:dyDescent="0.25"/><sheetData><row r="1" spans="1:3" x14ac:dyDescent="0.25"><c r="A1"><v>1</v></c><c r="B1"><v>5</v></c><c r="C1"><v>7</v></c></row><row r="2" spans="1:3" x14ac:dyDescent="0.25"><c r="A2"><v>2</v></c><c r="B2"><v>4</v></c><c r="C2"><f t="shared" si="0" /><v>3</v></c></row><row r="3" spans="1:3" x14ac:dyDescent="0.25"><c r="A3"><v>7</v></c><c r="B3"><v>82</v></c><c r="C3"><v>1</v></c></row></sheetData><pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/></worksheet>`; 143 const Sheet testSheet = [["1","5","7"],["2","4","3"],["7","82","1"]]; 144 const Sheet result = parseSheetXML(test, null); 145 assert(result == testSheet); 146 } 147 unittest { 148 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 149 <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x14ac" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"><dimension ref="A1:C6"/><sheetViews><sheetView tabSelected="1" workbookViewId="0"><selection activeCell="A7" sqref="A7"/></sheetView></sheetViews><sheetFormatPr defaultRowHeight="15" x14ac:dyDescent="0.25"/><sheetData><row r="1" spans="1:3" x14ac:dyDescent="0.25"><c r="A1" t="s"><v>0</v></c><c r="B1" t="s"><v>1</v></c></row><row r="2" spans="1:3" x14ac:dyDescent="0.25"><c r="A2"><v>1</v></c><c r="B2" s="1"><f>ABS(-6.2)</f><v>6.2</v></c><c r="C2" s="1"/></row><row r="3" spans="1:3" x14ac:dyDescent="0.25"><c r="A3"><v>2</v></c><c r="B3" s="1"><v>3.4</v></c><c r="C3" s="1"/></row><row r="4" spans="1:3" x14ac:dyDescent="0.25"><c r="A4"><v>3</v></c><c r="B4" s="1"><v>87.1</v></c><c r="C4" s="1"/></row><row r="5" spans="1:3" x14ac:dyDescent="0.25"><c r="A5"><v>4</v></c><c r="B5" s="2"><v>83.2</v></c><c r="C5" s="2"/></row><row r="6" spans="1:3" x14ac:dyDescent="0.25"><c r="A6"><v>5</v></c><c r="B6" s="1"><v>3</v></c><c r="C6" s="1"/></row></sheetData><mergeCells count="1"><mergeCell ref="B5:C5"/></mergeCells><pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/></worksheet>`; 150 const Sheet expected = [["Param", "Value", ""], ["1", "6.2", ""], ["2", "3.4", ""], ["3", "87.1", ""], ["4", "83.2", ""], ["5", "3", ""]]; 151 const Sheet result = parseSheetXML(test, ["Param", "Value"]); 152 assert(result == expected); 153 } 154 155 /++ 156 Gets the numeric (> 1) id of a sheet with a given name. 157 158 Params: 159 wbXml = The XML content of the workbook.xml file in the main XLSX zip archive. 160 sheetName = The name of the sheet to find. 161 162 Returns: the id of the given sheet 163 164 Exceptions: 165 Exception if the given sheet name is not in the workbook. 166 +/ 167 private int getSheetId(string wbXml, string sheetName) { 168 auto range = parseXML!configSplitYes(wbXml); 169 while(!range.empty) { 170 if(range.front.type == EntityType.elementStart && range.front.name == "sheet") { 171 auto attrs = range.front.attributes; 172 bool nameFound; 173 foreach(attr; attrs) { 174 if(attr.name == "name" && attr.value == sheetName) nameFound = true; 175 else if(nameFound && attr.name == "sheetId") { 176 return parse!int(attr.value); 177 } 178 } 179 } 180 range.popFront; 181 } 182 183 throw new Exception("No sheet with that name!"); 184 } 185 unittest { 186 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 187 <workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x15 xr2" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main" xmlns:xr2="http://schemas.microsoft.com/office/spreadsheetml/2015/revision2"><fileVersion appName="xl" lastEdited="7" lowestEdited="7" rupBuild="18625"/><workbookPr defaultThemeVersion="166925"/><mc:AlternateContent xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"><mc:Choice Requires="x15"><x15ac:absPath url="C:\Users\rraab.ADVILL\Documents\Code\D\dlang-xlsx\" xmlns:x15ac="http://schemas.microsoft.com/office/spreadsheetml/2010/11/ac"/></mc:Choice></mc:AlternateContent><bookViews><workbookView xWindow="0" yWindow="0" windowWidth="28800" windowHeight="12210" activeTab="1" xr2:uid="{045295F2-59E2-495E-BB00-149A1C289780}"/></bookViews><sheets><sheet name="Test1" sheetId="1" r:id="rId1"/><sheet name="Test2" sheetId="2" r:id="rId2"/></sheets><calcPr calcId="171027"/><extLst><ext uri="{140A7094-0E35-4892-8432-C4D2E57EDEB5}" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main"><x15:workbookPr chartTrackingRefBase="1"/></ext></extLst></workbook>`; 188 assert(getSheetId(test, "Test1") == 1); 189 assert(getSheetId(test, "Test2") == 2); 190 } 191 192 /++ 193 Parses a Shared String Table XML string into an array of strings. 194 195 Params: 196 sst = The Shared String Table XML string to parse 197 Returns: 198 A simple string array that can be indexed into from an "s" type cell's value (which is a zero-based index, thankfully) 199 +/ 200 private string[] parseStringTable(string sst) { 201 //auto doc = new DocumentParser(sst); 202 string[] table; 203 204 auto range = parseXML!configSplitYes(sst); 205 206 while(!range.empty) { 207 if(range.front.type == EntityType.elementStart && range.front.name == "t") { 208 range.popFront; 209 table ~= range.front.text; 210 } 211 range.popFront; 212 } 213 return table; 214 } 215 216 /++ 217 Gets a column from a sheet, with each element parsed as the templated type. 218 219 Params: 220 sheet = The Sheet from which to extract the column. 221 col = The (zero-based) index of the column to extract. 222 223 Returns: the given column, as a newly-allocated array. 224 +/ 225 T[] getColumn(T)(Sheet sheet, size_t col) { 226 return sheet.map!(x => parse!T(x[col])).array; 227 } 228 unittest { 229 assert(getColumn!int([["1","5","7"],["2","4","3"],["7","82","1"]], 1) == [5, 4, 82]); 230 } 231 232 ///Parses an Excel letter-number column-major one-based location string to a pair of row-major zero-based indeces 233 private Coord parseLocation(string location) { 234 auto pat = ctRegex!"([A-Z]+)([0-9]+)"; 235 auto m = location.matchFirst(pat); 236 Coord temp; 237 string rrow = m[2]; 238 temp.row = parse!int(rrow)-1; 239 temp.column = m[1].columnNameToNumber-1; 240 return temp; 241 } 242 unittest { 243 const Coord C3 = {2,2}; 244 const Coord B15 = {14,1}; 245 assert(parseLocation("C3") == C3); 246 assert(parseLocation("B15") == B15); 247 } 248 249 private Coord parseDimensions(string dims) { 250 auto pat = ctRegex!"([A-Z]+)([0-9]+):([A-Z]+)([0-9]+)"; 251 auto m = dims.matchFirst(pat); 252 Coord temp; 253 string m4 = m[4]; 254 string m2 = m[2]; 255 temp.row = parse!int(m4) - parse!int(m2) + 1; 256 temp.column = m[3].columnNameToNumber - m[1].columnNameToNumber + 1; 257 return temp; 258 } 259 260 private int columnNameToNumber(string col) { 261 reverse(col.dup); 262 int num; 263 foreach(i, c; col) { 264 num += (c - 'A' + 1)*26^^i; 265 } 266 return num; 267 } 268 269 unittest { 270 assert(columnNameToNumber("AA") == 27); 271 }