1 module xlsx; 2 3 import std.file, std.format, std.regex, std.conv, std.algorithm, std.range, std.utf; 4 import archive.core, archive.zip; 5 6 import dxml.parser; 7 8 private struct Coord { 9 int row; 10 int column; 11 } 12 13 /// Aliases a Sheet as a two-dimensional array of strings. 14 alias Sheet = string[][]; 15 16 private enum configSplitYes = makeConfig(SplitEmpty.yes); 17 18 /++ 19 Reads a sheet from an XLSX file. 20 21 Params: 22 fileName = The path of the XLSX file to read 23 sheetNum = The sheet number to read (one-indexed) 24 25 Returns: The contents of the sheet, as a two-dimensional array of strings. 26 +/ 27 Sheet readSheet(string fileName, int sheetNum) { 28 assert(sheetNum > 0); 29 30 auto zip = new ZipArchive(read(fileName)); 31 auto sheet = zip.getFile(format!"xl/worksheets/sheet%d.xml"(sheetNum)); 32 if(sheet is null) throw new Exception("Invalid sheet number"); 33 //std.utf.validate!(ubyte[])(sheet.data); 34 string xml = cast(string) sheet.data; 35 36 validate(xml); 37 38 auto sstFile = zip.getFile("xl/sharedStrings.xml"); 39 string sstXML = cast(string) sstFile.data; 40 string[] sst = parseStringTable(sstXML); 41 42 return parseSheetXML(xml, sst); 43 } 44 45 /++ 46 Reads a sheet from an XLSX file by its name. 47 48 Params: 49 fileName = the path of the XLSX file to read 50 sheetNum = The name of the sheet to read 51 52 Returns: the contents of the sheet, as a two-dimensional array of strings. 53 +/ 54 Sheet readSheetByName(string fileName, string sheetName) { 55 auto zip = new ZipArchive(read(fileName)); 56 auto workbook = zip.getFile("xl/workbook.xml"); 57 if(workbook is null) throw new Exception("Invalid XLSX file"); 58 //std.utf.validate!(ubyte[])(sheet.data); 59 string xml = cast(string) workbook.data; 60 61 const int id = getSheetId(xml, sheetName); 62 63 return readSheet(fileName, id); 64 } 65 66 /++ 67 Parses an XLSX sheet from XML. 68 69 Params: 70 xmlString = The XML to parse. 71 72 Returns: the equivalent sheet, as a two dimensional array of strings. 73 +/ 74 Sheet parseSheetXML(string xmlString, string[] sst) { 75 Sheet temp; 76 77 int cols = 0; 78 79 string[] theRow; 80 81 auto range = parseXML!configSplitYes(xmlString); 82 while(!range.empty) { 83 if(range.front.type == EntityType.elementStart) { 84 if(range.front.name == "dimension") { 85 auto attr = range.front.attributes.front; 86 assert(attr.name == "ref"); 87 auto dims = parseDimensions(attr.value); 88 cols = dims.column; 89 assert(cols > 0); 90 } 91 else if(range.front.name == "row") { 92 assert(cols > 0); 93 theRow = new string[cols]; 94 } 95 else if(range.front.name == "c") { 96 auto attrs = range.front.attributes; 97 Coord loc; 98 bool isref; 99 foreach(attr; attrs) { 100 if(attr.name == "r") { 101 loc = parseLocation(attr.value); 102 } 103 else if(attr.name == "t") { 104 if(attr.value == "s") isref=true; 105 else isref = false; 106 } 107 } 108 assert(loc.row >= 0 && loc.column >= 0); 109 range.popFront; 110 if(range.front.type != EntityType.elementEnd) { 111 range.popFront; 112 assert(range.front.type == EntityType.text); 113 string text = range.front.text; 114 assert(theRow.length > 0); 115 assert(loc.column < theRow.length); 116 if(isref) theRow[loc.column] = sst[parse!int(text)]; 117 else theRow[loc.column] = text; 118 } 119 } 120 } 121 else if(range.front.type == EntityType.elementEnd) { 122 if(range.front.name == "row") { 123 temp ~= theRow; 124 } 125 } 126 range.popFront; 127 } 128 129 130 return temp; 131 } 132 unittest { 133 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 134 <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x14ac" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"><dimension ref="A1:C3"/><sheetViews><sheetView tabSelected="1" workbookViewId="0"><selection activeCell="A4" sqref="A4"/></sheetView></sheetViews><sheetFormatPr defaultRowHeight="15" x14ac:dyDescent="0.25"/><sheetData><row r="1" spans="1:3" x14ac:dyDescent="0.25"><c r="A1"><v>1</v></c><c r="B1"><v>5</v></c><c r="C1"><v>7</v></c></row><row r="2" spans="1:3" x14ac:dyDescent="0.25"><c r="A2"><v>2</v></c><c r="B2"><v>4</v></c><c r="C2"><v>3</v></c></row><row r="3" spans="1:3" x14ac:dyDescent="0.25"><c r="A3"><v>7</v></c><c r="B3"><v>82</v></c><c r="C3"><v>1</v></c></row></sheetData><pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/></worksheet>`; 135 const Sheet testSheet = [["1","5","7"],["2","4","3"],["7","82","1"]]; 136 const Sheet result = parseSheetXML(test, null); 137 assert(result == testSheet); 138 } 139 unittest { 140 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 141 <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x14ac" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"><dimension ref="A1:C6"/><sheetViews><sheetView tabSelected="1" workbookViewId="0"><selection activeCell="A7" sqref="A7"/></sheetView></sheetViews><sheetFormatPr defaultRowHeight="15" x14ac:dyDescent="0.25"/><sheetData><row r="1" spans="1:3" x14ac:dyDescent="0.25"><c r="A1" t="s"><v>0</v></c><c r="B1" t="s"><v>1</v></c></row><row r="2" spans="1:3" x14ac:dyDescent="0.25"><c r="A2"><v>1</v></c><c r="B2" s="1"><v>6.2</v></c><c r="C2" s="1"/></row><row r="3" spans="1:3" x14ac:dyDescent="0.25"><c r="A3"><v>2</v></c><c r="B3" s="1"><v>3.4</v></c><c r="C3" s="1"/></row><row r="4" spans="1:3" x14ac:dyDescent="0.25"><c r="A4"><v>3</v></c><c r="B4" s="1"><v>87.1</v></c><c r="C4" s="1"/></row><row r="5" spans="1:3" x14ac:dyDescent="0.25"><c r="A5"><v>4</v></c><c r="B5" s="2"><v>83.2</v></c><c r="C5" s="2"/></row><row r="6" spans="1:3" x14ac:dyDescent="0.25"><c r="A6"><v>5</v></c><c r="B6" s="1"><v>3</v></c><c r="C6" s="1"/></row></sheetData><mergeCells count="1"><mergeCell ref="B5:C5"/></mergeCells><pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/></worksheet>`; 142 const Sheet expected = [["Param", "Value", ""], ["1", "6.2", ""], ["2", "3.4", ""], ["3", "87.1", ""], ["4", "83.2", ""], ["5", "3", ""]]; 143 const Sheet result = parseSheetXML(test, ["Param", "Value"]); 144 assert(result == expected); 145 } 146 147 /++ 148 Gets the numeric (> 1) id of a sheet with a given name. 149 150 Params: 151 wbXml = The XML content of the workbook.xml file in the main XLSX zip archive. 152 sheetName = The name of the sheet to find. 153 154 Returns: the id of the given sheet 155 156 Exceptions: 157 Exception if the given sheet name is not in the workbook. 158 +/ 159 private int getSheetId(string wbXml, string sheetName) { 160 auto range = parseXML!configSplitYes(wbXml); 161 while(!range.empty) { 162 if(range.front.type == EntityType.elementStart && range.front.name == "sheet") { 163 auto attrs = range.front.attributes; 164 bool nameFound; 165 foreach(attr; attrs) { 166 if(attr.name == "name" && attr.value == sheetName) nameFound = true; 167 else if(nameFound && attr.name == "sheetId") { 168 return parse!int(attr.value); 169 } 170 } 171 } 172 range.popFront; 173 } 174 175 throw new Exception("No sheet with that name!"); 176 } 177 unittest { 178 const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 179 <workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x15 xr2" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main" xmlns:xr2="http://schemas.microsoft.com/office/spreadsheetml/2015/revision2"><fileVersion appName="xl" lastEdited="7" lowestEdited="7" rupBuild="18625"/><workbookPr defaultThemeVersion="166925"/><mc:AlternateContent xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"><mc:Choice Requires="x15"><x15ac:absPath url="C:\Users\rraab.ADVILL\Documents\Code\D\dlang-xlsx\" xmlns:x15ac="http://schemas.microsoft.com/office/spreadsheetml/2010/11/ac"/></mc:Choice></mc:AlternateContent><bookViews><workbookView xWindow="0" yWindow="0" windowWidth="28800" windowHeight="12210" activeTab="1" xr2:uid="{045295F2-59E2-495E-BB00-149A1C289780}"/></bookViews><sheets><sheet name="Test1" sheetId="1" r:id="rId1"/><sheet name="Test2" sheetId="2" r:id="rId2"/></sheets><calcPr calcId="171027"/><extLst><ext uri="{140A7094-0E35-4892-8432-C4D2E57EDEB5}" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main"><x15:workbookPr chartTrackingRefBase="1"/></ext></extLst></workbook>`; 180 assert(getSheetId(test, "Test1") == 1); 181 assert(getSheetId(test, "Test2") == 2); 182 } 183 184 /++ 185 Parses a Shared String Table XML string into an array of strings. 186 187 Params: 188 sst = The Shared String Table XML string to parse 189 Returns: 190 A simple string array that can be indexed into from an "s" type cell's value (which is a zero-based index, thankfully) 191 +/ 192 private string[] parseStringTable(string sst) { 193 //auto doc = new DocumentParser(sst); 194 string[] table; 195 196 auto range = parseXML!configSplitYes(sst); 197 198 while(!range.empty) { 199 if(range.front.type == EntityType.elementStart && range.front.name == "t") { 200 range.popFront; 201 table ~= range.front.text; 202 } 203 range.popFront; 204 } 205 return table; 206 } 207 208 /++ 209 Gets a column from a sheet, with each element parsed as the templated type. 210 211 Params: 212 sheet = The Sheet from which to extract the column. 213 col = The (zero-based) index of the column to extract. 214 215 Returns: the given column, as a newly-allocated array. 216 +/ 217 T[] getColumn(T)(Sheet sheet, size_t col) { 218 return sheet.map!(x => parse!T(x[col])).array; 219 } 220 unittest { 221 assert(getColumn!int([["1","5","7"],["2","4","3"],["7","82","1"]], 1) == [5, 4, 82]); 222 } 223 224 ///Parses an Excel letter-number column-major one-based location string to a pair of row-major zero-based indeces 225 private Coord parseLocation(string location) { 226 auto pat = ctRegex!"([A-Z]+)([0-9]+)"; 227 auto m = location.matchFirst(pat); 228 Coord temp; 229 string rrow = m[2]; 230 temp.row = parse!int(rrow)-1; 231 temp.column = m[1].columnNameToNumber-1; 232 return temp; 233 } 234 unittest { 235 const Coord C3 = {2,2}; 236 const Coord B15 = {14,1}; 237 assert(parseLocation("C3") == C3); 238 assert(parseLocation("B15") == B15); 239 } 240 241 private Coord parseDimensions(string dims) { 242 auto pat = ctRegex!"([A-Z]+)([0-9]+):([A-Z]+)([0-9]+)"; 243 auto m = dims.matchFirst(pat); 244 Coord temp; 245 string m4 = m[4]; 246 string m2 = m[2]; 247 temp.row = parse!int(m4) - parse!int(m2) + 1; 248 temp.column = m[3].columnNameToNumber - m[1].columnNameToNumber + 1; 249 return temp; 250 } 251 252 private int columnNameToNumber(string col) { 253 reverse(col.dup); 254 int num; 255 foreach(i, c; col) { 256 num += (c - 'A' + 1)*26^^i; 257 } 258 return num; 259 } 260 261 unittest { 262 assert(columnNameToNumber("AA") == 27); 263 }