1 module xlsx;
2 
3 import std.file, std.xml, std.format, std.regex, std.conv, std.algorithm, std.range, std.utf;
4 import archive.core, archive.zip;
5 
6 private struct Coord {
7     int row;
8     int column;
9 }
10 
11 /// Aliases a Sheet as a two-dimensional array of strings.
12 alias Sheet = string[][];
13 
14 /++
15 Reads a sheet from an XLSX file. 
16 
17 Params:
18 fileName = The path of the XLSX file to read
19 sheetNum = The sheet number to read (one-indexed)
20 
21 Returns: The contents of the sheet, as a two-dimensional array of strings.
22 +/
23 Sheet readSheet(string fileName, int sheetNum) {
24     assert(sheetNum > 0);
25     
26     auto zip = new ZipArchive(read(fileName));
27     auto sheet = zip.getFile(format!"xl/worksheets/sheet%d.xml"(sheetNum));
28     if(sheet is null) throw new Exception("Invalid sheet number");
29     //std.utf.validate!(ubyte[])(sheet.data);
30     string xml = cast(string) sheet.data;
31     
32     validate(xml);
33 
34     auto sstFile = zip.getFile("xl/sharedStrings.xml");
35     string sstXML = cast(string) sstFile.data;
36     string[] sst = parseStringTable(sstXML);
37     
38     return parseSheetXML(xml, sst);
39 }
40 
41 /++
42 Reads a sheet from an XLSX file by its name.
43 
44 Params:
45 fileName = the path of the XLSX file to read
46 sheetNum = The name of the sheet to read
47 
48 Returns: the contents of the sheet, as a two-dimensional array of strings.
49 +/
50 Sheet readSheetByName(string fileName, string sheetName) {
51     auto zip = new ZipArchive(read(fileName));
52     auto workbook = zip.getFile("xl/workbook.xml");
53     if(workbook is null) throw new Exception("Invalid XLSX file");
54     //std.utf.validate!(ubyte[])(sheet.data);
55     string xml = cast(string) workbook.data;
56 
57     const int id = getSheetId(xml, sheetName);
58 
59     return readSheet(fileName, id);
60 }
61 
62 /++
63 Parses an XLSX sheet from XML.
64 
65 Params:
66 xmlString = The XML to parse.
67 
68 Returns: the equivalent sheet, as a two dimensional array of strings.
69 +/
70 Sheet parseSheetXML(string xmlString, string[] sst) {
71     Sheet temp;
72     
73     int cols = 0;
74     auto doc = new DocumentParser(xmlString);
75     doc.onEndTag["dimension"] = (in Element dim) {
76         auto dims = parseDimensions(dim.tag.attr["ref"]);
77         //temp ~= new string[dims.row];
78         cols = dims.column;
79     };
80     
81     doc.onStartTag["row"] = (ElementParser rowTag) {
82         //int r = parse!int(rowTag.tag.attr["r"])-1;
83         
84         auto theRow = new string[cols];
85         rowTag.onStartTag["c"] = (ElementParser cTag) {
86             Coord loc = parseLocation(cTag.tag.attr["r"]);
87             bool isRef = false;
88             if("t" in cTag.tag.attr)
89                 isRef = cTag.tag.attr["t"] == "s";
90             
91             string val;
92             cTag.onEndTag["v"] = (in Element v) { val = v.text; };
93             cTag.parse();
94 
95             if(isRef) {
96                 int index = parse!int(val);
97                 theRow[loc.column] = sst[index];
98             }
99             else {
100                 theRow[loc.column] = val;
101             }
102         };
103         rowTag.parse();
104         temp ~= theRow;
105     };
106     doc.parse();
107     return temp;
108 }
109 unittest {
110     const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
111 <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x14ac" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"><dimension ref="A1:C3"/><sheetViews><sheetView tabSelected="1" workbookViewId="0"><selection activeCell="A4" sqref="A4"/></sheetView></sheetViews><sheetFormatPr defaultRowHeight="15" x14ac:dyDescent="0.25"/><sheetData><row r="1" spans="1:3" x14ac:dyDescent="0.25"><c r="A1"><v>1</v></c><c r="B1"><v>5</v></c><c r="C1"><v>7</v></c></row><row r="2" spans="1:3" x14ac:dyDescent="0.25"><c r="A2"><v>2</v></c><c r="B2"><v>4</v></c><c r="C2"><v>3</v></c></row><row r="3" spans="1:3" x14ac:dyDescent="0.25"><c r="A3"><v>7</v></c><c r="B3"><v>82</v></c><c r="C3"><v>1</v></c></row></sheetData><pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/></worksheet>`;
112     Sheet testSheet = [["1","5","7"],["2","4","3"],["7","82","1"]];
113     Sheet result = parseSheetXML(test, null);
114     assert(result == testSheet);
115 }
116 
117 /++
118 Gets the numeric (> 1) id of a sheet with a given name.
119 
120 Params: 
121 wbXml = The XML content of the workbook.xml file in the main XLSX zip archive.
122 sheetName = The name of the sheet to find.
123 
124 Returns: the id of the given sheet
125 
126 Exceptions:
127 Exception if the given sheet name is not in the workbook.
128 +/
129 private int getSheetId(string wbXml, string sheetName) {
130     int theId;
131     auto doc = new DocumentParser(wbXml);
132     doc.onEndTag["sheet"] = (in Element sheet) {
133         if(sheet.tag.attr["name"] == sheetName) {
134             string wat = sheet.tag.attr["sheetId"].dup;
135             theId = parse!int(wat);
136         }
137     };
138     doc.parse();
139 
140     if(theId != 0) {
141         return theId;
142     }
143     else throw new Exception("No sheet with that name!");
144 }
145 unittest {
146     const string test = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
147 <workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x15 xr2" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main" xmlns:xr2="http://schemas.microsoft.com/office/spreadsheetml/2015/revision2"><fileVersion appName="xl" lastEdited="7" lowestEdited="7" rupBuild="18625"/><workbookPr defaultThemeVersion="166925"/><mc:AlternateContent xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"><mc:Choice Requires="x15"><x15ac:absPath url="C:\Users\rraab.ADVILL\Documents\Code\D\dlang-xlsx\" xmlns:x15ac="http://schemas.microsoft.com/office/spreadsheetml/2010/11/ac"/></mc:Choice></mc:AlternateContent><bookViews><workbookView xWindow="0" yWindow="0" windowWidth="28800" windowHeight="12210" activeTab="1" xr2:uid="{045295F2-59E2-495E-BB00-149A1C289780}"/></bookViews><sheets><sheet name="Test1" sheetId="1" r:id="rId1"/><sheet name="Test2" sheetId="2" r:id="rId2"/></sheets><calcPr calcId="171027"/><extLst><ext uri="{140A7094-0E35-4892-8432-C4D2E57EDEB5}" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main"><x15:workbookPr chartTrackingRefBase="1"/></ext></extLst></workbook>`;
148     assert(getSheetId(test, "Test1") == 1);
149     assert(getSheetId(test, "Test2") == 2);
150 }
151 
152 /++
153 Parses a Shared String Table XML string into an array of strings. 
154 
155 Params:
156 sst = The Shared String Table XML string to parse
157 Returns: 
158 A simple string array that can be indexed into from an "s" type cell's value (which is a zero-based index, thankfully)
159 +/
160 private string[] parseStringTable(string sst) {
161     auto doc = new DocumentParser(sst);
162     string[] table;
163 
164     doc.onEndTag["t"] = (in Element t) {
165 		table ~= t.text();
166     };
167 
168     doc.parse();
169 
170     return table;
171 }
172 
173 /++
174 Gets a column from a sheet, with each element parsed as the templated type.
175 
176 Params:
177 sheet = The Sheet from which to extract the column.
178 col = The (zero-based) index of the column to extract.
179 
180 Returns: the given column, as a newly-allocated array.
181 +/
182 T[] getColumn(T)(Sheet sheet, size_t col) {
183     return sheet.map!(x => parse!T(x[col])).array;
184 }
185 unittest {
186     assert(getColumn!int([["1","5","7"],["2","4","3"],["7","82","1"]], 1) == [5, 4, 82]);
187 }
188 
189 ///Parses an Excel letter-number column-major one-based location string to a pair of row-major zero-based indeces
190 private Coord parseLocation(string location) {
191     auto pat = ctRegex!"([A-Z]+)([0-9]+)";
192     auto m = location.matchFirst(pat);
193     Coord temp;
194     string rrow = m[2];
195     temp.row = parse!int(rrow)-1;
196     temp.column = m[1].columnNameToNumber-1;
197     return temp;
198 }
199 unittest {
200     Coord C3 = {2,2};
201     Coord B15 = {14,1};
202     assert(parseLocation("C3") == C3);
203     assert(parseLocation("B15") == B15);
204 }
205 
206 private Coord parseDimensions(string dims) {
207     auto pat = ctRegex!"([A-Z]+)([0-9]+):([A-Z]+)([0-9]+)";
208     auto m = dims.matchFirst(pat);
209     Coord temp;
210     string m4 = m[4];
211     string m2 = m[2];
212     temp.row = parse!int(m4) - parse!int(m2) + 1;
213     temp.column = m[3].columnNameToNumber - m[1].columnNameToNumber + 1;
214     return temp;
215 }
216 
217 private int columnNameToNumber(string col) {
218     reverse(col.dup);
219     int num = 0;
220     foreach(i, c; col) {
221         num += (c - 'A' + 1)*26^^i;
222     }
223     return num;
224 }
225 
226 unittest {
227     assert(columnNameToNumber("AA") == 27);
228 }