1 /*
2 *             Copyright Lodovico Giaretta 2016 - .
3 *  Distributed under the Boost Software License, Version 1.0.
4 *      (See accompanying file LICENSE_1_0.txt or copy at
5 *            http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 module csvplot;
9 
10 import std.algorithm: canFind, filter, sort, startsWith, splitter, map;
11 import std.array: appender, array;
12 import std.conv;
13 import std.format;
14 import std.getopt;
15 import std.stdio;
16 import std.typecons: Nullable, Tuple;
17 
18 enum Key
19 {
20     unspecified,
21     timestamp,
22     configuration,
23     component,
24     file,
25 }
26 
27 enum Value
28 {
29     min,
30     max,
31     average,
32     median,
33     deviation,
34     sigma,
35 }
36 
37 alias Entry = Tuple!(string, "timestamp",
38                      string, "component",
39                      string, "configuration",
40                      string, "file",
41                      double, "min",
42                      double, "average",
43                      double, "max",
44                      double, "median",
45                      double, "deviation");
46 
47 string getString(Entry entry, string name)
48 {
49     switch (name)
50     {
51         case "timestamp": return entry.timestamp;
52         case "component": return entry.component;
53         case "configuration": return entry.configuration;
54         case "file": return entry.file;
55         case "unspecified": return "";
56         default: assert(0);
57     }
58 }
59 double getDouble(Entry entry, string name)
60 {
61     switch (name)
62     {
63         case "min": return entry.min;
64         case "average": return entry.average;
65         case "max": return entry.max;
66         case "median": return entry.median;
67         case "deviation": return entry.deviation;
68         default: assert(0);
69     }
70 }
71                      
72 void main(string[] args)
73 {   
74     switch (args[1])
75     {
76         case "plot":
77             plot(args);
78             break;
79             
80         case "show":
81             break;
82             
83         default:
84             stderr.writeln("Unrecognized command ", args[1]);
85     }
86 }
87 
88 void plot(ref string[] args)
89 {
90     Key compare;
91     Key join;
92     Value[] show;
93     string[Key] where;
94     
95     // GET THE PARAMETERS
96     arraySep = ",";
97     getopt(args, config.caseSensitive, config.bundling,
98         config.required, "compare", &compare,
99         config.required, "show", &show,
100         "join", &join,
101         "where", &where,
102         );
103     
104     // CHECK CONSISTENCY OF OPTIONS
105     string kind = "P";
106     {
107         auto keys = where.keys ~ join ~ compare;
108         assert(keys.containsAll([Key.timestamp, Key.component]), "Invalid combination of options");
109         if (keys.canFind(Key.configuration))
110             kind = "C";
111         if (keys.canFind(Key.file))
112         {
113             assert(keys.canFind(Key.configuration), "Invalid combination of options");
114             kind = "F";
115         }
116     }
117     
118     // FILTER INPUT DATA
119     auto data = filter(args[2], where, kind);
120     
121     // EXTRACT NEEDED COLUMNS
122     string[] sortedJoinKeys = [];
123     string[] sortedCompareKeys = [];
124     double[][string][string] output;
125     foreach (entry; data)
126     {
127         auto joinKey = entry.getString(to!string(join));
128         auto compareKey = entry.getString(to!string(compare));
129         
130         if (!sortedJoinKeys.canFind(joinKey))
131             sortedJoinKeys ~= joinKey;
132         if (!sortedCompareKeys.canFind(compareKey))
133             sortedCompareKeys ~= compareKey;
134             
135         double[] line = [];
136         if (show.canFind(Value.min))
137             line ~= entry.min;
138         if (show.canFind(Value.average))
139             line ~= entry.average;
140         if (show.canFind(Value.max))
141             line ~= entry.max;
142         if (show.canFind(Value.median))
143             line ~= entry.median;
144         if (show.canFind(Value.deviation))
145         {
146             line ~= entry.average - entry.deviation;
147             line ~= entry.average + entry.deviation;
148         }
149         if (show.canFind(Value.sigma))
150             line ~= entry.deviation;
151         output[joinKey][compareKey] = line;
152     }
153     
154     // CALCULATE COLUMN INDEXES
155     int[string] column;
156     {
157         int i = 1;
158         if (show.canFind(Value.min))
159             column["min"] = i++;
160         if (show.canFind(Value.average))
161             column["average"] = i++;
162         if (show.canFind(Value.max))
163             column["max"] = i++;
164         if (show.canFind(Value.median))
165             column["median"] = i++;
166         if (show.canFind(Value.deviation))
167         {
168             column["deviation_low"] = i++;
169             column["deviation_high"] = i++;
170         }
171         if (show.canFind(Value.sigma))
172             column["sigma"] = i++;
173     }
174     
175     // OUTPUT THE DATA
176     writeln("$data << EOD");
177     foreach (joinKey; sortedJoinKeys)
178     {
179         foreach (compareKey; sortedCompareKeys)
180         {
181             if (compareKey in output[joinKey])
182                 foreach (value; output[joinKey][compareKey])
183                     write("\t", value);
184             else
185                 write("?");
186                 
187             writeln();
188         }
189         writeln();
190         writeln();
191     }
192     writeln("EOD");
193     
194     // SOME USEFUL VARIABLES
195     auto joinCount = sortedJoinKeys.length;
196     double boxWidth = (joinCount <= 6)? 0.15 : 0.1;
197     string xpos = "($0 - " ~ to!string((joinCount-1)*boxWidth/2) ~ " + column(-2)*" ~ to!string(boxWidth) ~ ")";
198     
199     // OUTPUT THE CORRECT SCRIPT BASED ON THE PARAMETERS
200     writeln();
201     writeln("set datafile missing \"?\"");
202     writeln("set term svg noenhanced");
203     writeln("set xtic rotate by -60");
204     writeln("set ylabel 'Speed [MB/s]'");
205     writeln("set offsets graph 0.1, graph 0.1, graph 0.1, graph 0.1");
206     write("set xtic (");
207     foreach (i, compareKey; sortedCompareKeys) write((i > 0)?", \"":"\"", compareKey, "\" ", i);
208     writeln(")");
209     writeln("set key outside");
210     writeln("set boxwidth %f".format(8*boxWidth/10));
211     
212     writeln("plot \\");
213     auto remainShow = show.dup;
214     if (remainShow.length == 1 && remainShow[0] != Value.deviation)
215     {
216         "$data u %s:%d:(column(-2)) notitle w boxes lc variable, \\".format(xpos, column[to!string(remainShow[0])]).writeln;
217         remainShow = [];
218     }
219     if (remainShow.containsAll([Value.min, Value.max, Value.deviation]))
220     {
221         "$data u %s:%d:%d:%d:%d:(column(-2)) notitle with candlesticks lc variable, \\".format(xpos, column["deviation_low"], column["min"], column["max"], column["deviation_high"]).writeln;
222         "$data u (NaN):(NaN):(NaN):(NaN):(NaN):(column(-2)) title \"deviation\" w candlesticks lc \"black\", \\".writeln;
223         remainShow = remainShow.filter!(a => !([Value.min, Value.max, Value.deviation].canFind(a))).array;
224         if (remainShow.canFind(Value.average))
225         {
226             "$data u %s:%d:%d:%d:%d:(column(-2)) notitle w candlesticks lc variable, \\".format(xpos, column["average"], column["average"], column["average"], column["average"]).writeln;
227             remainShow = remainShow.filter!(a => a != Value.average).array;
228         }
229     }
230     if (remainShow.containsAll([Value.min, Value.max, Value.average]))
231     {
232         "$data u %s:%d:%d:%d:(column(-2)) notitle w errorbars lc variable, \\".format(xpos, column["average"], column["min"], column["max"]).writeln;
233         "$data u (NaN):(NaN):(NaN):(NaN):(column(-2)) title \"min-avg-max\" w errorbars lc variable lt 1, \\".writeln;
234         remainShow = remainShow.filter!(a => !([Value.min, Value.max, Value.average].canFind(a))).array;
235     }
236     if (remainShow.containsAll([Value.min, Value.max, Value.median]))
237     {
238         "$data u %s:%d:%d:%d:(column(-2)) notitle w errorbars lc variable, \\".format(xpos, column["median"], column["min"], column["max"]).writeln;
239         "$data u (NaN):(NaN):(NaN):(NaN):(column(-2)) title \"min-median-max\" w errorbars lc variable lt 1, \\".writeln;
240         remainShow = remainShow.filter!(a => !([Value.min, Value.max, Value.median].canFind(a))).array;
241     }
242     if (remainShow.containsAll([Value.deviation, Value.average]))
243     {
244         "$data u %s:%d:%d:%d:(column(-2)) notitle w errorbars lc variable, \\".format(xpos, column["average"], column["deviation_low"], column["deviation_high"]).writeln;
245         "$data u (NaN):(NaN):(NaN):(NaN):(column(-2)) title \"average and deviation\" w errorbars lc variable lt 1, \\".writeln;
246         remainShow = remainShow.filter!(a => !([Value.average, Value.deviation].canFind(a))).array;
247     }
248     if (remainShow.containsAll([Value.deviation, Value.median]))
249     {
250         "$data u %s:%d:%d:%d:(column(-2)) notitle w errorbars lc variable, \\".format(xpos, column["average"], column["deviation_low"], column["deviation_high"]).writeln;
251         "$data u (NaN):(NaN):(NaN):(NaN):(column(-2)) title \"median and deviation\" w errorbars lc variable lt 1, \\".writeln;
252         remainShow = remainShow.filter!(a => !([Value.median, Value.deviation].canFind(a))).array;
253     }
254     if (remainShow.containsAll([Value.min, Value.max]))
255     {
256         "$data u %s:(($%d + $%d)/2):%d:%d:(column(-2)) notitle w errorbars lc variable pt -1, \\".format(xpos, column["min"], column["max"], column["min"], column["max"]).writeln;
257         "$data u (NaN):(NaN):(NaN):(NaN):(column(-2)) title \"min-max\" w errorbars lc variable pt -1 lt 1, \\".writeln;
258         remainShow = remainShow.filter!(a => !([Value.min, Value.max].canFind(a))).array;
259     }
260     if (remainShow.canFind(Value.median))
261     {
262         "$data u %s:%d:(column(-2)) notitle lc variable pt 2 ps 0.75, \\".format(xpos, column["median"]).writeln;
263         "$data u (NaN):(NaN):(column(-2)) title \"median\" lc \"black\" pt 2, \\".writeln;
264         remainShow = remainShow.filter!(a => a != Value.median).array;
265     }
266     if (remainShow.canFind(Value.average))
267     {
268         "$data u %s:%d:(column(-2)) notitle lc variable pt 7 ps 0.75, \\".format(xpos, column["average"]).writeln;
269         "$data u (NaN):(NaN):(column(-2)) title \"average\" lc \"black\" pt 7, \\".writeln;
270         remainShow = remainShow.filter!(a => a != Value.average).array;
271     }
272     if (remainShow.canFind(Value.sigma))
273     {
274         "$data u %s:%d:(column(-2)) notitle lc variable pt 4 ps 0.75, \\".format(xpos, column["sigma"]).writeln;
275         "$data u (NaN):(NaN):(column(-2)) title \"sigma\" lc \"black\" pt 4, \\".writeln;
276         remainShow = remainShow.filter!(a => a != Value.sigma).array;
277     }
278     if (remainShow.canFind(Value.min))
279     {
280         "$data u %s:%d:(column(-2)) notitle lc variable pt 10 ps 0.75, \\".format(xpos, column["min"]).writeln;
281         "$data u (NaN):(NaN):(column(-2)) title \"min\" lc \"black\" pt 10, \\".writeln;
282         remainShow = remainShow.filter!(a => a != Value.min).array;
283     }
284     if (remainShow.canFind(Value.max))
285     {
286         "$data u %s:%d:(column(-2)) notitle lc variable pt 8 ps 0.75, \\".format(xpos, column["max"]).writeln;
287         "$data u (NaN):(NaN):(column(-2)) title \"max\" lc \"black\" pt 8, \\".writeln;
288         remainShow = remainShow.filter!(a => a != Value.max).array;
289     }
290     "\"+\" u 1:(NaN) title \" \" w dots lc \"white\", \\".writeln;
291     foreach (i; 0..joinCount)
292         "$data u (NaN):(NaN):(%d) title \"%s\" w boxes lc variable fs solid, \\".format(i, sortedJoinKeys[i]).writeln;
293     writeln("dummy = 0");
294 }
295 
296 Entry[] filter(string filename, string[Key] where, string kind)
297 {
298     auto result = appender!(Entry[])();
299     auto file = File(filename, "r");
300     foreach(line; file.byLineCopy)
301     {
302         if (!line.startsWith(kind))
303             continue;
304         
305         auto values = line.splitter(',').map!"a.strip";
306         values.popFront;
307         
308         Entry entry;
309         
310         entry.timestamp = values.front; values.popFront;
311         entry.component = values.front; values.popFront;
312         if (Key.timestamp in where && !entry.timestamp.matches(where[Key.timestamp]))
313             continue;
314         if (Key.component in where && !entry.component.matches(where[Key.component]))
315             continue;
316             
317         if (kind != "P")
318         {
319             entry.configuration = values.front; values.popFront;
320             if (Key.configuration in where && !entry.configuration.matches(where[Key.configuration]))
321                 continue;
322         }
323         if (kind == "F")
324         {
325             entry.file = values.front; values.popFront;
326             if (Key.file in where && !entry.file.matches(where[Key.file]))
327                 continue;
328         }
329         entry.min = to!double(values.front); values.popFront;
330         entry.average = to!double(values.front); values.popFront;
331         entry.max = to!double(values.front); values.popFront;
332         entry.median = to!double(values.front); values.popFront;
333         entry.deviation = to!double(values.front); values.popFront;
334         
335         result.put(entry);
336     }
337     return result.data;
338 }
339 
340 bool containsAll(T)(T[] haystack, T[] needles)
341 {
342     foreach(needle; needles)
343         if(!haystack.canFind(needle))
344             return false;
345     return true;
346 }
347 
348 bool matches(string target, string re)
349 {
350     if (target == re)
351         return true;
352     
353     import std.regex;
354     return matchFirst(target, "^" ~ re ~ "$").hit == target;
355 }