My previous post introduced the dot language and how it can be utilized to create flowcharts. For part two, I sought to partially reproduce a more demanding visualization to highlight how Graphviz could be used. The original graphic was taken from the website for the Python scikit library and provide a quick reference guide on working with estimation procedures. It can be found here.
digraph Cheat_Sheet {
graph [fontsize=10 fontname="Verdana" compound=true];
node [shape=record fontsize=10 fontname="Verdana"];
A1 [label="START", shape=box, fontcolor=black, color=Yellow, style=filled];
A2 [label=">50\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A3 [label="get\nmore\ndata", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A4 [label="predicting a\ncategory", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A5 [label="predicting a\nquantity", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A6 [label="do you have\nlabeled\ndata", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A7 [label="just\nlooking", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A8 [label="predicting\nstructure", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A9 [label="tough\nluck", shape=box, fontcolor=black, color=cadetblue2, style=filled];
A1 -> A2 [color=yellow];
A2 -> A3 [color=red, fontcolor=red, label="NO", fontsize=10];
A2 -> A4 [color=green, fontcolor=green, label="YES", fontsize=10];
A4 -> A5 [color=red, fontcolor=red, label="NO", fontsize=10];
A4 -> A6 [color=green, fontcolor=green, label="YES", fontsize=10];
A5 -> A7 [color=red, fontcolor=red, label="NO", fontsize=10];
A7 -> A8 [color=red, fontcolor=red, label="NO", fontsize=10];
A8 -> A9 [color=yellow];
A6 -> H [color=green, fontcolor=green, label="YES", fontsize=10];
A6 -> P [color=red, fontcolor=red, label="NO", fontsize=10];
A5 -> B [color=green, fontcolor=green, label="YES", fontsize=10];
R -> A9 [color=red, fontcolor=red, label="NO", fontsize=10];
A7 -> X [color=green, fontcolor=green, label="YES", fontsize=10];
subgraph cluster_1 {
label=<<B>regression</B>>;
color=lightgrey;
style=filled;
B [label="<100k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled];
C [label="SGD\nRegressor", shape=box, fontcolor=black, color=cadetblue2, style=filled];
D [label="few features\nshould be\nimportant", shape=box, fontcolor=black, color=cadetblue2, style=filled];
E [label="ElasticNet\nLasso", shape=box, fontcolor=black, color=cadetblue2, style=filled];
F [label="Ridge Regression\nSVR\n(kernel='linear')", shape=box, fontcolor=black, color=cadetblue2, style=filled];
G [label="SVR(kernel='rbf')\nEnsembleRegressors", shape=box, fontcolor=black, color=cadetblue2, style=filled];
B -> C [color=red, fontcolor=red, label="NO", fontsize=10];
B -> D [color=green, fontcolor=green, label="YES", fontsize=10];
D -> E [color=green, fontcolor=green, label="YES", fontsize=10];
D -> F [color=red, fontcolor=red, label="NO", fontsize=10];
F -> G [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10];
}
subgraph cluster_2 {
label=<<B>classification</B>>;
color=lightgrey;
style=filled;
H [label="<100k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled];
I [label="SGD\nClassifier", shape=box, fontcolor=black, color=cadetblue2, style=filled];
J [label="Linear\nSVC", shape=box, fontcolor=black, color=cadetblue2, style=filled];
K [label="Text\nData", shape=box, fontcolor=black, color=cadetblue2, style=filled];
L [label="KNeighbors\nClassifier", shape=box, fontcolor=black, color=cadetblue2, style=filled];
M [label="Naive\nBayes", shape=box, fontcolor=black, color=cadetblue2, style=filled];
N [label="kernal\napproximation", shape=box, fontcolor=black, color=cadetblue2, style=filled];
O [label="SVC\nEnsemble\nClassifiers", shape=box, fontcolor=black, color=cadetblue2, style=filled];
H -> I [color=red, fontcolor=red, label="NO", fontsize=10];
H -> J [color=green, fontcolor=green, label="YES", fontsize=10];
J -> K [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10];
I -> N [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10];
K -> M [color=green, fontcolor=green, label="YES", fontsize=10];
K -> L [color=red, fontcolor=red, label="NO", fontsize=10];
L -> O [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10];
}
subgraph cluster_3 {
label=<<B>clustering</B>>;
color=lightgrey;
style=filled;
P [label="number of\ncategories\nknown", shape=box, fontcolor=black, color=cadetblue2, style=filled];
Q [label="<10k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled];
R [label="<10k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled];
S [label="MeanShift\nVBGMM", shape=box, fontcolor=black, color=cadetblue2, style=filled];
T [label="MiniBatch\nKMeans", shape=box, fontcolor=black, color=cadetblue2, style=filled];
U [label="KMeans", shape=box, fontcolor=black, color=cadetblue2, style=filled];
V [label="Spectral\nClustering\nGMM", shape=box, fontcolor=black, color=cadetblue2, style=filled];
P -> Q [color=green, fontcolor=green, label="YES", fontsize=10];
P -> R [color=red, fontcolor=red, label="NO", fontsize=10];
R -> S [color=green, fontcolor=green, label="YES", fontsize=10];
Q -> U [color=green, fontcolor=green, label="YES", fontsize=10];
Q -> T [color=red, fontcolor=red, label="NO", fontsize=10];
U -> V [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10];
}
subgraph cluster_4 {
label=<<B>dimensionality reduction</B>>;
color=lightgrey;
style=filled;
W [label="<10k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled];
X [label="Randomized\nPCA", shape=box, fontcolor=black, color=cadetblue2, style=filled];
Y [label="Isomap\nSpectral\nEmbedding", shape=box, fontcolor=black, color=cadetblue2, style=filled];
Z [label="kernel\napproximation", shape=box, fontcolor=black, color=cadetblue2, style=filled];
AA [label="LLE", shape=box, fontcolor=black, color=cadetblue2, style=filled];
X -> W [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10];
W -> Y [color=green, fontcolor=green, label="YES", fontsize=10];
W -> Z [color=red, fontcolor=red, label="NO", fontsize=10];
Y -> AA [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10];
}
}
