My previous post introduced the dot language and how it can be utilized to create flowcharts. For part two, I sought to partially reproduce a more demanding visualization to highlight how Graphviz could be used. The original graphic was taken from the website for the Python scikit library and provide a quick reference guide on working with estimation procedures. It can be found here.
digraph Cheat_Sheet { graph [fontsize=10 fontname="Verdana" compound=true]; node [shape=record fontsize=10 fontname="Verdana"]; A1 [label="START", shape=box, fontcolor=black, color=Yellow, style=filled]; A2 [label=">50\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A3 [label="get\nmore\ndata", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A4 [label="predicting a\ncategory", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A5 [label="predicting a\nquantity", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A6 [label="do you have\nlabeled\ndata", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A7 [label="just\nlooking", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A8 [label="predicting\nstructure", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A9 [label="tough\nluck", shape=box, fontcolor=black, color=cadetblue2, style=filled]; A1 -> A2 [color=yellow]; A2 -> A3 [color=red, fontcolor=red, label="NO", fontsize=10]; A2 -> A4 [color=green, fontcolor=green, label="YES", fontsize=10]; A4 -> A5 [color=red, fontcolor=red, label="NO", fontsize=10]; A4 -> A6 [color=green, fontcolor=green, label="YES", fontsize=10]; A5 -> A7 [color=red, fontcolor=red, label="NO", fontsize=10]; A7 -> A8 [color=red, fontcolor=red, label="NO", fontsize=10]; A8 -> A9 [color=yellow]; A6 -> H [color=green, fontcolor=green, label="YES", fontsize=10]; A6 -> P [color=red, fontcolor=red, label="NO", fontsize=10]; A5 -> B [color=green, fontcolor=green, label="YES", fontsize=10]; R -> A9 [color=red, fontcolor=red, label="NO", fontsize=10]; A7 -> X [color=green, fontcolor=green, label="YES", fontsize=10]; subgraph cluster_1 { label=<<B>regression</B>>; color=lightgrey; style=filled; B [label="<100k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled]; C [label="SGD\nRegressor", shape=box, fontcolor=black, color=cadetblue2, style=filled]; D [label="few features\nshould be\nimportant", shape=box, fontcolor=black, color=cadetblue2, style=filled]; E [label="ElasticNet\nLasso", shape=box, fontcolor=black, color=cadetblue2, style=filled]; F [label="Ridge Regression\nSVR\n(kernel='linear')", shape=box, fontcolor=black, color=cadetblue2, style=filled]; G [label="SVR(kernel='rbf')\nEnsembleRegressors", shape=box, fontcolor=black, color=cadetblue2, style=filled]; B -> C [color=red, fontcolor=red, label="NO", fontsize=10]; B -> D [color=green, fontcolor=green, label="YES", fontsize=10]; D -> E [color=green, fontcolor=green, label="YES", fontsize=10]; D -> F [color=red, fontcolor=red, label="NO", fontsize=10]; F -> G [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10]; } subgraph cluster_2 { label=<<B>classification</B>>; color=lightgrey; style=filled; H [label="<100k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled]; I [label="SGD\nClassifier", shape=box, fontcolor=black, color=cadetblue2, style=filled]; J [label="Linear\nSVC", shape=box, fontcolor=black, color=cadetblue2, style=filled]; K [label="Text\nData", shape=box, fontcolor=black, color=cadetblue2, style=filled]; L [label="KNeighbors\nClassifier", shape=box, fontcolor=black, color=cadetblue2, style=filled]; M [label="Naive\nBayes", shape=box, fontcolor=black, color=cadetblue2, style=filled]; N [label="kernal\napproximation", shape=box, fontcolor=black, color=cadetblue2, style=filled]; O [label="SVC\nEnsemble\nClassifiers", shape=box, fontcolor=black, color=cadetblue2, style=filled]; H -> I [color=red, fontcolor=red, label="NO", fontsize=10]; H -> J [color=green, fontcolor=green, label="YES", fontsize=10]; J -> K [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10]; I -> N [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10]; K -> M [color=green, fontcolor=green, label="YES", fontsize=10]; K -> L [color=red, fontcolor=red, label="NO", fontsize=10]; L -> O [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10]; } subgraph cluster_3 { label=<<B>clustering</B>>; color=lightgrey; style=filled; P [label="number of\ncategories\nknown", shape=box, fontcolor=black, color=cadetblue2, style=filled]; Q [label="<10k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled]; R [label="<10k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled]; S [label="MeanShift\nVBGMM", shape=box, fontcolor=black, color=cadetblue2, style=filled]; T [label="MiniBatch\nKMeans", shape=box, fontcolor=black, color=cadetblue2, style=filled]; U [label="KMeans", shape=box, fontcolor=black, color=cadetblue2, style=filled]; V [label="Spectral\nClustering\nGMM", shape=box, fontcolor=black, color=cadetblue2, style=filled]; P -> Q [color=green, fontcolor=green, label="YES", fontsize=10]; P -> R [color=red, fontcolor=red, label="NO", fontsize=10]; R -> S [color=green, fontcolor=green, label="YES", fontsize=10]; Q -> U [color=green, fontcolor=green, label="YES", fontsize=10]; Q -> T [color=red, fontcolor=red, label="NO", fontsize=10]; U -> V [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10]; } subgraph cluster_4 { label=<<B>dimensionality reduction</B>>; color=lightgrey; style=filled; W [label="<10k\nsamples", shape=box, fontcolor=black, color=cadetblue2, style=filled]; X [label="Randomized\nPCA", shape=box, fontcolor=black, color=cadetblue2, style=filled]; Y [label="Isomap\nSpectral\nEmbedding", shape=box, fontcolor=black, color=cadetblue2, style=filled]; Z [label="kernel\napproximation", shape=box, fontcolor=black, color=cadetblue2, style=filled]; AA [label="LLE", shape=box, fontcolor=black, color=cadetblue2, style=filled]; X -> W [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10]; W -> Y [color=green, fontcolor=green, label="YES", fontsize=10]; W -> Z [color=red, fontcolor=red, label="NO", fontsize=10]; Y -> AA [color=yellow, fontcolor=yellow, label="NOT\nWORKING", fontsize=10]; } }