Polygon methods


param.df <- data.frame(
 mean=c(0, 0, 2),
 sd=c(1, 2, 1))
density.df.list <- list()
for(param.i in 1:nrow(param.df)){
 one.param <- param.df[param.i,]
 observation <- seq(-4, 4, by=0.1)
 density.df.list[[param.i]] <- data.frame(
 param.i,
 param.fac=factor(param.i),
 one.param,
 observation,
 density=dnorm(observation, one.param$mean, one.param$sd),
 row.names=NULL)
}
density.df <- do.call(rbind, density.df.list)
if(require(ggplot2)){
 gg <- ggplot()+
 geom_line(aes(
 observation, density, color=param.fac),
 data=density.df)
 directlabels::direct.label(gg, "top.polygons")
}
#> Le chargement a nécessité le package : ggplot2

plot of chunk unnamed-chunk-1


if(require(ggplot2)){
 density.df$mean.lab <- paste0("mean=", density.df$mean)
 gg <- ggplot()+
 geom_line(aes(
 observation, density, color=param.fac),
 data=density.df)+
 directlabels::geom_dl(aes(
 observation, density,
 color=param.fac,
 label.group=param.fac,
 label=mean.lab),
 method="top.polygons",
 data=density.df)
 gg
}

plot of chunk unnamed-chunk-1


if(require(ggplot2)){
 gg <- ggplot()+
 geom_line(aes(
 observation, density, color=mean.lab, group=param.fac),
 data=density.df)
 directlabels::direct.label(gg, "top.polygons")
}

plot of chunk unnamed-chunk-1


if(require(ggplot2)){
 data(BodyWeight, package="nlme")
 gg <- ggplot()+
 geom_line(aes(
 Time, weight, color=Rat),
 data=BodyWeight)+
 facet_grid(. ~ Diet)
 gg
}

plot of chunk unnamed-chunk-1


if(require(ggplot2)){
 directlabels::direct.label(gg, "right.polygons")
}

plot of chunk unnamed-chunk-1


if(require(ggplot2)){
 gg.wider <- gg+xlim(-10, 70)
 directlabels::direct.label(gg.wider, "right.polygons")
}

plot of chunk unnamed-chunk-1


if(require(ggplot2)){
 directlabels::direct.label(gg.wider, "left.polygons")
}

plot of chunk unnamed-chunk-1

SO post about stats

https://github.com/tdhock/directlabels/issues/24


if(require(ggplot2)){
 set.seed(124234345)
 # Generate data
 df.2 <- data.frame(
 "n_gram" = c("word1"),
 "year" = rep(100:199),
 "match_count" = runif(100 ,min = 1000 , max = 2000))
 df.2 <- rbind(df.2, data.frame(
 "n_gram" = c("word2"),
 "year" = rep(100:199),
 "match_count" = runif(100 ,min = 1000 , max = 2000)) )
 # use stat smooth with geom_dl to get matching direct labels.
 span <- 0.3
 ggplot(df.2, aes(year, match_count, group=n_gram, color=n_gram)) +
 geom_line(alpha = I(7/10), color="grey") +
 stat_smooth(linewidth=2, span=span, se=F) +
 directlabels::geom_dl(aes(
 label=n_gram),
 method = "last.qp",
 stat="smooth",
 span=span) +
 xlim(c(100,220))+
 guides(colour="none")
}
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning in grid.Call.graphics(C_lines, x$x, x$y, index, x$arrow): la
#> semi-transparence n'est pas supportée sur ce périphérique : signalé seulement
#> une fois par page

plot of chunk unnamed-chunk-2

serialize issue

https://github.com/tdhock/directlabels/issues/6


if(require(ggplot2) && require(dplyr) && require(ggthemes)){
 ## create data
 aaa <- structure(
 list(x = c(28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 
 18, 17, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17),
 count = c(2344L, 
 4088L, 3247L, 2808L, 2046L, 1669L, 1315L, 951L, 610L, 543L, 469L, 
 370L, 937L, 1116L, 550L, 379L, 282L, 204L, 174L, 160L, 136L, 
 132L, 128L, 122L),
 term = c("aaa", "aaa", "aaa", "aaa", "aaa", 
 "aaa", "aaa", "aaa", "aaa", "aaa", "aaa", "aaa", "bbb", "bbb", 
 "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", 
 "bbb")),
 class = c("tbl_df", "tbl", "data.frame"),
 row.names = c(NA, 
 -24L),
 .Names = c("x", "count", "term"))
 ## have a look
 print(aaa)
 ## initial plot
 p2 <- aaa %>% ggplot(aes(x = x, y = count, group = term, colour = term)) + geom_line()
 ## have a look
 print(p2)
 ## works
 print(directlabels::direct.label(p2))
 ## plot with theme
 p2 <- aaa %>% ggplot(aes(x = x, y = count, group = term, colour = term)) + geom_line() + theme_fivethirtyeight()
 ## have a look
 print(p2)
 ## used to fail but should be OK as of 19 June 2020.
 print(directlabels::direct.label(p2))
}
#> Le chargement a nécessité le package : dplyr
#> 
#> Attachement du package : 'dplyr'
#> Les objets suivants sont masqués depuis 'package:stats':
#> 
#> filter, lag
#> Les objets suivants sont masqués depuis 'package:base':
#> 
#> intersect, setdiff, setequal, union
#> Le chargement a nécessité le package : ggthemes
#> # A tibble: 24 ×ばつ 3
#> x count term 
#> <dbl> <int> <chr>
#> 1 28 2344 aaa 
#> 2 27 4088 aaa 
#> 3 26 3247 aaa 
#> 4 25 2808 aaa 
#> 5 24 2046 aaa 
#> 6 23 1669 aaa 
#> 7 22 1315 aaa 
#> 8 21 951 aaa 
#> 9 20 610 aaa 
#> 10 19 543 aaa 
#> # i 14 more rows

plot of chunk unnamed-chunk-3plot of chunk unnamed-chunk-3plot of chunk unnamed-chunk-3plot of chunk unnamed-chunk-3

changepoint cost minima

This is a test for polygon.method with only one unaligned point per group as input, in particular the new bottom.polygons method.


data(LOPART100, package="directlabels")
abbrev.vec <- c(
 data="data and models",
 cost="cost of last change")
yfac <- function(l){
 factor(abbrev.vec[[l]], abbrev.vec)
}
COST <- function(dt){
 data.frame(y.var=yfac("cost"), dt)
}
DATA <- function(dt){
 data.frame(y.var=yfac("data"), dt)
}
sig.color <- "grey50"
tau <- 99
up.to.t <- 100
change.dt <- data.frame(tau, change=tau+0.5)
t.dt <- data.frame(up.to.t)
my.hjust <- function(x)ifelse(x < nrow(LOPART100$signal)/2, 0, 1)
min.dt <- do.call(rbind, by(
 LOPART100$cost,
 LOPART100$cost$Algorithm,
 function(df)df[which.min(df$cost_candidates),]))
cost.range <- range(LOPART100$cost$cost_candidates)
cost.h <- cost.range[2]-cost.range[1]
blank.dt <- data.frame(
 position=1, cost=cost.range[1]-cost.h/4)
label.colors <- c(
 "1"="#ff7d7d",
 "0"="#f6c48f")
if(require(ggplot2)){
 gg <- ggplot()+
 geom_blank(aes(
 position, cost),
 data=COST(blank.dt))+
 geom_vline(aes(
 xintercept=up.to.t),
 color=sig.color,
 data=t.dt)+
 geom_text(aes(
 up.to.t, 13,
 hjust=my.hjust(up.to.t),
 label=sprintf(
 "$t=%s$", up.to.t)),
 color=sig.color,
 data=DATA(t.dt))+
 geom_rect(aes(
 xmin=start, xmax=end,
 fill=paste(changes),
 ymin=-Inf, ymax=Inf),
 alpha=0.5,
 data=LOPART100$labels)+
 scale_fill_manual("label", values=label.colors)+
 theme_bw()+
 theme(panel.spacing=grid::unit(0, "lines"))+
 facet_grid(y.var ~ ., scales="free")+
 geom_text(aes(
 change, 1,
 hjust=my.hjust(change),
 label=sprintf(
 "$\\tau = %d$", tau)),
 vjust=0,
 data=DATA(change.dt))+
 geom_vline(aes(
 xintercept=change),
 data=change.dt)+
 geom_segment(aes(
 start-0.5, mean,
 size=Algorithm,
 color=Algorithm,
 xend=end+0.5, yend=mean),
 data=DATA(LOPART100$segments))+
 geom_point(aes(
 position, signal),
 color=sig.color,
 shape=1,
 data=DATA(LOPART100$signal))+
 scale_size_manual(values=c(
 OPART=1.5,
 LOPART=0.5),
 drop=FALSE)+
 scale_shape_manual(values=c(
 OPART=1,
 LOPART=2),
 drop=FALSE)+
 scale_color_manual(values=c(
 OPART="deepskyblue",
 LOPART="black"),
 drop=FALSE)+
 ylab("")+
 scale_x_continuous(
 "position $t,\\tau$",
 breaks=seq(0, 100, by=10))+
 geom_point(aes(
 change, cost_candidates,
 color=Algorithm, shape=Algorithm),
 data=COST(LOPART100$cost))+
 geom_point(aes(
 change, cost_candidates,
 color=Algorithm),
 data=COST(min.dt))
 print(gg)
 label.cost <- function(df){ 
 gg+
 directlabels::geom_dl(aes(
 change, cost_candidates,
 color=Algorithm,
 label.group=Algorithm,
 label=sprintf("$\\tau^*_{%d} = %d$", up.to.t, tau)),
 method="bottom.polygons",
 data=COST(df))
 }
 print(label.cost(LOPART100$cost))
 ## to make sure it works when there is only one point to label.
 print(label.cost(min.dt))
}
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> i Please use `linewidth` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
#> resolveHJust(x$just, : la semi-transparence n'est pas supportée sur ce
#> périphérique : signalé seulement une fois par page

plot of chunk unnamed-chunk-4

#> Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
#> resolveHJust(x$just, : la semi-transparence n'est pas supportée sur ce
#> périphérique : signalé seulement une fois par page

plot of chunk unnamed-chunk-4

#> Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
#> resolveHJust(x$just, : la semi-transparence n'est pas supportée sur ce
#> périphérique : signalé seulement une fois par page

plot of chunk unnamed-chunk-4

LOPART ROC curve

This is a test for polygon.method with only one unaligned point per group as input, in particular with right.polygons.

data(LOPART.ROC, package="directlabels")
algo.colors <- c(
 OPART="#0077CC",
 LOPART="black",
 SegAnnot="#22CC22")
if(require(ggplot2)){
 ggplot()+
 theme_bw()+
 scale_color_manual(values=algo.colors)+
 scale_size_manual(values=c(
 LOPART=1.5,
 OPART=1))+
 directlabels::geom_dl(aes(
 FPR, TPR,
 color=model.name,
 label=paste0(model.name, ifelse(is.na(auc), "", sprintf(
 " AUC=%.3f", auc
 )))),
 method=list(
 cex=0.8,
 directlabels::polygon.method(
 "right",
 offset.cm=0.5,
 padding.cm=0.05)),
 data=LOPART.ROC$points)+
 geom_path(aes(
 FPR, TPR,
 color=model.name,
 size=model.name,
 group=paste(model.name, test.fold)),
 data=LOPART.ROC$roc)+
 geom_point(aes(
 FPR, TPR,
 color=model.name),
 size=3,
 shape=21,
 fill="white",
 data=LOPART.ROC$points)+
 theme(
 panel.spacing=grid::unit(0, "lines"),
 legend.position="none"
 )+
 facet_grid(test.fold ~ Penalty + Parameters, labeller=label_both)+
 coord_equal()+
 scale_x_continuous(
 "False Positive Rate (test set labels)",
 breaks=c(0, 0.5, 1),
 labels=c("0", "0.5", "1"))+
 scale_y_continuous(
 "True Positive Rate (test set labels)",
 breaks=c(0, 0.5, 1),
 labels=c("0", "0.5", "1"))
}

plot of chunk unnamed-chunk-5

white or black text on colored background

The weighted method for rgb to grayscale conversion is used for the default text.color in polygon.method, and explained here https://www.tutorialspoint.com/dip/grayscale_to_rgb_conversion.htm

if(require(RColorBrewer) && require(ggplot2)){
 m <- RColorBrewer::brewer.pal.info
 brewer.dt.list <- list()
 for(brewer.row in 1:nrow(m)){
 brewer.name <- rownames(m)[[brewer.row]]
 brewer.info <- m[brewer.name, ]
 col.vec <- RColorBrewer::brewer.pal(brewer.info[, "maxcolors"], brewer.name)
 rgb.mat <- col2rgb(col.vec)
 hsv.mat <- rgb2hsv(rgb.mat)
 brewer.dt.list[[brewer.name]] <- data.frame(
 brewer.name,
 brewer.fac=factor(brewer.name, rownames(m)),
 brewer.row,
 category=factor(brewer.info[, "category"], c("seq", "qual", "div")),
 column=seq_along(col.vec),
 color=col.vec,
 t(rgb.mat),
 t(hsv.mat))
 }
 brewer.dt <- do.call(rbind, brewer.dt.list)
 ggplot()+
 theme_bw()+
 theme(panel.spacing=grid::unit(0, "lines"))+
 facet_grid(category ~ ., scales="free", space="free")+
 geom_tile(aes(
 factor(column), brewer.fac, fill=color),
 data=brewer.dt)+
 geom_text(aes(
 factor(column), brewer.fac, label=brewer.fac, color=ifelse(
 ((0.3 * red) + (0.59 * green) + (0.11 * blue))/255 < 0.5, "white", "black")),
 data=brewer.dt)+
 scale_fill_identity()+
 scale_color_identity()
}
#> Le chargement a nécessité le package : RColorBrewer

plot of chunk unnamed-chunk-6

odd qp labels for timings figure

In the image below the strange thing in the labels is that the end of the pointer of nc::capture_melt_single is inside of the pointer for cdata::unpivot_to_blocks – this is ok, but we could probably avoid this by switching the order. we should be able to detect/avoid this using a linear inequality constraint: bottom of label box must be greater than next target down, etc. But if targets are too close together this could lead to no feasible solution.

data(odd_timings, package="directlabels")
odd4 <- subset(odd_timings, captures==4)
if(require(ggplot2)){
 gg <- ggplot()+
 geom_line(aes(
 N.col, median.seconds, color=fun),
 data=odd4)+
 scale_x_log10(limits=c(10, 1e6))+
 scale_y_log10()
 directlabels::direct.label(gg, "right.polygons")
}

plot of chunk unnamed-chunk-7

TODO edit polygon.method so that the right panel labels do not cross – can this be added as a constraint in the qp, or do we just need to re-order?

two dlgrobs

This example has two geom_dl with the same method, but the grobs need different names to render correctly https://github.com/tdhock/directlabels/issues/30

data(odd_timings, package="directlabels")
zero <- subset(odd_timings, captures==0)
on.right <- with(zero, N.col==max(N.col))
funs.right <- unique(zero[on.right, "fun"])
is.right <- zero$fun %in% funs.right
timings.right <- zero[is.right,]
timings.left <- zero[!is.right,]
if(require(ggplot2)){
 gg <- ggplot()+
 geom_line(aes(
 N.col, median.seconds, color=fun),
 data=zero)+
 directlabels::geom_dl(aes(
 N.col, median.seconds, color=fun, label=fun),
 method="right.polygons",
 data=timings.left)+
 directlabels::geom_dl(aes(
 N.col, median.seconds, color=fun, label=fun),
 method="right.polygons",
 data=timings.right)+
 scale_x_log10(limits=c(10, 1e6))+
 scale_y_log10()
 gg
}

plot of chunk unnamed-chunk-8

AltStyle によって変換されたページ (->オリジナル) /