Deferencia de edad entre parejas en películas de Hollywood.
Script
Código
# paquetes ----------------------------------------------------------------library(tidyverse)library(glue)library(here)library(ggtext)library(showtext)# fuentes -----------------------------------------------------------------font_add_google(name ="Libre Bodoni", family ="libre", db_cache =FALSE) # títulofont_add_google(name ="Nunito Sans", family ="nunito") # resto del textofont_add_google(name ="Share Tech Mono", family ="share") # númerosshowtext_auto()showtext_opts(dpi =300)# íconosfont_add("fa-reg", here("icon/Font Awesome 5 Free-Regular-400.otf"))font_add("fa-brands", here("icon/Font Awesome 5 Brands-Regular-400.otf"))font_add("fa-solid", here("icon/Font Awesome 5 Free-Solid-900.otf"))# datos -------------------------------------------------------------------age_gaps <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-02-14/age_gaps.csv')# actores ----# top 100 male, actor 1top_100_m_1 <- age_gaps |>filter(character_1_gender =="man") |># agrupo por película, por si hay múltiples parejasgroup_by(movie_name) |>distinct(actor_1_name) |>ungroup() |># ordeno por cantidad y tomo los primeros 100count(actor_1_name, sort =TRUE) |>slice(1:100) |>pull(actor_1_name)# con los actores, verifico actor 1&2top_m <-bind_rows( age_gaps |>filter(actor_1_name %in% top_100_m_1), age_gaps |>filter(actor_2_name %in% top_100_m_1)) |># selecciono columnas de interés y renombroselect(movie_name, release_year, age_difference, actor_1_name, actor_2_name, actor_1_age, actor_2_age)# si el actor es mayor, la diferencia es positiva# si el actor es menor, la diferencia es negativadatos_m <- top_m |>mutate(age_difference =case_when(actor_1_name %in% top_100_m_1 ~ age_difference,TRUE~-age_difference)) |>mutate(ac =if_else(age_difference >=0, actor_1_name, actor_2_name)) |># elijo solamente el top 100 mfilter(ac %in% top_100_m_1) |>group_by(ac) |># obtengo la diferencia de edad media (con signo)summarise(delta =mean(age_difference), .groups ="drop") |># ordeno los actores por deltamutate(ac =fct_reorder(ac, delta)) |>arrange(ac) |>rename(ac1 = ac, delta1 = delta)# actrices ----# top 5 female 1top_100_f_1 <- age_gaps |>filter(character_2_gender =="woman") |># agrupo por película, por si hay múltiples parejasgroup_by(movie_name) |>distinct(actor_2_name) |>ungroup() |># ordeno por cantidad y tomo los primeros 100count(actor_2_name, sort =TRUE) |>slice(1:100) |>pull(actor_2_name)# con las actrices, verifico actriz 1&2top_f <-bind_rows( age_gaps |>filter(actor_1_name %in% top_100_f_1), age_gaps |>filter(actor_2_name %in% top_100_f_1)) |># selecciono columnas de interés y renombroselect(movie_name, release_year, age_difference, actor_1_name, actor_2_name, actor_1_age, actor_2_age)# si la actriz es mayor, la diferencia es positiva# si la actriz es menor, la diferencia es negativadatos_f <- top_f |>mutate(age_difference =case_when(actor_1_name %in% top_100_f_1 ~ age_difference,TRUE~-age_difference)) |>mutate(ac =if_else(age_difference >=0, actor_1_name, actor_2_name)) |># elijo solamente el top 100 ffilter(ac %in% top_100_f_1) |>group_by(ac) |># obtengo la diferencia de edad media (con signo)summarise(delta =mean(age_difference), .groups ="drop") |># ordeno los actores por deltamutate(ac =fct_reorder(ac, delta)) |>arrange(ac) |>rename(ac2 = ac, delta2 = delta)# combino ambos datosf_m <-bind_cols(datos_f, datos_m)# figura ------------------------------------------------------------------# colores ppalesc_m <-"#fb9e4f"c_f <-"#9dd893"# captionicon_twitter <-"<span style='font-family:fa-brands; color:white;'></span>"icon_github <-"<span style='font-family:fa-brands; color:white;'></span>"fuente <-"<span style='color:grey90;'>Datos:</span> <span style='color:gold;'><span style='font-family:mono;'>{**tidytuesdayR**}</span> semana 7</span>"autor <-"<span style='color:grey90;'>Autor:</span> <span style='color:gold;'>**Víctor Gauto**</span>"sep <-"<span style = 'color:#a4cac8;'>**|**</span>"usuario <-"<span style = 'color:gold;'>**vhgauto**</span>"mi_caption <-glue("{fuente} {sep} {autor} {sep} {icon_github} {icon_twitter} {usuario}")# plotg1 <-ggplot(data = f_m, aes(y =as.numeric(ac2))) +# vertical en age_gap == 0geom_vline(xintercept =0, color ="gold", linewidth =1, alpha =1,linetype =2) +# actoresgeom_point(aes(x = delta1), color = c_m, shape =16) +# actricesgeom_point(aes(x = delta2), color = c_f, shape =18) +# manualscale_x_continuous(labels = scales::label_number(style_positive ="plus",style_negative ="hyphen"),limits =c(-30, 30),expand =c(0, 0),# segundo eje horizontal arribasec.axis =dup_axis(name =NULL)) +scale_y_continuous(breaks =1:nrow(f_m),labels = f_m$ac2,expand =c(0, 0),# segundo eje vertical a la derechasec.axis =sec_axis(~ .,breaks =1:nrow(f_m),labels = f_m$ac1)) +coord_cartesian(clip ="off") +# ejeslabs(y =NULL, x =glue("Diferencia de edad (en años) entre la <span style='color:{c_f}'>**actriz**</span>/<span style='color:{c_m}'>**actor**</span> y su pareja en pantalla"),title ="En <span style='color:gold;'>Hollywood </span> ellos las<br> prefieren jóvenes",subtitle =glue("Se calculó la <span style='color:gold;'>**diferencia de edad**</span> promedio entre las 100 <span style='color:{c_f};'>**actrices**</span> y <span style='color:{c_m};'>**actores**</span> más populares de <span style='color:gold;'>**Hollywood**</span>. Dicha diferencia es negativa cuando la persona (eje vertical) es menor que su interés amoroso en pantalla. En su inmensa mayoría, <span style='color:{c_m};'>**ellos**</span> tienen exclusivamente parejas menores, causando que <span style='color:{c_f};'>**ellas**</span> tengan parejas de mayor edad."),caption = mi_caption,) +theme_minimal() +theme(aspect.ratio =1.8,axis.text.x =element_text(color ="gold", family ="share", size =10, face ="bold"),axis.text.y.left =element_text(color = c_f, family ="nunito", size =8),axis.text.y.right =element_text(color = c_m, family ="nunito", size =8),axis.title.x.bottom =element_markdown(color ="grey90", family ="nunito"),panel.grid.minor.y =element_blank(),panel.grid.major.y =element_line(linewidth = .05, color ="grey30"),panel.grid.major.x =element_line(linewidth = .05, color ="grey30",linetype ="ff"),panel.grid.minor.x =element_line(linewidth = .03, color ="grey30"),panel.background =element_rect(fill ="grey10", color =NA),plot.background =element_rect(fill ="grey10", color =NA),plot.title =element_markdown(size =45, family ="libre", color ="grey90"),plot.title.position ="plot",plot.subtitle =element_textbox_simple(color ="grey90", family ="nunito",margin =margin(0, 0, 10, 0)),plot.caption =element_markdown(hjust = .5, family ="nunito",margin =margin(10, 0, 0, 0)),plot.caption.position ="plot")# guardoggsave(plot = g1,filename =here("2023/semana_07/viz.png"),width =2300,height =3600,units ="px",dpi =300)# abrobrowseURL(here("2023/semana_07/viz.png"))