library(tfarima)      # librería de José Luis Gallego para Time Series
library(latticeExtra) # para gráficos con doble eje vertical (doubleYScale)
library(readr)        # para leer ficheros CSV
library(ggplot2)      # para el scatterplot (alternaticamente library(tidyverse))
library(jtools)       # para representación resultados estimación
library(zoo)          # para generar objetos ts (time series)


# fijamos el tamaño de las figuras que se generan en el notebook
options(repr.plot.width = 12, repr.plot.height = 4, repr.plot.res = 200)


data_frame <- read_csv('datos/GNPvsMelanoma.csv',show_col_types = FALSE)
head(data_frame, 3)

|   obs |   GNP | Melanoma |
|-------+-------+----------|
| <dbl> | <dbl> |    <dbl> |
|  1936 | 193.0 |      1.0 |
|  1937 | 203.2 |      0.8 |
|  1938 | 192.9 |      0.8 |
#+caption: A tibble: 3 × 3


# se pueden graficar dos columnas de un data_frame al mismo tiempo
kk <- xyplot(GNP + Melanoma ~ obs, data_frame, type="l")
# Se agrega dos ejes Y. Se construye cada serie por separado
obj1 <- xyplot(GNP ~ obs, data_frame, type = "l" , lwd=2, ylab="GNP (miles de millones de $)",  xlab="Years")
obj2 <- xyplot(Melanoma ~ obs, data_frame, type = "l", lwd=2, ylab="Casos de melanoma")
# --> se realiza la grafica con el segundo eje Y
doubleYScale(obj1, obj2, add.ylab2 = TRUE)


# transitoriamente cambio el tamaño de la siguiente figura
options(repr.plot.width = 4, repr.plot.height = 4, repr.plot.res = 100)
ggplot(data_frame, aes(x = Melanoma, y = GNP)) + geom_point()
options(repr.plot.width = 12, repr.plot.height = 4, repr.plot.res = 200)


# regresión de la variable GNP sobre la variable Melanoma del data_frame
summ( lm(GNP ~ Melanoma, data = data_frame) ) # summ from jtools cambia la representación de resultados

#+begin_example
MODEL INFO:
Observations: 37
Dependent Variable: GNP
Type: OLS linear regression 

MODEL FIT:
F(1,35) = 231.84, p = 0.00
R² = 0.87
Adj. R² = 0.87 

Standard errors: OLS
--------------------------------------------------
                      Est.    S.E.   t val.      p
----------------- -------- ------- -------- ------
(Intercept)         118.57   23.73     5.00   0.00
Melanoma            118.98    7.81    15.23   0.00
--------------------------------------------------
#+end_example


# creamos un objeto ts ("time series")
datos_ts <- as.ts( read.zoo( data_frame ) )
# creamos dos nuevas series temporales con las primeras diferencias de las columnas "GNP" y "Melanoma" de datos_ts
d_GNP      = diff(datos_ts[,"GNP"])
d_Melanoma = diff(datos_ts[,"Melanoma"])


# creamos un nuevo data frame con las primeras diferencias
DF.diferencias = data.frame(date = zoo::as.Date(time(d_GNP)),
                            d_GNP = as.matrix(d_GNP),
                            d_Melanoma = as.matrix(d_Melanoma))
head(DF.diferencias, 2)

|   |       date | d_GNP | d_Melanoma |
|---+------------+-------+------------|
|   |     <date> | <dbl> |      <dbl> |
| 1 | 1937-01-01 |  10.2 |       -0.2 |
| 2 | 1938-01-01 | -10.3 |        0.0 |
#+caption: A data.frame: 2 × 3


# gráfico conjunto con dos columnas del data frame DF.diferencias
kk <- xyplot(d_GNP + d_Melanoma ~ date, DF.diferencias, type="l")
obj1 <- xyplot(d_GNP ~ date, DF.diferencias, type = "l" , lwd=2, ylab="Incremento GNP",  xlab="Years")
obj2 <- xyplot(d_Melanoma ~ date, DF.diferencias, type = "l", lwd=2, ylab="Incremento casos de melanoma")
doubleYScale(obj1, obj2, add.ylab2 = TRUE)


# resultados del ajuste MCO entre d_GNP y d_Melanoma
summ( lm(d_GNP ~ d_Melanoma) )

#+begin_example
MODEL INFO:
Observations: 36
Dependent Variable: d_GNP
Type: OLS linear regression 

MODEL FIT:
F(1,34) = 0.01, p = 0.92
R² = 0.00
Adj. R² = -0.03 

Standard errors: OLS
------------------------------------------------
                     Est.   S.E.   t val.      p
----------------- ------- ------ -------- ------
(Intercept)         16.57   3.18     5.21   0.00
d_Melanoma           0.71   6.59     0.11   0.92
------------------------------------------------
#+end_example


# leemos los datos cuartrimestales como un objeto zoo
UK_zoo <- read.csv.zoo("datos/UK_Interest_rates.csv", FUN = as.yearqtr, format = "%YQ%q",
  strip.white = TRUE)
head(UK_zoo,3)

Long Short
1952 Q2 4.23  2.32
1952 Q3 4.36  2.47
1952 Q4 4.19  2.42


# creamos un data frame a partir de UK_zoo (data frame para la figura de doble eje)
UK_df = fortify.zoo(UK_zoo)
head(UK_df,3)

|   | Index     |  Long | Short |
|---+-----------+-------+-------|
|   | <yearqtr> | <dbl> | <dbl> |
| 1 | 1952 Q2   |  4.23 |  2.32 |
| 2 | 1952 Q3   |  4.36 |  2.47 |
| 3 | 1952 Q4   |  4.19 |  2.42 |
#+caption: A data.frame: 3 × 3


# creamos un ts (time series) a partir de UK_zoo por conveniencia
UK_ts = as.ts(UK_zoo)
head(UK_ts,3)

| Long | Short |
|------+-------|
| 4.23 |  2.32 |
| 4.36 |  2.47 |
| 4.19 |  2.42 |
#+caption: A matrix: 3 × 2 of type dbl


kk <- xyplot(Long + Short ~ Index, UK_df, type="l")
obj1 <- xyplot(Long ~ Index, UK_df, type = "l" , lwd=2, ylab="Long",  xlab="Years")
obj2 <- xyplot(Short ~ Index, UK_df, type = "l", lwd=2, ylab="Short")
doubleYScale(obj1, obj2, add.ylab2 = TRUE)


cor(UK_df$Long, UK_df$Short)

CorrelacionUKinterestRates

0.89764827721203


# creamos dos nuevas series temporales con las primeras diferencias
d_Long  = diff(UK_ts[,"Long"])
d_Short = diff(UK_ts[,"Short"])


# creamos un nuevo data frame con las primeras diferencias
UK_df.diferencias = data.frame(date = zoo::as.Date(time(d_Long)),
                            d_Long  = as.matrix(d_Long),
                            d_Short = as.matrix(d_Short))
head(UK_df.diferencias, 2)

|   |       date | d_Long | d_Short |
|---+------------+--------+---------|
|   |     <date> |  <dbl> |   <dbl> |
| 1 | 1952-07-01 |   0.13 |    0.15 |
| 2 | 1952-10-01 |  -0.17 |   -0.05 |
#+caption: A data.frame: 2 × 3


# gráfico con las primeras diferencias de los tipos de interésframe DF.diferencias
kk <- xyplot(d_Long + d_Short ~ date, UK_df.diferencias, type="l")
obj1 <- xyplot(d_Long ~ date, UK_df.diferencias, type = "l" , lwd=2, ylab="First diff. Long",  xlab="Quarters")
obj2 <- xyplot(d_Short ~ date, UK_df.diferencias, type = "l", lwd=2, ylab="First diff. Short")
doubleYScale(obj1, obj2, add.ylab2 = TRUE)


# resultados del ajuste MCO
summ( lm(d_Short ~ d_Long) )

#+begin_example
MODEL INFO:
Observations: 74
Dependent Variable: d_Short
Type: OLS linear regression 

MODEL FIT:
F(1,72) = 20.11, p = 0.00
R² = 0.22
Adj. R² = 0.21 

Standard errors: OLS
------------------------------------------------
                     Est.   S.E.   t val.      p
----------------- ------- ------ -------- ------
(Intercept)         -0.03   0.08    -0.35   0.72
d_Long               1.26   0.28     4.48   0.00
------------------------------------------------
#+end_example


modelo <- lm(UK_df$Short ~ UK_df$Long) # ajuste MCO
summ( modelo )                         # resultados del ajuste

#+begin_example
MODEL INFO:
Observations: 75
Dependent Variable: UK_df$Short
Type: OLS linear regression 

MODEL FIT:
F(1,73) = 302.85, p = 0.00
R² = 0.81
Adj. R² = 0.80 

Standard errors: OLS
------------------------------------------------
                     Est.   S.E.   t val.      p
----------------- ------- ------ -------- ------
(Intercept)         -1.17   0.35    -3.34   0.00
UK_df$Long           1.00   0.06    17.40   0.00
------------------------------------------------
#+end_example


plot(as.ts(resid(modelo)))
abline(0,0)

Econometría Aplicada. Lección 3¶

Correlación¶

La causalidad y correlación¶

Correlación espuria¶

Las series con tendencia suelen presentar elevadas correlaciones.¶

Ejemplo de correlación espuria: PNB vs incidencia de melanoma¶

Explorando si la correlación es probablemente espuria (no causalidad)¶

Cointegración¶

Ejemplo de cointegración: tipos de interes en UK a corto y largo plazo¶