Assignment 6

Code

# Load the haven package or install it if it's not already installed
if (!require(haven)) {
  install.packages("haven")
  library(haven)
}

Loading required package: haven

Warning: package 'haven' was built under R version 4.2.3

Code

# Define the URL for the dataset
url <- "https://github.com/datageneration/home/blob/master/DataProgramming/data/TEDS_2016.dta?raw=true"

# Try to read the dataset from the URL
try({
  TEDS_2016 <- read_stata(url)
  print("Dataset loaded successfully.")
}, silent = FALSE)

[1] "Dataset loaded successfully."

Code

# Check if the dataset has been loaded and print its structure
if (exists("TEDS_2016")) {
  print("Checking the dataset structure...")
  str(TEDS_2016)
} else {
  print("Failed to load the dataset.")
}

[1] "Checking the dataset structure..."
tibble [1,690 × 54] (S3: tbl_df/tbl/data.frame)
 $ District       : dbl+lbl [1:1690] 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201...
   ..@ label       : chr "District"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:73] 201 401 501 502 701 702 703 704 801 802 ...
   .. ..- attr(*, "names")= chr [1:73] "Yi Lan County Single District" "Hsinchu County Single District" "Miaoli County 1st District" "Miaoli County 2nd District" ...
 $ Sex            : dbl+lbl [1:1690] 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1,...
   ..@ label       : chr "Sex"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:2] 1 2
   .. ..- attr(*, "names")= chr [1:2] "Male" "Female"
 $ Age            : dbl+lbl [1:1690] 4, 2, 5, 4, 5, 5, 5, 4, 5, 4, 5, 1, 5, 3, 4, 5, 4, 5,...
   ..@ label       : chr "Age"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:5] 1 2 3 4 5
   .. ..- attr(*, "names")= chr [1:5] "20-29" "30-39" "40-49" "50-59" ...
 $ Edu            : dbl+lbl [1:1690] 4, 5, 5, 2, 1, 2, 1, 5, 1, 1, 1, 2, 1, 5, 5, 1, 3, 4,...
   ..@ label       : chr "Education"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:6] 1 2 3 4 5 9
   .. ..- attr(*, "names")= chr [1:6] "Below elementary school" "Junior high school" "Senior high school" "College" ...
 $ Arear          : dbl+lbl [1:1690] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
   ..@ label       : chr "Area"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:6] 1 2 3 4 5 6
   .. ..- attr(*, "names")= chr [1:6] "Taipei, New Taipei, Keelung and Yi Lan" "Taoyuan, Hsinchu and Miaoli" "Taichung, Changhua and Nantou" "Yunlin, Chiayi and Tainan" ...
 $ Career         : dbl+lbl [1:1690] 1, 2, 1, 4, 3, 2, 4, 1, 4, 3, 3, 5, 5, 4, 1, 5, 2, 2,...
   ..@ label       : chr "Occupations5"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:5] 1 2 3 4 5
   .. ..- attr(*, "names")= chr [1:5] "Hight-class WHITE COLLAR" "Low-class WHITE COLLAR" "FARMER" "WORKER" ...
 $ Career8        : dbl+lbl [1:1690] 1, 3, 1, 4, 5, 7, 4, 2, 4, 5, 5, 7, 7, 7, 2, 7, 3, 1,...
   ..@ label       : chr "Occupation8"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:8] 1 2 3 4 5 6 7 8
   .. ..- attr(*, "names")= chr [1:8] "Civil servants" "Managers and  Professionals (priv.)" "CLERKS (priv.)" "Labor (priv.)" ...
 $ Ethnic         : dbl+lbl [1:1690] 1, 2, 2, 1, 9, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 9, 2, 2,...
   ..@ label       : chr "Ethnic"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:4] 1 2 3 9
   .. ..- attr(*, "names")= chr [1:4] "Taiwanese" "Both" "Chinese" "Noresponse"
 $ Party          : dbl+lbl [1:1690] 25, 25,  3, 25, 25,  6, 25, 24, 25, 25,  6,  5, 25,  ...
   ..@ label       : chr "Party Preference"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:26] 1 2 3 4 5 6 7 8 9 10 ...
   .. ..- attr(*, "names")= chr [1:26] "Strongly support KMT" "Somewhat support KMT" "Lean to KMT" "Somewhat lean to KMT" ...
 $ PartyID        : dbl+lbl [1:1690] 9, 9, 1, 9, 9, 2, 9, 6, 9, 9, 2, 2, 9, 1, 1, 9, 9, 9,...
   ..@ label       : chr "Party Identification"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:7] 1 2 3 4 5 6 9
   .. ..- attr(*, "names")= chr [1:7] "KMT" "DPP" "NP" "PFP" ...
 $ Tondu          : dbl+lbl [1:1690] 3, 5, 3, 5, 9, 4, 9, 6, 9, 9, 5, 5, 9, 5, 4, 9, 9, 4,...
   ..@ label       : chr "Position on unification and independence"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:7] 1 2 3 4 5 6 9
   .. ..- attr(*, "names")= chr [1:7] "Immediate unification" "Maintain the status quo,move toward unification" "Maintain the status quo, decide either unification or independence" "Maintain the status quo forever" ...
 $ Tondu3         : dbl+lbl [1:1690] 2, 3, 2, 3, 9, 2, 9, 3, 9, 9, 3, 3, 9, 3, 2, 9, 9, 2,...
   ..@ label       : chr "3 categories of TONDU"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:4] 1 2 3 9
   .. ..- attr(*, "names")= chr [1:4] "Unification" "Maintain the status quo" "Independence" "Nonresponse"
 $ nI2            : dbl+lbl [1:1690]  3, 98, 98,  3, 98, 98, 98,  3, 98,  1,  2, 98, 98,  ...
   ..@ label       : chr "Who is the current the premier of our country?"
   ..@ format.stata: chr "%10.0g"
   ..@ labels      : Named num [1:5] 1 2 3 95 98
   .. ..- attr(*, "names")= chr [1:5] "Correct" "Incorrect" "I know but can't remember the name" "Refuse to answer" ...
 $ votetsai       : num [1:1690] NA 1 0 NA NA 1 1 1 1 NA ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ green          : num [1:1690] 0 0 0 0 0 1 0 1 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ votetsai_nm    : num [1:1690] NA 1 0 NA NA 1 1 1 1 NA ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ votetsai_all   : num [1:1690] 0 1 0 0 0 1 1 1 1 NA ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Independence   : num [1:1690] 0 1 0 1 0 0 0 1 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Unification    : num [1:1690] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ sq             : num [1:1690] 1 0 1 0 0 1 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Taiwanese      : num [1:1690] 1 0 0 1 0 1 0 1 1 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ edu            : num [1:1690] 4 5 5 2 1 2 1 5 1 1 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ female         : num [1:1690] 1 1 0 0 1 1 0 1 1 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ whitecollar    : num [1:1690] 1 1 1 0 0 1 0 1 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ lowincome      : num [1:1690] 4 4 5 4 3 5 2 5 5 5 ...
  ..- attr(*, "label")= chr "How serious do you think low income of salaryman?"
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ income         : num [1:1690] 8 7 8 5 5.5 9 1 10 2 5.5 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ income_nm      : num [1:1690] 8 7 8 5 NA 9 1 10 2 NA ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ age            : num [1:1690] 59 39 63 55 76 64 75 54 64 59 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ KMT            : num [1:1690] 0 0 1 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ DPP            : num [1:1690] 0 0 0 0 0 1 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ npp            : num [1:1690] 0 0 0 0 0 0 0 1 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ noparty        : num [1:1690] 1 1 0 1 1 0 1 0 1 1 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ pfp            : num [1:1690] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ South          : num [1:1690] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ north          : num [1:1690] 1 1 1 1 1 1 1 1 1 1 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Minnan_father  : num [1:1690] 1 1 1 1 1 1 1 1 1 1 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Mainland_father: num [1:1690] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Econ_worse     : num [1:1690] 0 0 1 1 0 1 1 1 1 1 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Inequality     : num [1:1690] 1 1 1 1 0 1 0 1 1 1 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ inequality5    : num [1:1690] 4 5 5 5 3 5 3 5 5 5 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ econworse5     : num [1:1690] 3 3 4 5 3 4 4 5 5 5 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Govt_for_public: num [1:1690] 1 1 1 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ pubwelf5       : num [1:1690] 5 5 4 1 3 2 2 1 3 2 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Govt_dont_care : num [1:1690] 0 0 1 1 0 1 1 1 0 1 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ highincome     : num [1:1690] 1 1 1 1 NA 1 0 1 0 NA ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ votekmt        : num [1:1690] 0 0 1 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ votekmt_nm     : num [1:1690] NA 0 1 NA NA 0 0 0 0 NA ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Blue           : num [1:1690] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ Green          : num [1:1690] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ No_Party       : num [1:1690] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ voteblue       : num [1:1690] 0 0 1 0 0 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ voteblue_nm    : num [1:1690] NA 0 1 NA NA 0 0 0 0 NA ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ votedpp_1      : num [1:1690] NA 1 0 NA NA 1 1 1 1 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"
 $ votekmt_1      : num [1:1690] NA 0 1 NA NA 0 0 0 0 0 ...
  ..- attr(*, "format.stata")= chr "%9.0g"

Logistic Regression Model

Code

glm.vt <- glm(votetsai ~ female, data=TEDS_2016, family=binomial)
summary(glm.vt)


Call:
glm(formula = votetsai ~ female, family = binomial, data = TEDS_2016)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4180  -1.3889   0.9546   0.9797   0.9797  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  0.54971    0.08245   6.667 2.61e-11 ***
female      -0.06517    0.11644  -0.560    0.576    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1666.5  on 1260  degrees of freedom
Residual deviance: 1666.2  on 1259  degrees of freedom
  (429 observations deleted due to missingness)
AIC: 1670.2

Number of Fisher Scoring iterations: 4

Code

plot(glm.vt)

Improve the model

Code

glm.expanded <- glm(votetsai ~ female + KMT + DPP + Age + Edu + income, data=TEDS_2016, family=binomial)
summary(glm.expanded)


Call:
glm(formula = votetsai ~ female + KMT + DPP + Age + Edu + income, 
    family = binomial, data = TEDS_2016)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.7453  -0.3661   0.2352   0.3057   2.5659  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  1.71385    0.49327   3.474 0.000512 ***
female       0.03838    0.17704   0.217 0.828358    
KMT         -3.14415    0.25039 -12.557  < 2e-16 ***
DPP          2.92894    0.26909  10.885  < 2e-16 ***
Age         -0.18226    0.07949  -2.293 0.021863 *  
Edu         -0.21371    0.07695  -2.777 0.005485 ** 
income       0.01735    0.03413   0.508 0.611210    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1666.53  on 1260  degrees of freedom
Residual deviance:  835.27  on 1254  degrees of freedom
  (429 observations deleted due to missingness)
AIC: 849.27

Number of Fisher Scoring iterations: 6

Summarize the data

Code

# Summary of the model to check coefficients and statistics
summary(glm.expanded)


Call:
glm(formula = votetsai ~ female + KMT + DPP + Age + Edu + income, 
    family = binomial, data = TEDS_2016)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.7453  -0.3661   0.2352   0.3057   2.5659  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  1.71385    0.49327   3.474 0.000512 ***
female       0.03838    0.17704   0.217 0.828358    
KMT         -3.14415    0.25039 -12.557  < 2e-16 ***
DPP          2.92894    0.26909  10.885  < 2e-16 ***
Age         -0.18226    0.07949  -2.293 0.021863 *  
Edu         -0.21371    0.07695  -2.777 0.005485 ** 
income       0.01735    0.03413   0.508 0.611210    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1666.53  on 1260  degrees of freedom
Residual deviance:  835.27  on 1254  degrees of freedom
  (429 observations deleted due to missingness)
AIC: 849.27

Number of Fisher Scoring iterations: 6

Code

# Plot the model diagnostics
plot(glm.expanded)

The group of variables that work best in explaining or predicting votes for Tsai Ing-wen is party affiliation (KMT, DPP). This is followed by demographic variables like age and education, which also play a significant role but are less predictive compared to party loyalty. Gender and income, in this analysis, do not appear to have significant impacts on voting for Tsai.

Code

# Fit the logistic regression model with expanded predictors
glm.full <- glm(votetsai ~ female + KMT + DPP + age + edu + income + Independence + Econ_worse + Govt_dont_care + Minnan_father + Mainland_father + Taiwanese, data=TEDS_2016, family=binomial)

# View the summary of the model
summary(glm.full)


Call:
glm(formula = votetsai ~ female + KMT + DPP + age + edu + income + 
    Independence + Econ_worse + Govt_dont_care + Minnan_father + 
    Mainland_father + Taiwanese, family = binomial, data = TEDS_2016)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.1060  -0.3143   0.1744   0.3975   2.7917  

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)     -0.015976   0.679780  -0.024  0.98125    
female          -0.097996   0.189840  -0.516  0.60571    
KMT             -2.922246   0.259333 -11.268  < 2e-16 ***
DPP              2.468855   0.275350   8.966  < 2e-16 ***
age              0.003287   0.007884   0.417  0.67672    
edu             -0.092110   0.090119  -1.022  0.30674    
income           0.021771   0.036406   0.598  0.54984    
Independence     1.020953   0.251776   4.055 5.01e-05 ***
Econ_worse       0.310462   0.189100   1.642  0.10063    
Govt_dont_care  -0.014295   0.188765  -0.076  0.93964    
Minnan_father   -0.247650   0.253921  -0.975  0.32941    
Mainland_father -1.089332   0.396822  -2.745  0.00605 ** 
Taiwanese        0.909019   0.198930   4.570 4.89e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1661.76  on 1256  degrees of freedom
Residual deviance:  767.13  on 1244  degrees of freedom
  (433 observations deleted due to missingness)
AIC: 793.13

Number of Fisher Scoring iterations: 6

Partisan Loyalty and Political Ideology: The model strongly indicates that political alignment (DPP, KMT) and views on independence are the most influential factors in predicting voter support for Tsai Ing-wen. These factors outweigh demographic variables like age, education, and income in this analysis.
Ethnic and National Identity: The significant effects of variables like Taiwanese and Mainland_father underscore the importance of ethnic and national identity in Taiwan’s political landscape, particularly in the context of cross-strait relations and national identity.
Potential for Further Analysis: While gender, age, education, and income are not significant predictors in this model, their interactions with other variables (like party affiliation or independence) might provide additional insights. Additionally, checking for multicollinearity and interaction effects could further refine the understanding of these relationships.