# Load necessary libraries (warn.conflicts and quietly are just hiding error messages because you're not really supposed to run plyr and dplyr together; when running both, it's recommended that plyr be loaded first)library(formatR, warn.conflicts=F, quietly=T)library(plyr, warn.conflicts=F, quietly=T)library(dplyr, warn.conflicts=F, quietly=T)library(tidyr, warn.conflicts=F, quietly=T)library(tibble, warn.conflicts=F, quietly=T)library(knitr, warn.conflicts=F, quietly=T)library(DataExplorer, warn.conflicts=F, quietly=T)library(SmartEDA, warn.conflicts=F, quietly=T)library(ThemePark, warn.conflicts=F, quietly=T)library(ggplot2, warn.conflicts=F, quietly=T)library(easystats, warn.conflicts=F, quietly=T)knitr::opts_chunk$set(fig.width =12,fig.asp =0.8,out.width ="100%",width.cutoff=I(50),60)
Code
d <-read.csv("/Users/Matt/Library/CloudStorage/Box-Box/Work Laptop Documents/Job Control Article/final_dataset_20230903.csv")# d becomes the name of the dataframe, which is effectively the dataset.# Convert categorical variables to factors and mark any blank values as NA values.d$Gender <-factor(d$Gender)d$Gender.Modality[d$Gender.Modality==""]<-NAd$Gender.Modality <-factor(d$Gender.Modality)d$Sexuality[d$Sexuality==""]<-NAd$Sexuality <-factor(d$Sexuality)d$sexuality.asexual[d$sexuality.asexual==""]<-NAd$sexuality.asexual <-factor(d$sexuality.asexual)d$sexuality.bisexual[d$sexuality.bisexual==""]<-NAd$sexuality.bisexual <-factor(d$sexuality.bisexual)d$sexuality.gay[d$sexuality.gay==""]<-NAd$sexuality.gay <-factor(d$sexuality.gay)d$sexuality.lesbian[d$sexuality.lesbian==""]<-NAd$sexuality.lesbian <-factor(d$sexuality.lesbian)d$sexuality.pansexual[d$sexuality.pansexual==""]<-NAd$sexuality.pansexual <-factor(d$sexuality.pansexual)d$sexuality.queer[d$sexuality.queer==""]<-NAd$sexuality.queer <-factor(d$sexuality.queer)d$sexuality.straight[d$sexuality.straight==""]<-NAd$sexuality.straight <-factor(d$sexuality.straight)d$sexuality.nondisclose[d$sexuality.nondisclose==""]<-NAd$sexuality.nondisclose <-factor(d$sexuality.nondisclose)d$Disability[d$Disability==""]<-NAd$Disability <-factor(d$Disability)d$Disclosure[d$Disclosure==""]<-NAd$Disclosure <-factor(d$Disclosure)d$disclosure.gender[d$disclosure.gender==""]<-NAd$disclosure.gender <-factor(d$disclosure.gender)d$disclosure.gm[d$disclosure.gm==""]<-NAd$disclosure.gm <-factor(d$disclosure.gm)d$disclosure.sexuality[d$disclosure.sexuality==""]<-NAd$disclosure.sexuality <-factor(d$disclosure.sexuality)d$disclosure.disability[d$disclosure.disability==""]<-NAd$disclosure.disability <-factor(d$disclosure.disability)d$disclosure.none[d$disclosure.none==""]<-NAd$disclosure.none <-factor(d$disclosure.none)d$Race.ethnicity[d$Race.ethnicity==""]<-NAd$Race.ethnicity <-factor(d$Race.ethnicity)d$re.african[d$re.african==""]<-NAd$re.african <-factor(d$re.african)d$re.aablack[d$re.aablack==""]<-NAd$re.aablack <-factor(d$re.aablack)d$re.eastasian[d$re.eastasian==""]<-NAd$re.eastasian <-factor(d$re.eastasian)d$re.hispaniclx[d$re.hispaniclx==""]<-NAd$re.hispaniclx <-factor(d$re.hispaniclx)d$re.indigenous[d$re.indigenous==""]<-NAd$re.indigenous <-factor(d$re.indigenous)d$re.mena[d$re.mena==""]<-NAd$re.mena <-factor(d$re.mena)d$re.seasian[d$re.seasian==""]<-NAd$re.seasian <-factor(d$re.seasian)d$re.white[d$re.white==""]<-NAd$re.white <-factor(d$re.white)d$re.nondisclose[d$re.nondisclose==""]<-NAd$re.nondisclose <-factor(d$re.nondisclose)d$time.institution[d$time.institution==""]<-NAd$time.institution <-factor(d$time.institution)d$time.institution <-factor(d$time.institution, levels =c("Less than 1", "1 to 5", "6 to 10", "11 to 15", "16 or more"))d$time.since.degree[d$time.since.degree==""]<-NAd$time.since.degree <-factor(d$time.since.degree)d$time.in.libraries[d$time.in.libraries==""]<-NAd$time.in.libraries <-factor(d$time.in.libraries)d$Type.of.institution <-factor(d$Type.of.institution)d$Public.Private..For.profit.non.profit[d$Public.Private..For.profit.non.profit==""]<-NAd$Public.Private..For.profit.non.profit <-factor(d$Public.Private..For.profit.non.profit)d$Current.position[d$Current.position==""]<-NAd$Current.position <-factor(d$Current.position)d$Income[d$Income==""]<-NAd$Income <-factor(d$Income)d$Faculty.status[d$Faculty.status==""]<-NAd$Faculty.status <-factor(d$Faculty.status)d$Tenure..Institution.[d$Tenure..Institution.==""]<-NAd$Tenure..Institution. <-factor(d$Tenure..Institution.)d$Tenure..individual.[d$Tenure..individual.==""]<-NAd$Tenure..individual. <-factor(d$Tenure..individual.)d$Union[d$Union==""]<-NAd$Union <-factor(d$Union)d$Training[d$Training==""]<-NAd$Training <-factor(d$Training)d$Training.Preparation[d$Training.Preparation==""]<-NAd$Training.Preparation <-factor(d$Training.Preparation)d$Teaching.Workload[d$Teaching.Workload==""]<-NAd$Teaching.Workload <-factor(d$Teaching.Workload)d$Teaching.Workload <-factor(d$Teaching.Workload, levels =c("Far too light", "Slightly light", "Just right", "Slightly excessive", "Far too excessive"))d$Follow.up.interview[d$Follow.up.interview==""]<-NAd$Follow.up.interview <-factor(d$Follow.up.interview)# The revalue function from the plyr package allows you to rename levels of a factor (i.e., change a response value to a different value). recode in the dplyr package effectively replaces revalue in the plyr package, but I haven't wanted to re-write the code, which is why I continue to use both plyr and dplyr even though it isn't ideal. d$public.private <-revalue(d$Public.Private..For.profit.non.profit, c("Private, for-profit"="Private", "Private, non-profit"="Private", "Public"="Public"))d$profit <-revalue(d$Public.Private..For.profit.non.profit, c("Private, for-profit"="for-profit", "Private, non-profit"="non-profit", "Public"="non-profit"))d$permtemp <-revalue(d$Current.position, c("Permanent, full-time"="Permanent", "Permanent, part-time"="Permanent", "Probationary, full time"="Other", "Temporary, full-time"="Temporary", "Temporary, part-time"="Temporary"))d$percenttime <-revalue(d$Current.position, c("Permanent, full-time"="Full-time", "Permanent, part-time"="Part-time", "Probationary, full time"="Full-time", "Temporary, full-time"="Full-time", "Temporary, part-time"="Part-time"))d$stafforfaculty <-revalue(d$Faculty.status, c("Academic staff"="Academic staff", "Faculty, non-tenure-track"="Faculty", "Faculty, tenure-track"="Faculty", "Faculty, tenured"="Faculty", "Staff"="Staff", "we do have a promotion process and our own bylaws, with some faculty benefits, but we are still classified as staff"="Staff"))d$tenure.institution2 <-revalue(d$Tenure..Institution., c("I am the first librarian to go through a new tenure similar status. My colleagues do not technically have tenure."="Other", "No"="No", "No faculty have tenure at my institution"="No", "tenure for tenure-track faculty; promotion for non-tenure-track faculty"="Other", "There is no tenure track at our institution."="No", "We have TT/T and NTT faculty lines and I was hired under a NTT faculty line."="Other", "We're eligible, but standards are still being developed. So, if we asked to go up for tenure right now I think we'd be told to wait. So, \"kind of\"?"="Other", "Yes, similar status"="Yes, similar status", "Yes, tenure"="Yes, tenure", "Some are and some aren't, it depends on the history of the line"="Other"))d$tenure.institution3 <-revalue(d$tenure.institution2, c("Yes, similar status"="Yes", "Yes, tenure"="Yes"))d$tenure.individual2 <-revalue(d$Tenure..individual., c("I am in the second month of that track."="Other", "I am not able to as a staff librarian however there are faculty librarian positions and these employees can earn tenure. There is no equivalent for staff librarians"="Other"))d$tenure.individual3 <-revalue(d$tenure.individual2, c("Yes, I am tenured"="Yes", "Yes, I have attained an equivalent status"="Yes"))d$Training2 <-revalue(d$Training, c("Background in education before getting MLIS degree"="Other","I completed my bachelors degreee in teaching so I already had some skills when I came into my position."="Other","I did not go to library school but instead spent time in a Ph.D. program. I received formal training to teach during that program. (But not on the job.)"="Other","I have a BA in secondary ed that I rely on heavily"="Other","I have a bachelor’s degree in education and worked as a public school teacher where I had training, as well"="Other","I have a second Masters in educational technology (instructional design); it wasn't entirely about teaching, but did cover some aspects of it."="Other","I learned to teach as an MA student (not library science) when I had to teach for credit courses."="Other","I received some training in a separate graduate program"="Other","I received training when I was in grad school for linguistics. I was a GTA in English and then I taught English Comp and other classes for twelve years before becoming a librarian. In library school we had one week on instruction and I glanced over the readings but didn't really read them thoroughly so I'm not sure how much they would help someone without my experience."="Other","On the job and former training via Bachelor of Education degree"="Other","Previous work as English grad student prepared me to teach; no formal training to teach in lib school or on the job"="Other","Professional development institute for digital pedagogy, not general instruction"="Other","second masters in Education"="Other","Undergraduate degree in education"="Other"))d$Training3 <-revalue(d$Training2, c("Yes, in library school and on the job"="Yes", "Yes, only in library school"="Yes", "Yes, only on the job"="Yes"))d$Training3 <-factor(d$Training3, levels =c("Yes", "No", "Other"))
Caution
Please keep in mind that I do not know what I am doing, and I am not a statistician. xoxo.
Results
Descriptive Statistics
Within the sample of academic librarians who have some degree of instructional responsibility, the mean job control perceived overall (job control general) was 3.33 and the mean job control when completing instructional responsibilities (job control instruction) was 3.13. A comparison of the characteristics of these data are included in Table 1. A box plot showing the distribution of these data is included in Figure 1.
Note
Descriptive statistics summarize the data set (representing the study sample), identifying some key features such as central tendency and variability or spread.
Skewness and kurtosis are numerical tests of the normality of data (there are arguments for testing normality graphically, usually through quantile-quantile plots, histograms, box plots, violin plots, or rainclouds). The closer the skewness and kurtosis are to zero the closer the approximation of normality.
Figure 1 presents a box plot as another test for normality. A violin plot or raincloud would be more useful visuals but are more difficult to read and interpret.
Code
# report_table() would achieve the same function as as.data.frame(report())table1 <-rbind(as.data.frame(report(d$jobcontrol.general.score.1989)), as.data.frame(report(d$jobcontrol.instruction.score.1989)))table1 <-t(table1)table1 <-as.data.frame(table1)colnames(table1)[1] ="Job Control (General)"colnames(table1)[2] ="Job Control (Instruction)"table1 <- table1[-10,]rownames(table1) <-c("Mean", "Std. Dev.", "Median", "MAD", "Min", "Max", "N", "Skewness", "Kurtosis")table1 <-round(table1, 2)table1 <-rownames_to_column(table1, var="Attribute")kable(table1)
Table 1: A summary of job control data.
Attribute
Job Control (General)
Job Control (Instruction)
Mean
3.33
3.13
Std. Dev.
0.52
0.60
Median
3.33
3.14
MAD
0.49
0.64
Min
1.86
1.62
Max
5.00
5.00
N
245.00
245.00
Skewness
-0.09
0.26
Kurtosis
0.13
0.30
Code
p7 <-ggplot(d, aes(y=jobcontrol.general.score.1989)) +geom_boxplot(outlier.color ="red") +labs(y ="Job Control General") +coord_cartesian(ylim=c(1.5, 5)) +theme_barbie()p8 <-ggplot(d, aes(y=jobcontrol.instruction.score.1989)) +geom_boxplot(outlier.color ="red") +labs(y ="Job Control Instruction") +coord_cartesian(ylim=c(1.5, 5)) +theme_barbie()plots(p7, p8, title ="Distribution of Job Control Data")