library(dplyr)
library(tidyr)
library(ggplot2)
library(languageR)
library(lme4)
library(emmeans)

library(lmerTest)

loading the data

all.select <- read.csv("all.select.csv", check.names = FALSE)
all.select <- all.select %>% select(-any_of("X"))
coded.data <- read.csv("coded.data.csv", check.names = FALSE)
coded.data <- coded.data[, names(coded.data) != ""]
coded.data <- coded.data %>% select(-any_of("X"))

Proportion of target responses

We coded a production target = 1 when

    - the language is an obligatory wh-ex-situ language, the wh-phrase is produced clause-initially
    - the language is an wh-in-situ language, and the wh-phrase is produced in-situ
    - the correct wh-phrase (in nominative or accusative, when applicable) is produced
    - the verb is in active, rather than in passive mood
    - the extraction site was correct (judged by the position of the gap)
    - full DP was used instead of a pronoun, or a null argument
    - there is a gap
    

We coded a production correct (adult-like alternative structure) = 1 when

    - the produced form retains the meaning of the target question and
    - the produced form contains the following structure
    - passive
    - cleft
    - topicalization
    - use of pronoun for the non-wh-phrase argument, both overt and covert
    - dislocation (e.g., topicalization)
            
            

Proportion Target/adult-like alternative

Overall number and percentage of target and adult-like production Separated by the wh-type and argument type per language

what_all <- all.select %>%
        filter(wh.type == "whatQ") %>%
        group_by(lang) %>%
        filter(!is.na(argument.type)) %>%
        summarise(n=n(), sum.target = sum(target), pertarget = sum.target/n, sum.correct = sum(correct), percorrect = sum.correct/n, gram = sum(grammatical), pergram = gram/n)

Creating different categories for produced form for non-ungrammatical forms. Some are non-adult forms and others are adult-forms, even when they are not target forms.

w <- coded.data %>%
        mutate(category = case_when(
                grammatical == 0 ~ "ungrammatical",
                role.inversion == 1 ~ "role reversal",
                (correct == 0 & wrong.wh == 1) ~ "wrong wh (incorrect)",
                correct == 0 ~ "other errors",
                correct == 1 & passives == 1 ~ "passive",
                correct == 1 & covert.pro == 1 ~ "null pro",
                correct == 1 & overt.pro == 1 ~ "overt pro",
                correct == 1 & cleft == 1 ~ "cleft",
                correct == 1 & drop.after.which == 1 ~ "bare which",
                correct == 1 & wrong.wh == 1 ~ "wrong wh",
                correct == 1 & extra.demo == 1 ~ "target",
                others == 1 ~ "others",
                TRUE ~ "target"
                )) 


w$category <- factor(w$category, levels = c("ungrammatical", "role reversal","wrong wh (incorrect)", "other errors", "others", "passive", "null pro", "overt pro", "cleft", "bare which", "wrong wh","target"))
w$argument.type <- factor(w$argument.type, levels = c("subject", "object"))
w$item.type <- factor(w$item.type, levels = c("whoS", "whoO", "whichS", "whichO", "whatO"))

in order to get percentages of use of each type of construction, we identify the number of items per language. Because we needed to take out some items from German data (produced forms were expected to be ambiguous), the number of data points in German were not the same across five conditions, which is reflected below.

all.select %>%
        group_by(lang, item.type, argument.type) %>%
        summarise(n()) %>%
        print(n = 25)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by lang, item.type, and argument.type.
## ℹ Output is grouped by lang and item.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(lang, item.type, argument.type))` for per-operation
##   grouping (`?dplyr::dplyr_by`) instead.
# One item each from which-subject and which-object questions was ambiguous in German. Because of this, we separate the which-questions from who-questions and what-questions for just German.
german.itemized1 <- w %>%
        filter(lang == "german") %>%
        filter(item.type == "whichO" | item.type == "whichS") %>%
        group_by(argument.type, item.type, category) %>%
        summarise(number = n()/105)%>%
        mutate(ll = "german")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by argument.type, item.type, and category.
## ℹ Output is grouped by argument.type and item.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(argument.type, item.type, category))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.
german.itemized2 <- w %>%
        filter(lang == "german") %>%
        filter(item.type == "whoS" | item.type == "whoO" | item.type == "whatO") %>%
        group_by(argument.type, item.type, category) %>%
        summarise(number = n()/126)%>%
        mutate(ll = "german")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by argument.type, item.type, and category.
## ℹ Output is grouped by argument.type and item.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(argument.type, item.type, category))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.
hungarian.itemized <- w %>%
        filter(lang == "hungarian") %>%
        group_by(argument.type, item.type, category) %>%
        summarise(number = n()/120)%>%
        mutate(ll = "hungarian")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by argument.type, item.type, and category.
## ℹ Output is grouped by argument.type and item.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(argument.type, item.type, category))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.
malayalam.itemized <- w %>%
        filter(lang == "malayalam") %>%
        group_by(argument.type, item.type, category) %>%
        summarise(number = n()/96)%>%
        mutate(ll = "malayalam")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by argument.type, item.type, and category.
## ℹ Output is grouped by argument.type and item.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(argument.type, item.type, category))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.
mandarin.itemized <- w %>%
        filter(lang == "mandarin") %>%
        group_by(argument.type, item.type, category) %>%
        summarise(number = n()/132)%>%
        mutate(ll = "mandarin")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by argument.type, item.type, and category.
## ℹ Output is grouped by argument.type and item.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(argument.type, item.type, category))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.
yoruba.itemized <- w %>%
        filter(lang == "yoruba") %>%
        group_by(argument.type, item.type, category) %>%
        summarise(number = n()/72) %>%
        mutate(ll = "yoruba")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by argument.type, item.type, and category.
## ℹ Output is grouped by argument.type and item.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(argument.type, item.type, category))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.

putting all the languages together

w.all <- rbind(german.itemized1,german.itemized2, hungarian.itemized, malayalam.itemized, mandarin.itemized, yoruba.itemized)

separating target + adult-like alternatives (blue/green) and non-adult-like (red)

we create a variable “non.adult.like” in order to separate the errors that are not adult-like from adult-like produced forms

w.all <- w.all %>%
        mutate(not.adult.like = case_when(
                category == "ungrammatical" ~ 1,
                category == "wrong wh (incorrect)" ~ 1,
                category == "other errors" ~ 1,
                category == "role reversal" ~ 1,
                TRUE ~ 0))

proportion of adult-like forms (categorized), comparing only object wh-questions (who vs. which vs. what)

w.all %>%
        filter(item.type == "whoO" | item.type == "whichO" | item.type == "whatO") %>%
        filter(not.adult.like == 0) %>%
        ggplot(aes(x = item.type, y = number, fill = category)) +
        geom_bar(position="stack", stat = "identity") +
        scale_fill_manual(values =c("darkolivegreen","chartreuse3","darkolivegreen2", "lightgreen", "darkslategray3", "deepskyblue","cornflowerblue","darkblue")) + 
        #scale_fill_manual(values = gray.colors(8)) +
        facet_wrap(~ll) + 
        xlab("type of wh-question") + ylab("proportion of occurrences") + ggtitle("Proportion of types of forms produced per language") +
 guides(fill = guide_legend(title = "type of responses")) +
theme(
    plot.title = element_text(size = 18), # Big bold title
    axis.title = element_text(size = 16),               # X and Y axis titles
    axis.text = element_text(size = 12),                # The numbers/labels on axes
    axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12),
    legend.title = element_text(size = 14),             # Legend title size
    legend.text = element_text(size = 12),               # Legend item size
    strip.text = element_text(size = 14)                # Font size of Facet labels
  )+
  theme(
    # This targets the 'big category' labels specifically
    strip.text = element_text(size = 14, face = "bold"))

object Q: proportion of errors (categorized) for three types of object questions (who vs. which vs. what)

w.all %>%
        filter(item.type == "whoO" | item.type == "whichO" | item.type == "whatO") %>%
        filter(not.adult.like == 1) %>%
        ggplot(aes(x = item.type, y = number, fill = category)) +
        geom_bar(position="stack", stat = "identity") +
        facet_wrap(~ll) +
        scale_fill_manual(values =c("red", "orange","brown3","pink"))+
        #scale_fill_manual(values = gray.colors(4)) +
        xlab("type of wh-question") + ylab("proportion of occurrences") + ggtitle("Proportion of types of errors per language") +
        ylim(0,1) +
 guides(fill = guide_legend(title = "type of responses")) +
theme(
    plot.title = element_text(size = 18), # Big bold title
    axis.title = element_text(size = 16),               # X and Y axis titles
    axis.text = element_text(size = 12),                # The numbers/labels on axes
    axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12),
    legend.title = element_text(size = 14),             # Legend title size
    legend.text = element_text(size = 12),               # Legend item size
    strip.text = element_text(size = 14)                # Font size of Facet labels
  )+
  theme(
    # This targets the 'big category' labels specifically
    strip.text = element_text(size = 14, face = "bold"))

comparing what, who O and which O

objects.df <- all.select %>%
        filter(argument.type == "object")

objects.df$lang <- factor(objects.df$lang, levels = c("german", "hungarian", "malayalam", "mandarin", "yoruba"))

objects.df <- objects.df %>%
  select(c("lang", "wh.type", "item.no", "participant", "target", "correct"))

objects.df$lang <- as.factor(objects.df$lang)
contrasts(objects.df$lang) <- contr.sum(5)

objects.target.df <- objects.df %>%
        filter(lang != "malayalam") 

m1a <- glm(target ~ lang*wh.type, 
            family = binomial(), 
            objects.target.df)
## Warning: Kontraste bei Faktor lang weggelassen, da Stufen fehlen
summary(m1a)


m1b <- glm(correct ~ lang*wh.type, 
            family = binomial(), 
            objects.df)
summary(m1b)

## maximal model that converged

m2a <- glmer(target ~ lang/wh.type  + (1 | item.no),
          family = binomial(), 
          objects.target.df)
## Warning: Kontraste bei Faktor lang weggelassen, da Stufen fehlen
## Warning: Kontraste bei Faktor lang weggelassen, da Stufen fehlen
summary(m2a)
#alpha
0.05/8

m2b <- glmer(correct ~ lang/wh.type  + (1 | item.no),
          family = binomial(), 
          objects.df)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.00241256 (tol = 0.002, component 1)
##   See ?lme4::convergence and ?lme4::troubleshooting.
summary(m2b)
## 
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
#alpha
0.05/10

Detecting the expected asymmetries

target.df <- all.select %>%
        filter(lang != "malayalam") %>%
        filter(wh.type != "whatQ")

target.df$lang <- factor(target.df$lang, levels = c("german", "hungarian", "mandarin", "yoruba"))

target.df <- target.df %>%
  select(c("lang", "wh.type", "argument.type", "item.order", "item.no", "participant", "target", "correct"))

target.df$lang <- as.factor(target.df$lang)
contrasts(target.df$lang) <- contr.sum(4)

## sum code who as +1/2, which as -1/2
target.df <- target.df %>%
        mutate(c_wh = case_when(wh.type == "whoQ" ~ 1/2, 
                                TRUE ~ -1/2))

## sum code subj as +1/2, obj as -1/2
target.df <- target.df %>%
        mutate(c_arg =  case_when(argument.type == "subject" ~ 1/2,
                                  TRUE ~ -1/2))

## centering the order of items
target.df <- target.df %>%
        mutate(c_item.order =  item.order - mean(1:30))

Analyses to test the effect of - H1: case marking (1 vs. 0)– Mandarin/Yoruba vs. German and Hungarian - H2: word order disambiguation (1 vs. 0)– Mandarin/Yoruba vs. German and Hungarian - H3: in-situ: non-obligatory ex-situ languages (1 vs. 0)– Mandarin vs. German, Hungarian, and Yoruba

Testing the hypotheses, H1 and H2, that two groups (German/Hungarian vs. Mandarin/Yoruba) differ

target.df <- target.df %>%
  mutate(hyp1 = if_else(lang %in% c("mandarin", "yoruba"), -1/2, 1/2))
str(target.df)


mhyp1 <- glm(target ~ hyp1*c_wh * c_arg,
          family = binomial(),
          target.df)
summary(mhyp1) 

Testing H3

target.df <- target.df %>%
  mutate(hyp2 = if_else(lang == "mandarin", -1/2, 1/2))
str(target.df)

mhyp2 <- glm(target ~ hyp2*c_wh * c_arg,
          family = binomial(),
          target.df)
summary(mhyp2) 

half of who and which questions had mismatch in number between the wh and in-situ argument.

temp2 <- all.select %>%
        filter(lang != "malayalam") %>%
        filter(wh.type != "whatQ") %>%
        filter(cleft != 1)

pr.temp <- temp2 %>%
        group_by(lang, wh.type, argument.type, feature.match) %>%
        summarise(n=n(), tar = sum(target), pr.tar= tar/n, corr = sum(correct), pr.cor = corr/n)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by lang, wh.type, argument.type, and
##   feature.match.
## ℹ Output is grouped by lang, wh.type, and argument.type.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(lang, wh.type, argument.type, feature.match))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.
## sum code who as +1/2, which as -1/2
temp2 <- temp2 %>%
        mutate(c_wh = case_when(wh.type == "whoQ" ~ 1/2, 
                                TRUE ~ -1/2))

## sum code subj as +1/2, obj as -1/2
temp2 <- temp2 %>%
        mutate(c_arg =  case_when(argument.type == "subject" ~ 1/2,
                                  TRUE ~ -1/2))


model.match0a <- glmer(target ~ lang/feature.match:c_arg + (1|item.no), family = binomial(), data = temp2)
summary(model.match0a)

em_mismatch <- emmeans(model.match0a, ~ feature.match * c_arg, at = list(lang = "mandarin"))
## NOTE: Results may be misleading due to involvement in interactions
# View the means
print(em_mismatch)


# 1. Create the data frame from your results
results <- data.frame(
  feature_match = factor(c("mismatch", "match", "mismatch", "match")),
  c_arg = factor(c("object", "object", "subject", "subject")),
  emmean = c(-0.152, 0.213, -0.660, -0.982),
  LCL = c(-0.497, -0.133, -1.022, -1.377),
  UCL = c(0.193, 0.559, -0.297, -0.596)
)

results <- results %>%
  mutate(
    feature_match = factor(feature_match, levels = c("match", "mismatch")),
    c_arg = factor(c_arg, levels = c("subject", "object"))
  )

# 2. Plot
ggplot(results, aes(x = c_arg, y = emmean, color = feature_match, group = feature_match)) +
  # Lines showing the interaction
  geom_line(size = 1) +
  # Points for the means
  geom_point(size = 3) +
  # Error bars representing the 95% Confidence Intervals
  geom_errorbar(aes(ymin = LCL, ymax = UCL), width = 0.1, size = 0.8) +
  # Optional: Add a dashed line at 0 to see what is significant
  geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
  # Aesthetics
  theme_classic() +
  labs(
    title = "Interaction of Feature Match and Argument Type",
    subtitle = "Estimated Marginal Means with 95% Confidence Intervals",
    x = "c_arg Condition",
    y = "Estimated Marginal Mean",
    color = "Feature Match"
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#alpha
0.05/4