1) Create a very simple, fake data set
2) Fit a multistate model (states: successful breeder, failed breeder)
3) Fit a multistate model with 1 unobservable state* (states: successful breeder, failed breeder, non-breeder*)
4) Fit a multistate model with 2 unobservable states* (states: successful breeder, failed breeder, post-successful-nonbreeder*, post-failed-nonbreeder*)
5) Fit a multistate HMM model with 2 unobservable states* and uncertainty in state assignment (states: successful breeder, failed breeder, post-successful-nonbreeder*, post-failed-nonbreeder*)
I provide basic R code for these models below, should this be useful to anyone. When fitting models 2, 3 and 4 I obtain the same results as in ESurge. I have difficulty to fit the HMM model in Rmark.
Some questions:
1) What should be labelled as an 'event' in RMark code? I think it is only the number (or letter) that represents uncertain observations in the encounter history matrix.
2) I have some difficulty to understand the 'pi' parameter as implemented in Mark. According to the Mark help file, "The pi parameter is the probability of the EVENT on the first capture." In contrast, in ESurge, pi is "‘initial state probabilities’, which describe the probability that an individual is in one or another state when it is first encountered". So, in ESurge, I would set pi to zero for the post-successful-nonbreeder and post-failed-nonbreeder states of my model, because these are unobservable states, and an animal cannot enter the population in this way. In Mark, I understand that I will not do this?
3) In Mark, the Delta parameters are the probability of determining the state given detection. I am not sure how to create an indicator variables for Delta in the ddl (design data list). For example, in my example (below), I have a single uncertain event (coded '5') which may refer to successful or failed breeders (though with different probabilities for each state). I'd be very grateful for any hints.
Any specific or general advice on fitting HMMs using RMark will be much appreciated!
Chris
- Code: Select all
#---------------------------
# Make some multistate data:
# 1 = succesful breeder
# 2 = failed breeder
# 0 = not seen
#---------------------------
df = c('1010001', '1010000', '1010000', '1010000', '1010100',
'1000000', '1000001', '1000001', '1000001', '1010101', '1010101', '1010101',
'1010101', '1010101', '1010101', '1010101', '1010101', '1010001', '1000001',
'1000001', '1000001', '1010101', '1010101', '1010101', '1010101', '1010101',
'1010101', '1010201', '1010201', '1010201', '1010201', '1010201', '1010201',
'1010201', '1010201', '1010201', '1010201', '1010201', '1010201', '1010201',
'1010201', '1010201', '1010201', '2010201', '2010000', '2000000', '2000000',
'2000000', '2000000', '2000000', '2010220', '2010221', '2010221', '2010221',
'2010221', '2010221', '2010221', '2010221', '2010221', '2010221', '2012221',
'2012222', '2012222', '2012222', '2012221', '2012221', '2012020', '2022010',
'2022010', '2022010', '2022010', '2022010', '2022010', '2022010', '2022010',
'2022010', '2021010', '2021010', '2021010', '2021010', '2021010', '2021010',
'2001000', '2021010', '2021010', '2021010')
df=as.data.frame(df); names(df) = "ch"; df$freq = 1; df$ch = as.character(df$ch)
head(df)
#------------------------------------------------------
# Fit multi-state model
# 3 states (successful breeder / failed breeder / dead)
# 3 events (successful breeder / failed breeder / not seen)
#------------------------------------------------------
library(RMark)
dp = process.data(df, model = "Multistrata")
## create design data
ddl = make.design.data(dp)
# define model to fit
Phi.ct = list(formula=~1)
p.ct = list(formula=~1)
Psi.class = list(formula=~stratum)
# constant survival, constant recapture
Model.1 = mark(dp, ddl, model.parameters=list(S = Phi.ct,
p = p.ct,
Psi = Psi.class),
output = FALSE,delete=T, model ="Multistrata")
summary(Model.1) # parameters matches E-surge output
#------------------------------------------------------
# Fit multi-state model with 1 unobservable state
# 4 states (successful breeder / failed breeder / nonbreeder(= unobservable) / dead)
# 3 events (successful breeder / failed breeder / not seen)
#------------------------------------------------------
# state 3 = unobservable
dp = process.data(df, model = "Multistrata", strata.labels = c("1", "2", "3"))
## create design data
ddl = make.design.data(dp)
# Set p = 0 for state 3 because non-breeders cannot be observed
ddl$p$fix[ddl$p$stratum=="3"]=0
# define model to fit
Phi.ct = list(formula=~1)
p.ct = list(formula=~1)
Psi.class =list(formula =~ -1+stratum:tostratum) # see page 918 C.17. Multi-strata example in Mark book
table(ddl$Psi[,c("stratum","tostratum")])
# constant survival, constant recapture
Model.2 = mark(dp, ddl, model.parameters=list(S = Phi.ct,
p = p.ct,
Psi = Psi.class),
output = FALSE,delete=T, model ="Multistrata")
summary(Model.2) # matches E-surge output
#----------------------------------------------------------------------------
# Fit multi-state model with 2 unobservable states
# 5 states (successful breeder / failed breeder / post-successful (unobservable) / post-failed (unobservable) / dead)
# 3 events (seen as successful breeder / failed breeder / not seen)
#----------------------------------------------------------------------------
# state 3 & 4 = unobservable
dp = process.data(df, model = "Multistrata", strata.labels = c("1", "2", "3", "4"))
## create design data
# Set the transition that is computed by subtraction with the subtract.stratum argument
# Important! The ψ that you want to set so zero cannot be computed by
# subtraction, so you need to set the subtract.stratum appropriately.
# See page 919 in the Mark book
# Here I compute the same states as for GEPAT in ESurge:
ddl = make.design.data(dp, parameters=
list(Psi=list(subtract.stratum=c("3","4","3","4"))))
# Set movement parameters (PSI) to 0 (cannot move from successful to post-failed, for example)
ddl$Psi$fix[ddl$Psi$stratum=="1" & ddl$Psi$tostratum=="4"]=0
ddl$Psi$fix[ddl$Psi$stratum=="2" & ddl$Psi$tostratum=="3"]=0
ddl$Psi$fix[ddl$Psi$stratum=="3" & ddl$Psi$tostratum=="4"]=0
ddl$Psi$fix[ddl$Psi$stratum=="4" & ddl$Psi$tostratum=="3"]=0
ddl$p$fix[ddl$p$stratum=="3"]=0
ddl$p$fix[ddl$p$stratum=="4"]=0
# define model to fit
Phi.ct = list(formula=~1)
p.ct = list(formula=~1)
Psi.class =list(formula =~ -1+stratum:tostratum) # see page 918 C.17. Multi-strata example in Mark book
table(ddl$Psi[,c("stratum","tostratum")])
# constant survival, constant recapture
Model.3 = mark(dp, ddl, model.parameters=list(S = Phi.ct,
p = p.ct,
Psi = Psi.class),
output = FALSE,delete=T, model ="Multistrata")
# match E-surge output!
summary(Model.3)
Psilist=get.real(Model.3,"Psi",vcv=T)
Psivalues=Psilist$estimates
TransitionMatrix(Psivalues[Psivalues$time==1,],vcv.real=Psilist$vcv.real)
# ----------------------------------------------------------------------------------
# Fit HMM model with 2 unobservable states
# 5 states (successful breeder / failed breeder / post-successful (unobservable) / post-failed (unobservable) / dead)
# 4 events (seen as successful breeder / failed breeder / not seen / seen with uncertain breeding state)
# event 1 = Successful breeder
# event 2 = Failed breeder
# event 5 = uncertain breeder
# Warning: this analysis is not correct and should not be used as a template
#-----------------------------------------------------------------------------------
# Add uncertainty (event = '5') to the input data
df = c('1010001',
'1050000', '1010000', '1010000', '1010100', '1000000', '1000001', '1000001',
'1000001', '1010105', '1050101', '1010101', '1050101', '1010101', '1010101',
'1010101', '1010101', '1010001', '1000001', '1000001', '1000001', '1010101',
'1010101', '1010101', '1050101', '1010101', '1010101', '1010201', '1010201',
'1010201', '1010201', '1010201', '1010201', '1010201', '1010201', '1010201',
'1010201', '1010201', '1010201', '1010201', '1010201', '1050201', '1010205',
'2010201', '2010000', '2000000', '2000000', '2000000', '2000000', '2000000',
'2050220', '2010221', '2010221', '2050221', '2010221', '2010221', '2010221',
'2010221', '2010521', '2010251', '2012221', '2012222', '2012222', '2012222',
'2012251', '5012221', '2012020', '2022010', '2025010', '2022010', '2022010',
'2052010', '2022010', '2022010', '2022010', '2022010', '2021010', '2021010',
'2021010', '2051010', '2051010', '5021010', '2001000', '2025010', '2021010',
'2021010')
df=as.data.frame(df); names(df) = "ch"; df$freq = 1; df$ch = as.character(df$ch)
head(df)
# state 3 & 4 = unobservable
# seems like 'event' is only for the uncertain observation - not the 'state' observations!
# Error in process.data: unused argument (events) if package marked is loaded in session
dp = process.data(df, model = "HidMarkov", strata.labels = c("1", "2", "3", "4"),
events = c("5"))
## create design data
ddl = make.design.data(dp)
# Set movement parameters (PSI) to 0
ddl$Psi$fix[ddl$Psi$stratum=="1" & ddl$Psi$tostratum=="4"]=0
ddl$Psi$fix[ddl$Psi$stratum=="2" & ddl$Psi$tostratum=="3"]=0
ddl$Psi$fix[ddl$Psi$stratum=="3" & ddl$Psi$tostratum=="4"]=0
ddl$Psi$fix[ddl$Psi$stratum=="4" & ddl$Psi$tostratum=="3"]=0
ddl$p$fix[ddl$p$stratum=="3"]=0
ddl$p$fix[ddl$p$stratum=="4"]=0
# animals can only enter as breeders, not as post-breeders (unobservable)
# In HMMs, the pi parameter is the probability of the event on the first capture.
# Note: P(event)! not as in ESurge, where it is P(state on first capture)! (?)
# Should this be fixed to zero?
ddl$pi$fix[ddl$pi$stratum=="3"]=0
ddl$pi$fix[ddl$pi$stratum=="4"]=0
# The Delta parameters are the probability of determining the state given detection
# with probability p.
ddl$Delta
ddl$Delta$fix[ddl$Delta$stratum=="1"]=1
#ddl$Delta$fix[ddl$Delta$stratum=="2"]=1
# define model to fit
Phi.ct = list(formula=~1)
p.ct = list(formula=~1)
Psi.class =list(formula =~ -1+stratum:tostratum) # see page 918 C.17. Multi-strata example in Mark book
pi.dot = list(formula=~1)
Delta.class = list(formula=~stratum)
#The delta parameters are the probability of determining the state given detection
# with probability p.
# Delta model
# create indicator variables for
#ddl$Delta$fix = ifelse(ddl$Delta$p =="5", 1, 0)
table(ddl$Psi[,c("stratum","tostratum")])
table(ddl$pi[,c("stratum")])
table(ddl$Delta[,c("event")])
# Aim: run a HMM where uncertain events (=5) could be successful breeders or unsuccesful breeders
Model.4 = mark(dp, ddl, model.parameters=list(S = Phi.ct,
p = p.ct,
Psi = Psi.class,
pi = pi.dot,
Delta = Delta.class),
output = FALSE,delete=T, model ="HidMarkov", mlogit0 = T)
summary(Model.4)