The data are organised as long format data. 4 individuals are observed during 4 or 5 days (BCSID
is the name of the unique key). Basically, the data describe activities performed during these 4-5 days. START
describe the start time of activities and MAINACT
the activities.
The data:
data = structure(list(BCSID = c("B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10001N", "B10001N", "B10001N",
"B10001N", "B10001N", "B10001N", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10004R", "B10004R", "B10004R",
"B10004R", "B10004R", "B10004R", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10010P", "B10010P", "B10010P",
"B10010P", "B10010P", "B10010P", "B10011Q", "B10011Q", "B10011Q",
"B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q",
"B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q",
"B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q",
"B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q",
"B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q",
"B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q",
"B10011Q", "B10011Q", "B10011Q", "B10011Q", "B10011Q"), DAY = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "5", "5", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "5", "5"), START = c("00:00:00",
"00:30:00", "10:00:00", "10:05:00", "10:35:00", "11:00:00", "11:03:00",
"11:15:00", "14:00:00", "14:30:00", "14:40:00", "14:45:00", "15:05:00",
"16:15:00", "17:00:00", "20:00:00", "00:00:00", "09:30:00", "10:00:00",
"10:30:00", "10:50:00", "11:15:00", "12:50:00", "14:00:00", "14:20:00",
"14:40:00", "15:00:00", "16:00:00", "16:45:00", "00:00:00", "00:30:00",
"01:00:00", "10:30:00", "12:00:00", "12:30:00", "12:45:00", "12:50:00",
"13:30:00", "14:30:00", "18:30:00", "19:30:00", "20:00:00", "00:00:00",
"10:00:00", "10:15:00", "11:00:00", "11:15:00", "12:30:00", "13:00:00",
"15:30:00", "16:30:00", "17:00:00", "18:00:00", "21:30:00", "22:30:00",
"00:00:00", "07:30:00", "08:10:00", "08:15:00", "08:30:00", "09:30:00",
"09:45:00", "11:45:00", "12:45:00", "13:45:00", "14:15:00", "15:15:00",
"15:30:00", "15:45:00", "16:30:00", "17:15:00", "17:45:00", "18:00:00",
"18:30:00", "20:30:00", "23:20:00", "23:30:00", "00:00:00", "08:00:00",
"08:30:00", "09:15:00", "09:30:00", "11:10:00", "13:00:00", "13:30:00",
"15:00:00", "16:00:00", "16:30:00", "17:00:00", "17:30:00", "18:00:00",
"18:15:00", "18:30:00", "19:00:00", "21:00:00", "21:30:00", "22:00:00",
"23:00:00", "23:02:00", "23:15:00", "00:00:00", "07:00:00", "07:30:00",
"08:30:00", "08:45:00", "09:45:00", "12:00:00", "13:00:00", "15:00:00",
"16:30:00", "17:00:00", "20:45:00", "22:00:00", "22:15:00", "22:45:00",
"00:00:00", "07:45:00", "08:00:00", "08:30:00", "09:00:00", "09:10:00",
"09:30:00", "11:41:00", "12:15:00", "12:45:00", "13:00:00", "13:30:00",
"14:00:00", "17:00:00", "18:00:00", "19:00:00", "20:00:00", "22:30:00",
"00:00:00", "06:45:00", "07:50:00", "08:05:00", "08:55:00", "09:00:00",
"12:00:00", "13:00:00", "17:00:00", "17:20:00", "18:00:00", "19:00:00",
"19:25:00", "23:00:00", "23:25:00", "00:00:00", "00:15:00", "10:00:00",
"12:15:00", "18:00:00", "18:10:00", "18:45:00", "19:20:00", "19:45:00",
"22:00:00", "22:35:00", "00:00:00", "04:00:00", "10:00:00", "13:00:00",
"13:25:00", "16:35:00", "17:15:00", "18:00:00", "19:00:00", "20:00:00",
"20:40:00", "21:00:00", "22:50:00", "23:00:00", "23:50:00", "00:00:00",
"01:45:00", "06:45:00", "08:00:00", "09:00:00", "12:00:00", "13:00:00",
"17:00:00", "18:50:00", "23:15:00", "23:45:00", "00:00:00", "00:15:00",
"00:00:00", "07:00:00", "08:00:00", "08:30:00", "08:50:00", "13:30:00",
"14:30:00", "17:30:00", "18:00:00", "18:30:00", "19:30:00", "00:00:00",
"08:00:00", "09:00:00", "10:30:00", "13:00:00", "18:30:00", "20:00:00",
"21:30:00", "00:00:00", "01:00:00", "01:30:00", "09:00:00", "09:30:00",
"09:35:00", "10:00:00", "10:30:00", "13:30:00", "14:00:00", "15:30:00",
"18:00:00", "22:00:00", "00:00:00", "07:00:00", "07:30:00", "08:00:00",
"08:40:00", "12:30:00", "13:30:00", "18:00:00", "18:30:00", "20:00:00",
"00:00:00", "00:20:00"), MAINACT = c("-11", "1704", "1302", "1301",
"1507", "603", "1301", "101", "502", "1704", "1507", "1404",
"8888", "603", "1507", "101", "-11", "1302", "1301", "1507",
"704", "101", "1704", "1704", "3102", "1002", "1704", "3101",
"101", "-11", "1704", "1302", "1302", "1507", "603", "2902",
"3201", "812", "1704", "1704", "3701", "101", "-11", "1302",
"1301", "3101", "1001", "1507", "1006", "2101", "2902", "1704",
"8888", "1704", "1302", "-11", "1302", "1302", "1507", "1301",
"1702", "1001", "2902", "2901", "2101", "2502", "3801", "3001",
"3504", "1301", "3503", "3101", "1507", "1301", "2601", "1702",
"1302", "1606", "1302", "1301", "502", "101", "101", "1001",
"101", "101", "502", "3504", "3101", "2902", "1507", "3702",
"1408", "3801", "2902", "2101", "1408", "1302", "1302", "1302",
"1606", "1302", "1408", "1503", "2902", "1705", "2701", "2803",
"2805", "2902", "3701", "1408", "1301", "1302", "1302", "1606",
"1302", "1301", "1507", "603", "4010", "3701", "2101", "4008",
"1507", "603", "1301", "2101", "1702", "1507", "3504", "2701",
"1302", "-11", "1302", "502", "2502", "102", "101", "103", "101",
"502", "502", "1704", "1702", "2902", "1702", "1704", "-11",
"1302", "1302", "1702", "2502", "1702", "2902", "1702", "1704",
"1702", "2507", "-11", "1702", "1302", "1702", "3601", "1704",
"2801", "1702", "2902", "1702", "1704", "3801", "3601", "603",
"1302", "1606", "1601", "1302", "502", "101", "103", "101", "502",
"2901", "1702", "1704", "-11", "1302", "-11", "1302", "1507",
"502", "101", "103", "101", "502", "1704", "701", "1301", "1606",
"1302", "502", "301", "301", "1704", "2901", "1702", "-11", "1702",
"1302", "1302", "1507", "603", "601", "601", "1507", "603", "1006",
"1507", "3201", "-11", "1302", "1507", "1301", "502", "103",
"101", "1704", "1301", "1704", "1702", "1302"), eorder2 = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 1, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1, 2, 3, 4, 5, 6,
7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 1, 2)), .Names = c("BCSID", "DAY", "START",
"MAINACT", "eorder2"), row.names = c(NA, 231L), class = "data.frame")
The data look like this:
BCSID DAY START MAINACT eorder2 28 B10001N 2 16:00:00 3101 12 29 B10001N 2 16:45:00 101 13 30 B10001N 3 00:00:00 -11 1 31 B10001N 3 00:30:00 1704 2 32 B10001N 3 01:00:00 1302 3 33 B10001N 3 10:30:00 1302 4
The variable eorder2
denotes the order of activities per day. The first observation (eorder2 == 1
) beginning at midnight (00:00:00) is or should be sleeping.
The missing sleep is denoted -11. What I need to do is to fill this missing sleep with a proxy sleep denoted 1606.
However, I need to specify several conditions for this proxy sleep:
- I need to exclude DAY 1.
- I need to "target" the first episode of each day.
- Finally, I want that the previous last episode is 1302 or 3201 and that the first next episode after -11 is 1302.
Here, for example, Row 220 is a missing that fulfill the conditions. It is the first episode of the day and it is preceded by 3201 and followed by 1302.
BCSID DAY START MAINACT eorder2 218 B10011Q 3 18:00:00 1507 12 219 B10011Q 3 22:00:00 3201 13 220 B10011Q 4 00:00:00 -11 1 221 B10011Q 4 07:00:00 1302 2 222 B10011Q 4 07:30:00 1507 3
I was thinking doing something like this:
First, specifying some Row conditions:
ep2 = which( data$eorder2 == 1 & # first episode
data$DAY != 1 & # not day 1
data$MAINACT == '-11') # missing activities
Then fill the missing -11 with a line of code like this:
data[ep2, 'MAINACT'] [ data[ep2-1,'MAINACT'] == '1302' | data[ep2-1,'MAINACT'] == '3201' & data[ep2+1,'MAINACT'] == '1302' ] <- '1606'
Using ep2-1
for targeting the previous episode and ep2+1
the following episode.
This code seems to work. Alternative ideas or corrections are very welcome.
1 Answer 1
I would first write (or find) a function for shifting a vector x
by a given number of observations k
. The stat
package has a lag
function but it only allows to shift in one direction (k
has to be >= 0
)... Here is such a function that will work both ways, with positive or negative k
:
LAG <- function(x, k) {
if (k == 0) {
x
} else if (k > 0) {
c(rep(NA, k), head(x, -k))
} else {
c(tail(x, k), rep(NA, -k))
}
}
Then, you can create a boolean vector telling if each row meets all the conditions or not:
need_replace <- with(data, eorder2 == 1 &
DAY != 1 &
MAINACT == '-11' &
LAG(MAINACT, +1) %in% c('1301', '1302') &
LAG(MAINACT, -1) == '1302')
And finally, do the substitution:
data$MAINACT[need_replace] <- '1606'
A few more comments:
- I created a vector of TRUE/FALSE rather than a vector of indices like you did with
which
. Both work but it is less typing withoutwhich
. - See that I used
with(data, ...)
so I did not have to typedata$
over and over. This also makes your code shorter and easier to read. - I used
%in%
instead of two==
statements separated by|
. That's another good function to know (imagine having many more than two allowed values...) - Be careful that
&
has higher priority than|
so what you had written was equivalent tostatement1 | (statement2 & statement 3)
which is not the same as what I think you had in mind:(statement1 | statement2) & statement3
. Priority rules are documented under?Syntax
. - As it stands, none of the rows in your example data match all the conditions you have specified so please let me know if I misunderstood something, I am sure it will be a simple fix.