根据现有列的条件创建新列
我有一个包含以下值的 data.table
structure(list(Item = c("10040", "10040", "10110", "10190", "10190",
"12020", "12970", "7010040"), Customer = c("CLUB RETAILER - 106",
"WHOLESALER - 112", "NATIONAL RETAILER - 102", "MID WEST 1 - 120",
"WHOLESALER - 112", "WHOLESALER - 112", "WHOLESALER - 112", "DIAMLER BENZ - 108"
), DemandID = c("NetBaseForecast", "NetBaseForecast", "NetBaseForecast",
"NetBaseForecast", "NetBaseForecast", "NetBaseForecast", "NetBaseForecast",
"NetBaseForecast"), Forecasts = c(5, 158, 212, 12, 3, 3, 3, 2776
), RTF = c(1, 98, 139, 8, 0, 0, 0, 2356)), row.names = c(NA,
-8L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x00000000025c1ef0>, sorted = c("Item", "Customer"))
我需要根据预测和 RTF 列的每行的 2 个条件,基于预测和 rtf 列创建 2 个新列。我正在使用 data.table
- 如果预测 > RTF
- New_F = 预测 - RTF
- Com_F = RTF
- 如果预测 < RTF
- 新_F = 0
- Com_F = 预测
我目前正在使用这样的 for 循环
for(i in 1:nrow(temp1NoOrders)){
if (temp1NoOrders$Forecasts[i] >= temp1NoOrders$RTF[i]){
temp1NoOrders$Com_F[i] <- temp1NoOrders$RTF[i]
temp1NoOrders$New_F[i] <- temp1NoOrders$Forecasts[i] - temp1NoOrders$RTF[i]
}
else if (temp1NoOrders$Forecasts[i] < temp1NoOrders$RTF[i]){
temp1NoOrders$Com_F[i] <- temp1NoOrders$Forecast[i]
temp1NoOrders$New_F[i] <- 0
}
}
是否有使用 data.table 执行此操作的更快方法?我假设如果我使用 100k 行数据集,由于循环和 if-else 语句,这可能会变慢。
回答
您不需要逐行执行此操作。使用fifelse
的data.table
检查条件。
library(data.table)
temp1NoOrders[, c("New_F","Com_F") :=
.(fifelse(Forecasts > RTF, Forecasts - RTF, 0),
fifelse(Forecasts > RTF, RTF, Forecasts))]
temp1NoOrders
# Item Customer DemandID Forecasts RTF New_F Com_F
#1: 10040 CLUB RETAILER - 106 NetBaseForecast 5 1 4 1
#2: 10040 WHOLESALER - 112 NetBaseForecast 158 98 60 98
#3: 10110 NATIONAL RETAILER - 102 NetBaseForecast 212 139 73 139
#4: 10190 MID WEST 1 - 120 NetBaseForecast 12 8 4 8
#5: 10190 WHOLESALER - 112 NetBaseForecast 3 0 3 0
#6: 12020 WHOLESALER - 112 NetBaseForecast 3 0 3 0
#7: 12970 WHOLESALER - 112 NetBaseForecast 3 0 3 0
#8: 7010040 DIAMLER BENZ - 108 NetBaseForecast 2776 2356 420 2356
fifelse
很容易理解,但更有效的方法是使用pmax
和pmin
:
temp1NoOrders[, c("New_F","Com_F") := .(pmax(Forecasts - RTF, 0),
pmin(Forecasts, RTF))]