|
每天活跃人数统计
#根据flat.day数据观察每天活跃用户变化# numday为每天发言人数numday <- apply(flat.mat,2,sum) tempdf <- data.frame(time=ymd(names(numday)),numday) qplot(x=time,y=numday,ymax=numday,geom='linerange')
# 再观察十强选手的日内情况
flat.hour <- dcast(newdata,id~hour,value.var='hour',subset=.(id %in% topuser))# 平行坐标图
hour.melt <- melt(flat.hour) p <- ggplot(data=hour.melt,aes(x=variable,y=value)) p + geom_line(aes(group=id,color=id))+theme_bw()+theme(legend.position = "none")
# 连续对话的次数,以三十分钟为间隔
newdata$realtime <- strptime(newdata$time,'%Y-%m-%d %H:%M')# 时间排序有问题,按时间重排数据
newdata2 <- newdata[order(newdata$realtime),]# 将数据按讨论来分组
group <- rep(1,dim(newdata2)[1])for (i in 2:dim(newdata2)[1]) { ? ?d <- as.numeric(difftime(newdata2$realtime[i], ? ? ? ? ? ? ? ? ? ? ? ? ? ? newdata2$realtime[i-1], ? ? ? ? ? ? ? ? ? ? ? ? ? ? units='mins')) ? ?
if ( d < 30) { ? ? ? ?group[i] <- group[i-1] ? ?} ? ?else {group[i] <- group[i-1]+1} } barplot(table(group))
# 得到 93 多组对话newdata2$group <- group# igraph进行十强之间的网络分析# 建立关系矩阵,如果两个用户同时在一次群讨论中出现,则计数+1newdata3 <- dcast(newdata2,id~group,sum,value.var='group',subset=.(id %in% user[1:10,]$Var1))#newdata4 <- ifelse(newdata3[,-1] > 0,0) rownames(newdata4) <- newdata3[,1] relmatrix <- newdata4 %*% t(newdata4)# 很容易看出哪两个人聊得最多
deldiag <- relmatrix-diag(diag(relmatrix)) which(deldiag==max(deldiag),arr.ind=T)# 根据关系矩阵画社交网络画
g <- graph.adjacency(deldiag,weighted=T,mode='undirected') g <-simplify(g) V(g)$label<-rownames(relmatrix) V(g)$degree<- degree(g) layout1 <- layout.fruchterman.reingold(g)#egam <- 10*E(g)$weight/max(E(g)$weight)egam <- (log(E(g)$weight)+1) / max(log(E(g)$weight)+1)#V(g)$label.cex <- V(g)$degree / max(V(g)$degree)+ .2V(g)$label.color <- rgb(0,.2,.8) V(g)$frame.color <- NA E(g)$width <- egam E(g)$color <- rgb(0,0,1,egam) plot(g,layout=layout1)
#找到配对pairlist=data.frame(pair=1:length(attributes(deldiag)$dimnames[[1]])) rownames(pairlist)<-attributes(deldiag)$dimnames[[1]]for(i in(1:length(deldiag[1,]))) { pairlist[i,1]<-attributes(which(deldiag[i,]==max(deldiag[i,]),arr.ind=T))$names[1] } pairlist
pairmatrix=data.frame(pairA=1:length(attributes(deldiag)$dimnames[[1]]),pairB=1:length(attributes(deldiag)$dimnames[[1]])) pairmatrix=data.frame(pair=1:length(attributes(deldiag)$dimnames[[1]]))for(i in (1:dim(deldiag)[1])) { deldiag[i,] <- ifelse(deldiag[i,] == max(deldiag[i,0) } deldiag
(编辑:网站开发网_盐城站长网 )
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|