ncovr: Read and process nCoV data 新型冠状病毒数据获取和可视化
2020-03-16
这是一个 R 语言包,使用教程详见 https://openr.pzhao.org/zh/tags/ncovr/。这里是个简介。
ncovr 包是方便 R 用户获取新型冠状病毒(2019-nCoV)数据而开发的,后续增添了数据处理、建模、可视化等功能。
数据获取途径
ncovr 包获取数据的主要途径是 BlankerL/DXY-2019-nCoV-Crawler。这个项目提供了 api 接口和 csv 文件。为了减轻 api 的流量压力, ncovr 每天将每天自动从这个 api 读一次数据,保存成 R 语言直接读取的 .RDS 格式,方便 R 语言用户调用。详见下面的示例。
安装
-
安装 R。在 CRAN 上选择适合你操作系统的安装包来安装。
-
安装 remotes 包:
install.packages('remotes')
-
安装 ncovr 包:
remotes::install_github('pzhaonet/ncovr')
获取数据
# Sys.setlocale('LC_CTYPE', 'Chinese') # windows 用户设置中文环境
require("ncovr")
## Loading required package: ncovr
require("leafletCN")
## Loading required package: leafletCN
ncov <- get_ncov() # 读取 RDS数据(推荐)国内从github读取数据不稳定!
# get_ncov(method = 'csv') # 从 csv 文件读取(推荐)
# get_ncov(method = 'api') # 从 api 接口读取
另外提供了下载数据的函数get_ncov2()
,定义新的 ncov 类(实际也是data frame),提供了subset()
方法,
用于提取 ncov 子集,参数可以是省份、china或者world
ncov2 <- get_ncov2()
ncov2
## All COVID 2019 Data
## Updated at 2020-03-16 15:44:16
## From https://github.com/yiluheihei/nCoV-2019-Data
# 湖北 ncov, 按市统计
hubei_ncov <- ncov2["湖北"]
hubei_ncov
## Hubei COVID 2019 Data
## Updated at 2020-03-16 13:54:01
## From https://github.com/yiluheihei/nCoV-2019-Data
head(data.frame(hubei_ncov), 5)
## cityEnglishName cityName provinceName provinceShortName provinceEnglishName
## 1 Wuhan 武汉 湖北省 湖北 Hubei
## 2 Xiaogan 孝感 湖北省 湖北 Hubei
## 3 Ezhou 鄂州 湖北省 湖北 Hubei
## 4 Suizhou 随州 湖北省 湖北 Hubei
## 5 Jingzhou 荆州 湖北省 湖北 Hubei
## currentConfirmedCount confirmedCount suspectedCount curedCount deadCount
## 1 9102 50003 0 38432 2469
## 2 125 3518 0 3266 127
## 3 62 1394 0 1275 57
## 4 41 1307 0 1221 45
## 5 38 1580 0 1492 50
## updateTime
## 1 2020-03-16 13:54:01
## 2 2020-03-16 13:54:01
## 3 2020-03-16 13:54:01
## 4 2020-03-16 13:54:01
## 5 2020-03-16 13:54:01
# china,按省统计
china_ncov <- ncov2["china"]
head(data.frame(china_ncov), 5)
## provinceName provinceShortName provinceEnglishName currentConfirmedCount
## 1 中国 中国 China 9951
## 2 湖北省 湖北 Hubei 9557
## 3 辽宁省 辽宁 Liaoning 9
## 4 广西壮族自治区 广西 Guangxi 2
## 5 海南省 海南 Hainan 1
## confirmedCount suspectedCount curedCount deadCount updateTime
## 1 81099 0 67930 3218 2020-03-16 13:54:38
## 2 67798 0 55142 3099 2020-03-16 13:54:01
## 3 125 0 115 1 2020-03-16 13:54:01
## 4 252 0 248 2 2020-03-16 13:49:01
## 5 168 0 161 6 2020-03-16 13:49:01
# world, 按国家统计
world_ncov <- ncov2["world"]
## Parsed with column specification:
## cols(
## countryName = col_character(),
## countryEnglishName = col_character()
## )
head(data.frame(world_ncov), 5)
## countryEnglishName countryName currentConfirmedCount confirmedCount
## 1 Afghanistan 阿富汗 16 16
## 2 Albania 阿尔巴尼亚 41 42
## 3 Algeria 阿尔及利亚 41 54
## 4 Andorra 安道尔 1 2
## 5 Antigua and Barbuda 安提瓜和巴布达 1 1
## suspectedCount curedCount deadCount updateTime
## 1 0 0 0 2020-03-16 06:53:45
## 2 0 0 1 2020-03-16 06:53:45
## 3 0 10 3 2020-03-16 06:53:45
## 4 0 1 0 2020-03-16 06:53:45
## 5 0 0 0 2020-03-16 06:53:45
国家地图
按省级显示
plot_map(ncov$area)
# log scale
plot_map(ncov$area, scale = "log")
更进一步使用plot_china_map()
可通过设置bins
参数控制如何分组以填充不同的颜色,
自动把ncov为0的地区(包括南海驻岛)填充为白色
plot_china_map(
china_ncov,
bins = c(1, 100, 500, 1000, 10000),
legend_position = "bottomleft"
)
按城市显示
plot_map(ncov$area, method = "city", scale = "log")
ggplot
library(ggplot2)
ncov$area$date <- as.Date(ncovr:::conv_time(ncov$area$updateTime))
choose_date <- "2020-02-10"
x <- ncov$area[ncov$area$date <= as.Date(choose_date), ]
x <- x[!duplicated(x$provinceName), ]
plot_ggmap(x)
## Parsed with column specification:
## cols(
## province = col_character(),
## city = col_character(),
## long = col_double(),
## lat = col_double()
## )
省疫情图
湖北省疫情图
# plot_province_map(ncov2, "湖北省"),或
plot_province_map(hubei_ncov, "湖北省", bins = c(1, 100, 200, 500, 1000, 10000))
或直接基于ncov作图,无需提前取各省ncov数据,天津疫情图
plot_province_map(ncov2, "天津市", bins = c(1, 10, 20, 50))
世界地图:各国疫情图
ncov$area$date <- as.Date(ncovr:::conv_time(ncov$area$updateTime))
ncov$area <- ncov$area[rev(order(ncov$area$date)), ]
ncov_date <- as.character(Sys.Date())
y <- ncov$area[ncov$area$date <= as.Date(ncov_date), ]
y <- y[!duplicated(y$provinceName), ]
countryname <- data.frame(
ncovr = c("United Kiongdom", "United States of America",
"New Zealand", "Kampuchea (Cambodia )"
),
leafletNC = c("UnitedKingdom", "UnitedStates",
"NewZealand", "Cambodia"
),
stringsAsFactors = FALSE
)
x <- data.frame(
countryEnglishName = y$countryEnglishName,
countryName = y$countryName,
confirmedCount = y$confirmedCount,
stringsAsFactors = FALSE
)
loc <- which(x$countryEnglishName %in% countryname$ncovr)
x$countryEnglishName[loc] <- countryname$leafletNC[
match(x$countryEnglishName[loc], countryname$ncovr)
]
x$countryEnglishName2 = x$countryEnglishName # for taiwan
x_other <- x[!is.na(x$countryEnglishName) & x$countryEnglishName != 'China', ]
x_china <- data.frame(
countryEnglishName = 'China',
countryName = unique(x[!is.na(x$countryEnglishName) & x$countryEnglishName == 'China', 'countryName']),
confirmedCount = sum(x[!is.na(x$countryEnglishName) & x$countryEnglishName == 'China', 'confirmedCount']),
countryEnglishName2 = 'China'
)
x_taiwan <- x_china
x_taiwan$countryEnglishName2 = "Taiwan"
x <- rbind(x_other, x_china, x_taiwan)
plot_map(
x = x,
key = "confirmedCount",
scale = "log",
method = 'country',
legend_title = paste0("Cnfrm 确诊"),
filter = '待明确地区'
)
## New names:
## * `` -> ...76
更进一步
plot_world_map(world_ncov)
## Parsed with column specification:
## cols(
## name = col_character(),
## name_zh = col_character()
## )
ts
x_ts <- ncov$area[, c('countryEnglishName', 'countryName', 'date', 'confirmedCount', 'curedCount', 'deadCount')] %>%
dplyr::group_by(countryEnglishName, date) %>%
dplyr::summarise(
confirmed = max(confirmedCount),
cured = max(curedCount),
dead = max(deadCount)) %>%
dplyr::ungroup() %>%
dplyr::filter(!is.na(countryEnglishName) & !countryEnglishName == 'China') %>%
as.data.frame()
loc <- which(x_ts$countryEnglishName %in% countryname$ncovr)
x_ts$countryEnglishName[loc] <-
countryname$leafletNC[
match(x_ts$countryEnglishName[loc], countryname$ncovr)
]
plot_ts(
x_ts,
area = "Italy",
area_col = "countryEnglishName",
date_col = "date",
ts_col = c("confirmed", "cured", "dead")
)
国外疫情图
韩国疫情图
korea_ncov <- get_foreign_ncov("韩国")
plot_foreign_map(korea_ncov, "korea")
## Parsed with column specification:
## cols(
## name = col_character(),
## name_zh = col_character(),
## provinceName = col_character(),
## provinceEnglishName = col_character()
## )
日本疫情图
jp_ncov <- get_foreign_ncov("日本")
plot_foreign_map(jp_ncov, "japan")
## Parsed with column specification:
## cols(
## name = col_character(),
## name_zh = col_character(),
## provinceName = col_character(),
## provinceEnglishName = col_character()
## )
伊朗疫情图
iran_ncov <- get_foreign_ncov("伊朗")
plot_foreign_map(iran_ncov, "iran")
## Parsed with column specification:
## cols(
## name = col_character(),
## name_zh = col_character(),
## provinceName = col_character(),
## provinceEnglishName = col_character()
## )
意大利疫情图
italy_ncov <- get_foreign_ncov("意大利")
plot_foreign_map(italy_ncov, "italy")
## Parsed with column specification:
## cols(
## name = col_character(),
## name_zh = col_character(),
## provinceName = col_character(),
## provinceEnglishName = col_character()
## )
直接画这四个国家的疫情图
foreign_countries <- c("韩国", "伊朗", "日本", "意大利")
names(foreign_countries) <- c("korea", "iran", "japan", "italy")
htmltools::tagList(purrr::imap(
foreign_countries,
~ get_foreign_ncov(.x) %>%
plot_foreign_map(.y)
))
更多功能请参看函数的帮助信息
License
Copyright Peng Zhao.
Released under the GPL-3 license.