R 字符串处理

使用R包stringr进行R语言的字符串处理。

str_count()

计算字符串中字符个数。

> library(stringr)
> fruit <- c("apple", "banana", "pear", "pineapple")
> str_count(fruit, "a")
#> [1] 1 3 1 1

str_detect()

检查字符是否在字符串中。

> fruit <- c("apple", "banana", "pear", "pinapple")
> str_detect(fruit, "a")
#> [1] TRUE TRUE TRUE TRUE
> str_detect(fruit, "^a")
#> [1]  TRUE FALSE FALSE FALSE

str_extract()

模式匹配。

> shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
> str_extract_all(shopping_list, "[a-z]+\\b", simplify = TRUE)  #simplify = TRUE 以矩阵的方式展示结果
#>      [,1]     [,2] [,3]   
#> [1,] "apples" ""   ""     
#> [2,] "bag"    "of" "flour"
#> [3,] "bag"    "of" "sugar"
#> [4,] "milk"   ""   ""     
> str_extract(shopping_list, "[a-z]+\\b")
[1] "apples" "bag"    "bag"    "milk"

str_match ()

模式匹配，进行分组匹配。

> fruit <- c("apple12345679!123")
> str_match_all(fruit,"([a-z]+).*?(!)")   #分2个组，结果第一个为全部匹配的结果
[[1]]
     [,1]             [,2]    [,3]
[1,] "apple12345679!" "apple" "!"

str_locate()

模式匹配位置。

> fruit <- c("apple", "banana", "pear", "pineapple") 
> str_locate(fruit, "a")
#>      start end
#> [1,]     1   1
#> [2,]     2   2
#> [3,]     3   3
#> [4,]     5   5
> str_locate(fruit, c("a", "b", "p", "p"))
#>      start end
#> [1,]     1   1
#> [2,]     1   1
#> [3,]     1   1
#> [4,]     1   1
> str_locate_all(fruit, "a")
[[1]]
     start end
[1,]     1   1

[[2]]
     start end
[1,]     2   2
[2,]     4   4
[3,]     6   6

[[3]]
     start end
[1,]     3   3

[[4]]
     start end
[1,]     5   5

str_subset()

功能如 linux下grep。

> fruit <- c("apple", "banana", "pear", "pinapple")
> str_subset(fruit, "a")
#> [1] "apple"    "banana"   "pear"     "pinapple"
> str_subset(fruit, "^a")
#> [1] "apple"
> str_subset(fruit, "[aeiou]")
#> [1] "apple"    "banana"   "pear"     "pinapple"

str_replace()

替换字符。

> fruits <- c("one apple", "two pears", "three bananas")
> str_replace(fruits, "[aeiou]", "-")
#> [1] "-ne apple"     "tw- pears"     "thr-e bananas"
> str_replace_all(fruits, "[aeiou]", "-")
#> [1] "-n- -ppl-"     "tw- p--rs"     "thr-- b-n-n-s"

str_split()

分割字符串。

> fruits <- c(
  "apples and oranges and pears and bananas",
  "pineapples and mangos and guavas"
)

> str_split(fruits, " and ")
#> [[1]]
#> [1] "apples"  "oranges" "pears"   "bananas" 
#> [[2]]
#> [1] "pineapples" "mangos"     "guavas"    
> str_split(fruits, " and ", simplify = TRUE)
#>      [,1]         [,2]      [,3]     [,4]     
#> [1,] "apples"     "oranges" "pears"  "bananas"
#> [2,] "pineapples" "mangos"  "guavas" ""     
> str_split(fruits, " and ", n = 3)
#> [[1]]
#> [1] "apples"            "oranges"           "pears and bananas" 
#> [[2]]
#> [1] "pineapples" "mangos"     "guavas"

str_sort()

字符串排序。

> letter<-c('an apple','two oranges','three bananas','four tomatoes')
> str_sort(letter)
[1] "an apple"      "four tomatoes" "three bananas" "two oranges"
> str_sort(letter,decreasing = TRUE)
[1] "two oranges"   "three bananas" "four tomatoes" "an apple"
x <- c("100a10", "100a5", "2b", "2a")
str_sort(x)
#> [1] "100a10" "100a5"  "2a"     "2b"    
str_sort(x, numeric = TRUE)
#> [1] "2a"     "2b"     "100a5"  "100a10"

str_to_upper() str_to_lower() str_to_title()

改变字符大小写。

str_length()

字符串长度。

str_c()

拼接字符串。

> str_c('I have',letter,sep = ' ')  #单字符串加字符
[1] "I have an apple"      "I have two oranges"   "I have three bananas" "I have four tomatoes"
> str_c(letter,collapse = ',')
[1] "an apple,two oranges,three bananas,four tomatoes"  #拼接所有字符串