R 字符串处理

使用R包stringr进行R语言的字符串处理。

str_count()

计算字符串中字符个数。

1
2
3
4
> library(stringr)
> fruit <- c("apple", "banana", "pear", "pineapple")
> str_count(fruit, "a")
#> [1] 1 3 1 1

str_detect()

检查字符是否在字符串中。

1
2
3
4
5
> fruit <- c("apple", "banana", "pear", "pinapple")
> str_detect(fruit, "a")
#> [1] TRUE TRUE TRUE TRUE
> str_detect(fruit, "^a")
#> [1] TRUE FALSE FALSE FALSE

str_extract()

模式匹配。

1
2
3
4
5
6
7
8
9
> shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
> str_extract_all(shopping_list, "[a-z]+\\b", simplify = TRUE) #simplify = TRUE 以矩阵的方式展示结果
#> [,1] [,2] [,3]
#> [1,] "apples" "" ""
#> [2,] "bag" "of" "flour"
#> [3,] "bag" "of" "sugar"
#> [4,] "milk" "" ""
> str_extract(shopping_list, "[a-z]+\\b")
[1] "apples" "bag" "bag" "milk"

str_match ()

模式匹配,进行分组匹配。

1
2
3
4
5
> fruit <- c("apple12345679!123")
> str_match_all(fruit,"([a-z]+).*?(!)") #分2个组,结果第一个为全部匹配的结果
[[1]]
[,1] [,2] [,3]
[1,] "apple12345679!" "apple" "!"

str_locate()

模式匹配位置。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
> fruit <- c("apple", "banana", "pear", "pineapple") 
> str_locate(fruit, "a")
#> start end
#> [1,] 1 1
#> [2,] 2 2
#> [3,] 3 3
#> [4,] 5 5
> str_locate(fruit, c("a", "b", "p", "p"))
#> start end
#> [1,] 1 1
#> [2,] 1 1
#> [3,] 1 1
#> [4,] 1 1
> str_locate_all(fruit, "a")
[[1]]
start end
[1,] 1 1

[[2]]
start end
[1,] 2 2
[2,] 4 4
[3,] 6 6

[[3]]
start end
[1,] 3 3

[[4]]
start end
[1,] 5 5

str_subset()

功能如 linux下grep。

1
2
3
4
5
6
7
> fruit <- c("apple", "banana", "pear", "pinapple")
> str_subset(fruit, "a")
#> [1] "apple" "banana" "pear" "pinapple"
> str_subset(fruit, "^a")
#> [1] "apple"
> str_subset(fruit, "[aeiou]")
#> [1] "apple" "banana" "pear" "pinapple"

str_replace()

替换字符。

1
2
3
4
5
> fruits <- c("one apple", "two pears", "three bananas")
> str_replace(fruits, "[aeiou]", "-")
#> [1] "-ne apple" "tw- pears" "thr-e bananas"
> str_replace_all(fruits, "[aeiou]", "-")
#> [1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s"

str_split()

分割字符串。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
> fruits <- c(
"apples and oranges and pears and bananas",
"pineapples and mangos and guavas"
)

> str_split(fruits, " and ")
#> [[1]]
#> [1] "apples" "oranges" "pears" "bananas"
#> [[2]]
#> [1] "pineapples" "mangos" "guavas"
> str_split(fruits, " and ", simplify = TRUE)
#> [,1] [,2] [,3] [,4]
#> [1,] "apples" "oranges" "pears" "bananas"
#> [2,] "pineapples" "mangos" "guavas" ""
> str_split(fruits, " and ", n = 3)
#> [[1]]
#> [1] "apples" "oranges" "pears and bananas"
#> [[2]]
#> [1] "pineapples" "mangos" "guavas"

str_sort()

字符串排序。

1
2
3
4
5
6
7
8
9
10
> letter<-c('an apple','two oranges','three bananas','four tomatoes')
> str_sort(letter)
[1] "an apple" "four tomatoes" "three bananas" "two oranges"
> str_sort(letter,decreasing = TRUE)
[1] "two oranges" "three bananas" "four tomatoes" "an apple"
x <- c("100a10", "100a5", "2b", "2a")
str_sort(x)
#> [1] "100a10" "100a5" "2a" "2b"
str_sort(x, numeric = TRUE)
#> [1] "2a" "2b" "100a5" "100a10"

str_to_upper() str_to_lower() str_to_title()

改变字符大小写。

str_length()

字符串长度。

str_c()

拼接字符串。

1
2
3
4
> str_c('I have',letter,sep = ' ')  #单字符串加字符
[1] "I have an apple" "I have two oranges" "I have three bananas" "I have four tomatoes"
> str_c(letter,collapse = ',')
[1] "an apple,two oranges,three bananas,four tomatoes" #拼接所有字符串