#利用python来停词--Rmarkdown(Rstudio)内运行时有错误!? python命令行运行没问题
{python}
# jieba.load_userdict('userdict.txt')
# 创建停用词list
def stopwordslist(filepath):
stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
return stopwords
# 对句子进行分词
def seg_sentence(sentence):
sentence_seged = sentence.strip()
stopwords = stopwordslist('C:/Users/czliu/Documents/R/tingci.txt') # 这里加载停用词的路径
outstr = ''
for word in sentence_seged:
if word not in stopwords:
if word != '\t':
outstr += word
outstr += " "
return outstr
inputs = open('C:/Users/czliu/Documents/R/a.txt', 'r', encoding='utf-8')
outputs = open('C:/Users/czliu/Documents/R/yuliao.txt', 'w', encoding='utf-8')
for line in inputs:
line_seg = seg_sentence(line) # 这里的返回值是字符串
outputs.write(line_seg + '\n')
outputs.close()
inputs.close()
```
以上代码,原是放在rstudio-Rmarkdown内运行的。