txt mining 1-白红宇

txt mining 1

阅读量：7065 次

发布时间：2019-06-28

本文共 1625 字，大约阅读时间需要 5 分钟。

全模式

#import jieba sentence="我是只长着大长腿的小白兔" word1=jieba.cut(sentence,cut_all=True) for item in word1:     print(item) #jingzhunmoshi

word2=jieba.cut(sentence,cut_all=False) for item in word2:     print(item)

#搜索引擎模式

word3=jieba.cut_for_search(sentence)#搜索模式 for item in word3:     print (item) print ("") word4=jieba.cut(sentence)#精准模式 for item in word4:     print (item) #词性标注 import jieba.posseg word5=jieba.posseg.cut(sentence) #调用词性  .flag  调用词语  . word for item in word5:     print (item.word+","+item.flag)     #(a: adj; c: conj; d；adv: e: exclamation ! ;f: 方向词；i: idiom(成语)； #    m:number; n:none; nr: name; ns:location; nt:institution name;  nz:其他专有名词； # p:prep;  r: pronone(代词); t:time; u:auxiliary(do); v；verb;vn:doing; w:marks; un: unknow word

#我,r;是,v;只长,v;着,uz;大,a;长,a;腿,n;的,uj;小白兔,nr

#加载词典 print ("") jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/galaxy_macau_dict.txt") word6=jieba.posseg.cut(sentence) for item in word6:     print (item.word+","+item.flag)

#词频的更改 jieba.suggest_freq("大长腿的小兔子",True) jieba.add_word("大长腿的小兔子",True) word7=jieba.cut(sentence) print (word7) print ("") #topic extract import jieba.analyse tag=jieba.analyse.extract_tags(sentence,2) print(tag) word8=jieba.tokenize(sentence) for item in word8:     print (item)

#编码解决方案 import urllib.request data=urllib.request.urlopen("http://127.0.0.1/txt1.txt").read().decode("utf-8","ignore") word10=jieba.analyse.extract_tags(data,200) print(word10) #data=urllib.request.urlopen("http://127.0.0.1/txt1.html").read().decode("utf-8","ignore") #...

转载于:https://www.cnblogs.com/rabbittail/p/8059862.html

你可能感兴趣的文章

自定义序列化技术2：变长数组的实现

查看>>

[Python3网络爬虫开发实战] 5.3-非关系型数据库存储