全模式
#import jieba sentence="我是只长着大长腿的小白兔" word1=jieba.cut(sentence,cut_all=True) for item in word1: print(item) #jingzhunmoshi
word2=jieba.cut(sentence,cut_all=False) for item in word2: print(item)
#搜索引擎模式
word3=jieba.cut_for_search(sentence)#搜索模式 for item in word3: print (item) print ("") word4=jieba.cut(sentence)#精准模式 for item in word4: print (item) #词性标注 import jieba.posseg word5=jieba.posseg.cut(sentence) #调用词性 .flag 调用词语 . word for item in word5: print (item.word+","+item.flag) #(a: adj; c: conj; d;adv: e: exclamation ! ;f: 方向词;i: idiom(成语); # m:number; n:none; nr: name; ns:location; nt:institution name; nz:其他专有名词; # p:prep; r: pronone(代词); t:time; u:auxiliary(do); v;verb;vn:doing; w:marks; un: unknow word
#我,r;是,v;只长,v;着,uz;大,a;长,a;腿,n;的,uj;小白兔,nr
#加载词典 print ("") jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/galaxy_macau_dict.txt") word6=jieba.posseg.cut(sentence) for item in word6: print (item.word+","+item.flag)
#词频的更改 jieba.suggest_freq("大长腿的小兔子",True) jieba.add_word("大长腿的小兔子",True) word7=jieba.cut(sentence) print (word7) print ("") #topic extract import jieba.analyse tag=jieba.analyse.extract_tags(sentence,2) print(tag) word8=jieba.tokenize(sentence) for item in word8: print (item)
#编码解决方案 import urllib.request data=urllib.request.urlopen("http://127.0.0.1/txt1.txt").read().decode("utf-8","ignore") word10=jieba.analyse.extract_tags(data,200) print(word10) #data=urllib.request.urlopen("http://127.0.0.1/txt1.html").read().decode("utf-8","ignore") #...