阅读背景:

贝叶斯,SVM分类_我想了很多事情的博客

来源:互联网 
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
import jieba
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
import os
def getListFiles(path):
    ret = []
    for root, dirs, files in os.walk(path):
        for filespath in files:
            if filespath.endswith("txt"):
                ret.append(os.path.join(root,filespath))
                # ret.append(root)
    return ret
def load_data(path):
    ret = getListFiles(path)
    data_x = []
    data_y = []
    for j in ret:
        f = open(j,'r',encoding='gbk')
        for i in f.readlines():
            i = i.split('\t')
            # print(i[0])
            data_x.append(i[1])
            data_y.append(i[0])
    return data_x,data_y
def func(data_x,data_y):
    x_train,x_test,y_train,y_test = train_test_split(data_x,data_y,random_state=1)
    word_df = []
    word__test = []
    # print(y_train)
    for i in x_train:
        word_df.append(' '.join(jieba.cut(i)))
    for i in x_test:
        word__test.append(' '.join(jieba.cut(i)))
    # vec = CountVectorizer()
    # print(word_df)
    vec = TfidfVectorizer().fit(word_df) #实例化tf-idf
    a = vec.fit_transform(word_df) #拟合数据
    print(vec.vocabulary_)
    # tf.fit(word_df)
    print(a)
    classifier = MultinomialNB() #实例化bayes分类
    # classifier = svm.SVC() #实例化SVM分类
    classifier.fit(vec.transform(word_df),y_train)#拟合
    scoure = classifier.score(vec.transform(word__test),y_test) #评分
    result = classifier.predict(vec.transform(word__test)) #预测
    # print(word__test)
    print(result)
if __name__ == '__main__':
    data_x,data_y = load_data(r'C:\Users\Administrator\Desktop\语料\result\会议')
    func(data_x,data_y)
from sklearn.model_selection import train_test_



你的当前访问异常,请进行认证后继续阅读剩余内容。

分享到: