0
0 комментариев
for doc, category in zip(docs_new, predicted):
 print('%r => %s' % (doc, twenty_train.target_names[category]))

Как узнать с какой вероятностью алгоритм определил, что текст относится к данной группе?

Вот полный код:

from sklearn.datasets import load_files
 
categories = ['first', 'second', 'third']
twenty_train = load_files('db', categories=categories, shuffle=False, encoding='utf-8')
 
from sklearn.feature_extraction.text import CountVectorizer
 
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(twenty_train.data)
 
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
print(X_train_tfidf.shape)
 
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)
 
docs_new = [str1, str2]
X_new_counts = count_vect.transform(docs_new)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)
 
predicted = clf.predict(X_new_tfidf)
 
for doc, category in zip(docs_new, predicted):
    print('%r => %s' % (doc, twenty_train.target_names[category]))


Добавить комментарий