Pythonでitertoolsを使ってパラメータの総当り

2016-06-28
このエントリーをはてなブックマークに追加
import itertools

_x = ['0', '1', '2']
_y = ['a', 'b', 'c']

for _i, _j in itertools.product(_x, _y):
	print(_i, _j)

パラメータAには複数のパターン、パラメータBにも複数のパターン。それらの総当りパターンを出力したいっていう時に便利。

0 a
0 b
0 c
1 a
1 b
1 c
2 a
2 b
2 c
Tags:

scikit-learnのTfidfVectorizerで単語ごとのスコアを取得する

2016-06-17
このエントリーをはてなブックマークに追加
from sklearn.feature_extraction.text import TfidfVectorizer


def main():
    _docs = [
        'A friend to all is a friend to none.',
        'One good turn deserves another.',
        'He who runs after two hares will catch neither.',
        'It’s no use crying over spilt milk.',
        'Time and tide wait for no man.',
    ]

    _v = TfidfVectorizer(analyzer=text_split)
    _score =_v.fit_transform(_docs)

    print(_score.toarray())
    print(_v.get_feature_names())

def text_split(val):
    return val.split(' ')

if __name__ == '__main__':
    main()

$ python tf.py
[[ 0.2773501   0.          0.          0.          0.          0.2773501
   0.          0.2773501   0.          0.          0.          0.          0.
   0.          0.5547002   0.          0.          0.2773501   0.          0.
   0.          0.          0.2773501   0.          0.          0.          0.
   0.5547002   0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.4472136   0.          0.          0.
   0.          0.          0.4472136   0.          0.          0.4472136
   0.          0.          0.4472136   0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.          0.
   0.          0.4472136   0.          0.          0.          0.          0.        ]
 [ 0.          0.33333333  0.          0.          0.          0.
   0.33333333  0.          0.          0.          0.33333333  0.          0.
   0.          0.          0.          0.33333333  0.          0.          0.
   0.33333333  0.          0.          0.          0.33333333  0.          0.
   0.          0.          0.33333333  0.          0.          0.33333333
   0.33333333]
 [ 0.          0.          0.38775666  0.          0.          0.          0.
   0.          0.          0.          0.          0.38775666  0.          0.
   0.          0.          0.          0.          0.          0.38775666
   0.          0.31283963  0.          0.38775666  0.          0.38775666
   0.          0.          0.          0.          0.38775666  0.          0.
   0.        ]
 [ 0.          0.          0.          0.          0.38775666  0.          0.
   0.          0.38775666  0.          0.          0.          0.
   0.38775666  0.          0.          0.          0.          0.38775666
   0.          0.          0.31283963  0.          0.          0.          0.
   0.38775666  0.          0.          0.          0.          0.38775666
   0.          0.        ]]

[‘A’, ‘He’, ‘It’s’, ‘One’, ‘Time’, ‘a’, ‘after’, ‘all’, ‘and’, ‘another.’, ‘catch’, ‘crying’, ‘deserves’, ‘for’, ‘friend’, ‘good’, ‘hares’, ‘is’, ‘man.’, ‘milk.’, ‘neither.’, ‘no’, ‘none.’, ‘over’, ‘runs’, ‘spilt’, ‘tide’, ‘to’, ‘turn’, ‘two’, ‘use’, ‘wait’, ‘who’, ‘will’]