추천시스템 함수
import pandas as pd
user_df= pd.read_csv("./선곡표종합.csv", encoding = 'utf-8')
songDB= pd.read_csv("./preprocessed_songDB.csv", encoding= 'utf-8')
C:\Users\user\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3165: DtypeWarning: Columns (11) have mixed types.Specify dtype option on import or set low_memory=False.
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
import numpy as np
import pandas as pd
import re
import datetime
from sklearn.preprocessing import MinMaxScaler
from dateutil.parser import parse
from collections import OrderedDict
from sklearn.metrics import roc_auc_score
from collections import Counter
from sklearn.preprocessing import LabelEncoder
! pip install xlearn
! pip install gensim
#방송일 전처리
def parse_(x):
return parse(x)
#장르 전처리
def sp(x):
return x.split(',')[0]
change_value = {"포크" : 'Folk', '랩/힙합' : 'Hip Hop', '댄스' : 'Dance', '록/메탈' : 'Rock', 'R&B/Soul' : 'R&B', '성인가요' : 'adult', '인디음악' : 'Indie'
, '일렉트로니카' : 'Electronica', '재즈' : 'Jazz', '포크/블루스' : 'Folk', '뉴에이지' : 'Newage', '월드뮤직' : 'Worldmusic', '국내영화' : 'Domestic movie'
, '국외영화' : 'Foreign movie', '국내드라마' : 'Domestic drama', '컨트리' : 'Country', '클래식' : 'Classic', '블루스' : 'Blues'
, '국악' : 'Korean classical', '애니메이션/웹툰' : 'Animation', '키즈' : 'Kids', "국내뮤지컬" : 'Domestic musicals'
, '국외뮤지컬' : 'Foreign musicals', '게임' : 'Game', '국외드라마' : 'Foreign drama', '불교' : 'Buddhism', '뮤직테라피' : 'Music Therapy', '발라드' : 'Ballade' }
#빈도수 전처리
def gb_rating(x):
if x <= 0.001: return 1
if ((x > 0.001) & (x <= 0.05)) : return 2
if ((x > 0.05) & (x <= 0.3)) : return 3
if ((x > 0.3) & (x <= 0.6)) : return 4
return 5
#좋아요 전처리
def gb_likes(x):
if x <= 0.02: return 1
if ((x > 0.02) & (x <= 0.05)) : return 2
if ((x > 0.05) & (x <= 0.1)) : return 3
if ((x > 0.1) & (x <= 0.3)) : return 4
return 5
##genre와 release를 meta로 하여, meta별 상위의 평점 높은 노래
def get_meta_popular_list(x, k):
song_id_list = x.sort_values(by=['mean'], ascending=False)['song_id'].tolist()
song_id_list = list(OrderedDict.fromkeys(song_id_list ))
return song_id_list[:k]
#온도 빈도수를 점수화 시켜줌
def temp_bucketize(x):
if (x <= -5) : return '-10'
if ((x > -5) & (x <= 0)) : return '-5'
if ((x > 0) & (x <= 5)) : return '0'
if ((x > 5) & (x <= 10)) : return '5'
if ((x > 10) & (x <= 15)) : return '10'
if ((x > 15) & (x <= 20)) : return '15'
return '20'
#일조량 빈도수를 점수화 시켜줌
def sunshine_bucketize(x):
if (x <= -5) : return '-10'
if ((x > -5) & (x <= 0)) : return '-5'
if ((x > 0) & (x <= 5)) : return '0'
if ((x > 5) & (x <= 10)) : return '5'
if ((x > 10) & (x <= 15)) : return '10'
if ((x > 15) & (x <= 20)) : return '15'
return '20'
class FactorizationMachine():
"""
This Class is implementation of this paper : https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
Just a example of FM Algorithm, not for production.
-----
Only simple methods are available.
e.g 1 : batch training, adagrad optimizer, parallel training are not supported.
e.g 2 : simple optimizer Stochastic Gradient Descent with L2 Regularization.
e.g 3 : using titanic dataset on local memory.
"""
def __init__(self, k, lr, l2_reg, l2_lambda, epoch, early_stop_window, train_data, valid_data):
"""
:param k: number of latent vector
:param lr: learning rate
:param l2_reg: bool parameter for L2 regularization
:param l2_lambda: lambda of L2 regularization
:param epoch: training epoch
:param train_data: path of train data
:param valid_data: path of valid data
"""
self._k = k
self._lr = lr
self._l2_reg = l2_reg
self._l2_lambda = l2_lambda
self._epoch = epoch
self._early_stop_window = early_stop_window
self._train_file_path = train_data
self._valid_file_path = valid_data
self._valid_loss_list = []
def _load_dataset(self):
"""
1. load dataset to memory from train/valid path
2. find max index in dataset for w's vector size
"""
# load data
train_file = open(self._train_file_path, 'r')
valid_file = open(self._valid_file_path, 'r')
self._train_data = train_file.read().split('\n')
self._valid_data = valid_file.read().split('\n')
train_file.close()
valid_file.close()
# find max index
self.feature_max_index = 0
print("Start to init FM vectors.")
for row in self._train_data:
for element in row.split(" ")[1:]:
index = int(element.split(":")[0])
if self.feature_max_index < index:
self.feature_max_index = index
for row in self._valid_data:
for element in row.split(" ")[1:]:
index = int(element.split(":")[0])
if self.feature_max_index < index:
self.feature_max_index = index
# init FM vectors
self._init_vectors()
print("Finish init FM vectors.")
def _init_vectors(self):
"""
1. initialize FM vectors
2. Conduct naive transformation libsvm format txt data to numpy training sample.
"""
self.w = np.random.randn(self.feature_max_index+1)
self.v = np.random.randn(self.feature_max_index+1, self._k)
self.train_x_data = []
self.train_y_data = np.zeros((len(self._train_data)-1))
self.valid_x_data = []
self.valid_y_data = np.zeros((len(self._valid_data)-1))
# make numpy dataset
for n, row in enumerate(self._train_data):
element = row.split(" ")
if len(element) > 1:
self.train_y_data[n] = int(element[0])
self.train_x_data.append([np.array([int(pair.split(":")[0]) for pair in element[1:]]),
np.array([int(pair.split(":")[1]) for pair in element[1:]])])
for n, row in enumerate(self._valid_data):
element = row.split(" ")
if len(element) > 1:
self.valid_y_data[n] = int(element[0])
self.valid_x_data.append([np.array([int(pair.split(":")[0]) for pair in element[1:]]),
np.array([int(pair.split(":")[1]) for pair in element[1:]])])
def train(self):
"""
Train FM model by Gradient Descent with L2 regularization
"""
self._load_dataset()
for epoch_num in range(1, self._epoch):
train_y_hat = self.predict(data=self.train_x_data)
valid_y_hat = self.predict(data=self.valid_x_data)
train_loss = self._get_loss(y_data=self.train_y_data, y_hat=train_y_hat)
valid_loss = self._get_loss(y_data=self.valid_y_data, y_hat=valid_y_hat)
train_auc = roc_auc_score(self.train_y_data, train_y_hat)
valid_auc = roc_auc_score(self.valid_y_data, valid_y_hat)
self._print_learning_info(epoch=epoch_num, train_loss=train_loss, valid_loss=valid_loss,
train_auc=train_auc, valid_auc=valid_auc)
if self._check_early_stop(valid_loss=valid_loss):
print("Early stop at epoch:", epoch_num)
return 0
self._stochastic_gradient_descent(self.train_x_data, self.train_y_data)
def predict(self, data):
"""
Implementation of FM model's equation on O(kmd)
-----
Numpy array shape : (n, [index of md], [value of md])
md : none-zero feature
"""
num_data = len(data)
scores = np.zeros(num_data)
for n in range(num_data):
feat_idx = data[n][0]
val = data[n][1]
# linear feature score
linear_feature_score = np.sum(self.w[feat_idx] * val)
# factorized feature score
vx = self.v[feat_idx] * (val.reshape(-1, 1))
cross_sum = np.sum(vx, axis=0)
square_sum = np.sum(vx * vx, axis=0)
cross_feature_score = 0.5 * np.sum(np.square(cross_sum) - square_sum)
# Model's equation
scores[n] = linear_feature_score + cross_feature_score
# Sigmoid transformation for binary classification
scores = 1.0 / (1.0 + np.exp(-scores))
return scores
def _get_loss(self, y_data, y_hat):
"""
Calculate loss with L2 regularization (two type of coeficient - w,v)
"""
l2_norm = 0
if self._l2_reg:
w_norm = np.sqrt(np.sum(np.square(self.w)))
v_norm = np.sqrt(np.sum(np.square(self.v)))
l2_norm = self._l2_lambda * (w_norm + v_norm)
return -1 * np.sum( (y_data * np.log(y_hat)) + ((1 - y_data) * np.log(1 - y_hat)) ) + l2_norm
def _check_early_stop(self, valid_loss):
self._valid_loss_list.append(valid_loss)
if len(self._valid_loss_list) > 5:
prev_loss = self._valid_loss_list[len(self._valid_loss_list) - self._early_stop_window]
curr_loss = valid_loss
if prev_loss < curr_loss:
return True
return False
def _print_learning_info(self, epoch, train_loss, valid_loss, train_auc, valid_auc):
print("epoch:", epoch, "||", "train_loss:", train_loss, "||", "valid_loss:", valid_loss,
"||", "Train AUC:", train_auc, "||", "Test AUC:", valid_auc)
def _stochastic_gradient_descent(self, x_data, y_data):
"""
Update each coefs (w, v) by Gradient Descent
"""
for data, y in zip(x_data, y_data):
feat_idx = data[0]
val = data[1]
vx = self.v[feat_idx] * (val.reshape(-1, 1))
# linear feature score
linear_feature_score = np.sum(self.w[feat_idx] * val)
# factorized feature score
vx = self.v[feat_idx] * (val.reshape(-1, 1))
cross_sum = np.sum(vx, axis=0)
square_sum = np.sum(vx * vx, axis=0)
cross_feature_score = 0.5 * np.sum(np.square(cross_sum) - square_sum)
# Model's equation
score = linear_feature_score + cross_feature_score
y_hat = 1.0 / (1.0 + np.exp(-score))
cost = y_hat - y
if self._l2_reg:
self.w[feat_idx] = self.w[feat_idx] - cost * self._lr * (val + self._l2_lambda * self.w[feat_idx])
self.v[feat_idx] = self.v[feat_idx] - cost * self._lr * ((sum(vx) * (val.reshape(-1, 1)) - (vx * (val.reshape(-1, 1)))) + self._l2_lambda * self.v[feat_idx])
else:
self.w[feat_idx] = self.w[feat_idx] - cost * self._lr * val
self.v[feat_idx] = self.v[feat_idx] - cost * self._lr * (sum(vx) * (val.reshape(-1, 1)) - (vx * (val.reshape(-1, 1))))
#프로그램별 노래 및 빈도수 순서대로
def get_music_list_sort_by_time(x):
return x.sort_values(['rating'])['song_id'].tolist()
#최근 틀었던 노래
def get_recent_song_list_sort_by_time(x, k):
return x.sort_values(['air_date'])['song_id'].tolist()[-k:]
##인기많은 곡
def get_meta_popular_list(x, k):
song_id_list = x.sort_values(by=['mean'], ascending=False)['song_id'].tolist()
return song_id_list[:k]
def most_frequent(x):
occurence_count = Counter(x)
return occurence_count.most_common(1)[0][0]
def fm_model(df, songDB):
tbn_program = df.copy()
#제목과 가수 전처리
df['songName'] = df['songName'].apply(lambda x: re.sub('\([^)]*\)',"",str(x)))
df['songName'] = df['songName'].apply(lambda x: x.replace("'", ""))
df['songName'] = df['songName'].apply(lambda x: x.lower())
df['songName'] = df['songName'].apply(lambda x: re.sub('[^A-Za-z0-9가-힣]', ' ', x))
df['songName'] = df['songName'].apply(lambda x: re.sub(" +", " ", x))
df['songName'] = df['songName'].apply(lambda x: x.strip())
df['artistName'] = df['artistName'].apply(lambda x: re.sub('\([^)]*\)',"",str(x)))
df['artistName'] = df['artistName'].apply(lambda x: x.replace("'", ""))
df['artistName'] = df['artistName'].apply(lambda x: x.lower())
df['artistName'] = df['artistName'].apply(lambda x: re.sub('[^A-Za-z0-9가-힣]', ' ', x))
df['artistName'] = df['artistName'].apply(lambda x: re.sub(" +", " ", x))
df['artistName'] = df['artistName'].apply(lambda x: x.strip())
df = df.dropna(how='any',axis=0)
#songdb와 merge
result = pd.merge(df, songDB, on = ['songName', 'artistName'] )
#연도 전치리
result['year'] = list(map(str, result['year']))
result['month'] = list(map(str, result['month']))
result['date'] = list(map(str, result['date']))
#방송일 전처리
result['air_date']= result['year'] + "-" + result['month'] + '-'+ result['date']
result['air_date'] = list(map(str, result['air_date']))
result['air_date'] = result['air_date'].apply(lambda x : parse_(x))
#장르 전처리
result['genre'] = result['genre'].apply(lambda x: sp(x))
idx_ = []
for i in range(len(result['genre'])):
if '-' in result['genre'][i]:
idx_.append(i)
result.drop(idx_, inplace = True)
result = result.replace({'genre' : change_value})
result['genre'] = result['genre'].apply(lambda x : x.strip())
# 프로그램별 빈도수 구하기
# MinMaxScaler를 통해 Scaling
df = pd.DataFrame(data = result.groupby('ProgramName')['song_id'].value_counts())
df.rename(columns = {'song_id': 'rating'}, inplace = True)
dat_ = pd.merge(result, df, on=['ProgramName','song_id'], how = 'inner')
n = len(dat_['ProgramName'].unique())
for i in range(n):
X = dat_.loc[dat_['ProgramName'] == dat_['ProgramName'].unique()[i] , ['rating']]
MinMaxScalers = MinMaxScaler()
MinMaxScalers.fit(X)
df_MinMaxScalers = MinMaxScalers.transform(X)
dat_.loc[dat_['ProgramName'] == dat_['ProgramName'].unique()[i] , ['rating']] = df_MinMaxScalers
#song_id 별 likes sacling
X = np.reshape([dat_['likes']], (-1, 1))
MinMaxScalers = MinMaxScaler()
MinMaxScalers.fit(X)
df_MinMaxScalers = MinMaxScalers.transform(X)
dat_['likes'] = df_MinMaxScalers
#프로그램 label encoding
le = LabelEncoder()
dat_['ProgramName']=le.fit_transform(dat_['ProgramName'])
programs_id = []
le.classes_
#tbn의 Program_id 저장
inverse_id = list(le.inverse_transform(list(dat_['ProgramName'].unique())))
for i in range(len(inverse_id)):
if inverse_id[i] in list(tbn_program['ProgramName'].unique()):
programs_id.append(list(dat_['ProgramName'].unique())[i])
#빈도수를 점수화 시켜줌
dat_['rating'] = round(dat_['rating'],4)
dat_['rating'] = dat_['rating'].apply(lambda x: gb_rating(x))
#likes 수를 점수화 시키키
dat_['likes'] = dat_['likes'].apply(lambda x: gb_likes(x))
#프로그램별 노래 및 빈도수 순서대로
program_month_list = dat_.groupby('ProgramName')[['song_id', 'rating']].apply(lambda x: get_music_list_sort_by_time(x))
### 탐색적으로 다양한 추천 후보군 생성
#rating기반 Popular 후보군 생성
mean_rating = dat_.groupby('song_id')['rating'].agg(['mean', 'count'])
# 5번 이상 나온 노래중에서 빈도수 점수의 평균이 높은순
popular_song_list = mean_rating[mean_rating['count']>5]['mean'].sort_values(ascending=False).index[:10].tolist()
### 장르 & 연도 기준 Meta2Item 후보군 생성
merge_df = pd.merge(mean_rating, dat_, on='song_id')
##genre와 release를 meta로 하여, meta별 상위 100개의 평점 높은 노래
#중복 제거해서 뽑기
genre_popular = merge_df.groupby('genre').apply(lambda x: get_meta_popular_list(x, k=100))
year_popular = merge_df.groupby('release').apply(lambda x: get_meta_popular_list(x, k=100))
### Meta 후보군 생성 결과 확인
genre_popular_dict = genre_popular.to_dict()
year_popular_dict = year_popular.to_dict()
##다양한 Meta 후보군 생성
dat_['avg_temp'] = dat_['평균기온'].apply(lambda x: temp_bucketize(x))
dat_['sunshine'] = dat_['일조시간(Hr)'].apply(lambda x: sunshine_bucketize(x))
##분석가능한 형태로 변경
weather_list = dat_['weather'].unique()
weather_index = {}
for idx, weather in enumerate(weather_list):
weather_index[weather] = idx + 1
avg_temp_list = dat_['avg_temp'].unique()
avg_temp_index = {}
for idx, avg_temp in enumerate(avg_temp_list):
avg_temp_index[avg_temp] = idx + 1
sunshine_list = dat_['sunshine'].unique()
sunshine_index = {}
for idx, sunshine in enumerate(sunshine_list):
sunshine_index[sunshine] = idx + 1
genre_list = merge_df['genre'].unique()
genre_index = {}
for idx, genre in enumerate(genre_list):
genre_index[genre] = idx + 1
year_list = merge_df['release'].unique()
year_index = {}
for idx, year in enumerate(year_list):
year_index[year] = idx + 1
##train, test데이터 분리
train_df = dat_[(dat_['ProgramName'] == 0) & (dat_['air_date'] <= dat_.groupby('ProgramName')['air_date'].quantile(q = 0.7, interpolation = 'nearest')[0])]
for i in range(1, len(dat_['ProgramName'].unique())):
df_gb = dat_[(dat_['ProgramName'] == i) & (dat_['air_date'] <= dat_.groupby('ProgramName')['air_date'].quantile(q = 0.7, interpolation = 'nearest')[i])]
train_df = pd.concat([train_df, df_gb])
train_final = train_df.copy()
test_df = dat_[(dat_['ProgramName'] == 0) & (dat_['air_date'] > dat_.groupby('ProgramName')['air_date'].quantile(q = 0.7, interpolation = 'nearest')[0])]
for i in range(1, len(dat_['ProgramName'].unique())):
df_gb = dat_[(dat_['ProgramName'] == i) & (dat_['air_date'] > dat_.groupby('ProgramName')['air_date'].quantile(q = 0.7, interpolation = 'nearest')[i])]
test_df = pd.concat([test_df, df_gb])
test_final = test_df.copy()
train_df = train_df.reset_index(drop = True)
test_df = test_df.reset_index(drop = True)
### Feature Mapping - Profiling 피처맵 맵핑
train_df = train_df[['ProgramName', 'rating', 'likes' ,'weather', 'avg_temp', 'genre', 'release', 'sunshine', 'song_id']]
train_df['genre'] = train_df['genre'].apply(lambda x: genre_index[x])
train_df['release'] = train_df['release'].apply(lambda x: year_index[x])
train_df['weather'] = train_df['weather'].apply(lambda x: weather_index[x])
train_df['avg_temp'] = train_df['avg_temp'].apply(lambda x: avg_temp_index[x])
train_df['sunshine'] = train_df['sunshine'].apply(lambda x: sunshine_index[x])
test_df = test_df[['ProgramName', 'rating', 'likes' ,'weather', 'avg_temp', 'genre', 'release', 'sunshine', 'song_id']]
test_df['genre'] = test_df['genre'].apply(lambda x: genre_index[x])
test_df['release'] = test_df['release'].apply(lambda x: year_index[x])
test_df['weather'] = test_df['weather'].apply(lambda x: weather_index[x])
test_df['avg_temp'] = test_df['avg_temp'].apply(lambda x: avg_temp_index[x])
test_df['sunshine'] = test_df['sunshine'].apply(lambda x: sunshine_index[x])
train_df['y'] = train_df['rating'].apply(lambda x : 1 if x >= 2 else 0)
test_df['y'] = test_df['rating'].apply(lambda x : 1 if x >= 2 else 0)
col_len_dict = {'ProgramName': len(train_df['ProgramName'].unique()),
'rating': len(train_df['rating'].unique()),
'likes': len(train_df['likes'].unique()),
'genre': len(train_df['genre'].unique()),
'weather': len(train_df['weather'].unique()),
'avg_temp': len(train_df['avg_temp'].unique()),
'sunshine': len(train_df['sunshine'].unique()),
'release': len(train_df['release'].unique()),
'song_id': len(train_df['song_id'].unique()),}
col_accum_index_dict = {}
cumulative = 0
for key, value in col_len_dict.items():
col_accum_index_dict[key] = cumulative
cumulative = cumulative + value
def make_libsvm_row(uid, mid):
row = []
user_id = str(col_accum_index_dict['ProgramName'] + uid - 1) + ":" + str(1)
weather = str(col_accum_index_dict['weather'] + program_index_dict['weather'][uid] - 1) + ":" + str(1)
avg_temp = str(col_accum_index_dict['avg_temp'] + program_index_dict['avg_temp'][uid] - 1) + ":" + str(1)
sunshine = str(col_accum_index_dict['sunshine'] + program_index_dict['sunshine'][uid] - 1) + ":" + str(1)
song_id = str(col_accum_index_dict['song_id'] + int(mid) - 1) + ":" + str(1)
song = str(col_accum_index_dict['release'] + song_index_dict['release'][int(mid)] - 1) + ":" + str(1)
singer = str(col_accum_index_dict['genre'] + song_index_dict['genre'][int(mid)] - 1) + ":" + str(1)
return " ".join([user_id, weather, song, singer, avg_temp, sunshine])
### 추천 랭킹 모델링 (Factorization Machine)
for idx, row in train_df.iterrows():
vec = []
label = row['y']
vec.append(str(label))
row = row.drop(labels=['rating'])
row = row.drop(labels=['y'])
for key, value in row.items():
col_idx = col_accum_index_dict[key] + value - 1
vec.append(str(col_idx) + ":" + str(1))
print("%s\n" % " ".join(vec))
break
txt_file = open('train.txt', 'w')
for idx, row in train_df.iterrows():
vec = []
label = row['y']
vec.append(str(label))
row = row.drop(labels=['rating'])
row = row.drop(labels=['y'])
for key, value in row.items():
col_idx = col_accum_index_dict[key] + value - 1
vec.append(str(col_idx) + ":" + str(1))
txt_file.write("%s\n" % " ".join(vec))
txt_file.close()
txt_file = open('test.txt', 'w')
for idx, row in test_df.iterrows():
vec = []
label = row['y']
vec.append(str(label))
row = row.drop(labels=['rating'])
row = row.drop(labels=['y'])
for key, value in row.items():
col_idx = col_accum_index_dict[key] + value - 1
vec.append(str(col_idx) + ":" + str(1))
txt_file.write("%s\n" % " ".join(vec))
txt_file.close()
fm = FactorizationMachine(k=4,#latent factor의 dimension
lr=0.005,#learning rate
l2_reg=True,#정규화
l2_lambda=0.0002,
epoch=40,#학습 몇바퀴
early_stop_window=3,# 모델의 성능이 3번정도 지켜봤는데 좋아지지 않으면 스탑
train_data='./train.txt',
valid_data='./test.txt')
fm.train()
### 모의 추천 시스템 구축
#train_final, test_final이용
train_rate_list = train_final.groupby('ProgramName')[['song_id', 'rating']].apply(lambda x: get_music_list_sort_by_time(x))
##word2vec학습
song_meta_dict = train_final.set_index('song_id')[['release', 'genre']].to_dict()
song2vec_dataset = []
for song_list in train_rate_list:
meta_list = []
for music_id in song_list:
word_meta_1 = "song_id:" + str(music_id)
word_meta_2 = "year:" + str(song_meta_dict['release'][music_id])
word_meta_3 = "genre:" + str(song_meta_dict['genre'][music_id])
meta_list.append(word_meta_1)
meta_list.append(word_meta_2)
meta_list.append(word_meta_3)
song2vec_dataset.append(meta_list)
from gensim.models import Word2Vec
model = Word2Vec(song2vec_dataset,
window=6, # 주변 word의 윈도우
sg=1, # skip-gram OR cbow
hs=0, # hierarchical softmax OR negative sampling
negative=20, # negative sampling 파라미터
min_count=1 # word의 등장 최소 횟수
)
# #Embedding - 일부 데이터로 song2Vec 학습 결과 확인
#최근 틀었던 노래 추출
recent_program_song_list = train_final.groupby('ProgramName')[['song_id', 'air_date']].apply(lambda x: get_recent_song_list_sort_by_time(x, 10))
# 노래별 연관 노래 k개씩 추출
k = 20
not_in_count = 0
item2item = {}
for song_id in dat_['song_id'].values:
item2item[song_id] = []
try:
sim_list = model.wv.most_similar("song_id:" + str(song_id), topn=k+20)
for song_tup in sim_list:
tup_info = song_tup[0].split(":")
if (tup_info[0] == "song_id") and (len(item2item[song_id]) < 20):
item2item[song_id].append(tup_info[1])
except:
not_in_count += 1
print("word", str(song_id) ,"not in vocabulary")
#최근 틀었던 노래와 유사한곡
def get_similar_items(x, k):
similar_items = []
for song_id in x:
if song_id in item2item:
similar_items.append(item2item[song_id][:k])
return [item for items in similar_items for item in items]
recent_song_similar_items = recent_program_song_list.apply(lambda x: get_similar_items(x, 30))
### 추천 결과 평가 - Train 데이터 기반 유저별 추천 후보군 생성
mean_rating = train_final.groupby('song_id')['rating'].agg(['mean', 'count'])
###빈도수 높은 상위 30개 노래
popular_song_list = mean_rating[mean_rating['count']>5]['mean'].sort_values(ascending=False).index[:30].tolist()
##장르, 연도, 날씨를 meta로 하여, meta별 상위 30개의 인기가 많은 노래
merge_df = pd.merge(mean_rating, dat_, on='song_id')
genre_popular = merge_df.groupby('genre').apply(lambda x: get_meta_popular_list(x, k=30))
year_popular = merge_df.groupby('release').apply(lambda x: get_meta_popular_list(x, k=30))
weather_popular = merge_df.groupby('weather').apply(lambda x: get_meta_popular_list(x, k=30))
#### 4개 추천 후보군 병합
recent_song_similar_items.apply(lambda x: x.extend(popular_song_list))
song_dict = merge_df[['song_id', 'release', 'genre', 'weather']].to_dict()
def get_items_by_prefer_meta(x):
year_list = []
genre_list = []
weather_list = []
for song_id in x:
if song_id in song_dict['release']:
year_list.append(song_dict['release'][song_id])
if song_id in song_dict['genre']:
genre_list.append(song_dict['genre'][song_id])
if song_id in song_dict['weather']:
weather_list.append(song_dict['weather'][song_id])
try:
prefer_year = most_frequent(year_list)
except:
pass
try:
prefer_genre = most_frequent(genre_list)
except:
pass
try:
prefer_weather = most_frequent(weather_list)
except:
pass
try:
return year_popular[prefer_year] + genre_popular[prefer_genre] + weather_popular[prefer_weather]
except:
pass
items_by_prefer_meta = train_rate_list.apply(lambda x: get_items_by_prefer_meta(x))
merged_candidates = recent_song_similar_items + items_by_prefer_meta
merged_candidates = merged_candidates.dropna(how='any',axis=0)
recommendations = merged_candidates.apply(lambda x: set(x))
##추천결과 평가
program_df = dat_[['ProgramName', 'weather', 'avg_temp', 'sunshine']].copy()
program_df['weather'] = dat_['weather'].apply(lambda x: weather_index[x])
program_df['avg_temp'] = dat_['avg_temp'].apply(lambda x: avg_temp_index[x])
program_df['sunshine'] = dat_['sunshine'].apply(lambda x: sunshine_index[x])
song_df = dat_[['song_id','release', 'genre']].copy()
song_df['release'] = song_df['release'].apply(lambda x: year_index[x])
song_df['genre'] = song_df['genre'].apply(lambda x: genre_index[x])
song_index_dict = song_df.set_index('song_id')[['release', 'genre']].to_dict()
program_index_dict = program_df.set_index('ProgramName')[['weather', 'avg_temp', 'sunshine']].to_dict()
# make vector list
vector_list = []
uid_mid_list = []
for uid, song_set in recommendations.iteritems():
for mid in song_set:
x_feature = []
libsvm_row = make_libsvm_row(uid, mid)
element = libsvm_row.split(" ")
vector_list.append([np.array([int(pair.split(":")[0]) for pair in element]), np.array([int(pair.split(":")[1]) for pair in element])])
uid_mid_list.append((uid, mid))
predict_results = fm.predict(vector_list)
#### 후보군 정렬
user_recommendation_dict = {}
for song_prob, uid_mid in zip(predict_results, uid_mid_list):
uid, mid = int(uid_mid[0]), int(uid_mid[1])
if uid not in user_recommendation_dict:
user_recommendation_dict[uid] = {}
user_recommendation_dict[uid][mid] = song_prob
else:
if mid not in user_recommendation_dict[uid]:
user_recommendation_dict[uid][mid] = song_prob
def recomend_list(program_id):
recomend = list(map(int, list(recommendations[program_id])))
reco = dat_[dat_['song_id'].isin(recomend)][['songName', 'artistName', 'genre', 'release', 'weather' ,'likes', 'region', 'song_id']].drop_duplicates()
return reco
##추천리스트 추출
print(programs_id)
name = "".join(le.inverse_transform([0])[0].split())
tbn_program_final = recomend_list(programs_id[0])
tbn_program_final['program'] = name
tbn_program_final['predict'] = 1
for song_id, predict_ in user_recommendation_dict[programs_id[0]].items():
if song_id in tbn_program_final['song_id']:
tbn_program_final.loc[tbn_program_final['song_id'] == song_id, 'predict'] = round(predict_, 4)
for i in range(1, len(programs_id)):
name = "".join(le.inverse_transform([programs_id[i]])[0].split())
try:
df = recomend_list(programs_id[i])
df['program'] = name
df['predict'] = 0
for song_id, predict_ in user_recommendation_dict[programs_id[i]].items():
if song_id in df['song_id']:
df.loc[df['song_id'] == song_id, 'predict'] = round(predict_, 4)
else:
pass
tbn_program_final = pd.concat([tbn_program_final, df])
except:
pass
tbn_program_final.to_csv('./tbn_recommendation_list.csv', encoding = 'euc-kr', index = False)
return tbn_program
Requirement already satisfied: xlearn in c:\users\user\anaconda3\lib\site-packages (0.40a1)
Requirement already satisfied: gensim in c:\users\user\anaconda3\lib\site-packages (4.1.2)
Requirement already satisfied: smart-open>=1.8.1 in c:\users\user\anaconda3\lib\site-packages (from gensim) (5.2.1)
Requirement already satisfied: scipy>=0.18.1 in c:\users\user\anaconda3\lib\site-packages (from gensim) (1.6.2)
Requirement already satisfied: Cython==0.29.23 in c:\users\user\anaconda3\lib\site-packages (from gensim) (0.29.23)
Requirement already satisfied: numpy>=1.17.0 in c:\users\user\anaconda3\lib\site-packages (from gensim) (1.19.5)
추천목록 = fm_model(user_df, songDB)
0 -1:1 149:1 214:1 282:1 154:1 288:1 284:1 1766167:1
Start to init FM vectors.
Finish init FM vectors.
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1935522 not in vocabulary
word 1569506 not in vocabulary
word 1569506 not in vocabulary
word 1569506 not in vocabulary
word 1569506 not in vocabulary
word 1569506 not in vocabulary
word 1569506 not in vocabulary
word 3827 not in vocabulary
word 3827 not in vocabulary
word 5473462 not in vocabulary
word 5473462 not in vocabulary
word 5610143 not in vocabulary
word 2627950 not in vocabulary
word 2627950 not in vocabulary
word 30260502 not in vocabulary
word 30776382 not in vocabulary
word 30776382 not in vocabulary
word 32457754 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 33746565 not in vocabulary
word 621287 not in vocabulary
word 4828190 not in vocabulary
word 4828190 not in vocabulary
word 4670129 not in vocabulary
word 66120 not in vocabulary
word 664261 not in vocabulary
word 664261 not in vocabulary
word 664261 not in vocabulary
word 664261 not in vocabulary
word 664261 not in vocabulary
word 3753310 not in vocabulary
word 1121191 not in vocabulary
word 1121191 not in vocabulary
word 33642931 not in vocabulary
word 33642931 not in vocabulary
word 1778230 not in vocabulary
word 32012528 not in vocabulary
word 3080890 not in vocabulary
word 519279 not in vocabulary
word 32203109 not in vocabulary
word 4743561 not in vocabulary
word 4743574 not in vocabulary
word 2346661 not in vocabulary
word 2346661 not in vocabulary
word 2346661 not in vocabulary
word 2346661 not in vocabulary
word 2346661 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 33043504 not in vocabulary
word 9624346 not in vocabulary
word 33038706 not in vocabulary
word 1099594 not in vocabulary
word 411066 not in vocabulary
word 1459449 not in vocabulary
word 1459449 not in vocabulary
word 1459449 not in vocabulary
word 1459449 not in vocabulary
word 1459449 not in vocabulary
word 31655897 not in vocabulary
word 31655897 not in vocabulary
word 4324341 not in vocabulary
word 106726416 not in vocabulary
word 30537309 not in vocabulary
word 338652 not in vocabulary
word 5224735 not in vocabulary
word 1068643 not in vocabulary
word 1068643 not in vocabulary
word 33423495 not in vocabulary
word 2531788 not in vocabulary
word 33970958 not in vocabulary
word 33628328 not in vocabulary
word 38385 not in vocabulary
word 1917233 not in vocabulary
word 4779294 not in vocabulary
word 860577 not in vocabulary
word 38585 not in vocabulary
word 38585 not in vocabulary
word 2271648 not in vocabulary
word 2552851 not in vocabulary
word 33453960 not in vocabulary
word 33453960 not in vocabulary
word 33453960 not in vocabulary
word 32691297 not in vocabulary
word 32691297 not in vocabulary
word 1755551 not in vocabulary
word 5047959 not in vocabulary
word 33861841 not in vocabulary
word 33516954 not in vocabulary
word 82912 not in vocabulary
word 82912 not in vocabulary
word 30781649 not in vocabulary
word 184436 not in vocabulary
word 184436 not in vocabulary
word 55327 not in vocabulary
word 55327 not in vocabulary
word 55327 not in vocabulary
word 33946475 not in vocabulary
word 31988773 not in vocabulary
word 32445339 not in vocabulary
word 247269 not in vocabulary
word 31737198 not in vocabulary
word 5472103 not in vocabulary
word 8199190 not in vocabulary
word 317254 not in vocabulary
word 32122539 not in vocabulary
word 32122539 not in vocabulary
word 1197688 not in vocabulary
word 1708676 not in vocabulary
word 1708676 not in vocabulary
word 1708676 not in vocabulary
word 31030574 not in vocabulary
word 31030574 not in vocabulary
word 31030574 not in vocabulary
word 31030574 not in vocabulary
word 31030574 not in vocabulary
word 31030574 not in vocabulary
word 4658849 not in vocabulary
word 47283 not in vocabulary
word 5225331 not in vocabulary
word 419999 not in vocabulary
word 797095 not in vocabulary
word 33806217 not in vocabulary
word 33806217 not in vocabulary
word 442442 not in vocabulary
word 32995890 not in vocabulary
word 32143689 not in vocabulary
word 3060824 not in vocabulary
word 454477 not in vocabulary
word 3060824 not in vocabulary
word 454477 not in vocabulary
word 32664127 not in vocabulary
word 32664127 not in vocabulary
word 32664127 not in vocabulary
word 106857875 not in vocabulary
word 106857875 not in vocabulary
word 31703498 not in vocabulary
word 32012356 not in vocabulary
word 32012356 not in vocabulary
word 5711462 not in vocabulary
word 52214 not in vocabulary
word 51052 not in vocabulary
word 51052 not in vocabulary
word 51052 not in vocabulary
word 51052 not in vocabulary
word 33899466 not in vocabulary
word 33899466 not in vocabulary
word 31657937 not in vocabulary
word 31083322 not in vocabulary
word 31083322 not in vocabulary
word 33868791 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 49543 not in vocabulary
word 33628310 not in vocabulary
word 2951796 not in vocabulary
word 33955533 not in vocabulary
word 33955533 not in vocabulary
word 2192666 not in vocabulary
word 3040707 not in vocabulary
word 2352749 not in vocabulary
word 30844118 not in vocabulary
word 1125392 not in vocabulary
word 33666746 not in vocabulary
word 3018899 not in vocabulary
word 4824874 not in vocabulary
word 230251 not in vocabulary
word 5711158 not in vocabulary
word 4099280 not in vocabulary
word 3789644 not in vocabulary
word 33825315 not in vocabulary
word 2604502 not in vocabulary
word 31015120 not in vocabulary
word 3833290 not in vocabulary
word 1798749 not in vocabulary
word 510941 not in vocabulary
word 30403590 not in vocabulary
word 2663168 not in vocabulary
word 1629574 not in vocabulary
word 3692955 not in vocabulary
word 33669074 not in vocabulary
word 33978819 not in vocabulary
word 33669074 not in vocabulary
word 33978819 not in vocabulary
word 33669074 not in vocabulary
word 33978819 not in vocabulary
word 33669074 not in vocabulary
word 33978819 not in vocabulary
word 988003 not in vocabulary
word 2590048 not in vocabulary
word 418647 not in vocabulary
word 418647 not in vocabulary
word 418647 not in vocabulary
word 418647 not in vocabulary
word 536317 not in vocabulary
word 2249747 not in vocabulary
word 2249747 not in vocabulary
word 2249747 not in vocabulary
word 2249747 not in vocabulary
word 2249747 not in vocabulary
word 1783841 not in vocabulary
word 33502331 not in vocabulary
word 1719739 not in vocabulary
word 1719739 not in vocabulary
word 1719739 not in vocabulary
word 1719739 not in vocabulary
word 1719739 not in vocabulary
word 33229138 not in vocabulary
word 508137 not in vocabulary
word 8243986 not in vocabulary
word 33800786 not in vocabulary
word 33800786 not in vocabulary
word 32457747 not in vocabulary
word 32457747 not in vocabulary
word 46389 not in vocabulary
word 3421128 not in vocabulary
word 625711 not in vocabulary
word 30377669 not in vocabulary
word 3786914 not in vocabulary
word 3786914 not in vocabulary
word 1837078 not in vocabulary
word 33388379 not in vocabulary
word 31864628 not in vocabulary
word 1419572 not in vocabulary
word 2797558 not in vocabulary
word 2797558 not in vocabulary
word 31341517 not in vocabulary
word 2979880 not in vocabulary
word 2979880 not in vocabulary
word 431377 not in vocabulary
word 30188113 not in vocabulary
word 30188113 not in vocabulary
word 2565279 not in vocabulary
word 2565279 not in vocabulary
word 2565279 not in vocabulary
word 4549610 not in vocabulary
word 3592800 not in vocabulary
word 1714358 not in vocabulary
word 3427926 not in vocabulary
word 8016327 not in vocabulary
word 8170232 not in vocabulary
word 88119 not in vocabulary
word 315862 not in vocabulary
word 3773440 not in vocabulary
word 2425271 not in vocabulary
word 2425271 not in vocabulary
word 685096 not in vocabulary
word 685096 not in vocabulary
word 1516440 not in vocabulary
word 1516440 not in vocabulary
word 7873962 not in vocabulary
word 3652053 not in vocabulary
word 68162 not in vocabulary
word 68162 not in vocabulary
word 31360459 not in vocabulary
word 33892157 not in vocabulary
word 33999118 not in vocabulary
word 2335672 not in vocabulary
word 2335672 not in vocabulary
word 2335672 not in vocabulary
word 32579561 not in vocabulary
word 2984491 not in vocabulary
word 1883568 not in vocabulary
word 1883568 not in vocabulary
word 1123008 not in vocabulary
word 32096088 not in vocabulary
word 4412196 not in vocabulary
word 4475936 not in vocabulary
word 4412196 not in vocabulary
word 4475936 not in vocabulary
word 1792798 not in vocabulary
word 31314144 not in vocabulary
word 31314144 not in vocabulary
word 55301 not in vocabulary
word 55301 not in vocabulary
word 715752 not in vocabulary
word 3607249 not in vocabulary
word 601615 not in vocabulary
word 31417871 not in vocabulary
word 4016085 not in vocabulary
word 4016085 not in vocabulary
word 33793429 not in vocabulary
word 33018542 not in vocabulary
word 32071687 not in vocabulary
word 12062126 not in vocabulary
word 430065 not in vocabulary
word 430065 not in vocabulary
word 31345333 not in vocabulary
word 2234915 not in vocabulary
word 3618858 not in vocabulary
word 3618858 not in vocabulary
word 491270 not in vocabulary
word 491270 not in vocabulary
word 33172151 not in vocabulary
word 610559 not in vocabulary
word 23184883 not in vocabulary
word 1722471 not in vocabulary
word 1722471 not in vocabulary
word 520096 not in vocabulary
word 1512807 not in vocabulary
word 33699116 not in vocabulary
word 33699116 not in vocabulary
word 33759183 not in vocabulary
word 33759183 not in vocabulary
word 1204838 not in vocabulary
word 1204838 not in vocabulary
word 8007081 not in vocabulary
word 834231 not in vocabulary
word 33905050 not in vocabulary
word 4230002 not in vocabulary
word 3961271 not in vocabulary
word 4378208 not in vocabulary
word 4378217 not in vocabulary
word 34018319 not in vocabulary
word 4024367 not in vocabulary
word 317805 not in vocabulary
word 317805 not in vocabulary
word 8153410 not in vocabulary
word 5381341 not in vocabulary
word 473438 not in vocabulary
word 5381341 not in vocabulary
word 473438 not in vocabulary
word 2218823 not in vocabulary
word 2523870 not in vocabulary
word 1177472 not in vocabulary
word 1177472 not in vocabulary
word 5408677 not in vocabulary
word 32475388 not in vocabulary
word 632323 not in vocabulary
word 8261974 not in vocabulary
word 3739057 not in vocabulary
word 8244881 not in vocabulary
word 52834 not in vocabulary
word 3170749 not in vocabulary
word 1274796 not in vocabulary
word 3602038 not in vocabulary
word 20240 not in vocabulary
word 3410685 not in vocabulary
word 33939317 not in vocabulary
word 2229259 not in vocabulary
word 2229259 not in vocabulary
word 14711444 not in vocabulary
word 1109252 not in vocabulary
word 2960592 not in vocabulary
word 33722941 not in vocabulary
word 33722941 not in vocabulary
word 5617211 not in vocabulary
word 31861247 not in vocabulary
word 812266 not in vocabulary
word 4268996 not in vocabulary
word 503935 not in vocabulary
word 71608 not in vocabulary
word 5741526 not in vocabulary
word 2951900 not in vocabulary
word 2951900 not in vocabulary
word 4027674 not in vocabulary
word 4027674 not in vocabulary
word 5803353 not in vocabulary
word 3898222 not in vocabulary
word 1005227 not in vocabulary
word 1174715 not in vocabulary
word 31341518 not in vocabulary
word 33899428 not in vocabulary
word 1005220 not in vocabulary
word 1005220 not in vocabulary
word 2930968 not in vocabulary
word 1476272 not in vocabulary
word 850689 not in vocabulary
word 1476272 not in vocabulary
word 850689 not in vocabulary
word 2012251 not in vocabulary
word 358942 not in vocabulary
word 2323552 not in vocabulary
word 33976672 not in vocabulary
word 33999130 not in vocabulary
word 33818184 not in vocabulary
word 33818184 not in vocabulary
word 31396067 not in vocabulary
word 7846034 not in vocabulary
word 33265984 not in vocabulary
word 33265984 not in vocabulary
word 30063261 not in vocabulary
word 91399 not in vocabulary
word 4363425 not in vocabulary
word 4363425 not in vocabulary
word 33898789 not in vocabulary
word 33898789 not in vocabulary
word 33898789 not in vocabulary
word 33898789 not in vocabulary
word 2209385 not in vocabulary
word 30454037 not in vocabulary
word 1890408 not in vocabulary
word 396801 not in vocabulary
word 4219017 not in vocabulary
word 4322575 not in vocabulary
word 439123 not in vocabulary
word 439123 not in vocabulary
word 7979987 not in vocabulary
word 33701695 not in vocabulary
word 623512 not in vocabulary
word 3849464 not in vocabulary
word 33442288 not in vocabulary
word 33473109 not in vocabulary
word 4745698 not in vocabulary
word 2312253 not in vocabulary
word 2312253 not in vocabulary
word 8071102 not in vocabulary
word 4583482 not in vocabulary
word 4583482 not in vocabulary
word 33949450 not in vocabulary
word 91299 not in vocabulary
word 91299 not in vocabulary
word 91299 not in vocabulary
word 1768561 not in vocabulary
word 1265429 not in vocabulary
word 390914 not in vocabulary
word 32833391 not in vocabulary
word 309028 not in vocabulary
word 33279139 not in vocabulary
word 16937 not in vocabulary
word 7865985 not in vocabulary
word 1137829 not in vocabulary
word 1137829 not in vocabulary
word 5615441 not in vocabulary
word 1606418 not in vocabulary
word 31524320 not in vocabulary
word 4362701 not in vocabulary
word 30475790 not in vocabulary
word 3890454 not in vocabulary
word 3764877 not in vocabulary
word 3618890 not in vocabulary
word 3618890 not in vocabulary
word 1666161 not in vocabulary
word 1140848 not in vocabulary
word 33144500 not in vocabulary
word 32054267 not in vocabulary
word 8021660 not in vocabulary
word 8282912 not in vocabulary
word 65582 not in vocabulary
word 65582 not in vocabulary
word 1415458 not in vocabulary
word 1415458 not in vocabulary
word 1030601 not in vocabulary
word 32493083 not in vocabulary
word 3123105 not in vocabulary
word 1542823 not in vocabulary
word 32691501 not in vocabulary
word 1108997 not in vocabulary
word 91207 not in vocabulary
word 2552849 not in vocabulary
word 2997924 not in vocabulary
word 33187 not in vocabulary
word 5681156 not in vocabulary
word 31341509 not in vocabulary
word 32254662 not in vocabulary
word 32254662 not in vocabulary
word 2213095 not in vocabulary
word 2338549 not in vocabulary
word 33766987 not in vocabulary
word 69003 not in vocabulary
word 1016351 not in vocabulary
word 32609679 not in vocabulary
word 30072072 not in vocabulary
word 30072072 not in vocabulary
word 31252819 not in vocabulary
word 31252819 not in vocabulary
word 5569745 not in vocabulary
word 2680562 not in vocabulary
word 1977448 not in vocabulary
word 2119819 not in vocabulary
word 5725927 not in vocabulary
word 847126 not in vocabulary
word 536962 not in vocabulary
word 620236 not in vocabulary
word 5622053 not in vocabulary
word 5718697 not in vocabulary
word 30223433 not in vocabulary
word 266325 not in vocabulary
word 1873739 not in vocabulary
word 1923229 not in vocabulary
word 4400534 not in vocabulary
word 33867016 not in vocabulary
word 480711 not in vocabulary
word 825283 not in vocabulary
word 4060141 not in vocabulary
word 4105654 not in vocabulary
word 32211414 not in vocabulary
word 14776 not in vocabulary
word 32817923 not in vocabulary
word 1907915 not in vocabulary
word 466094 not in vocabulary
word 466094 not in vocabulary
word 4140591 not in vocabulary
word 32183384 not in vocabulary
word 1839145 not in vocabulary
word 30669593 not in vocabulary
word 2994012 not in vocabulary
word 5511122 not in vocabulary
word 33812065 not in vocabulary
word 65062 not in vocabulary
word 1416017 not in vocabulary
word 105894 not in vocabulary
word 59780 not in vocabulary
word 3736265 not in vocabulary
word 1283717 not in vocabulary
word 1283717 not in vocabulary
word 30101802 not in vocabulary
word 770655 not in vocabulary
word 649821 not in vocabulary
word 421759 not in vocabulary
word 1648091 not in vocabulary
word 33071492 not in vocabulary
word 1033231 not in vocabulary
word 1534836 not in vocabulary
word 4242935 not in vocabulary
word 32779548 not in vocabulary
word 3731617 not in vocabulary
word 5803352 not in vocabulary
word 2338552 not in vocabulary
word 4188566 not in vocabulary
word 33829275 not in vocabulary
word 1500192 not in vocabulary
word 33766371 not in vocabulary
word 4054519 not in vocabulary
word 2101171 not in vocabulary
word 2101171 not in vocabulary
word 33808001 not in vocabulary
word 21622048 not in vocabulary
word 31337775 not in vocabulary
word 21346339 not in vocabulary
word 3586702 not in vocabulary
word 3586702 not in vocabulary
word 3586702 not in vocabulary
word 537750 not in vocabulary
word 1177614 not in vocabulary
word 33658484 not in vocabulary
word 20057563 not in vocabulary
word 3835594 not in vocabulary
word 7956304 not in vocabulary
word 3037252 not in vocabulary
word 30444663 not in vocabulary
word 33967162 not in vocabulary
word 33967162 not in vocabulary
word 261691 not in vocabulary
word 261691 not in vocabulary
word 32097689 not in vocabulary
word 54543 not in vocabulary
word 8021585 not in vocabulary
word 854966 not in vocabulary
word 1335351 not in vocabulary
word 2796367 not in vocabulary
word 2796367 not in vocabulary
word 419103 not in vocabulary
word 309292 not in vocabulary
word 5702554 not in vocabulary
word 3378302 not in vocabulary
word 5381590 not in vocabulary
word 5384845 not in vocabulary
word 32397385 not in vocabulary
word 9648959 not in vocabulary
word 1274333 not in vocabulary
word 1829223 not in vocabulary
word 531748 not in vocabulary
word 32362663 not in vocabulary
word 33668958 not in vocabulary
word 1786108 not in vocabulary
word 3780562 not in vocabulary
word 3642699 not in vocabulary
word 2049250 not in vocabulary
word 8142659 not in vocabulary
word 3090056 not in vocabulary
word 4433490 not in vocabulary
word 31745022 not in vocabulary
word 33872789 not in vocabulary
word 50151 not in vocabulary
word 987652 not in vocabulary
word 987652 not in vocabulary
word 8233373 not in vocabulary
word 480310 not in vocabulary
word 1003080 not in vocabulary
word 5699589 not in vocabulary
word 530768 not in vocabulary
word 1948501 not in vocabulary
word 31606729 not in vocabulary
word 31955742 not in vocabulary
word 331233 not in vocabulary
word 1634719 not in vocabulary
word 5471248 not in vocabulary
word 8179409 not in vocabulary
word 59701 not in vocabulary
word 3965021 not in vocabulary
word 1469335 not in vocabulary
word 33347024 not in vocabulary
word 1928224 not in vocabulary
word 5458983 not in vocabulary
word 33706543 not in vocabulary
word 5389475 not in vocabulary
word 3448854 not in vocabulary
word 3859894 not in vocabulary
word 3718614 not in vocabulary
word 4054930 not in vocabulary
word 32553649 not in vocabulary
[44, 121, 132, 123, 11, 142, 48, 124, 22, 70, 1, 100, 141, 59, 75, 24, 26, 25, 46, 30, 29, 28, 16, 52, 68, 14, 126, 15, 79, 69, 113, 62, 36, 137, 88, 139, 93, 99, 60, 17, 35, 85, 66, 7, 97, 78, 39, 134, 37, 112, 83, 105, 51, 110, 64, 27, 34, 76, 40, 2, 6, 42, 122, 33, 18, 109, 9, 102, 54, 131, 21, 114, 31, 0, 5, 135, 4, 89, 3, 82, 95, 57, 125, 55, 138, 67, 74, 91, 86, 111, 117, 45, 98, 73, 49, 90, 58, 129, 133, 50, 56, 130, 13, 115, 107, 92, 53, 65, 116, 63, 128, 136, 96, 108, 140, 103, 80, 43, 47, 143, 12, 127, 23, 71, 61, 72, 106, 32, 104, 87, 84, 118, 10, 77, 81, 120, 119, 20, 38, 41, 94, 101, 8, 19]
추천목록.shape
(100000, 18)
끝
Comments