Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 45fecd3

Browse files
add word generator function
1 parent fb66cbd commit 45fecd3

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

‎transform_data_w2v.py‎

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,32 @@
66

77

88
class TransformDataW2V(TransformData):
9-
def __init__(self, batch_size, num_skips, skip_window,file,corpus):
9+
def __init__(self, batch_size, num_skips, skip_window):
1010
TransformData.__init__(self, 'corpus/dict.utf8', ['pku'])
1111
self.batch_size = batch_size
1212
self.num_skips = num_skips
1313
self.skip_window = skip_window
1414
self.data_index = 0
1515
self.span = 2 * self.skip_window + 1
16-
self.words = [itemforsublistinself.words_indexforiteminsublist]
16+
self.words = self.generate_words('sogou')
1717
self.word_count = len(self.words)
1818

19+
def generate_words(self, name):
20+
if name == 'pku':
21+
return [item for sublist in self.words_index for item in sublist]
22+
elif name == 'sogou':
23+
with open('corpus/sogou.txt', 'r', encoding='utf8') as file:
24+
return self.sentence2index(file.read())
25+
26+
def sentence2index(self, sentence):
27+
index = []
28+
for ch in sentence:
29+
if ch in self.dictionary:
30+
index.append(self.dictionary[ch])
31+
else:
32+
index.append(0)
33+
return index
34+
1935
def generate_batch(self):
2036
batch = np.ndarray(shape=(self.batch_size), dtype=np.int32)
2137
labels = np.ndarray(shape=(self.batch_size, 1), dtype=np.int32)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /