Skip to content

Commit bcc45a5

Browse files
committed
add final eval
1 parent e2ebe07 commit bcc45a5

File tree

1 file changed

+91
-14
lines changed

1 file changed

+91
-14
lines changed

week10_textconv/Seminar-pytorch.ipynb

Lines changed: 91 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
"source": [
77
"# Natural Language Processing with Deep Learning\n",
88
"\n",
9+
"__This is exactly the same notebook as in ../week10_textconv/. Feel free submit the seminar notebook, just make sure you read the assignments at the end.\n",
10+
"\n",
911
"Today we're gonna apply the newly learned DL tools for sequence processing to the task of predicting job salary.\n",
1012
"\n",
1113
"Special thanks to [Oleg Vasilev](https://github.com/Omrigan/) for the assignment core (orignally written for theano/tensorflow)."
@@ -50,7 +52,9 @@
5052
{
5153
"cell_type": "code",
5254
"execution_count": null,
53-
"metadata": {},
55+
"metadata": {
56+
"collapsed": true
57+
},
5458
"outputs": [],
5559
"source": [
5660
"data = pd.read_csv(\"./Train_rev1.csv\", index_col=None)\n",
@@ -78,7 +82,9 @@
7882
{
7983
"cell_type": "code",
8084
"execution_count": null,
81-
"metadata": {},
85+
"metadata": {
86+
"collapsed": true
87+
},
8288
"outputs": [],
8389
"source": [
8490
"print(\"Before\")\n",
@@ -110,7 +116,9 @@
110116
{
111117
"cell_type": "code",
112118
"execution_count": null,
113-
"metadata": {},
119+
"metadata": {
120+
"collapsed": true
121+
},
114122
"outputs": [],
115123
"source": [
116124
"print(\"After\")\n",
@@ -144,7 +152,9 @@
144152
{
145153
"cell_type": "code",
146154
"execution_count": null,
147-
"metadata": {},
155+
"metadata": {
156+
"collapsed": true
157+
},
148158
"outputs": [],
149159
"source": [
150160
"print(\"Total unique tokens :\", len(token_counts))\n",
@@ -160,7 +170,9 @@
160170
{
161171
"cell_type": "code",
162172
"execution_count": null,
163-
"metadata": {},
173+
"metadata": {
174+
"collapsed": true
175+
},
164176
"outputs": [],
165177
"source": [
166178
"# Let's see how many words are there for each count\n",
@@ -197,7 +209,9 @@
197209
{
198210
"cell_type": "code",
199211
"execution_count": null,
200-
"metadata": {},
212+
"metadata": {
213+
"collapsed": true
214+
},
201215
"outputs": [],
202216
"source": [
203217
"print(\"Tokens left:\", len(tokens))\n",
@@ -229,7 +243,9 @@
229243
{
230244
"cell_type": "code",
231245
"execution_count": null,
232-
"metadata": {},
246+
"metadata": {
247+
"collapsed": true
248+
},
233249
"outputs": [],
234250
"source": [
235251
"assert isinstance(token_to_id, dict)\n",
@@ -275,7 +291,9 @@
275291
{
276292
"cell_type": "code",
277293
"execution_count": null,
278-
"metadata": {},
294+
"metadata": {
295+
"collapsed": true
296+
},
279297
"outputs": [],
280298
"source": [
281299
"#### print(\"Lines:\")\n",
@@ -296,7 +314,9 @@
296314
{
297315
"cell_type": "code",
298316
"execution_count": null,
299-
"metadata": {},
317+
"metadata": {
318+
"collapsed": true
319+
},
300320
"outputs": [],
301321
"source": [
302322
"from sklearn.feature_extraction import DictVectorizer\n",
@@ -326,7 +346,9 @@
326346
{
327347
"cell_type": "code",
328348
"execution_count": null,
329-
"metadata": {},
349+
"metadata": {
350+
"collapsed": true
351+
},
330352
"outputs": [],
331353
"source": [
332354
"from sklearn.model_selection import train_test_split\n",
@@ -368,7 +390,9 @@
368390
{
369391
"cell_type": "code",
370392
"execution_count": null,
371-
"metadata": {},
393+
"metadata": {
394+
"collapsed": true
395+
},
372396
"outputs": [],
373397
"source": [
374398
"generate_batch(data_train, 3, max_len=10)"
@@ -457,7 +481,9 @@
457481
{
458482
"cell_type": "code",
459483
"execution_count": null,
460-
"metadata": {},
484+
"metadata": {
485+
"collapsed": true
486+
},
461487
"outputs": [],
462488
"source": [
463489
"title_encoder = TitleEncoder(out_size=64)\n",
@@ -495,7 +521,9 @@
495521
{
496522
"cell_type": "code",
497523
"execution_count": null,
498-
"metadata": {},
524+
"metadata": {
525+
"collapsed": true
526+
},
499527
"outputs": [],
500528
"source": [
501529
"desc_encoder = <Create description encoder>\n",
@@ -686,7 +714,9 @@
686714
{
687715
"cell_type": "code",
688716
"execution_count": null,
689-
"metadata": {},
717+
"metadata": {
718+
"collapsed": true
719+
},
690720
"outputs": [],
691721
"source": [
692722
"for epoch_i in range(num_epochs):\n",
@@ -739,6 +769,33 @@
739769
" print('\\n\\n')"
740770
]
741771
},
772+
{
773+
"cell_type": "code",
774+
"execution_count": null,
775+
"metadata": {
776+
"collapsed": true
777+
},
778+
"outputs": [],
779+
"source": [
780+
"print(\"Final eval:\")\n",
781+
"for batch in iterate_minibatches(data_val, shuffle=False):\n",
782+
" title_ix = Variable(torch.LongTensor(batch[\"Title\"]), volatile=True)\n",
783+
" desc_ix = Variable(torch.LongTensor(batch[\"FullDescription\"]), volatile=True)\n",
784+
" cat_features = Variable(torch.FloatTensor(batch[\"Categorical\"]), volatile=True)\n",
785+
" reference = Variable(torch.FloatTensor(batch[target_column]), volatile=True)\n",
786+
"\n",
787+
" prediction = model(title_ix, desc_ix, cat_features)\n",
788+
" loss = compute_loss(reference, prediction)\n",
789+
"\n",
790+
" val_loss += loss.data.numpy()[0]\n",
791+
" val_mae += compute_mae(reference, prediction).data.numpy()[0]\n",
792+
" val_batches += 1\n",
793+
"\n",
794+
"print(\"\\tLoss:\\t%.5f\" % (val_loss / val_batches))\n",
795+
"print(\"\\tMAE:\\t%.5f\" % (val_mae / val_batches))\n",
796+
"print('\\n\\n')"
797+
]
798+
},
742799
{
743800
"cell_type": "markdown",
744801
"metadata": {},
@@ -806,6 +863,26 @@
806863
" * Maintain the best-on-validation snapshot via `model.state_dict`\n",
807864
" * Plotting learning curves is usually a good idea"
808865
]
866+
},
867+
{
868+
"cell_type": "markdown",
869+
"metadata": {},
870+
"source": [
871+
"### A short report\n",
872+
"\n",
873+
"Please tell us what you did and how did it work.\n",
874+
"\n",
875+
"`<YOUR_TEXT_HERE>`, i guess..."
876+
]
877+
},
878+
{
879+
"cell_type": "code",
880+
"execution_count": null,
881+
"metadata": {
882+
"collapsed": true
883+
},
884+
"outputs": [],
885+
"source": []
809886
}
810887
],
811888
"metadata": {

0 commit comments

Comments
 (0)