forked from lbechberger/MLinPractice
-
Notifications
You must be signed in to change notification settings - Fork 2
/
application.py
60 lines (47 loc) · 1.97 KB
/
application.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Console-based application for tweet classification.
Created on Wed Sep 29 14:49:25 2021
@author: lbechberger
"""
import argparse, pickle
import pandas as pd
from sklearn.pipeline import make_pipeline
from code.util import COLUMN_TWEET
# setting up CLI
parser = argparse.ArgumentParser(description = "Application")
parser.add_argument("preprocessing_file", help = "path to the pickle file containing the preprocessing")
parser.add_argument("feature_file", help = "path to the pickle file containing the feature extraction")
parser.add_argument("dim_red_file", help = "path to the pickle file containing the dimensionality reduction")
parser.add_argument("classifier_file", help = "path to the pickle file containing the classifier")
args = parser.parse_args()
# load all the pipeline steps
with open(args.preprocessing_file, 'rb') as f_in:
preprocessing = pickle.load(f_in)
with open(args.feature_file, 'rb') as f_in:
feature_extraction = pickle.load(f_in)
with open(args.dim_red_file, 'rb') as f_in:
dimensionality_reduction = pickle.load(f_in)
with open(args.classifier_file, 'rb') as f_in:
classifier = pickle.load(f_in)["classifier"]
# chain them together into a single pipeline
pipeline = make_pipeline(preprocessing, feature_extraction, dimensionality_reduction, classifier)
# headline output
print("Welcome to ViralTweeter v0.1!")
print("-----------------------------")
print("")
while True:
# ask user for input
tweet = input("Please type in your tweet (type 'quit' to quit the program): ")
# terminate if necessary
if tweet == "quit":
print("Okay, goodbye!")
break
# if not terminated: create pandas DataFrame and put it through the pipeline
df = pd.DataFrame()
df[COLUMN_TWEET] = [tweet]
prediction = pipeline.predict(df)
confidence = pipeline.predict_proba(df)
print("Prediction: {0}, Confidence: {1}".format(prediction, confidence))
print("")