The following code connects to a Google Sheet that contains my training dataset.
You can copy my Google Sheet and change the sheet_id to your own training dataset.
1sheet_id = "1g7oYCpSqa8J3X4nv94LDGR53fJTMQRs9YqDsIQgos-8"
1# 1) Install dependencies2!pip install --quiet gspread pandas scikit-learn oauth2client3
4# 2) Authenticate with Google5from google.colab import auth6auth.authenticate_user()7
8# 3) Connect to Google Sheets API via gspread9import gspread10from google.auth import default11
12creds, _ = default()13gc = gspread.authorize(creds)14
15# 4) Read Sheet1 from your public Google Sheet16import pandas as pd17
18sheet_id = "1g7oYCpSqa8J3X4nv94LDGR53fJTMQRs9YqDsIQgos-8"19sh = gc.open_by_key(sheet_id)20ws = sh.worksheet("Sheet1")21records = ws.get_all_records()22df = pd.DataFrame(records)23
24# Rename column F to 'category'25df = df.rename(columns={"Result": "category"})26df = df[["company_name", "category"]].dropna()27
28# 4b) Read from the "Recruiters" sheet29ws_recruiters = sh.worksheet("Recruiters")30recruiters_list = ws_recruiters.col_values(1) # first column only31
32# Remove header and blanks33recruiters_cleaned = [name for name in recruiters_list if name.strip().lower() not in ["", "company_name", "name"]]34
35# Create a DataFrame with category = "Recruitment"36df_recruiters = pd.DataFrame({37 "company_name": recruiters_cleaned,38 "category": ["Recruitment"] * len(recruiters_cleaned)39})40
41# 4c) Merge with Sheet1 training data42df_combined = pd.concat([df, df_recruiters], ignore_index=True).drop_duplicates()43
44# 5) Preview the final training data45print("Combined training data preview:")46print(df_combined.head())47
48# 6) Train model49from sklearn.feature_extraction.text import CountVectorizer50from sklearn.naive_bayes import MultinomialNB51from sklearn.pipeline import Pipeline52
53pipeline = Pipeline([54 ("vectorizer", CountVectorizer(lowercase=True)),55 ("classifier", MultinomialNB())56])57pipeline.fit(df_combined["company_name"], df_combined["category"])58
59# 7) Classifier function60def classify_company(name: str) -> str:61 return pipeline.predict([name])[0]62
63# 8) Test64print("\nExample predictions:")65for ex in ["FDM Group", "MinterEllison", "Next Apex", "PERSOLKELLY", "Visy Industries"]:66 print(f"{ex} → {classify_company(ex)}")
Now that your training done, let's run the following code to test-drive this
1# Interactive classification loop2while True:3 user_input = input("Enter a company name (or type 'exit' to quit): ").strip()4 if user_input.lower() == "exit":5 print("Goodbye!")6 break7 if user_input == "":8 print("Please enter a valid company name.\n")9 continue10
11 try:12 result = classify_company(user_input)13 print(f"→ {user_input} is classified as: {result}\n")14 except Exception as e:15 print(f"Error: {e}\n")
You've completed this chapter! 🎉