AusBiz Consulting

The following code connects to a Google Sheet that contains my training dataset.

You can copy my Google Sheet and change the sheet_id to your own training dataset.

Python

1sheet_id = "1g7oYCpSqa8J3X4nv94LDGR53fJTMQRs9YqDsIQgos-8"

Python

1# 1) Install dependencies
2!pip install --quiet gspread pandas scikit-learn oauth2client
3
4# 2) Authenticate with Google
5from google.colab import auth
6auth.authenticate_user()
7
8# 3) Connect to Google Sheets API via gspread
9import gspread
10from google.auth import default
11
12creds, _ = default()
13gc = gspread.authorize(creds)
14
15# 4) Read Sheet1 from your public Google Sheet
16import pandas as pd
17
18sheet_id = "1g7oYCpSqa8J3X4nv94LDGR53fJTMQRs9YqDsIQgos-8"
19sh = gc.open_by_key(sheet_id)
20ws = sh.worksheet("Sheet1")
21records = ws.get_all_records()
22df = pd.DataFrame(records)
23
24# Rename column F to 'category'
25df = df.rename(columns={"Result": "category"})
26df = df[["company_name", "category"]].dropna()
27
28# 4b) Read from the "Recruiters" sheet
29ws_recruiters = sh.worksheet("Recruiters")
30recruiters_list = ws_recruiters.col_values(1)  # first column only
31
32# Remove header and blanks
33recruiters_cleaned = [name for name in recruiters_list if name.strip().lower() not in ["", "company_name", "name"]]
34
35# Create a DataFrame with category = "Recruitment"
36df_recruiters = pd.DataFrame({
37    "company_name": recruiters_cleaned,
38    "category": ["Recruitment"] * len(recruiters_cleaned)
39})
40
41# 4c) Merge with Sheet1 training data
42df_combined = pd.concat([df, df_recruiters], ignore_index=True).drop_duplicates()
43
44# 5) Preview the final training data
45print("Combined training data preview:")
46print(df_combined.head())
47
48# 6) Train model
49from sklearn.feature_extraction.text import CountVectorizer
50from sklearn.naive_bayes import MultinomialNB
51from sklearn.pipeline import Pipeline
52
53pipeline = Pipeline([
54    ("vectorizer", CountVectorizer(lowercase=True)),
55    ("classifier", MultinomialNB())
56])
57pipeline.fit(df_combined["company_name"], df_combined["category"])
58
59# 7) Classifier function
60def classify_company(name: str) -> str:
61    return pipeline.predict([name])[0]
62
63# 8) Test
64print("\nExample predictions:")
65for ex in ["FDM Group", "MinterEllison", "Next Apex", "PERSOLKELLY", "Visy Industries"]:
66    print(f"{ex} → {classify_company(ex)}")

Now that your training done, let's run the following code to test-drive this

Python

1# Interactive classification loop
2while True:
3    user_input = input("Enter a company name (or type 'exit' to quit): ").strip()
4    if user_input.lower() == "exit":
5        print("Goodbye!")
6        break
7    if user_input == "":
8        print("Please enter a valid company name.\n")
9        continue
10
11    try:
12        result = classify_company(user_input)
13        print(f"→ {user_input} is classified as: {result}\n")
14    except Exception as e:
15        print(f"Error: {e}\n")

1# 1) Install dependencies 2!pip install --quiet gspread pandas scikit-learn oauth2client 3 4# 2) Authenticate with Google 5from google.colab import auth 6auth.authenticate_user() 7 8# 3) Connect to Google Sheets API via gspread 9import gspread 10from google.auth import default 11 12creds, _ = default() 13gc = gspread.authorize(creds) 14 15# 4) Read Sheet1 from your public Google Sheet 16import pandas as pd 17 18sheet_id = "1g7oYCpSqa8J3X4nv94LDGR53fJTMQRs9YqDsIQgos-8" 19sh = gc.open_by_key(sheet_id) 20ws = sh.worksheet("Sheet1") 21records = ws.get_all_records() 22df = pd.DataFrame(records) 23 24# Rename column F to 'category' 25df = df.rename(columns={"Result": "category"}) 26df = df[["company_name", "category"]].dropna() 27 28# 4b) Read from the "Recruiters" sheet 29ws_recruiters = sh.worksheet("Recruiters") 30recruiters_list = ws_recruiters.col_values(1) # first column only 31 32# Remove header and blanks 33recruiters_cleaned = [name for name in recruiters_list if name.strip().lower() not in ["", "company_name", "name"]] 34 35# Create a DataFrame with category = "Recruitment" 36df_recruiters = pd.DataFrame({ 37 "company_name": recruiters_cleaned, 38 "category": ["Recruitment"] * len(recruiters_cleaned) 39}) 40 41# 4c) Merge with Sheet1 training data 42df_combined = pd.concat([df, df_recruiters], ignore_index=True).drop_duplicates() 43 44# 5) Preview the final training data 45print("Combined training data preview:") 46print(df_combined.head()) 47 48# 6) Train model 49from sklearn.feature_extraction.text import CountVectorizer 50from sklearn.naive_bayes import MultinomialNB 51from sklearn.pipeline import Pipeline 52 53pipeline = Pipeline([ 54 ("vectorizer", CountVectorizer(lowercase=True)), 55 ("classifier", MultinomialNB()) 56]) 57pipeline.fit(df_combined["company_name"], df_combined["category"]) 58 59# 7) Classifier function 60def classify_company(name: str) -> str: 61 return pipeline.predict([name])[0] 62 63# 8) Test 64print("\nExample predictions:") 65for ex in ["FDM Group", "MinterEllison", "Next Apex", "PERSOLKELLY", "Visy Industries"]: 66 print(f"{ex} → {classify_company(ex)}")

1# Interactive classification loop 2while True: 3 user_input = input("Enter a company name (or type 'exit' to quit): ").strip() 4 if user_input.lower() == "exit": 5 print("Goodbye!") 6 break 7 if user_input == "": 8 print("Please enter a valid company name.\n") 9 continue 10 11 try: 12 result = classify_company(user_input) 13 print(f"→ {user_input} is classified as: {result}\n") 14 except Exception as e: 15 print(f"Error: {e}\n")

Recruitment or Industry using Machine Learning (Source Code)

Next Up

Lesson 19: Install LLM on your laptop

Next Up

Lesson 19: Install LLM on your laptop