1import json
2import pandas as pd
3import requests
4import time
5
6def extract_methods_frameworks_with_ollama(job_description):
7 """
8 Extract methods and frameworks from job description using Ollama with Gemma model
9 """
10
11 if pd.isna(job_description) or job_description == "":
12 return []
13
14
15 job_description_lower = job_description.lower()
16
17
18 prompt = f"""
19 You are a specialized methods and frameworks extractor for job descriptions. Your task is to identify ONLY project management methodologies, development frameworks, and working approaches mentioned in the job description.
20
21 STRICT RULES:
22 1. Extract ONLY methods, frameworks, and methodologies
23 2. Include ONLY: project methodologies, development frameworks, process frameworks, working approaches
24 3. Examples include: Scrum, Agile, Kanban, Waterfall, Lean, Six Sigma, DevOps, SAFe, PRINCE2, ITIL, XP (Extreme Programming)
25 4. Do NOT include: programming languages, job titles, soft skills, or technical tools (unless they are actual methodologies)
26 5. IMPORTANT: ONLY extract methods/frameworks that are EXPLICITLY mentioned in the text
27 6. If no methods or frameworks are mentioned, return an empty array []
28
29 CORRECT EXAMPLES:
30 - INCLUDE: Scrum, Agile, Kanban, Waterfall, Lean, Design Thinking, OKRs, PRINCE2
31 - DO NOT INCLUDE: Python, Excel, leadership, communication, teamwork, SQL
32
33 Job Description:
34 {job_description}
35
36 Respond ONLY with a JSON array containing the list of methods and frameworks found.
37 Example response format: ["Agile", "Scrum", "Kanban"]
38 """
39
40
41 try:
42 response = requests.post(
43 "http://localhost:11434/api/generate",
44 json={
45 "model": "gemma3:1b",
46 "prompt": prompt,
47 "stream": False
48 },
49 timeout=30
50 )
51
52 if response.status_code == 200:
53 result = response.json()["response"]
54
55
56 try:
57
58 start_idx = result.find('[')
59 end_idx = result.rfind(']') + 1
60
61 if start_idx >= 0 and end_idx > start_idx:
62 json_str = result[start_idx:end_idx]
63 methods_frameworks = json.loads(json_str)
64
65
66 non_methods = ["python", "java", "excel", "sql", "leadership", "communication",
67 "teamwork", "degree", "bachelor", "master", "certification"]
68
69
70 filtered_methods = [method for method in methods_frameworks
71 if method.lower() not in [nm.lower() for nm in non_methods]
72 and len(method) > 1]
73
74
75 verified_methods = []
76
77
78 method_synonyms = {
79 "agile": ["agile methodology", "agile environment", "agile principles", "agile practices"],
80 "scrum": ["scrum master", "scrum team", "scrum methodology", "scrum framework"],
81 "kanban": ["kanban board", "kanban method", "kanban methodology"],
82 "waterfall": ["waterfall model", "waterfall methodology", "waterfall approach"],
83 "lean": ["lean methodology", "lean principles", "lean thinking", "lean management"],
84 "six sigma": ["6 sigma", "six-sigma", "6-sigma", "lean six sigma"],
85 "devops": ["dev ops", "dev-ops", "devops practices", "devops culture"],
86 "safe": ["scaled agile framework", "safe framework", "safe methodology"],
87 "prince2": ["prince 2", "prince-2", "projects in controlled environments"],
88 "itil": ["information technology infrastructure library", "itil framework", "itil processes"],
89 "extreme programming": ["xp", "xp methodology", "extreme programming practices"],
90 "design thinking": ["design-thinking", "design thinking methodology"],
91 "okrs": ["objectives and key results", "okr framework", "okr methodology"]
92 }
93
94 for method in filtered_methods:
95 method_lower = method.lower()
96
97
98 if method_lower in job_description_lower:
99 verified_methods.append(method)
100 continue
101
102
103 for main_method, synonyms in method_synonyms.items():
104 if method_lower == main_method or method_lower in synonyms:
105
106 if any(syn in job_description_lower for syn in synonyms) or main_method in job_description_lower:
107 verified_methods.append(main_method.title())
108 break
109
110 print(f"Initial extraction: {methods_frameworks}")
111 print(f"After verification: {verified_methods}")
112 return verified_methods
113 else:
114 print(f"Couldn't find JSON array in: {result}")
115 return []
116 except json.JSONDecodeError:
117 print(f"Failed to parse JSON from: {result}")
118 return []
119 else:
120 print(f"Error from Ollama API: {response.status_code} - {response.text}")
121 return []
122 except Exception as e:
123 print(f"Exception when calling Ollama API: {str(e)}")
124 return []
125
126def main():
127 file_path = "seek_jobs.csv"
128
129
130 title_column = "job_title"
131 description_column = "description"
132 job_details_column = "job_details"
133
134 print(f"Reading dataset from {file_path}...")
135 df = pd.read_csv(file_path)
136
137 print(f"Total records: {len(df)}")
138 print("Starting methods and frameworks extraction...\n")
139
140
141 all_methods_frameworks = []
142
143 for idx in range(len(df)):
144 print(f"Job {idx+1}/{len(df)}:")
145 job_methods_frameworks = []
146
147
148 if title_column in df.columns:
149 job_title = df.loc[idx, title_column]
150 if not pd.isna(job_title) and job_title != "":
151 print(f"Job Title: {job_title}")
152 else:
153 print("Job Title: [Not specified]")
154 else:
155 print("Job Title column not found in dataset")
156
157
158 if description_column in df.columns:
159 description = df.loc[idx, description_column]
160 if not pd.isna(description) and description != "":
161 print(f"Processing from description column:")
162 methods_desc = extract_methods_frameworks_with_ollama(description)
163
164 if methods_desc:
165 print(f"Extracted methods/frameworks from description: {methods_desc}")
166 job_methods_frameworks.extend(methods_desc)
167 else:
168 print("No methods/frameworks extracted from description")
169 else:
170 print("Description column is empty for this job")
171
172
173 if job_details_column in df.columns:
174 job_details = df.loc[idx, job_details_column]
175 if not pd.isna(job_details) and job_details != "":
176 print(f"Processing from job_details column:")
177 methods_details = extract_methods_frameworks_with_ollama(job_details)
178
179 if methods_details:
180 print(f"Extracted methods/frameworks from job_details: {methods_details}")
181 job_methods_frameworks.extend(methods_details)
182 else:
183 print("No methods/frameworks extracted from job_details")
184 else:
185 print("Job details column is empty for this job")
186
187
188 if job_methods_frameworks:
189 unique_methods = list(set(job_methods_frameworks))
190 print(f"Combined unique methods/frameworks: {unique_methods}")
191 all_methods_frameworks.append(unique_methods)
192 else:
193 all_methods_frameworks.append([])
194
195
196 print("-" * 50)
197
198
199 time.sleep(0.5)
200
201
202 df['extracted_methods_frameworks'] = all_methods_frameworks
203
204
205 df['methods_frameworks_str'] = df['extracted_methods_frameworks'].apply(lambda x: ', '.join(x) if x else '')
206
207
208
209if __name__ == "__main__":
210 main()