@article{10.22454/FamMed.2025.363712,
author = {Partin, Michael and Dambro, Anthony B. and Newman, Roland and Shang, Yimeng and Kong, Lan and Clebak, Karl T.},
title = {Evaluating the Agreement Between ChatGPT and the Clinical Competency Committee in Assigning ACGME Milestones for Family Medicine Residents},
journal = {Family Medicine},
volume = {57},
number = {6},
year = {2025},
month = {6},
pages = {424-429},
doi = {10.22454/FamMed.2025.363712},
abstract = {Background and Objectives: Although artificial intelligence models have existed for decades, the demand for application of these tools within health care and especially medical education are exponentially expanding. Pressure is mounting to increase direct observation and faculty feedback for resident learners, which can create administrative burdens for a Clinical Competency Committee (CCC). This study aimed to assess the feasibility of utilizing a large language model (ChatGPT) in family medicine residency evaluation by comparing the agreement between ChatGPT and the CCC for the Accreditation Council for Graduate Medical Education (ACGME) family medicine milestone levels and examining potential biases in milestone assignment.
Methods: Written faculty feedback for 24 residents from July 2022 to December 2022 at our institution was collated and de-identified. Using standardized prompts for each query, we used ChatGPT to assign milestone levels based on faculty feedback for 11 ACGME subcompetencies. We analyzed these levels for correlation and agreement between actual levels assigned by the CCC.
Results: Using Pearson’s correlation coefficient, we found an overall positive and strong correlation between ChatGPT and the CCC for competencies of patient care, medical knowledge, communication, and professionalism. We found no significant difference in correlation or mean difference in milestone level between male and female residents. No significant difference existed between residents with a high faculty feedback word count versus a low word count.
Conclusions : This study demonstrates the feasibility for tools like ChatGPT to assist in the evaluation process of family medicine residents without apparent bias based on gender or word count.},
URL = {https://journals.stfm.org//familymedicine/2025/june/partin-0368/},
eprint = {https://journals.stfm.org//media/ez2jnx5t/partin20240368docx-2025-06-06-17-53.pdf},
}