Generate Full Resume Text File

import pandas as pd
from pathlib import Path
import re
from typing import List
DATA_DIR = Path('files/cv')
OUT_PATH = DATA_DIR / 'full_resume.txt'
MY_NAME = 'Kirkpatrick, LR'  # matches cv.qmd setting

def read_csv(name: str) -> pd.DataFrame:
    path = DATA_DIR / name
    if not path.exists():
        return pd.DataFrame()
    return pd.read_csv(path)

def normalize_whitespace(s) -> str:
    if pd.isna(s):
        return ''
    s_str = str(s)
    if s_str.lower() == 'nan':
        return ''
    return re.sub(r'\s+', ' ', s_str).strip()

def join_bits(bits: List[str], sep=', '):
    return sep.join([b for b in bits if b and str(b).lower() != 'nan'])
def fmt_doi(doi: str) -> str:
    doi = normalize_whitespace(doi)
    if not doi:
        return ''
    return f'https://doi.org/{doi}'

def fmt_num_str(s: str) -> str:
    # Convert numeric-like strings like '71.0' to '71', leave others unchanged
    s = normalize_whitespace(s)
    if not s:
        return ''
    try:
        f = float(s)
    except Exception:
        return s
    if f.is_integer():
        return str(int(f))
    return str(f)

def format_publication(row) -> str:
    authors = normalize_whitespace(row.get('authors',''))
    year = normalize_whitespace(row.get('year',''))
    title = normalize_whitespace(row.get('title',''))
    venue = normalize_whitespace(row.get('venue',''))
    volume = fmt_num_str(row.get('volume',''))
    issue = fmt_num_str(row.get('issue',''))
    pages = normalize_whitespace(row.get('pages',''))
    doi = normalize_whitespace(row.get('doi',''))

    # Build venue string like 'Journal Name, 12(3), 455–472'
    venue_parts = []
    if venue:
        venue_parts.append(venue)
    vol_issue = ''
    if volume:
        vol_issue = volume
        if issue:
            vol_issue = f"{vol_issue}({issue})"
    if vol_issue:
        venue_parts.append(vol_issue)
    if pages:
        venue_parts.append(pages)
    venue_line = ', '.join(venue_parts)

    # Compose head: 'Authors (YEAR). Title.'
    first = authors
    if year:
        first = f"{authors} ({year})"
    head = f"{first}. {title}." if title else f"{first}."

    parts = [head]
    if venue_line:
        parts.append(venue_line + '.')
    if doi:
        parts.append(fmt_doi(doi))
    return ' '.join([p for p in parts if p])

def format_poster(row) -> str:
    authors = normalize_whitespace(row.get('authors',''))
    year = normalize_whitespace(row.get('year',''))
    title = normalize_whitespace(row.get('title',''))
    event = normalize_whitespace(row.get('event',''))
    location = normalize_whitespace(row.get('location',''))

    year_part = f'({year}).' if year else ''
    head = f"{authors} {year_part} {title}." if title else f"{authors} {year_part}"
    where = ', '.join([x for x in [event, location] if x])
    if where:
        return f"{head} Poster presented at {where}."
    return head

def format_talk(row) -> str:
    authors = normalize_whitespace(row.get('authors',''))
    year = normalize_whitespace(row.get('year',''))
    title = normalize_whitespace(row.get('title',''))
    event = normalize_whitespace(row.get('event',''))
    location = normalize_whitespace(row.get('location',''))

    year_part = f'({year}).' if year else ''
    head = f"{authors} {year_part} {title}." if title else f"{authors} {year_part}"
    where = ', '.join([x for x in [event, location] if x])
    if where:
        return f"{head} {where}."
    return head

def format_education(row) -> str:
    school = normalize_whitespace(row.get('school',''))
    deg1 = normalize_whitespace(row.get('degree1',''))
    thesis1 = normalize_whitespace(row.get('thesis1_title',''))
    thesis1_type = normalize_whitespace(row.get('thesis1_type',''))
    advisor1 = normalize_whitespace(row.get('advisor1',''))
    deg2 = normalize_whitespace(row.get('degree2',''))
    start = normalize_whitespace(row.get('start_year',''))
    grad = normalize_whitespace(row.get('grad_year',''))

    lines = []
    if school:
        lines.append(school)
    if deg1:
        lines.append(deg1)
    if thesis1:
        lines.append(f"{thesis1_type}: {thesis1}")
    if advisor1:
        lines.append(f'Advisor: {advisor1}')
    if deg2:
        lines.append(deg2)
    years = ' - '.join([y for y in [start, grad] if y])
    if years:
        lines.append(years)
    return '\n'.join(lines)

def format_generic(row, title_field='item') -> str:
    title = normalize_whitespace(row.get(title_field, row.get('title','')))
    details = normalize_whitespace(row.get('details',''))
    url = normalize_whitespace(row.get('url',''))
    lines = []
    if title:
        lines.append(title)
    if details:
        lines.append(details)
    if url:
        lines.append(url)
    return '\n'.join(lines)
def section_header(title: str) -> str:
    return title + '\n' + ('=' * len(title))

def build_resume_text() -> str:
    parts = []

    # Education
    parts.append(section_header('Education'))
    edu = read_csv('education.csv')
    if not edu.empty:
        for _, r in edu.iterrows():
            parts.append(format_education(r))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    # Publications
    parts.append('\n' + section_header('Publications'))
    pubs = read_csv('publications.csv')
    if not pubs.empty:
        for _, r in pubs.iterrows():
            parts.append(format_publication(r))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    # Presentations: split into Posters and Conference Talks
    pres = read_csv('presentations.csv')
    posters = pd.DataFrame()
    talks = pd.DataFrame()
    if not pres.empty:
        # select rows where 'type' contains 'poster' (case-insensitive)
        posters = pres[pres.get('type','').astype(str).str.contains('poster', case=False, na=False)]
        talks = pres.drop(posters.index)

    # Posters
    parts.append('\n' + section_header('Posters'))
    if not posters.empty:
        for _, r in posters.iterrows():
            parts.append(format_poster(r))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    # Conference Talks
    parts.append('\n' + section_header('Conference Talks'))
    if not talks.empty:
        for _, r in talks.iterrows():
            parts.append(format_talk(r))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    # Mentorship
    parts.append('\n' + section_header('Mentorship'))
    ment = read_csv('mentorship.csv')
    if not ment.empty:
        for _, r in ment.iterrows():
            parts.append(format_generic(r, title_field='item'))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    # Awards
    parts.append('\n' + section_header('Awards'))
    awards = read_csv('awards.csv')
    if not awards.empty:
        for _, r in awards.iterrows():
            parts.append(format_generic(r))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    # Professional Activities / Other
    parts.append('\n' + section_header('Professional Activities'))
    acts = read_csv('activities.csv')
    if not acts.empty:
        for _, r in acts.iterrows():
            parts.append(format_generic(r, title_field='item'))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    # Grants (if present)
    parts.append('\n' + section_header('Grants'))
    grants = read_csv('grants.csv')
    if not grants.empty:
        for _, r in grants.iterrows():
            parts.append(format_generic(r, title_field='item'))
            parts.append('')
    else:
        parts.append('_No entries yet._')

    return '\n\n'.join(parts)

def write_full_resume(path: Path = OUT_PATH):
    path.parent.mkdir(parents=True, exist_ok=True)
    text = build_resume_text()
    path.write_text(text, encoding='utf-8')
    print(f'Wrote resume to: {path}')
# Generate the file when this cell is run:
write_full_resume()
Wrote resume to: files/cv/full_resume.txt