mirror of
https://github.com/TomHodson/tomhodson.github.com.git
synced 2025-06-26 10:01:18 +02:00
224 lines
8.6 KiB
Plaintext
224 lines
8.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Publications markdown generator for academicpages\n",
|
|
"\n",
|
|
"Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). \n",
|
|
"\n",
|
|
"The core python code is also in `pubsFromBibs.py`. \n",
|
|
"Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:\n",
|
|
"* bib file names\n",
|
|
"* specific venue keys based on your bib file preferences\n",
|
|
"* any specific pre-text for specific files\n",
|
|
"* Collection Name (future feature)\n",
|
|
"\n",
|
|
"TODO: Make this work with other databases of citations, \n",
|
|
"TODO: Merge this with the existing TSV parsing solution"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pybtex.database.input import bibtex\n",
|
|
"import pybtex.database.input.bibtex \n",
|
|
"from time import strptime\n",
|
|
"import string\n",
|
|
"import html\n",
|
|
"import os\n",
|
|
"import re"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#todo: incorporate different collection types rather than a catch all publications, requires other changes to template\n",
|
|
"publist = {\n",
|
|
" \"proceeding\": {\n",
|
|
" \"file\" : \"proceedings.bib\",\n",
|
|
" \"venuekey\": \"booktitle\",\n",
|
|
" \"venue-pretext\": \"In the proceedings of \",\n",
|
|
" \"collection\" : {\"name\":\"publications\",\n",
|
|
" \"permalink\":\"/publication/\"}\n",
|
|
" \n",
|
|
" },\n",
|
|
" \"journal\":{\n",
|
|
" \"file\": \"pubs.bib\",\n",
|
|
" \"venuekey\" : \"journal\",\n",
|
|
" \"venue-pretext\" : \"\",\n",
|
|
" \"collection\" : {\"name\":\"publications\",\n",
|
|
" \"permalink\":\"/publication/\"}\n",
|
|
" } \n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"html_escape_table = {\n",
|
|
" \"&\": \"&\",\n",
|
|
" '\"': \""\",\n",
|
|
" \"'\": \"'\"\n",
|
|
" }\n",
|
|
"\n",
|
|
"def html_escape(text):\n",
|
|
" \"\"\"Produce entities within text.\"\"\"\n",
|
|
" return \"\".join(html_escape_table.get(c,c) for c in text)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"for pubsource in publist:\n",
|
|
" parser = bibtex.Parser()\n",
|
|
" bibdata = parser.parse_file(publist[pubsource][\"file\"])\n",
|
|
"\n",
|
|
" #loop through the individual references in a given bibtex file\n",
|
|
" for bib_id in bibdata.entries:\n",
|
|
" #reset default date\n",
|
|
" pub_year = \"1900\"\n",
|
|
" pub_month = \"01\"\n",
|
|
" pub_day = \"01\"\n",
|
|
" \n",
|
|
" b = bibdata.entries[bib_id].fields\n",
|
|
" \n",
|
|
" try:\n",
|
|
" pub_year = f'{b[\"year\"]}'\n",
|
|
"\n",
|
|
" #todo: this hack for month and day needs some cleanup\n",
|
|
" if \"month\" in b.keys(): \n",
|
|
" if(len(b[\"month\"])<3):\n",
|
|
" pub_month = \"0\"+b[\"month\"]\n",
|
|
" pub_month = pub_month[-2:]\n",
|
|
" elif(b[\"month\"] not in range(12)):\n",
|
|
" tmnth = strptime(b[\"month\"][:3],'%b').tm_mon \n",
|
|
" pub_month = \"{:02d}\".format(tmnth) \n",
|
|
" else:\n",
|
|
" pub_month = str(b[\"month\"])\n",
|
|
" if \"day\" in b.keys(): \n",
|
|
" pub_day = str(b[\"day\"])\n",
|
|
"\n",
|
|
" \n",
|
|
" pub_date = pub_year+\"-\"+pub_month+\"-\"+pub_day\n",
|
|
" \n",
|
|
" #strip out {} as needed (some bibtex entries that maintain formatting)\n",
|
|
" clean_title = b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\").replace(\" \",\"-\") \n",
|
|
"\n",
|
|
" url_slug = re.sub(\"\\\\[.*\\\\]|[^a-zA-Z0-9_-]\", \"\", clean_title)\n",
|
|
" url_slug = url_slug.replace(\"--\",\"-\")\n",
|
|
"\n",
|
|
" md_filename = (str(pub_date) + \"-\" + url_slug + \".md\").replace(\"--\",\"-\")\n",
|
|
" html_filename = (str(pub_date) + \"-\" + url_slug).replace(\"--\",\"-\")\n",
|
|
"\n",
|
|
" #Build Citation from text\n",
|
|
" citation = \"\"\n",
|
|
"\n",
|
|
" #citation authors - todo - add highlighting for primary author?\n",
|
|
" for author in bibdata.entries[bib_id].persons[\"author\"]:\n",
|
|
" citation = citation+\" \"+author.first_names[0]+\" \"+author.last_names[0]+\", \"\n",
|
|
"\n",
|
|
" #citation title\n",
|
|
" citation = citation + \"\\\"\" + html_escape(b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")) + \".\\\"\"\n",
|
|
"\n",
|
|
" #add venue logic depending on citation type\n",
|
|
" venue = publist[pubsource][\"venue-pretext\"]+b[publist[pubsource][\"venuekey\"]].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")\n",
|
|
"\n",
|
|
" citation = citation + \" \" + html_escape(venue)\n",
|
|
" citation = citation + \", \" + pub_year + \".\"\n",
|
|
"\n",
|
|
" \n",
|
|
" ## YAML variables\n",
|
|
" md = \"---\\ntitle: \\\"\" + html_escape(b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")) + '\"\\n'\n",
|
|
" \n",
|
|
" md += \"\"\"collection: \"\"\" + publist[pubsource][\"collection\"][\"name\"]\n",
|
|
"\n",
|
|
" md += \"\"\"\\npermalink: \"\"\" + publist[pubsource][\"collection\"][\"permalink\"] + html_filename\n",
|
|
" \n",
|
|
" note = False\n",
|
|
" if \"note\" in b.keys():\n",
|
|
" if len(str(b[\"note\"])) > 5:\n",
|
|
" md += \"\\nexcerpt: '\" + html_escape(b[\"note\"]) + \"'\"\n",
|
|
" note = True\n",
|
|
"\n",
|
|
" md += \"\\ndate: \" + str(pub_date) \n",
|
|
"\n",
|
|
" md += \"\\nvenue: '\" + html_escape(venue) + \"'\"\n",
|
|
" \n",
|
|
" url = False\n",
|
|
" if \"url\" in b.keys():\n",
|
|
" if len(str(b[\"url\"])) > 5:\n",
|
|
" md += \"\\npaperurl: '\" + b[\"url\"] + \"'\"\n",
|
|
" url = True\n",
|
|
"\n",
|
|
" md += \"\\ncitation: '\" + html_escape(citation) + \"'\"\n",
|
|
"\n",
|
|
" md += \"\\n---\"\n",
|
|
"\n",
|
|
" \n",
|
|
" ## Markdown description for individual page\n",
|
|
" if note:\n",
|
|
" md += \"\\n\" + html_escape(b[\"note\"]) + \"\\n\"\n",
|
|
"\n",
|
|
" if url:\n",
|
|
" md += \"\\n[Access paper here](\" + b[\"url\"] + \"){:target=\\\"_blank\\\"}\\n\" \n",
|
|
" else:\n",
|
|
" md += \"\\nUse [Google Scholar](https://scholar.google.com/scholar?q=\"+html.escape(clean_title.replace(\"-\",\"+\"))+\"){:target=\\\"_blank\\\"} for full citation\"\n",
|
|
"\n",
|
|
" md_filename = os.path.basename(md_filename)\n",
|
|
"\n",
|
|
" with open(\"../_publications/\" + md_filename, 'w') as f:\n",
|
|
" f.write(md)\n",
|
|
" print(f'SUCESSFULLY PARSED {bib_id}: \\\"', b[\"title\"][:60],\"...\"*(len(b['title'])>60),\"\\\"\")\n",
|
|
" # field may not exist for a reference\n",
|
|
" except KeyError as e:\n",
|
|
" print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \\\"', b[\"title\"][:30],\"...\"*(len(b['title'])>30),\"\\\"\")\n",
|
|
" continue\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|