{ "cells": [ { "cell_type": "markdown", "id": "9329f864-d401-4ae2-a5ae-697f7629a54d", "metadata": {}, "source": [ "# Standard Name Table\n", "\n", "A so-called \"standard name table\" defines \"standard names\", which is a concept used by the [CF Convention](https://cfconventions.org/).\n", "\n", "Those standard names are used to define the meaning of a numerical variable in files (typically netCDF4 files).\n", "\n", "With this library, we can describe a standard name table using JSON-LD. **Note**, that only a simplified version of the original CF Conventions is modelled!\n", "\n", "This notebook walks you through the main steps of building such a table yourself using Python:" ] }, { "cell_type": "code", "execution_count": 1, "id": "d8f1bebc-629e-4fd4-8cbd-0b50d733d71d", "metadata": {}, "outputs": [], "source": [ "import ssnolib\n", "from ssnolib.namespace import SSNO\n", "from ssnolib.prov import Person, Organization, Attribution\n", "from ontolutils.namespacelib.m4i import M4I" ] }, { "cell_type": "markdown", "id": "1a5b6be1-fe93-4071-8ffa-421495b2d4ad", "metadata": {}, "source": [ "### Create a new table\n", "\n", "Let's start by instantiate a table. We add a title and one or multiple associated \"agents\", which can be persons or organizations. More details on [how to work with agents can be found here](./Agents.ipynb)." ] }, { "cell_type": "code", "execution_count": 2, "id": "c3f81b35-9d4c-40af-aecf-a18f3d199ab8", "metadata": {}, "outputs": [], "source": [ "# Create to \"Agents\", which are Persons in this case:\n", "agent1 = ssnolib.Person(\n", " id=\"https://orcid.org/0000-0001-8729-0482\",\n", " firstName=\"Matthias\",\n", " lastName=\"Probst\",\n", " orcidId=\"https://orcid.org/0000-0001-8729-0482\"\n", ")\n", "# Agent 2 is affiliated with an organization:\n", "orga1 = ssnolib.Organization(name=\"Awesome Institute\")\n", "agent2 = ssnolib.Person(\n", " firstName=\"John\",\n", " lastName=\"Doe\",\n", " mbox=\"john@doe.com\",\n", " affiliation=orga1\n", ")\n", "\n", "# instantiate the table:\n", "snt = ssnolib.StandardNameTable(\n", " title='SNT from scratch',\n", " description=\"A table defined as part of a tutorial\",\n", " version='v1',\n", " qualifiedAttribution=[\n", " Attribution(agent=agent1, hadRole=M4I.ContactPerson),\n", " Attribution(agent=agent2, hadRole=M4I.Supervisor),\n", " Attribution(agent=orga1)\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "ce695180-c392-40f7-b091-127db525d7bc", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "sequence item 0: expected str instance, LangString found", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43msnt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto_html\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfolder\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtmp\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\GitHub\\ssnolib\\ssnolib\\ssno\\standard_name_table.py:1344\u001b[39m, in \u001b[36mStandardNameTable.to_html\u001b[39m\u001b[34m(self, folder, filename)\u001b[39m\n\u001b[32m 1342\u001b[39m filename = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.title\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.html\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1343\u001b[39m html_filename = pathlib.Path(filename)\n\u001b[32m-> \u001b[39m\u001b[32m1344\u001b[39m markdown_filename = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mto_markdown\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhtml_filename\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwith_suffix\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m.tmp.md\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1345\u001b[39m template_filename = __this_dir__ / \u001b[33m'\u001b[39m\u001b[33mtemplates\u001b[39m\u001b[33m'\u001b[39m / \u001b[33m'\u001b[39m\u001b[33mstandard_name_table.html\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m 1347\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m template_filename.exists():\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\GitHub\\ssnolib\\ssnolib\\ssno\\standard_name_table.py:1187\u001b[39m, in \u001b[36mStandardNameTable.to_markdown\u001b[39m\u001b[34m(self, filename)\u001b[39m\n\u001b[32m 1185\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m qa.hadRole:\n\u001b[32m 1186\u001b[39m role = ROLE_LOOKUP.get(\u001b[38;5;28mstr\u001b[39m(qa.hadRole), \u001b[38;5;28mstr\u001b[39m(qa.hadRole).rsplit(\u001b[33m\"\u001b[39m\u001b[33m/\u001b[39m\u001b[33m\"\u001b[39m, \u001b[32m1\u001b[39m)[-\u001b[32m1\u001b[39m])\n\u001b[32m-> \u001b[39m\u001b[32m1187\u001b[39m lines.append(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrole\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[43mqa\u001b[49m\u001b[43m.\u001b[49m\u001b[43magent\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1189\u001b[39m lines.append(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mContact: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mqa.agent.to_text()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\GitHub\\ssnolib\\ssnolib\\prov\\attribution.py:131\u001b[39m, in \u001b[36mPerson.to_text\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 129\u001b[39m parts.append(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mORCID: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.orcidId\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 130\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.affiliation:\n\u001b[32m--> \u001b[39m\u001b[32m131\u001b[39m parts.append(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43maffiliation\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 132\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[33m'\u001b[39m\u001b[33m; \u001b[39m\u001b[33m'\u001b[39m.join(parts)\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\GitHub\\ssnolib\\ssnolib\\prov\\attribution.py:79\u001b[39m, in \u001b[36mOrganization.to_text\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 77\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.hasRorId:\n\u001b[32m 78\u001b[39m parts.append(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mROR ID: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.hasRorId\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m79\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[33;43m'\u001b[39;49m\u001b[33;43m; \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparts\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[31mTypeError\u001b[39m: sequence item 0: expected str instance, LangString found" ] } ], "source": [ "snt.to_html(folder=\"tmp\")" ] }, { "cell_type": "markdown", "id": "b259d9ba-73f8-4571-ab83-9f5b507d8397", "metadata": {}, "source": [ "Let's add some standard names to the table:" ] }, { "cell_type": "markdown", "id": "49819462-0525-48ad-96bb-ef74ae913215", "metadata": {}, "source": [ "## Add Standard Names" ] }, { "cell_type": "code", "execution_count": null, "id": "76576ec4-3c04-4493-a513-08cc4effe3b6", "metadata": {}, "outputs": [], "source": [ "snt.standardNames = [\n", " ssnolib.StandardName(\n", " standard_name=\"air_density\",\n", " description=\"The density of air\",\n", " unit=\"kg/m^3\"\n", " ),\n", " ssnolib.StandardName(\n", " standard_name=\"coordinate\",\n", " description=\"The spatial coordinate vector.\",\n", " unit=\"m\"\n", " ),\n", " ssnolib.StandardName(\n", " standard_name=\"velocity\",\n", " description=\"The velocity vector of an object or fluid.\",\n", " unit=\"m/s\"\n", " )\n", "]" ] }, { "cell_type": "markdown", "id": "e9ce3e58-7a7f-4dab-96c0-039086bc44d8", "metadata": {}, "source": [ "So far we only have two standard names. We can define modification rules, to build new, verified standard names. For example, \"x_velocity\" would be a reasonable new standard name for the table.\n", "\n", "So let's define such a modification rule. We call it a `Qualification`. The one we would like to define should be used directly of an already existing standard name, e.g. \"SSNO:AnyStandardName\":" ] }, { "cell_type": "code", "execution_count": null, "id": "b5446a1a-e9d1-4ae4-9bd3-6b740fb8b0e2", "metadata": {}, "outputs": [], "source": [ "component = ssnolib.VectorQualification(\n", " name=\"component\",\n", " hasValidValues=[\"x\", \"y\", \"z\"],\n", " description=\"The component of a vector\",\n", " before=SSNO.AnyStandardName\n", ")\n", "\n", "transformation = ssnolib.Transformation(\n", " name=\"C_derivative_of_X\",\n", " description=\"derivative of X with respect to distance in the component direction, which may be x, y or z.\",\n", " altersUnit=\"[X]/[C]\",\n", " hasCharacter=[\n", " ssnolib.Character(character=\"X\", associatedWith=SSNO.AnyStandardName),\n", " ssnolib.Character(character=\"C\", associatedWith=component.id),\n", " ]\n", ")" ] }, { "cell_type": "markdown", "id": "9098b634-0627-440a-b0bf-6eb8bb31ef35", "metadata": {}, "source": [ "Add it to the SNT:" ] }, { "cell_type": "code", "execution_count": null, "id": "e1e2f9db-38fd-4849-85eb-c7240841e90f", "metadata": {}, "outputs": [], "source": [ "snt.hasModifier = [component, transformation]" ] }, { "cell_type": "markdown", "id": "ab5cfa34-1d85-414f-9fc7-c13751e3144e", "metadata": {}, "source": [ "We can check standard name strings, whether they apply to the modification rule:" ] }, { "cell_type": "code", "execution_count": null, "id": "5908af8b-eff9-4b3f-9f6f-2baac1a979b9", "metadata": {}, "outputs": [], "source": [ "snt.verify_name(\"vertical_velocity\")" ] }, { "cell_type": "code", "execution_count": null, "id": "b9288dcb-0d49-481c-8e2d-f2d76df4d58a", "metadata": {}, "outputs": [], "source": [ "snt.verify_name(\"x_velocity\")" ] }, { "cell_type": "code", "execution_count": null, "id": "73012529-4f91-497b-925e-a2c5cd013299", "metadata": {}, "outputs": [], "source": [ "snt.verify_name(\"x_component\")" ] }, { "cell_type": "markdown", "id": "a469096c-4c8a-4586-a359-a058c3ee0c6b", "metadata": {}, "source": [ "Also, adding new standard names can go through a verification:" ] }, { "cell_type": "code", "execution_count": null, "id": "581313cc-247b-423b-8615-ad16998cbf82", "metadata": {}, "outputs": [], "source": [ "#snt.add_new_standard_name(\"x_coordinate\", verify=True) # verify=False will just add the standard name and interpret it as a core standard name" ] }, { "cell_type": "markdown", "id": "5ace0482-51d0-4fd6-8aec-9cbcfe69ff1e", "metadata": {}, "source": [ "## Export standard name tables\n", "We can export to various formats such as JSON-LD or TTL. We can also generate an HTML file:" ] }, { "cell_type": "markdown", "id": "940f3b10-0650-4661-a58c-ce3a4aad5fc8", "metadata": {}, "source": [ "### Serialize TTL:" ] }, { "cell_type": "code", "execution_count": null, "id": "5ad73b1b-1737-466c-a3df-e0286926a581", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print(snt.serialize(format=\"ttl\", ba))" ] }, { "cell_type": "markdown", "id": "c83dbbc4-2b7f-4bc0-87d8-ec856480dcb6", "metadata": {}, "source": [ "### Write HTML file" ] }, { "cell_type": "code", "execution_count": null, "id": "65cfef36-9919-404a-8bd0-3a4b09c7d386", "metadata": {}, "outputs": [], "source": [ "snt.to_html(folder=\"tmp\")" ] }, { "cell_type": "code", "execution_count": null, "id": "4028ba08-35d1-4d16-a09c-ab50230e4232", "metadata": {}, "outputs": [], "source": [ "with open(f\"tmp/{snt.title}.jsonld\", \"w\", encoding=\"utf-8\") as f:\n", " f.write(snt.model_dump_jsonld())" ] }, { "cell_type": "code", "execution_count": null, "id": "d7544600-d328-43d4-a499-d94731606bca", "metadata": {}, "outputs": [], "source": [ "snt.title" ] }, { "cell_type": "code", "execution_count": null, "id": "28ef615c-303f-463d-b2ff-6846197e0cd3", "metadata": {}, "outputs": [], "source": [ "snt_loaded = ssnolib.StandardNameTable.parse(f\"tmp/{snt.title}.jsonld\", context={\"ssno\": \"https://example.org/\"})" ] }, { "cell_type": "code", "execution_count": null, "id": "f107cfe2-2a09-4d51-b9a3-de7d3377f04f", "metadata": {}, "outputs": [], "source": [ "snt_loaded.qualifiedAttribution[0].agent.model_dump(exclude_none=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "bd80a819-4804-4d2c-9340-aa7679fb56a2", "metadata": {}, "outputs": [], "source": [ "snt_loaded.hasModifier" ] }, { "cell_type": "markdown", "id": "a0011edc-9e69-4334-9414-4f2cbe95c270", "metadata": {}, "source": [ "## Parse a table from an online resource\n", "\n", "Let's pare the CF Convention, which is the model role for the library: [CF Convention table](https://cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-table.xml).\n", "\n", "Well, it does not need the SSNO ontology for that, just use DCAT:" ] }, { "cell_type": "code", "execution_count": null, "id": "6d155de4-2837-4406-8f98-fb7fac2f223e", "metadata": {}, "outputs": [], "source": [ "distribution = ssnolib.dcat.Distribution(\n", " title='XML Table',\n", " download_URL='https://cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-table.xml',\n", " media_type='application/xml'\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "27e0c1a5-09ce-44b7-97be-c5ed74f22fe2", "metadata": {}, "outputs": [], "source": [ "dataset = ssnolib.dcat.Dataset(\n", " distribution=distribution\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "46b362ae-add6-49a3-97f5-0989a3391379", "metadata": {}, "outputs": [], "source": [ "print(dataset.model_dump_ttl())" ] }, { "cell_type": "markdown", "id": "15fec63a-4248-401f-abf1-3bdd6632b84a", "metadata": {}, "source": [ "But let's associate out `schema:ResearchProject` to it:" ] }, { "cell_type": "code", "execution_count": null, "id": "c45a0f85-2c8a-439b-a63c-2f11c9ff4f29", "metadata": {}, "outputs": [], "source": [ "from ssnolib.schema import Project" ] }, { "cell_type": "code", "execution_count": null, "id": "c1b6479d-0f1d-4695-876d-614f191a195e", "metadata": {}, "outputs": [], "source": [ "proj = Project(name=\"My Project\", usesStandardnameTable=dataset)" ] }, { "cell_type": "markdown", "id": "6e341d8c-1ed8-441a-8c8d-68a5aad77e8e", "metadata": {}, "source": [ "Maybe we would like to get all the standard names. We can do this by calling `fetch()` or instantiate the standard name table using `parse()`:" ] }, { "cell_type": "code", "execution_count": null, "id": "4fb565a5-b3ad-4b07-9c30-fba828c299c5", "metadata": {}, "outputs": [], "source": [ "from ontolutils import QUDT_UNIT\n", "\n", "additional_qudts = {\n", " # other:\n", " 'kg m-1 s-1': QUDT_UNIT.KiloGM_PER_M_SEC,\n", " 'm-2 s-1': QUDT_UNIT.M2_PER_SEC,\n", " 'K s': QUDT_UNIT.K_SEC,\n", " 'W s m-2': QUDT_UNIT.W_SEC_PER_M2,\n", " 'N m-1': QUDT_UNIT.N_PER_M,\n", " 'mol mol-1': QUDT_UNIT.MOL_PER_MOL,\n", " 'mol/mol': QUDT_UNIT.MOL_PER_MOL,\n", " 'm4 s-1': QUDT_UNIT.M4_PER_SEC,\n", " 'K Pa s-1': QUDT_UNIT.K_PA_PER_SEC,\n", " 'Pa m s-1': QUDT_UNIT.PA_M_PER_SEC,\n", " 'radian': QUDT_UNIT.RAD,\n", " 'degree s-1': QUDT_UNIT.DEG_PER_SEC,\n", " 'Pa m s-2': QUDT_UNIT.PA_M_PER_SEC2,\n", " 'sr': QUDT_UNIT.SR,\n", " 'sr-1': QUDT_UNIT.PER_SR,\n", " 'm year-1': QUDT_UNIT.M_PER_YR,\n", " 'mol m-2 s-1 sr-1': QUDT_UNIT.MOL_PER_M2_SEC_SR,\n", " 'mol m-2 s-1 m-1 sr-1': QUDT_UNIT.MOL_PER_M2_SEC_M_SR,\n", " 'Pa-1 s-1': QUDT_UNIT.PA_PER_SEC,\n", " 'm-1 s-1': QUDT_UNIT.PER_M_SEC,\n", " 'm2 s rad-1': QUDT_UNIT.M2_SEC_PER_RAD,\n", " 'W/m2': QUDT_UNIT.W_PER_M2,\n", " 'dbar': QUDT_UNIT.DeciBAR\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "ff06ea2d-e511-44c1-85a6-21867fed16f3", "metadata": { "scrolled": true }, "outputs": [], "source": [ "snt = ssnolib.StandardNameTable.parse(dataset.distribution[0], make_standard_names_lowercase=True, qudt_lookup=additional_qudts)" ] }, { "cell_type": "code", "execution_count": null, "id": "3e003f97-de6f-4116-9a25-10fab90cd8b3", "metadata": {}, "outputs": [], "source": [ "snt.to_html(folder=\"tmp\")" ] }, { "cell_type": "markdown", "id": "089e90b6-176b-48fe-b0dd-f0e3c1a5cf83", "metadata": {}, "source": [ "Write to JSON-LD file:" ] }, { "cell_type": "code", "execution_count": null, "id": "2c1ab39c-d30d-4dbb-a1b6-94306192787c", "metadata": {}, "outputs": [], "source": [ "with open(f\"tmp/{snt.title}.jsonld\", \"w\", encoding=\"utf-8\") as f:\n", " f.write(snt.model_dump_jsonld())" ] }, { "cell_type": "markdown", "id": "7d89f2ba-fa2f-442b-9376-899226ae914d", "metadata": {}, "source": [ "Instantiate a Standard name table from a JSON-LD:" ] }, { "cell_type": "code", "execution_count": null, "id": "df48b63f-f1bc-4299-be90-4fd267870243", "metadata": {}, "outputs": [], "source": [ "snt = ssnolib.parse_table(f\"tmp/{snt.title}.jsonld\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ced83081-66c1-4e33-ae89-d4b0580a6a2f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }