This website is now static html (since the end of 2010). The pages you see here are a simple wget spider mode crawl of the original wordpress, thus dynamic features like commenting are not working anymore.

Load the MeSH vocabulary into a MySQL database. A simple script.


A simple script written in Java to load the MeSH terms into a MySQL database.

This script simply loads the MeSH terms (the value of “/DescriptorRecordSet > DescriptorRecord > DescriptorName > String” XML tag) that can be found in the MeSH XML formatted version of the vocabulay (http://www.nlm.nih.gov/mesh/filelist.html) into a MySQL relational database.

Download the script or read it:

/**
 * @author Arjuna Del Toso
 * @url http://arjuna.deltoso.net/
 * 
 * @FAQ: what the hell is this?

 * A simple, very simple, program to load the MESH Terms to a MySQL database
 * 
 * @USAGE
 * 1-configure some stuff in the code ("//: CHANGE A VALUE HERE" string is a placeholder for what to change);
 * 2-install the required packages (jdbc connector [http://www.mysql.com/products/connector/j/] and dom4j [http://www.dom4j.org/]);
 * 3-compile it ($javac mesh2database.java);

 * 4-put the MESH Descriptor XML file and the MESH Descriptor DTD file in the current directory;
 * 5-run the program ($java mesh2database) and wait;
 * 6-enjoy the 20.000+ records inserted in your database.
 * 
 */

// Some imports in order to run this program; you need

// the jdbc driver for connecting to MySQL database
// and the dom4j package for XML manipulation
import java.io.File;
import org.dom4j.Document;
import org.dom4j.DocumentException;

import org.dom4j.Element;
import org.dom4j.ElementHandler;
import org.dom4j.ElementPath;
import org.dom4j.io.SAXReader;

import java.sql.*;

// a very fanciful name
public class mesh2database {

	// all work done here
	public static void main(String[] args) throws SQLException {

		// load the jdbc driver

		try {
			Class.forName("com.mysql.jdbc.Driver").newInstance();
		} catch (InstantiationException e1) {
		} catch (IllegalAccessException e1) {
		} catch (ClassNotFoundException e1) {}

		// the connection to mysql db

//: CHANGE A VALUE HERE
//: set your database values
		final Connection con = DriverManager.getConnection("jdbc:mysql://localhost/table", "user", "password");
		// a preparedStatement in the hope to improve speed

//: CHANGE A VALUE HERE
//: configure to match your database schema
		final PreparedStatement s = con.prepareStatement("INSERT IGNORE INTO `table`.`mesh` (`id` , `mesh` ) VALUES (NULL , ?)");

		// the MESH Descriptors XML file,
		// can be found at http://www.nlm.nih.gov/mesh/

//: CHANGE A VALUE HERE
//: insert the path to your mesh file
		File file = new File("desc2007");

		// enable pruning mode to call me back as each node is complete

		SAXReader reader = new SAXReader(false);

		reader.addHandler( "/DescriptorRecordSet/DescriptorRecord/DescriptorName/String",
				new ElementHandler() {
			public void onStart(ElementPath path) {}
			public void onEnd(ElementPath path) {
				// process a single element

				Element meshDescriptorValue = path.getCurrent();
				Element descname = meshDescriptorValue.getParent();
				Element descrecord = descname.getParent();

				// remove .toLowerCase() if you are Case Sensitive ;) 

				String meshHeading = meshDescriptorValue.getText().toLowerCase();

				// i just don't want something like
				// "DNA Degradation, Apoptotic"
				// as single record in my database
				if(!meshHeading.contains(",")){

					// simple debug code (outputs the MESH DESCRIPTORS) 

					System.out.println(meshHeading);

					// the database INSERTs executed here
					try {
						// insert current value in the preparedStatement
						s.setString(1, meshHeading);
						// execute the statement

						s.executeUpdate();
					} catch (SQLException e1) {}
				}
				// prune the tree
				descrecord.detach();
			}
		}
		);

		// just reading the file will do all the work
		try {
			Document document = reader.read(file);
		} catch (DocumentException e) {}

		// closing statement and connection

		s.close();
		con.close();
	}

}

In order to run this script you need the Java compiler, the MeSH XML file, a MySQL installation and two java libraries: MySQL Connector/J and dom4j.



Leave a Reply