<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Alistair Miles</title>
	<atom:link href="http://alimanfoo.wordpress.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://alimanfoo.wordpress.com</link>
	<description>purl.org/net/aliman -- Web technology for science &#38; innovation</description>
	<lastBuildDate>Fri, 05 Aug 2011 15:38:45 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='alimanfoo.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://s2.wp.com/i/buttonw-com.png</url>
		<title>Alistair Miles</title>
		<link>http://alimanfoo.wordpress.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://alimanfoo.wordpress.com/osd.xml" title="Alistair Miles" />
	<atom:link rel='hub' href='http://alimanfoo.wordpress.com/?pushpress=hub'/>
		<item>
		<title>MalariaGEN Informatics Blog</title>
		<link>http://alimanfoo.wordpress.com/2011/08/05/malariagen-informatics-blog/</link>
		<comments>http://alimanfoo.wordpress.com/2011/08/05/malariagen-informatics-blog/#comments</comments>
		<pubDate>Fri, 05 Aug 2011 15:38:40 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[bioinformatics]]></category>
		<category><![CDATA[malariagen]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=134</guid>
		<description><![CDATA[This is just a short post to say my colleagues and I have started a MalariaGEN Informatics Blog, that&#8217;s where I&#8217;m mostly posting at the moment. Filed under: Uncategorized Tagged: bioinformatics, malariagen<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=134&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This is just a short post to say my colleagues and I have started a <a href="http://informatics.malariagen.net/" title="MalariaGEN Informatics Blog">MalariaGEN Informatics Blog</a>, that&#8217;s where I&#8217;m mostly posting at the moment.</p>
<br />Filed under: <a href='http://alimanfoo.wordpress.com/category/uncategorized/'>Uncategorized</a> Tagged: <a href='http://alimanfoo.wordpress.com/tag/bioinformatics/'>bioinformatics</a>, <a href='http://alimanfoo.wordpress.com/tag/malariagen/'>malariagen</a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/134/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=134&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2011/08/05/malariagen-informatics-blog/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Using SPARQL for Biological Data Integration &#8211; Reflections on openflydata.org and the FlyWeb Project</title>
		<link>http://alimanfoo.wordpress.com/2011/01/17/using-sparql-for-biological-data-integration-reflections-on-openflydata-org-and-the-flyweb-project/</link>
		<comments>http://alimanfoo.wordpress.com/2011/01/17/using-sparql-for-biological-data-integration-reflections-on-openflydata-org-and-the-flyweb-project/#comments</comments>
		<pubDate>Mon, 17 Jan 2011 19:36:49 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[agile development]]></category>
		<category><![CDATA[data-sharing]]></category>
		<category><![CDATA[databases]]></category>
		<category><![CDATA[javascript]]></category>
		<category><![CDATA[jena]]></category>
		<category><![CDATA[json]]></category>
		<category><![CDATA[model-driven architecture]]></category>
		<category><![CDATA[ontologies]]></category>
		<category><![CDATA[owl]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[rdf]]></category>
		<category><![CDATA[semantic web]]></category>
		<category><![CDATA[SPARQL]]></category>
		<category><![CDATA[web services]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=119</guid>
		<description><![CDATA[It&#8217;s now almost 18 months since the end of the FlyWeb project and the development of the proof-of-concept site openflydata.org, so I thought it was high time to write up a few reflections. Thanks to Dr David Shotton, head of the Image Bioinformatics Research Group, for giving me the chance to work on FlyWeb, it [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=119&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>It&#8217;s now almost 18 months since the end of the <a href="http://imageweb.zoo.ox.ac.uk/wiki/index.php/FlyWeb_project">FlyWeb project</a> and the development of the proof-of-concept site <a href="http://openflydata.org/">openflydata.org</a>, so I thought it was high time to write up a few reflections. Thanks to <a href="http://www.zoo.ox.ac.uk/staff/academics/shotton_dm.htm">Dr David Shotton</a>, head of the <a href="http://imageweb.zoo.ox.ac.uk/wiki/index.php/Main_Page">Image Bioinformatics Research Group</a>, for giving me the chance to work on FlyWeb, it was a great project.</p>
<p>If you want to know more about the technical side of the work, see the paper &#8220;<a href="http://dx.doi.org/10.1016/j.jbi.2010.04.004">OpenFlyData: An exemplar data web integrating gene expression data on the fruit fly Drosophila melanogaster</a>&#8221; in the Journal of Biomedical Informatics.</p>
<p><strong>Integrating Gene Expression Data</strong></p>
<p>We wanted to help reduce the amount of time spent by a <em>Drosophila</em> functional genetics research group on <strong>experimental design</strong> and on <strong>validating experimental results</strong>. Experimental design includes selecting genes that might be relevant to the biological function of interest (in this case, male fertility). Validating results includes checking your gene expression data against other published data for the same gene &#8211; a discrepancy suggests an artifact or problem in your data, or in the published data &#8230; either way it&#8217;s likely to be important. </p>
<p>The problem here is a common refrain &#8211; <strong>the relevant data are not all found in the same place</strong>. Trawling and querying multiple sites and manually compiling the results takes a lot of time. Could we build some tools that help bring these data together?</p>
<p><strong>Technology Hypothesis &#8211; Data Webs, Semantic Web &amp; SPARQL</strong></p>
<p>We wanted to be as <strong>user-driven</strong> as possible, i.e., to stay focused on what the researchers needed, and to be open-minded about technology, using whatever tools made us most productive. </p>
<p>But we did have a <strong>technology hypothesis</strong>, which was part the reason why <a href="http://www.jisc.ac.uk/whatwedo/programmes/resourcediscovery/flyweb.aspx">JISC funded the FlyWeb project</a>. Our hypothesis was that building a data integration solution for our <em>Drosophila</em> researchers using Semantic Web standards and open source software would be (a) <strong>feasible</strong>, and (b)<strong> reasonably cost-efficient</strong>. David Shotton and Graham Klyne, the original proposal authors, had also previously developed a vision for &#8220;<a href="http://www.rin.ac.uk/news/events/data-webs-new-visions-research-data-web">data webs</a>&#8220;, an architectural pattern for integrating a set of biological data within a specific domain or for a specific purpose. Would the <em>data webs</em> pattern help us to build a solution?</p>
<p>So how did we marry these two forces: on the one hand being <strong>user-driven</strong>, on the other having a <strong>technology hypothesis</strong> that we wanted to test? </p>
<p>Well, in the spirit of agile, <strong>we tried to make our development iterations as short as possible</strong>. I.e., we tried to work in a way that meant we had something to put in front of users in the shortest possible time frame. When we were discussing architectural patterns and technologies, and several alternatives looked to be of similar complexity or difficulty, we favoured approaches involving RDF, OWL or SPARQL, and that were closer to the original <em>data webs</em> vision. </p>
<p>However, our goal was <strong>not</strong> to prove that a solution based on semweb standards and tech was any cheaper or better than a non-semweb alternative; just that it was <strong>possible</strong> and <strong>not prohibitively expensive</strong>. This is interesting because, if data integration solutions in related problem domains were all based on semweb standards, then they also might play together, as well as solving their own particular problems &#8230; or so the argument goes. I.e., there would be some re-use or re-purposing benefit from each individual data integration solution, and maybe some network effect, if everyone used SPARQL, for example. Of course there would be work involved in linking two data web solutions, but it might be less because at least we&#8217;d bottom out at the same standards &#8211; RDF, SPARQL, and maybe even some shared ontologies. </p>
<p>But you can&#8217;t even begin to talk about network effects if you can&#8217;t first show that you can solve specific problems effectively and cheaply. I.e., solutions need to make sense locally.</p>
<p><strong>SPARQL Mashups</strong></p>
<p>An architectural pattern that we adopted early in the project was the &#8220;<strong>SPARQL mashups</strong>&#8221; pattern. A SPARQL mashup is an HTML+JavaScript application that runs entirely in the browser, and that retrieves data directly from two or more SPARQL endpoints via the SPARQL protocol.</p>
<p>To see a SPARQL mashup in action, go to <a href="http://openflydata.org/search/gene-expression">http://openflydata.org/search/gene-expression</a>, click the &#8220;show logger&#8221; link at the bottom of the page (or open firebug if you&#8217;re in firefox), then type &#8220;schuy&#8221; into the search box. You should see SPARQL queries being sent to various SPARQL endpoints and result sets being returned in the JSON SPARQL results format. </p>
<p>For example, here&#8217;s the query that finds genes from <a href="http://flybase.org/">FlyBase</a> matching the query term &#8220;aly&#8221;:</p>
<pre>
# Select feature short name, unique name, annotation ID, and official full name, given
# any label and where feature is D. melanogaster gene.

PREFIX xsd:
PREFIX chado:
PREFIX skos:
PREFIX so:
PREFIX syntype: 

SELECT DISTINCT ?uniquename ?name ?accession ?fullname WHERE {

  ?feature skos:altLabel "aly" ;
    a so:SO_0000704 ;
    chado:organism  ;
    chado:uniquename ?uniquename ;
    chado:name ?name ;
    chado:feature_dbxref [
      chado:accession ?accession ;
      chado:db &lt;http://openflydata.org/id/flybase/db/FlyBase_Annotation_IDs&gt;
    ] .

  OPTIONAL {
    ?fs
      chado:feature ?feature ;
      chado:is_current "true"^^xsd:boolean ;
      chado:synonym [
        a syntype:FullName ;
        chado:name ?fullname ;
      ] ;
      a chado:Feature_Synonym .
  }

}
</pre>
<p>Each of the panels in the UI corresponds (more-or-less) to a data source. The search term is first used in a SPARQL query to the FlyBase endpoint to find matching genes. If there is only a single gene matching the query, the gene is automatically selected, and further SPARQL queries are then sent to other data sources (e.g., FlyAtlas, BDGP, Fly-TED) to retrieve gene expression data relevant to that gene. If more than one gene matches the query (e.g., try &#8220;aly&#8221;) the user has to select a gene before the next set of queries are dispatched.</p>
<p><strong>Why did we use the SPARQL mashup pattern?</strong></p>
<p>Well, it allowed us to <strong>use some off-the-shelf open source software</strong>. All we had to do was code a transformation from the data in its published format to RDF. Once we had an RDF dump for each data source, we loaded the data into a triple store (we used <a href="http://openjena.org/wiki/TDB">Jena TDB</a>) then deployed the store as a SPARQL endpoint via a SPARQL protocol server (we used <a href="http://www.joseki.org/">Joseki</a> initially, then <a href="http://sparqlite.googlecode.com/">SPARQLite</a>).</p>
<p>Once we had a SPARQL endpoint for each data source, we could develop a simple HTML+JavaScript application in the spirit of a conventional mashup, using the SPARQL protocol as the API to the data.</p>
<p>A nice feature of using SPARQL here is that <strong>you don&#8217;t have to think about the API to the data</strong>, at least not from a web service point of view. The SPARQL protocol and query language basically give you an API for free. All you have to figure out is what query you need to get the right data. And you don&#8217;t need to write any code on the server side, other than that required to transform your data to RDF.</p>
<p>Also, because your API supports a query language (SPARQL), <strong>you don&#8217;t need to know up-front exactly what data you need or what questions you&#8217;re going to ask</strong> (although obviously it helps to have a rough idea). I.e., if you get half-way through coding your mashup and realise you need to query the data in a different way, or retrieve more or less data, you just tweak the SPARQL query you&#8217;re sending. I.e., there are no consequences for your server-side code, your API can already handle it.</p>
<p>This also means <strong>your API can handle unanticipated use cases</strong>. I.e., if someone else wants to query the data for a completely different purpose, chances are they can already do it &#8211; the expressiveness of SPARQL means that the chances others will be able to use your data are high. Although this wasn&#8217;t a motivation in our project, we liked the idea.</p>
<p><strong>Dealing With Big(-ish) RDF Data</strong></p>
<p>As we scaled up from initial prototypes, we hit a few snags. The biggest challenge was dealing with <a href="http://code.google.com/p/openflydata/wiki/FlyBaseMilestone3">the FlyBase data</a>, which amounted to about 180 million triples in our final dump. Also, queries had to be quick, because users of the mashup apps are waiting for SPARQL queries to evaluate in real time. Here&#8217;s a few tricks we found for working with RDF data at this scale.</p>
<ul>
<li><strong>Fast data loading</strong> &#8211; For data loading, we found we could get between 15,000 and 30,000 triples per second from Jena TDB on a 64-bit platform. That meant the FlyBase dataset loaded in somewhere between 1.5 and 3 hours. To load the data, we fired up a <a href="http://aws.amazon.com/ec2/instance-types/">large EC2 instance</a>, and loaded the data onto an EBS volume. When the load was done, we detached the volume and attached it to a small instance which hosted the query endpoint, and shut down the large instance to keep running costs down. We didn&#8217;t try this, but using a RAID 0 array and striping your data across multiple EBS volumes might increase load performance even further (there&#8217;s a nice <a href="http://alestic.com/2009/06/ec2-ebs-raid">article by Eric Hammond on using RAID 0 on EC2</a>).</li>
<li><strong>Everything has to be streaming</strong> &#8211; The transformation from source format (e.g., relational database) to RDF has to be streaming. The SPARQL query engine has to be streaming. And the SPARQL protocol implementation has to be streaming. That&#8217;s part of why we rolled our own SPARQL protocol implementation in the end (<a href="http://sparqlite.googlecode.com/">SPARQLite</a>) &#8211; Joseki at the time did not write result sets in a streaming fashion, for valid reasons, but which limits scalability.</li>
<li>To get good query performance we <strong>pre-calculated some data</strong>. E.g., when we wanted to do a case-insensitive match against RDF literals in a query pattern, we computed the lower-case version of the literal and added it to the data as extra triples, then wrote queries with literals in lower case too &#8211; rather than, say, using a regex filter. SPARQL queries go much faster when they have a concrete literal or URI node to work from early in the query; queries with loose patterns and FILTERs can be very slow, because you&#8217;re pushing a lot of triples through the filters. We did also try using the SPARQL-lucene integration (LARQ) for text matches, but couldn&#8217;t get this to quite fast enough (sub 3s) for the FlyBase gene name queries, although it was used heavily in some other projects (<a href="http://www.clarosnet.org/about/default.htm">CLAROS</a> and <a href="http://code.google.com/p/milarq/">MILARQ</a>). You can also make queries go faster by shortening query paths. E.g., if you have a pattern you want to query like { ?x :p ?y . ?y :q &#8220;foo&#8221;. } your query may go faster if you first invent a new predicate :r and compute some new triples via a rule or query like CONSTRUCT { ?x :r ?z } WHERE { ?x :p ?y . ?y :q ?z. }, then add these triples to your dataset and query using the pattern { ?x :r &#8220;foo&#8221; } instead.</li>
<li>Beware that <strong>how you write your query may make a difference</strong>. Depending on <a href="http://openjena.org/wiki/TDB/Optimizer">which optimiser</a> you use, TDB will do some re-ordering of the query to make it go faster (I believe to put more selective bits earlier), but if you know your data well (statistics are helpful) then writing the query with this in mind can help the query engine. E.g., if you have a triple pattern with a specific predicate and a specific subject or object that you know should only have a few matches, put this right at the top of the query. Basically, put the most discriminating parts of the query as early as possible. This also means that often triple patterns with rdf:type are not that helpful early on, because they don&#8217;t narrow down the results much, although this is what you tend to put first for readability.</a>
<li><strong>Test-driven data</strong> &#8211; When you generate a large RDF dataset, you need to be sure you got the transformation right, and the data is as you expect it to be, otherwise you can waste a lot of time. I.e., you need to be able to test your triples. We designed some simple test harnesses for our data, where a set of test SPARQL queries were run against the data. Each SPARQL query was an ASK or SELECT and the test case defined an expectation for each query result. For very large datasets, you may also want to code some sanity checks on the n-triples dump before trying to load into a triplestore and test with SPARQL, e.g., scanning with grep and/or awk to find triples with predicates you expect to be there. </li>
</ul>
<p><strong>Open SPARQL Endpoints &#8211; Mitigating Denial of Service</strong></p>
<p>Above I mildly extolled the virtues of SPARQL as an API to data &#8211; anyone can write the query they need to extract the data they want, and you don&#8217;t need to anticipate all requirements <em>a priori</em>.</p>
<p>The obvious downside to the expressiveness of SPARQL and openness of SPARQL endpoints is that they are <strong>vulnerable to accidental or intential denial of service attacks</strong>. I.e., someone can write a hard query and tie up your query engine&#8217;s compute and/or memory resources, if not crash your box. </p>
<p>Although deploying a production service or guaranteeing service levels wasn&#8217;t part of our remit, we were concerned that unless we could mitigate this vulnerability, SPARQL outside the firewall would never really be useful beyond a proof-of-concept. I.e., we would never be able to advertise our endpoints as a production web service, so that others could write mashups or other applications that query the data and depend on the service.</p>
<p>We spent a bit of time working on this, and this may be a solved problem now in newer query engines, but at the time we thought to place some limits on the queries that open endpoints would accept. For example, SPARQLite endpoints could be configured to disallow queries with triple patterns with variable predicates, or FILTER or OPTIONAL clauses, or to enforce a LIMIT on all queries&#8217; result sets. This is not a complete solution, because you could still write hard queries, but at least it removed some of the obvious attacks. A better solution would probably involve monitoring queries&#8217; resource usage and killing any that take too long or consume too much resources &#8211; a bit like how <a href="http://docs.amazonwebservices.com/AmazonSimpleDB/latest/DeveloperGuide/index.html?SDBLimits.html">Amazon&#8217;s SimpleDB places limits on service usage</a>, including a 5 second maximum query execution time.</p>
<p><strong>Mapping to RDF</strong></p>
<p>The <strong>elephant in the room</strong> here is mapping the data to RDF, and that&#8217;s where a lot of the work went. All of our data sources came in some non-RDF format, either as CSV files or a relational database. For the CSV sources we hand-coded RDF transformations as Python scripts. For the relational databases, we made heavy use of <a href="http://www4.wiwiss.fu-berlin.de/bizer/d2rq/spec/">D2RQ</a>, although we did not use <a href="http://www4.wiwiss.fu-berlin.de/bizer/d2r-server/">D2R server</a> to transform SPARQL queries to SQL on-the-fly due to performance and scalability issues, rather we used the D2R dump utility to generate a complete RDF dump of an SQL datasource in n-triples format, then loaded that into a Jena TDB triplestore which backed our SPARQL endpoints.</p>
<p>The main issue was the time it takes to design a mapping from a fairly complex relational schema like <a href="http://gmod.org/wiki/Chado">Chado</a> to RDF. Rather than trying to find one or more existing, published, ontologies to use in the RDF outputs of the mapping, and designing the mappings by hand, we tried a different approach.<strong> Inspired by model-driven engineering</strong>, we developed a Python utility which, driven by some simple annotations on the source SQL schema definition, <strong>generated both a suitable OWL ontology and a complete D2RQ mapping file</strong>. This worked well with a schema like Chado which has consistent structural patterns and naming conventions. There&#8217;s a worked example in the supplementary information (S4) to the <a href="http://dx.doi.org/10.1016/j.jbi.2010.04.004">OpenFlyData paper in the Journal of Biomedical Informatics</a>.</p>
<p>The problem with this approach is, of course, that you end up with <strong>one schema/ontology per data source</strong>. Initially we thought this would force us to do some ontology alignment and to map everything to a common ontology, but we quickly realised this just wasn&#8217;t necessary. The mashup applications quite happily query each source according to its own ontology, and have just enough knowledge of what each ontology means to integrate the results in a sensible way. I.e.,<strong> you can develop applications that work with multiple data sources without perfect (or even partial) ontology alignment</strong>. Obviously, aligning ontologies is desirable, but that can be a long-term ambition &#8211; using ontologies derived from the source data at least gets you started, and gets you talking about data semantics rather than getting bogged down by differences in syntax, formats or protocol (because RDF and SPARQL are the interlingua for these). </p>
<p><strong>Lasting Impressions</strong></p>
<p>The message I took away from this project is that, <strong>if you already have some data</strong>, and you want to make the data available to web application developers and other hackers in a useful way, then <strong>SPARQL can be a good option</strong>. It&#8217;s fairly straightforward (even, dare I say, fun) to code simple HTML+JavaScript mashups that bring data from different SPARQL endpoints together on-the-fly (pardon the pun). SPARQL won&#8217;t be a panacea, and you may find some queries just aren&#8217;t quick enough to evaluate in real time, so you may have to find ways to optimise these queries when moving to production, but it&#8217;s worth doing some benchmarking, as triplestores like Jena TDB are quick for certain types of query.</p>
<p>The pain comes when you need to convert data to RDF. But you don&#8217;t need to get hung up on finding the right ontologies or designing a perfect or even complete mapping. Convert what you need, using a custom ontology that is designed for your application or generated from the source data, and just get going &#8211; you&#8217;ll have plenty of iterations to refactor the data.</p>
<p>Would I use SPARQL again? Yes, for read-only data services and data integration webapps, I&#8217;d definitely consider it. And there are some new features coming in <a href="http://www.w3.org/2009/sparql/wiki/Main_Page">SPARQL 1.1</a> which look very useful. If someone solves the denial-of-service problem for open SPARQL endpoints (and they may already have) then the case for SPARQL as a data-sharing standard is compelling. Certainly an area to watch.</p>
<br />Filed under: <a href='http://alimanfoo.wordpress.com/category/agile-development/'>agile development</a>, <a href='http://alimanfoo.wordpress.com/category/data-sharing/'>data-sharing</a>, <a href='http://alimanfoo.wordpress.com/category/databases/'>databases</a>, <a href='http://alimanfoo.wordpress.com/category/javascript/'>javascript</a>, <a href='http://alimanfoo.wordpress.com/category/jena/'>jena</a>, <a href='http://alimanfoo.wordpress.com/category/json/'>json</a>, <a href='http://alimanfoo.wordpress.com/category/model-driven-architecture/'>model-driven architecture</a>, <a href='http://alimanfoo.wordpress.com/category/ontologies/'>ontologies</a>, <a href='http://alimanfoo.wordpress.com/category/owl/'>owl</a>, <a href='http://alimanfoo.wordpress.com/category/python/'>python</a>, <a href='http://alimanfoo.wordpress.com/category/rdf/'>rdf</a>, <a href='http://alimanfoo.wordpress.com/category/semantic-web/'>semantic web</a>, <a href='http://alimanfoo.wordpress.com/category/sparql/'>SPARQL</a>, <a href='http://alimanfoo.wordpress.com/category/web-services/'>web services</a>  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/119/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=119&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2011/01/17/using-sparql-for-biological-data-integration-reflections-on-openflydata-org-and-the-flyweb-project/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Apache, Authentication and MySQL</title>
		<link>http://alimanfoo.wordpress.com/2010/11/17/apache-authentication-and-mysql/</link>
		<comments>http://alimanfoo.wordpress.com/2010/11/17/apache-authentication-and-mysql/#comments</comments>
		<pubDate>Wed, 17 Nov 2010 16:06:14 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=109</guid>
		<description><![CDATA[I just spent a couple of hours trying to configure an Apache 2.2 server to do BASIC authentication using a MySQL database of usernames and passwords. The standard way to do this is via the mod_auth_mysql module, but much of the documentation on the web is out of date or has some hidden gotchas. Here [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=109&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I just spent a couple of hours trying to configure an Apache 2.2 server to do BASIC authentication using a MySQL database of usernames and passwords. The standard way to do this is via the mod_auth_mysql module, but much of the documentation on the web is out of date or has some hidden gotchas. Here is what I got to work.</p>
<p>For reference, I&#8217;m using Ubuntu 10.04 with all software installed via APT (apache2, mysql-server, libapache2-mod-auth-mysql).</p>
<p>To install mod_auth_mysql&#8230;</p>
<pre>
$ sudo apt-get install libapache2-mod-auth-mysql
$ sudo a2enmod auth_mysql
</pre>
<p>The biggest gotcha is that the <a href="http://modauthmysql.sourceforge.net/CONFIGURE">configuration documentation for mod_auth_mysql</a> is badly out of date. There have been some substantial changes to the configuration parameter names since that was written, although I could not find any definitive documentation of the new configuration parameters. There are a couple of other gotchas in there too, I&#8217;ll come to those in a minute.</p>
<p>Before configuring Apache, I set up a test database of usernames and passwords. This is what I did&#8230;</p>
<pre>
$ mysql -uroot -p
mysql&gt; grant all on auth.* to auth_user@localhost identified by 'XXX';
mysql&gt; flush privileges;
mysql&gt; create database auth;
mysql&gt; use auth;
CREATE TABLE user_info ( user_name CHAR(100) NOT NULL, user_passwd CHAR(100) NOT NULL, PRIMARY KEY (user_name) );
INSERT INTO `user_info` VALUES ('test', MD5('test'));
CREATE TABLE user_group ( user_name char(100) NOT NULL, user_group char(100) NOT NULL, PRIMARY KEY (user_name,user_group) );
INSERT INTO `user_group` VALUES ('test', 'test-group');
</pre>
<p>Note the length of the user_password field. 100 characters is probably more than needed, but you will definitely need more than the 20 characters suggested in some documentation if you want to use a password hash like MD5. (If the field is too short, then password hashes will get truncated when they&#8217;re inserted into the database.)</p>
<p>Then I configured mod_auth_mysql to authenticate users for my whole domain. In the appropriate virtual host configuration file (e.g., /etc/apache2/sites-enabled/000-default) I added the following&#8230;</p>
<pre>
&lt;Location /&gt;

# these lines force authentication to fall through to mod_auth_mysql
AuthBasicAuthoritative Off
AuthUserFile /dev/null

# begin auth_mysql configuration
AuthMySQL On
AuthMySQL_Host localhost
AuthMySQL_User auth_user
AuthMySQL_Password XXXX
AuthMySQL_DB auth
AuthMySQL_Password_Table user_info
AuthMySQL_Username_Field user_name
AuthMySQL_Password_Field user_passwd
AuthMySQL_Empty_Passwords Off
AuthMySQL_Encryption_Types PHP_MD5
AuthMySQL_Authoritative On
#AuthMySQL_Non_Persistent Off
#AuthMySQL_Group_Table user_group
#AuthMySQL_Group_Field user_group

# generic auth configuration
AuthType Basic
AuthName "auth_mysql test"
Require valid-user

&lt;/Location&gt;
</pre>
<p>Note the &#8220;PHP_MD5&#8243; encryption type. (Some of the documented encryption types don&#8217;t seem to be available, e.g., &#8220;MD5&#8243;.)</p>
<p>Then&#8230;</p>
<pre>
$ sudo apache2ctl -t # check syntax
$ sudo apache2ctl restart
</pre>
<p>Then when browsing to the host, I get an authentication challenge, and can log in with username &#8220;test&#8221; and password &#8220;test&#8221;.</p>
<p><strong>Using mod_authn_dbd Instead</strong></p>
<p>There is another way to get Apache to use a relational database to look up usernames and passwords when authenticating &#8211; mod_authn_dbd. That module seems more current and has up-to-date documentation, see e.g., <a href="http://httpd.apache.org/docs/2.2/mod/mod_authn_dbd.html">the Apache 2.2 mod_authn_dbd module docs</a> and <a href="http://httpd.apache.org/docs/2.2/misc/password_encryptions.html">the Apache 2.2 docs on password encryption</a>.</p>
<p>Note however that you cannot use normal MD5 encryption to store passwords in the database with this module. If you want to use MD5 you have to use the special Apache MD5 algorithm.</p>
<p>Also note that to get this working with MySQL you will need to install the MySQL driver for DBD, which you can do via APT:</p>
<pre>
$ sudo apt-get install libaprutil1-dbd-mysql
</pre>
<p>If you get a message like &#8220;DBD: Can&#8217;t load driver file apr_dbd_mysql.so&#8221; then this is what you need to do &#8211; don&#8217;t believe the articles that tell you you need to recompile APR <img src='http://s0.wp.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> </p>
<br />Filed under: <a href='http://alimanfoo.wordpress.com/category/uncategorized/'>Uncategorized</a>  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/109/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=109&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2010/11/17/apache-authentication-and-mysql/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Configure Exim4 on Ubuntu to use GMail as Smart Host</title>
		<link>http://alimanfoo.wordpress.com/2010/02/03/configure-exim4-on-ubuntu-to-use-gmail-as-smart-host/</link>
		<comments>http://alimanfoo.wordpress.com/2010/02/03/configure-exim4-on-ubuntu-to-use-gmail-as-smart-host/#comments</comments>
		<pubDate>Wed, 03 Feb 2010 00:16:53 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[ubuntu]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=100</guid>
		<description><![CDATA[This is just a short post to say that, to configure exim4 to use gmail as a smart host on Ubuntu 9.04, I did only the following, and no more&#8230; user@host:~$ sudo dpkg-reconfigure exim4-config Choose mail sent by SMARTHOST; received via SMTP or fetchmail. Machine handling outgoing mail for this host (smarthost): smtp.gmail.com::587 (All other [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=100&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This is just a short post to say that, to configure exim4 to use gmail as a smart host on Ubuntu 9.04, I did only the following, and no more&#8230;<br />
<code><br />
user@host:~$ sudo dpkg-reconfigure exim4-config<br />
</code><br />
Choose mail sent by SMARTHOST; received via SMTP or fetchmail.<br />
Machine handling outgoing mail for this host (smarthost):<br />
<code><br />
smtp.gmail.com::587<br />
</code><br />
(All other questions I left as default.)<br />
Then&#8230;<br />
<code><br />
user@host:~$ sudo emacs /etc/exim4/passwd.client<br />
</code><br />
&#8230;and add the following line:<br />
<code><br />
*:yourAccountName@gmail.com:y0uRpaSsw0RD<br />
</code><br />
Please note, I know next to nothing about exim4 configuration, so <a href="http://wiki.debian.org/PkgExim4UserFAQ#Recommendableandnot-so-recommendablethird-partydocumentation"><em>caveat emptor</em></a>.  </p>
<br />Filed under: <a href='http://alimanfoo.wordpress.com/category/ubuntu/'>ubuntu</a>  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/100/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=100&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2010/02/03/configure-exim4-on-ubuntu-to-use-gmail-as-smart-host/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>CGGH and Data-Sharing Networks: Background</title>
		<link>http://alimanfoo.wordpress.com/2010/01/08/cggh-and-data-sharing-networks-background/</link>
		<comments>http://alimanfoo.wordpress.com/2010/01/08/cggh-and-data-sharing-networks-background/#comments</comments>
		<pubDate>Fri, 08 Jan 2010 14:45:30 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[chassis]]></category>
		<category><![CDATA[data-sharing]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=94</guid>
		<description><![CDATA[This post provides a bit of background to my current work on research data-sharing networks, as a member of the Centre for Genomics and Global Health (CGGH). Centre for Genomics and Global Health The Centre for Genomics and Global Health (CGGH), a joint research programme of Oxford University and the Wellcome Trust Sanger Institute, is [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=94&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This post provides a bit of background to my current work on research data-sharing networks, as a member of the <a href="http://cggh.org">Centre for Genomics and Global Health (CGGH)</a>. </p>
<p><strong>Centre for Genomics and Global Health</strong></p>
<p>The <a href="http://cggh.org">Centre for Genomics and Global Health (CGGH)</a>, a joint research programme of Oxford University and the Wellcome Trust Sanger Institute, is tasked with providing support for data-sharing networks that enable clinicians and researchers around the world to collaborate effectively on large-scale research projects.</p>
<p><strong>MalariaGEN</strong></p>
<p>The largest data-sharing network that CGGH currently supports is <a href="http://malariagen.net">MalariaGEN</a>, the Malaria Genomic Epidemiology Network. MalariaGEN is a partnership of researchers in 21 countries who are using genomic epidemiology to understand how protective immunity against malaria works, which is a fundamental problem in malaria vaccine development. MalariaGEN has been operational since 2008.</p>
<p>CGGH acts as the MalariaGEN Resource Centre, providing scientific and operational support for MalariaGEN&#8217;s research and training activities. A key aspect of this operational support is the design, development and hosting of Web-based information systems that are used by MalariaGEN to manage data shared by MalariaGEN&#8217;s research partners. CGGH previously developed and currently hosts a system called <strong>Topheno</strong>, which is the system used by MalariaGEN to manage data sharing. Many lessons have been learned in the development and use of Topheno, and much my current work is building on that experience.</p>
<p><strong>WWARN</strong></p>
<p>A second data-sharing network that CGGH supports is <a href="http://wwarn.org">WWARN</a>, the World-Wide Antimalarial Resistance Network. WWARN is a global collaboration working to ensure that anyone affected by malaria receives effective and safe drug treatment. WWARN&#8217;s aim is to provide quality-assured intelligence, based on the balance of currently-available scientific data, to track the emergence of malarial drug resistance. WWARN is due to begin operations in the first half of 2010.</p>
<p>CGGH has responsibility for WWARN&#8217;s scientific informatics module, which includes in its scope the design, development and hosting of Web-based information systems to support WWARN&#8217;s data-sharing operations. These systems are currently under development.</p>
<p><strong>Common Features of WWARN and MalariaGEN</strong></p>
<p>There are some key similarities between WWARN and MalariaGEN. </p>
<p>In both cases, the operational workflow begins with the submission of original research data, usually by a researcher who is/was involved in the study from which the data originates, acting from their host institution (usually a university).</p>
<p>In both cases, data are submitted to the network from a distributed community of researchers. In the case of MalariaGEN, the set of researchers submitting data to MalariaGEN is delimited by the set of partners who have signed up to one of MalariaGEN&#8217;s <a href="http://www.malariagen.net/home/science/consortial-projects.php">Consortial Projects</a>. For WWARN, the set of researchers submitting data is envisaged to be slightly more open-ended, with researchers submitting any original data that is relevant to one of WWARN&#8217;s <a href="http://www.wwarn.org/home/modules">four scientific modules</a>.</p>
<p>In both cases, data are <strong>not</strong> primarily captured for submission to MalariaGEN or WWARN, but are captured as part of an independently funded original research study. Each study from which data originates has its own scientific objectives, which may be related to the objectives of the data-sharing network, but if so are usually more specific and finer-grained. The subjects for each original study are usually drawn from at most a handful of locations within a single country. The data-sharing networks then work to aggregate the data from many independent studies, in a reliable and scientifically valid manner, to conduct coarser-grained analyses across larger scales of time, space and biology than are considered in any one original study.</p>
<p>This last point has a number of important consequences. For example, because data are being primarily captured for an original research study, and not for the data-sharing network&#8217;s secondary analyses, the network is not in a position to mandate the manner or format of data collection and representation. Data may be collected for a range of purposes using different means and a diversity of representations. The data-sharing network must learn to deal with this heterogeneity, and this forms a large part of the network&#8217;s data management operations. </p>
<p>Also, because the data-sharing network is not the primary-endpoint for the data, those involved in the secondary analysis of shared data typically have to cajole researchers into submitting their data, because doing so means time out from their primary research activities. Therefore, the data-sharing network wants to minimise the obstacles it presents to those submitting data, and to find ways in which they can add value for the submitters&#8217; primary research, even though that research will not be perfectly aligned with the secondary research activities and goals of the network as a whole.</p>
<p><strong>Other Data-Sharing Networks</strong></p>
<p>In addition to MalariaGEN and WWARN, CGGH is also involved in supporting an informal network of researchers working on the malaria parasite (Plasmodium) genomes. Here the main focus is on generating and analysing detailed genome sequence data using next-generation sequencing technology, although there may also be a need to share and aggregate other, related data. Finally, CGGH is involved in the <a href="http://www.modmedmicro.ac.uk">UKCRC Modernising Medical Microbiology project</a>, which may involve management of data from a number of different sources, although some of these sources will have been collected for health reasons and not for research.</p>
<p>Thus, involvement in data-sharing networks is a fundamental feature of CGGH&#8217;s activities. Although CGGH&#8217;s involvement is far from limited to informatics, and also encompasses sample and data management, statistics, ethics and programme management, nevertheless a key responsibility is the development of Web-based information systems that support the operational activities of a data-sharing network. It is also worth noting that the development or extension of those systems is often the rate-limiting step in establishing a data-sharing network or enabling it to adapt to a new type of data or analysis.</p>
<p><strong>Generic Information Systems for Data-Sharing Networks</strong></p>
<p>It is thus of urgent strategic importance to CGGH to identify those requirements for information systems that are common across these data-sharing networks. Once these requirements are understood, we need to identify a set of existing software and services that can be adopted and deployed to fulfil those common requirements. The underlying driver is to minimise the amount of time and effort spent on designing, developing and running common infrastructure, and thus make available as much effort as possible to deal with those requirements that are unique to the scientific activities of a particular data-sharing network.</p>
<p><Strong>What Next?</strong></p>
<p>We&#8217;re working to identify and document many of the key requirements that are known to be common at least between MalariaGEN and WWARN. This work should hopefully then provide a basis for finding and evaluating existing software and services, and for designing a reference architecture which provides the highest possible point of departure for developing information systems for each data-sharing network. We&#8217;re going to need lots of help with that, so please feel free to contact me if you think you might be able to help.</p>
<br />Posted in chassis, data-sharing  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/94/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/94/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/94/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/94/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/94/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/94/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/94/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/94/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/94/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/94/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/94/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/94/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/94/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/94/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=94&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2010/01/08/cggh-and-data-sharing-networks-background/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>REST-not-so-easy? Data-Sharing Networks and the Atom Publishing Protocol</title>
		<link>http://alimanfoo.wordpress.com/2009/12/15/rest-not-so-easy-data-sharing-networks-and-the-atom-publishing-protocol/</link>
		<comments>http://alimanfoo.wordpress.com/2009/12/15/rest-not-so-easy-data-sharing-networks-and-the-atom-publishing-protocol/#comments</comments>
		<pubDate>Tue, 15 Dec 2009 16:08:35 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[atompub]]></category>
		<category><![CDATA[chassis]]></category>
		<category><![CDATA[data-sharing]]></category>
		<category><![CDATA[metadata]]></category>
		<category><![CDATA[web services]]></category>
		<category><![CDATA[xml]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=83</guid>
		<description><![CDATA[Data-Sharing Networks Recently I&#8217;ve been involved in designing a software system (dsn-chassis) to support data-sharing for the World-Wide Antimalarial Resistance Network (WWARN). We&#8217;re also involved in developing and maintaining software for other data-sharing networks, such as MalariaGEN. Thus we have a vested interest in figuring out what requirements these data-sharing networks have in common, so [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=83&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><strong>Data-Sharing Networks</strong></p>
<p>Recently I&#8217;ve been involved in designing a software system (<a href="http://dsn-chassis.googlecode.com">dsn-chassis</a>) to support data-sharing for the <a href="http://www.wwarn.org/">World-Wide Antimalarial Resistance Network (WWARN)</a>. <a href="http://cggh.org/">We&#8217;re</a> also involved in developing and maintaining software for other data-sharing networks, such as <a href="http://www.malariagen.net/">MalariaGEN</a>. Thus we have a vested interest in figuring out what requirements these data-sharing networks have in common, so we can identify common standards and software components that can be adapted and re-used. Naturally, we want to minimise the overall effort of building and maintaining supporting infrastructure for each new data-sharing community. </p>
<p><strong>Applications and Services</strong></p>
<p>One shift in perspective that I&#8217;ve encouraged is a move from viewing the software required to support a data-sharing network as a single, monolithic, web application, to viewing the software as a collection of web services and applications that are loosely coupled via open protocols and formats. The hope is that this view will make it easier to identify the most generic components of the required infrastructure, such as metadata persistence and query, or authentication and authorisation. We could then adopt service protocols and formats that give us the broadest possible choise of re-using and building on existing open-source software, the smallest possible dependency or lock-in to any one vendor or product, and the flexibility to extend and customise to cope with those requirements that are unique to a given data-sharing network.</p>
<p>That&#8217;s the ideal, anyway. </p>
<p><strong>Metadata Persistence and Query</strong></p>
<p>One capability that is common to the data-sharing networks we&#8217;re involved in is management of metadata relating to scientific data being shared within the network. This metadata includes, among other things, information about the study from which the data originates, such as the scientific protocols (i.e., procedures) that were used in generating the data. This metadata can be complicated, and varies substantially between different types of experiment. It is, however, needed to evaluate the quality and comparability of data from different studies, which is a prerequisite for aggregating those data in a sensible way. To get a sense for the type of metadata that needs to be captured, check out an <a href="http://dsn-chassis.googlecode.com/svn/trunk/wwarn/client/gwt-test/static/demo/questionnaire/study/2/index.html">early prototype of the study questionnaire being developed for WWARN</a>. </p>
<p>Although the exact nature of this metadata will differ from one data-sharing network to the next, the basic capabilities to persist (<a href="http://en.wikipedia.org/wiki/Create,_read,_update_and_delete">CRUD</a>) and query those metadata is common. Hence, one of the first things we&#8217;ve done is to take a look at various APIs and protocols for persisting and querying arbitrary packages of metadata, and their available implementations. </p>
<p><strong>Atom Publishing Protocol</strong></p>
<p>One protocol that stood out was the <a href="http://www.atomenabled.org/developers/protocol/atom-protocol-spec.php">Atom Publishing Protocol</a> and the related <a href="http://www.atomenabled.org/developers/syndication/atom-format-spec.php">Atom Format</a>, for a number of reasons. First, it follows the <a href="http://www.ics.uci.edu/~fielding/pubs/dissertation/rest_arch_style.htm">REST architectural style</a>, which implies a number of <a href="http://en.wikipedia.org/wiki/Representational_State_Transfer#Constraints">helpful constraints</a>. Second, there are at least a few reasonably mature open-source implementations (e.g., <a href="http://exist.sourceforge.net/">eXist</a>, <a href="http://atomserver.codehaus.org/">AtomServer</a>) that are geared not just towards blog publishing but to arbitrary data and metadata management. Third, big players like <a href="http://code.google.com/apis/gdata/">Google</a> and <a href="http://www.infoq.com/news/2008/03/microsoft-atompub">Microsoft</a> are standardisting on Atom as a basis for their Web service APIs &#8211; not that I like to flock, but reading the Google Data APIs especially gave me some confidence that many of the difficult practical issues have already been encountered and solutions found.</p>
<p><strong>REST-easy?</strong></p>
<p>One of the most compelling reasons I&#8217;ve found to use the REST style in designing a Web service API, and to follow the approach of mapping the four basic persistence-related operations (Create, Retrieve, Update, Delete) on to the appropriate HTTP verbs (POST, GET, PUT, DELETE), is simplicity and ease of implementation. A second reason is the ability to decouple the persistence protocol from the data model, at least to some extent. In my experience, these two factors in particular contribute to being able to rapidly develop prototypes, and to carry as little baggage as possible forward as those prototypes (and the underlying data models) inevitably evolve towards a production system.</p>
<p>We&#8217;ve also been working with the <a href="http://exist.sourceforge.net/">eXist open-source XML database</a>, which implements AtomPub out of the box, as our metadata persistence service implementation. One of the nice things about eXist is that it will store whatever XML you throw at it, so you can fiddle with your <a href="http://www.snellspace.com/wp/?p=314">Atom content extensions</a> to your heart&#8217;s content without ever needing to touch the server-side code or configuration. Another nice feature is that it supports Web service endpoints implemented as XQuery scripts, which is a very convenient way to add a wide range of query service capabilities to the data and metadata you&#8217;ve stored via the Atom service.</p>
<p>Our current, early, prototype of a data management system for WWARN uses eXist as the Atom service implementation, and a GWT application that we&#8217;ve rolled ourselves as the user-interface.</p>
<p><strong>REST-not-so-easy?</strong></p>
<p>While this approach has proved excellent during the development of early prototypes, there are a number of challenges approaching in moving the system towards production strength that are causing me some consternation. My main reason in writing this article is to highlight those issues, in the hope that others can provide some helpful ideas and advice. </p>
<p>I (somewhat pithily) entitled this article &#8220;REST-not-so-easy&#8221; because, while some of these issues <strong>are</strong> specific to AtomPub, others seem common to <strong>any</strong> Web service API based on the REST style. It could very well be that I&#8217;m missing a few rather obvious and simple solutions, I rather hope that&#8217;s the case.</p>
<p>So, on to the issues&#8230;</p>
<p><strong>Link Expansion</strong></p>
<p>When building our user interface, we commonly found that a single view required us to retrieve data not just from a single Atom entry, but rather from a graph of linked Atom entries. For example, in our data model, <code>Dataset</code> entries are linked to <code>Study</code> entries, which represent the study from which the dataset originates. When the user is viewing a dataset, they also want to view some basic information about the linked study, such as its title and summary, in addition to a hyperlink that allows them to navigate to further information about the study. </p>
<p>The client application can, of course, perform two HTTP GET operations to retrieve this simple graph of two linked Atom entries, which is easy enough. However, we have other examples where the graph to be retrieved has a depth of 2 or more. In these cases, implementing the client application becomes far simpler if the whole graph can be retrieved with a single HTTP GET operation, rather than many. A single GET is also better for latency, which is an important concern for us where users may be located in parts of the world with poor network bandwidth.</p>
<p>A workaround for this that we cooked up early-on was to develop query services that returned a single, root Atom entry as the result, with the required links outbound from and inbound to the target Atom entries expanded inline, within the <code>atom:link</code> element. I was initially unsure of the sanity of this approach, especially given that including an <code>atom:entry</code> directly within an <code>atom:link</code> element does seem to break the <a href="">Atom Format Spec</a>. However, I found some discussion from folks at <a href="http://blogs.msdn.com/astoriateam/archive/2008/02/18/related-entries-and-feeds-links-and-link-expansion.aspx">Microsoft and Google</a> on doing inline link expansion, which gave me some confidence that this idea isn&#8217;t so crazy. I also found <a href="http://www.imc.org/atom-syntax/mail-archive/msg20448.html">a suggestion</a> that returning the graph of linked Atom entries as a feed, rather than a single Atom entry with links expanded inline, was a more elegant way to go, which I have some sympathy for. However, we&#8217;ve gone with the inline expansion approach for now, which is working well so far. For example implementations using XQuery, see <a href="http://code.google.com/p/dsn-chassis/source/browse/branches/generic/service/exist-1.4.0rc-rev10028/WebContent/query/datasets.xql">chassis dataset query service</a> and the supporting <a href="http://code.google.com/p/dsn-chassis/source/browse/branches/generic/service/exist-1.4.0rc-rev10028/WebContent/query/chassis-functions.xqm">function library</a>.</p>
<p>There is an obvious gotcha here, which is that if you retrieve an Atom entry with links expanded inline, then PUT that entry back to the edit link URL, you will end up storing the linked entries inline too, which leads to all sorts of interesting errors. So, any view that needs to do a PUT must first retrieve a fresh, unexpanded representation of the entry, before it can be modified and PUT back to the server.</p>
<p>I&#8217;ve also glanced at <a href="http://www.infoq.com/articles/atomserver2">an article on AtomServer</a> which describes the idea of <a href="http://atomserver.codehaus.org/docs/aggregate_feeds.html">aggregate feeds</a> created by joining separate collections. It looks like this might provide a similar capability, but in a quite different way. However, if I have understood the idea correctly, it looks like you could only ever fetch a graph on entries 1 deep.</p>
<p><strong>Referential Integrity; Broken Links</strong></p>
<p>Our simple <a href="http://www.flickr.com/photos/londonbonsaipurple/4187188603/sizes/l/">data model</a> currently comprises entities such as <code>Study</code>, <code>Dataset</code>, <code>Data File</code>, <code>Submission</code> and <code>Review</code>. There are, of course, associations between these entities, such as an assocation between a <code>Dataset</code> and the <code>Study</code> from which it originated, as mentioned above. When we map our logical data model onto Atom, each type of entity maps onto a distinct collection of Atom entries. Each type of entity also gets its own Atom content extension, which means that we stick the entity data into some XML nested in the <code>atom:content</code> element of the Atom entry. Associations between entities are mapped onto links between Atom entries. I.e., we use the <code>atom:link</code> element to represent associations between entities, where the value of the <code>rel</code> attribute represents the type of the association.</p>
<p>To create a new association, a client retrieves a representation of the entry that is the source (i.e., subject) of the assocation, adds a new <code>atom:link</code> element with the appropriate <code>rel</code> attribute (describing the association type) and <code>href</code> attribute (pointing to the target/object of the association), and then PUTs the entry representation back to the entry&#8217;s edit link URL.</p>
<p>By default, eXist&#8217;s Atom implementation does not do any referential integrity checks on the links in an Atom entry. This is perfectly sensible, because no integrity constraints have been declared, and because the link could equally point to an Atom entry anywhere in the Web, not just other Atom entries located on the same service.</p>
<p>However, if an Atom entry that is the target of one or more links is deleted, then all of those links will be broken. How should the service deal with this, if at all? Is it OK to do as the Web does, and leave it up to the client (and the user) to deal with broken links? Or should the service be a bit smarter, e.g. by noisily preventing deletion of entries that are link targets, or by silently deleting links to deleted targets, or by some other mechanism? What about new links that are created to non-existent targets? Should those be prevented somehow? And should the service differentiate between links whose targets are entries hosted by the same Atom service and links whose targets are elsewhere in the Web? If so, how? I.e., are there different kinds of referential integrity that need to be considered?</p>
<p>Our current, very tentative, position is to do as the Web does, and leave it to the client to handle broken or bad links. The client will typically do this by simply notifying the user of a broken link. The client will provide the facility to allow the user to fix broken links, but what the user does next is up to them. However, we haven&#8217;t implemented any of this functionality in the client application yet, so I don&#8217;t know what the consequences will be.</p>
<p><strong>Transactions</strong></p>
<p>Sometimes, what is a single operation from the user workflow point of view, such as creating a study, or creating a dataset, or updating a study with new information, maps onto a single Atom Protocol request, such as an HTTP POST or PUT. However, often a single user operation maps onto multiple Atom Protocol requests. For example, creating a new revision of a data file involves four HTTP requests, which are a POST (create a new media entry that represents the data file revision), a PUT (add some metadata about the media entry, e.g., the original file name), a GET (retrieve a fresh representation of the data file entry), and finally a PUT (link the data file entry to the new revision). </p>
<p>Any of these requests could fail, leaving the data in an inconsistent state. How should we handle this possibility?</p>
<p>The obvious answer is to provide some sort of transactional capability, such that the client can invoke these requests within the context of a single transaction that will either completely succeed or fail with no change to the data. But that begs two further questions: how do you add transactions to the Atom Protocol, and how do you implement them?</p>
<p>In my not-very-extensive searching of the Web, I have not found much in the way of discussion and/or implementation of transactional capabilities for REST-style web service APIs, except for some work on <a href="http://community.jboss.org/wiki/transactionalsupportforjaxrsbasedapplications">transactional support for JAX RS based applications</a>. An alternative to a protocol extension is simply to design the Web service API to expose only those operations that are atomic from the user workflow point of view, then handle transaction-type issues behind the scenes. But that would mean leaving AtomPub behind and layering another API on top, which seams to completely defeat the purpose of going for REST/AtomPub in the first place. </p>
<p>The other option is, of course, to leave it up to the client to deal with inconsistent data, which might include trying cleaning it up automatically or simply notifying the user of problems and leaving the rest up to them, as with the discussion of broken links above. But that would leave open all sorts of weird and wonderfully wrong possible states of the system. </p>
<p>If someone were to point me to an extension to the Atom Protocol that allows multiple Atom requests to be carried out in the context of a single transaction, and an existing, mature, open-source implementation, I would be very happy .. I think.</p>
<p><a href="http://atomserver.codehaus.org/docs/batching.html">AtomServer has a capability to combine several Atom requests into a &#8220;batch&#8221;</a>, via a custom extension to the Atom Protocol, based on the <a href="http://code.google.com/apis/gdata/docs/batch.html">batch processing capability in the Google Data APIs</a>. Although I couldn&#8217;t find any mention of adding transactional processing of an entire batch in the AtomServer documentation, it does mention that a batch is processed as a single database operation, and so it shouldn&#8217;t be too difficult to wrap that with a transaction. I&#8217;m not sure how that would impact the AtomServer batch response model, however. </p>
<p><strong>Granularity</strong></p>
<p>The Atom Publishing Protocol, as with any CRUD-style API, is relatively coarse-grained, in the sense that if you want to update any field of an entry, you have to update the whole thing at once. This coarse-grained nature is good from one point of view, because it means that the details of the data model are kept separate from the design of the protocol. The data model can then evolve without needing any change to the protocol specification or implementation. However, it does mean that the protocol can be quite inefficient and wasteful in terms of network bandwidth. I.e., if you&#8217;ve stored a reasonable amount of data in a single Atom entry, but all you want to do is change the title, you&#8217;ll have to PUT the whole thing. This can mean noticeable latency in the client application, especially where network bandwidth is poor.</p>
<p>On the face of it, this issue is less serious than dealing with broken links or inconsistent data, but it does make other protocols that provide the ability to submit a fine-grained change set or change request for a particular data or metadata record (such as the <a href="http://n2.talis.com/wiki/Platform_API">Talis Platform API</a>) start to look quite appealing.</p>
<p>When you delve a bit deeper, this issue also interacts with the issue of authorisation and access control, which I&#8217;ll come to below.</p>
<p><strong>Authentication</strong></p>
<p>I&#8217;ll just mention this briefly to say that, for us, authentication is not an issue. We use a <a href="http://static.springsource.org/spring-security/site/docs/2.0.x/reference/springsecurity.html">Spring Security</a> filter to implement authentication using HTTP Basic in the development environment, and <a href="http://webauth.stanford.edu/">webauth</a> in production.</p>
<p>I have seen articles on inventing new authentication protocols for Atom to work around restrictions imposed by their server environment, but those restrictions don&#8217;t apply to us.</p>
<p>As a side note, I suspect there is a bug in the authentication code in the eXist Atom servlet, as we had some pain trying to turn it off and use Spring Security for authentication instead (for HTTP Basic authentication, we had to add a filter that removes the <code>Authorisation</code> header before it gets to the eXist Atom servlet, otherwise the servlet&#8217;s own authentication process was activated), but I haven&#8217;t pinpointed it yet. I&#8217;d like to see eXist factor out and consolidate all authentication code from its Atom and XQuery servlets to dedicated filters, which would make for easier integration with Spring Security or other authentication frameworks .. but that&#8217;s another story.</p>
<p><strong>Security, Authorisation and Access Control</strong></p>
<p>I&#8217;ve saved the best for last.</p>
<p>Within a data-sharing network, typically different roles are defined, with different privileges/permissions/authorities/rights/acls/&#8230; </p>
<p>For example, for WWARN we have defined a <code>submitter</code> role, which is a person who wants to share original research data with WWARN; a <code>gatekeeper</code> role, who performs an initial review of submitted data and decides whether or not the data should be accepted for curation; a <code>curator</code> role who&#8217;s job is to clean up and standardise data submitted from different studies so that it can be sensibly aggregated; a <code>coordinator</code> who oversees the operations of the data-sharing network; and an <code>administrator</code> who installs, configures and maintains the systems.</p>
<p>There are things people should be able to do. For example, a <code>submitter</code> creates studies, datasets, and data files, and can submit a dataset to the network. A <code>gatekeeper</code> reviews submissions and assigns a curator if the submission is accepted. A <code>curator</code> creates derived data files and reviews curated data for validity and conformance with standard data dictionaries.</p>
<p>There are, of course, things people should not be able to do. For example, a <code>submitter</code> cannot review their own submission or decide that their submission should be accepted by the network. Neither can a <code>submitter</code> assign a curator to their submission. These capabilities are allowed for the <code>gatekeeper</code> only.</p>
<p>These permissions can be implemented at the user-interface level. I.e., the user-interface can expose only those functionalities that are permitted for the user&#8217;s role(s). However, if the client application is using a Web service to implement some or all of these operations, then the Web service API must also have appropriate constraints, otherwise users would be able to hack the API and do things they shouldn&#8217;t. If the Web service API is based on Atom, then the problem we have is how to implement the appropriate authorisation constraints in a way that works with Atom and existing implementations. </p>
<p>Some constraints are simple to implement. For example, the constraint that only a <code>submitter</code> can create a study can be implemented by allowing only POST requests to the <code>Studies</code> Atom collection URL if the user has the <code>submitter</code> role. I.e., constraints that can be mapped onto a specific class of CRUD operation on a specific Atom collection with a specific role are straightforward, and could, for example, be implemented using a Spring Security filter and URL patterns.</p>
<p>Others are not so simple. For example, a <code>submitter</code> who creates a new study becomes the owner of that study. The original owner may also grant ownership to other <code>submitters</code>; the owner(s) of a study are the only people who can update information about that study. This begs the questions, how do we represent entry-specific access control constraints in the Atom Protocol and/or Format, and what implementations (if any) are available?</p>
<p>The eXist database does have <a href="http://exist.sourceforge.net/atompub.html#N1019F">its own solution for Atom security</a>. You can declare access control constraints for a specific Atom entry by including an <code>exist:permissions</code> element within the Atom entry. However, this presents three problems for us. First, it requires that we use the eXist database of users, but we have to integrate with another, external database of users, so we would have to keep those two synchronised. Second, it is based on the Unix/Linux model of file system permissions, which is too inflexible. For example, we want to enable a user to grant arbitrary permissions for a given entry to arbitrary collections of specified users. Having a single owner/group for each entry means you cannot do this sort of thing. Third, the format is specific to eXist. If we buy into their permissions format, we will have a job to port it to another atom implementation such as AtomServer, if we need to do that at some point in the future.</p>
<p>eXist also has an <a href="http://exist.sourceforge.net/xacml.html">XACML sub-system</a>, but the <a href="http://exist.sourceforge.net/xacml-features.html">capabilities</a> do not obviously map onto our requirements for access control to Atom collections and member entries.</p>
<p>The Google Data APIs have a different approach. See for example the <a href="http://code.google.com/apis/calendar/data/2.0/developers_guide_protocol.html#SharingACalendar">Google Calendar API section on sharing calendars</a>. Each access control rule is an Atom entry. There is one collection of access control rule entries for each calendar. Each access control rule has a &#8220;scope&#8221;, which typically specifies a user, and a &#8220;role&#8221;, which is typically a permission such as &#8220;read&#8221; or &#8220;editor&#8221;. See also the <a href="http://code.google.com/apis/calendar/data/2.0/reference.html#gacl_reference">gAcl namespace reference</a>. </p>
<p>The Google approach has the flexibility we need, but I&#8217;m baulking at implementing something like this ourselves on top of eXist or another Atom implementation.</p>
<p>And there is another issue here. It&#8217;s not hard to imagine situations where you might want different users to be able to update only specific parts of an Atom entry, but not the whole thing. I.e., you might want to have fine-grained, within-entry access control rules for different users. This sort of thing can be handled in a purely Java environment, for example, using <a href="http://static.springsource.org/spring-security/site/docs/2.0.x/reference/ns-config.html#ns-method-security">Spring method security</a> and <a href="http://static.springsource.org/spring-security/site/docs/2.0.x/reference/domain-acls.html">Spring domain object security</a>. However, it&#8217;s not obvious how to implement this sort of thing in a general way for REST/Atom, and so we&#8217;ve deliberately designed our data model to avoid needing this kind of rule. I.e., rules are only ever defined at the granularity of a single entry, nothing finer. This means that authorisation considerations have a significant impact on the design of the data model. Not necessarily an issue, but interesting nonetheless.</p>
<p>We need a way forward here, which means minimum coding for us (we want to focus our effort on the applications, rather than the underlying services), and maximum simplicity and portability. </p>
<p><strong>Any Thoughts?</strong></p>
<p>If you&#8217;ve read this and have any thoughts, ideas or suggestions about any of the issues above, really anything at all, no matter how trivial, please do <a href="mailto:alimanfoo@gmail.com">drop me an email</a>, or add a comment to this article, or join the <a href="http://groups.google.com/group/dsn-chassis">dsn-chassis group</a> and post a message there. Thanks for reading.</p>
<br />Posted in atompub, chassis, data-sharing, metadata, web services, xml  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/83/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/83/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/83/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/83/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/83/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/83/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/83/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/83/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/83/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/83/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/83/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/83/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/83/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/83/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=83&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2009/12/15/rest-not-so-easy-data-sharing-networks-and-the-atom-publishing-protocol/feed/</wfw:commentRss>
		<slash:comments>10</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>DC2009 Talk Notes &#8211; Towards Semantic Web Deployment: Experiences with Knowledge Organisation Systems, Library Catalogues, and Fruit Flies</title>
		<link>http://alimanfoo.wordpress.com/2009/10/15/dc2009-talk-notes-towards-semantic-web-deployment-experiences-with-knowledge-organisation-systems-library-catalogues-and-fruit-flies/</link>
		<comments>http://alimanfoo.wordpress.com/2009/10/15/dc2009-talk-notes-towards-semantic-web-deployment-experiences-with-knowledge-organisation-systems-library-catalogues-and-fruit-flies/#comments</comments>
		<pubDate>Thu, 15 Oct 2009 02:37:07 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[skos]]></category>
		<category><![CDATA[semanticweb]]></category>
		<category><![CDATA[dublincore]]></category>
		<category><![CDATA[drosophila]]></category>
		<category><![CDATA[flyweb]]></category>
		<category><![CDATA[conference]]></category>
		<category><![CDATA[rda]]></category>
		<category><![CDATA[rdf]]></category>
		<category><![CDATA[malaria]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=81</guid>
		<description><![CDATA[First, let me say that it has been my pleasure to attend Dublin Core conferences since 2005 in Madrid. Thanks to the organisers here for putting on a great conference, and for inviting me to give this talk, it is a real honour. I will post notes from this talk on my blog at purl.org/net/aliman [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=81&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>First, let me say that it has been my pleasure to attend Dublin Core conferences since 2005 in Madrid.  </p>
<p>Thanks to the organisers here for putting on a great conference, and for inviting me to give this talk, it is a real honour.</p>
<p>I will post notes from this talk on my blog at purl.org/net/aliman &#8211; so don&#8217;t worry if you miss anything.</p>
<p>It is also excellent timing, as after at least 5 years of talking about it, I can finally say that, on 18 August 2009 SKOS &#8211; the Simple Knowledge Organisation System &#8211; was published as a W3C Recommendation, thanks in large part to the experience and support of this community.</p>
<p>I&#8217;d like to talk more about SKOS later in this presentation.</p>
<p>Before I get into the main body of my talk, let me say first that, one of the lessons I&#8217;ve learned in working on Semantic Web Deployment, especially in the last 2 years, is that big ideas can look great when viewed from a distance, but the reality on the ground will always be far more complex and surprising than you anticipate, as I&#8217;m sure many of you can testify to.</p>
<p>In particular, the cost/benefit trade-offs for investing in a particular technology or approach can vary wildly from one situation to the next. Nothing is more important than keeping an open mind.</p>
<p>So, although I love talking about this big ideas, I&#8217;m going to try to put them to one side for this morning (although, I&#8217;m sure I&#8217;ll still be tempted by one or two). Instead, I&#8217;d like to simply share a few experiences, and hopefully give you at least a flavour of where some of the opportunities and the challenges might lie.</p>
<p>Now, given that we&#8217;ve been talking about metadata for at least two days now, I thought I&#8217;d spend a few minutes talking about something completely different. </p>
<h3>Fruit Flies</h3>
<p>These are fruit flies, of the species Drosophila melanogaster.</p>
<p>On the left is a male, on the right a female. Females are about 2.5 millimeters long, males are slightly smaller.</p>
<p>When I first saw this image, I thought it was a photograph. But, when I looked closer, I noticed it was in fact a drawing, and it was signed &#8220;E Wallace&#8221;.  </p>
<p>Edith Wallace, I found out, was curator of stocks and artist for Thomas Hunt Morgan, who c. 1908 began using fruit flies in experimental studies of heredity at Columbia University.</p>
<p>Heredity simply means the passing of biological traits, such as green eyes or brown hair, from parents to offspring.</p>
<p>I&#8217;d like to talk more about Thomas Hunt Morgan, but before I do, it&#8217;s worth noting that when Charles Darwin published his book On The Origin of Species by Means of Natural Selection in 1859, which of course depends in its central argument on the inheritance of traits from one generation to the next, the underlying mechanisms of that inheritance, i.e., of heredity, were completely unknown. </p>
<p>It was, of course, Gregor Mendel&#8217;s work on pea plants, published first in 1865, but not widely known until the turn of the 20th century, which first suggested that organisms inherit traits in a discrete, distinct way. It is these discrete units or particles of inheritance that we now call genes.</p>
<p>However, at the turn of the century, although the theory of genes was accepted, it still was not known which molecules in the cell carried these genes.</p>
<p>It was T. H. Morgan&#8217;s aim to determine what these molecules were.</p>
<p>He needed a suitable animal to study, and chose Drosophila, primarily because of cost and convenience &#8211; they are cheap and easy to culture, have a short life cycle and lots of offspring. </p>
<p>Morgan was looking for heritable mutations to study, and spent a long time looking before he discovered a few flies with white instead of the usual red eyes. The white-eyed mutation only occurs in male flies, i.e., it is a sex-linked trait. The fact that the trait depends on the sex of the individual suggested to Morgan that the genes responsible for the mutation reside on the sex chromosome &#8230; and that the chromosomes generally are the carriers of genetic information. </p>
<p>Morgan and his students also discovered that some traits, such as wing length and eye shape, do not get mixed up randomly from one generation to the next, but rather tend to be inherited together. </p>
<p>They demonstrated that the reason why these traits tend go together is because the genes responsible are on the same chromosome, and are in fact quite close together on the same chromosome, and hence tend to stay together when chromosomes recombine during formation of sperm and egg.</p>
<p>They then used these observations to construct the very first genetic maps, that is, maps of where different genes are in relation to each other on the chromosomes. This was the first crucial step in understanding how a genome is organised.</p>
<p>Morgan started something of a trend when he chose Drosophila, because for the last century Drosophila has been one of the most intensively studied model organisms.</p>
<p>It is fair to say that much of what we know today about genetics, and the molecular mechanisms underlying development, behaviour, aging, and many other biological processes common to flies and humans, we know from research on fruit flies.</p>
<p>In the last decade, that research has entered an entirely new phase.</p>
<p>In 2000, the complete genome sequence of Drosophila melanogaster was published. For the first time, we gained a complete picture of the location and DNA sequence of every gene in the genome. Publication of those data has unlocked entirely new methods and avenues of scientific investigation. </p>
<p>However, many questions remain unanswered. </p>
<p>What do all those genes actually do?</p>
<p>Where and when do they play a part in various biological processes?</p>
<p>And, especially, how do genetic differences between individuals relate to different biological outcomes? </p>
<p>E.g., translating that question from flies to humans, why do individuals with one genotype tend to be resistant to a disease such as malaria, whereas others don&#8217;t?</p>
<p>Answering these types of questions is the domain of functional genomics.</p>
<p>A functional genomic study typically asks, in relation to some biological process like development of sperm or the eye, &#8230;</p>
<p>What genes are active, where, when and how much? </p>
<p>Which genes are interacting with each other?</p>
<p>What happens if you stop a gene from working (knock it out), then restore it?</p>
<p>For example, this image shows that a particular gene called schumacher-levy is active during the developing Drosophila embryo, and is localised to a specific organ, in this case the developing gonad.</p>
<p>Other advances in biotechnology over the last decade have revolutionised this type of research.</p>
<p>For example, we now have a number of tools for carrying out high-throughput experiments. These are experiments where, rather than looking at just a handful of genes, we can look at 100s or even 1000s of genes at a time.</p>
<p>The bottom line: these high-throughput technologies, in addition to rapid advances in genome sequencing technology, generate a very large amount of highly heterogeneous data.</p>
<p>And our ability to generate new data on unprecedented scales is accelerating. Really, the pace of change is staggering.</p>
<p>To give you an idea of the rate and scale of new advances, consider that, in July 2008 the Wellcome Trust Sanger Institute, the primary institute for DNA sequencing in the UK, announced that, every two minutes, they produce as much DNA sequence as was deposited in the whole first five years of the international DNA sequence databases, from 1982 to 1987. </p>
<p>Finding, comparing, and integrating these data is a critical challenge for ongoing biological research in all organisms, and is especially critical in translating findings from model organisms such as Drosophila to human health.</p>
<p>Because this is such a critical problem, much excellent work has already gone into making Drosophila data publicly available and accessible.</p>
<p>For example, FlyBase provides access to primary genome sequence data on all sequenced Drosophila species. It is the primary reference point for all Drosophila genome-related data. </p>
<p>FlyBase also establishes a controlled vocabulary for all Drosophila genes, which is a vital tool in integrating data from multiple sources, because genes are often the point of intersection between data sources.</p>
<p>BDGP embryo in situ database is an example of a database holding the output of high-throughput functional genomic studies. They publish images that depict the expression of a gene within the developing fly embryo, at various stages during the course of embryo development, for thousands of genes. </p>
<p>FlyAtlas also publishes data from high-throughput experiments, but using a different technique. They use DNA microarrays to get quantitative data on gene expression in different tissues, so the data tell you not only whether a gene is active, but also how much it is being expressed.</p>
<p>Much of the focus has, to date, been on providing a direct user-interface to each source of data. So each data provider has a set of web-based tools for a human researcher to query and visualise the data.</p>
<p>However, the focus is shifting towards enabling data to be harvested and integrated across databases in an automated way, because it is recognised that this could save much time and effort, and because some questions just couldn&#8217;t be answered any other way.</p>
<p>And there are projects making headway here, for example, FlyMine uses a conventional data warehouse approach to integrate data on Drosophila. However, it is no small challenge, and there is a pressing need to make the end products much more usable, and because public funding will always be scarce, to make the whole process as cost-effective, scalable and sustainable as possible.</p>
<h3>FlyWeb Project</h3>
<p>With this in mind, in January 2008, I moved to the Zoology Department at the University of Oxford, to work with a team there led by Dr David Shotton on a small project called FlyWeb.</p>
<p>FlyWeb asked two questions&#8230;</p>
<p>1. Can we build tools for Drosophila biologists that cut down the effort required to search across different sources of gene expression data?</p>
<p>2. Under the hood, what (semantic) web tools and design patterns help us to build cross-database information systems, and ensure that they are robust, performant and quick to build.</p>
<p>To answer these question, we set about building a proof of concept, which is deployed at openflydata.org.</p>
<p>How does openflydata.org work?</p>
<p>Each of the cross-database search applications I&#8217;ve demonstrated is, simply, a mashup.</p>
<p>It is a light weight, HTML+JavaScript application that runs in the browser, that fetches data in real time from several different web service endpoints. </p>
<p>Why did we use the mashup approach? </p>
<p>Simplest thing we could think to do. Also gave us flexibility to experiment with semantic web technology, without totally committing to it. I.e., we could mix semantic web and other solutions, if it proved easier to do so.</p>
<p>The bottom line was, we wanted to produce useful, compelling functionality for a biological researcher, in a reasonable time frame and with a reasonable level of performance and reliability. If semantic web helped, great, if not, fine, we&#8217;ll try something else. </p>
<p>Of course, it would have been great if each data source had provided a web service endpoint for their data with the necessary query functionality &#8230; but they didn&#8217;t, so we made some ourselves.</p>
<p>The approach we took was, for each data source, we first converted the data to RDF (the Resource Description Framework, one of the key Semantic Web Standards), then loaded the data into an off-the-shelf open-source RDF storage system (Jena TDB written by Andy Seaborne, now at Talis), then mounted each RDF store as a SPARQL endpoint. </p>
<p>What is a SPARQL endpoint? </p>
<p>It is, simply, a web service endpoint that uses the SPARQL protocol as its interface (API).</p>
<p>What is the SPARQL protocol? It is a simple HTTP-based protocol for sending SPARQL queries to be evaluated against a given data store.</p>
<p>What are SPARQL queries? SPARQL is a query language, roughly analogous to SQL (read only), but built for the Web. Basically, it gives you a way to ask pretty much any question you want to ask of a given set of data.</p>
<p>Why did we use RDF &amp; SPARQL?</p>
<p>1. Rapid prototyping. Leverage off-the-shelf, open source, software, such as Jena, or Mulgara (see David Wood). In principle, we only had to write the software to convert the data from each source to RDF. We could then use OTS software to deploy an RDF database and SPARQL endpoint. (Caveat&#8230;)</p>
<p>2. SPARQL is a simple protocol with an expressive query language. That means its easy to write code for, but gives the client application (in our case, the mashups) the power to ask any question it wants. (Caveat&#8230;)</p>
<p>Point 2. also means we can offer these SPARQL endpoints as a service to the biological community, so others with a bit of savvy can ask their own questions. In particular, they can ask questions that we (the service provider) hasn&#8217;t thought of, which (in theory) promotes innovative re-use and exploitation of the data.</p>
<p>You&#8217;ll noticed I haven&#8217;t yet talked about either of the two key themes of this conference, semantic interoperability or linked data.</p>
<p>I haven&#8217;t mentioned these yet because, the point I&#8217;d like to make is that, depending on the context, there *may* be compelling, practical, short-term reasons to evaluate Semantic Web-based technology for a data integration project &#8230; </p>
<p>1. The (relative) ease of deploying a web service endpoint for querying a data source; i.e., of making the data accessible, lowering the barriers to re-use.</p>
<p>2. The (relative) simplicity of exploiting those web services to prototype light weight cross-database search and on-the-fly data integration applications. (pardon the pun)</p>
<p>Caveats&#8230;</p>
<p>Of course, the best choice of technology depends greatly on the existing technological context, and on the expertise available to you. As with relational databases, XML, or any family of related technologies, becoming productive with a new technology requires an investment in terms of people, training and time.</p>
<p>A second caveat is that, the more open and expressive a query protocol like SPARQL is, the harder it becomes to guarantee the performance and availability of a service using that protocol. It is a denial-of-service type problem. If anyone can ask any question they like, some people will ask hard questions, whether intentionally or by accident, which could degrade service performance for others.</p>
<p>SPARQL is thus a double edged sword. On the one hand, its open nature and expressivity is a major advantage, but that openness creates challenges when it comes to providing reliable and performant services to others.</p>
<p>I see resolving this tension as one of the key challenges for the semantic web community in making the technology widely applicable. We explored some strategies for mitigating these issues in FlyWeb, but we certainly did not find all of the answers.</p>
<p>Now, let&#8217;s talk about semantic interoperability and linked data.</p>
<p>The two classic problems you encounter when integrating data from different sources are&#8230;</p>
<p>1. schema alignment &#8230; each data source has a different data model &#8230; they structure their data in a different way &#8230; using different names for similar types of entities and relationships &#8230; or using similar names for what are actually very different types of entities and relationships &#8230; </p>
<p>2. coreference resolution &#8230; each data source may use different identifiers for the same thing (e.g., genes) &#8230; or (more rarely) identifiers may clash &#8230;</p>
<p>Our approach to schema alignment was not to try to completely align all our data sources in a single step. </p>
<p>Rather, we tried to pick the low-hanging fruit, and take an incremental approach, making use of existing data models as much as possible.</p>
<p>So, for example, the data from FlyBase come from a relational database, which is structured according to a relational schema developed over a number of years by the Generic Model Organism Database community, a schema they call Chado.</p>
<p>When we transformed FlyBase&#8217;s data to RDF, we used the Chado schema to help design the RDF data structures we were generating. In fact, we went one step further than that, and we semi-automatically generated an OWL ontology from the Chado relational schema. This ensured that we took a systematic approach to the data transformation, and that the definitions for the data structures in the output RDF could be grounded in the definitions already established by Chado.</p>
<p>Our approach to coreference resolution was, similarly, to make use of existing controlled vocabularies. </p>
<p>Our biggest problem was identifying genes. A single gene might be known in the scientific literature by many different names. This has been a perennial problem for Drosophila biologists, and a big part of what FlyBase has done has been to establish a definitive controlled vocabulary for Drosophila gene names, and curate a list of known synonyms for each gene.</p>
<p>So we used FlyBase&#8217;s unique gene identifier system as a foundation, constructed a set of URIs for Drosophila genes based on the FlyBase identifier. We then used these URIs to link data from each of the various sources.</p>
<p>You&#8217;ll notice I said &#8220;link data&#8221; just there. What do I mean by that?</p>
<p>Well, I&#8217;d like to make a distinction between two types of linking.</p>
<p>1. &#8220;semantically linked&#8221; &#8211; data from different sources use a common set of URIs to identify data entities, e.g., people, places, genes, diseases, (&#8230; or any two URIs that identify the same entity have been explictly mapped)</p>
<p>2. &#8220;web linked&#8221; &#8211; data are semantically linked, and URIs resolve to data so links can be followed by a crawler &#8230; this is what most people mean when they talk about &#8220;linked data&#8221;</p>
<p>We might also describe a third notion, &#8230;</p>
<p>3. &#8220;semantically aligned&#8221; &#8211; data from different sources use a common schema, that is, they share a common data model, (&#8230; or their respective data models have been explicitly mapped)</p>
<p>To build our cross-search applications, we went for the low-hanging fruit. I.e., we did just enough, and no more, to get them working.</p>
<p>This meant a very small amount of semantic alignment. In fact, it was quite possible to work around differences between data models, as long as those data models were understood. We certainly did not need to accomplish a complete and perfect alignment of all data models, before we could start building prototypes.</p>
<p>We also did not make the data web linked, i.e., we did not publish true linked data. Why not? Because it didn&#8217;t serve our immediate needs. We needed performant and queryable web services to data for each source, so we could build a mashup that selected the data it needed. Whether the data were actually linked in the web was, for this project, not relevant.</p>
<p>We did, however, work on semantically linking the data, i.e., mapping differences in the identifiers used, especially for genes, and this was absolutely critical to getting a reasonable level of recall and precision.</p>
<p>Which is not to say that I think true, web linked data is a bad idea. There may be other reasons for deploying web linked data, which would have been relevant especially if we had wanted to go beyond a proof-of-concept system. </p>
<p>But the point is, you can get a quick win if you make data available via a queryable web service. You save a lot of time and effort if data from multiple sources are semantically linked. But your data certainly don&#8217;t need to be perfectly and completely semantically aligned before you can start using them.</p>
<h3>SKOS</h3>
<p>I&#8217;d like to leave flies now, and return to SKOS.</p>
<p>As you all know better than I do, one of the cornerstones of information retrieval for many years has been the development of controlled structured vocabularies, such as the Library of Congress Subject Headings, or the Dewey Decimal Classification, or the Agrovoc Thesaurus.</p>
<p>Ever since the advent of the Web, there has been a desire to make better use of these valuable tools, to help organise and connect information as it emerges from closed silos and is shared via the Web.</p>
<p>Hopefully, the Simple Knowledge Organisation System (SKOS) will go some way towards enabling that to happen.</p>
<p>SKOS provides a common, standard, data model, for controlled structured vocabularies like thesauri, taxonomies and classification schemes. </p>
<p>This means that, if you own or have developed a controlled vocabulary, and would like to make it available for others to use, you can use SKOS to publish your vocabulary as linked data in the Web. </p>
<p>Because SKOS is now a standard, your data will be linkable with other vocabularies published in a similar way, and (hopefully) compatible with a variety of different software systems.</p>
<p>If you were at DC2008, you would have heard Ed Summers talk about his work to deploy the Library of Congress Subject Headings as linked data, using SKOS. </p>
<p>His initial work was deployed at an experimental site, but since then, based on Ed&#8217;s work, the Library of Congress has deployed their new Authorities and Vocabularies Service at id.loc.gov. </p>
<p>The first service deployed there is, of course, the LCSH.</p>
<p>To explain what LOC have done, for each heading in the LCSH, LOC have minted a URI for that heading.</p>
<p>For example, the URI http://id.loc.gov/authorities/sh95000541#concept identifies the LCSH heading for the World Wide Web.</p>
<p>If you plug that URI into the location bar of your browser, you&#8217;ll get a conventional web page providing a summary of that heading.</p>
<p>With a small change in the way you make that request, you can also retrieve a machine-readable representation of that heading. I.e., you can get data. Those data are structured using SKOS.</p>
<p>Each heading is, of course, linked to other headings, so you could, if you wanted to, follow the links from one heading to the next, collecting data along the way.</p>
<p>Alternatively, if you want to re-use the entire LCSH in your own application, you can download the whole thing in bulk, again as data, structured using SKOS.</p>
<p>I hope what the Library of Congress have done with LCSH achieves three things.</p>
<p>First, I hope it means more people use the LCSH. For all it&#8217;s quirks, the LCSH, like many other vocabularies, is an invaluable resource, and I hope we will see it turned to wild and wonderful new uses.</p>
<p>Second, I hope it encourages other projects to re-use the LCSH URIs, to link their metadata records to LCSH via the Web. That would make it much easier to make use of links between metadata records across existing collections.</p>
<p>Third, I hope LCSH serves as a hub for linking other vocabularies as they emerge into the Web. </p>
<p>I think that, in a short time, it is not unrealistic to imagine that, we could see LCSH as a hub in a web of linked vocabularies, with that web of vocabularies itself serving as a hub for a much larger and broader web of linked metadata. </p>
<p>That this is possible is demonstrated by the fact that the LCSH has already been linked to another vocabulary, the French RAMEAU vocabulary, used by the Bibliotheque nationale de France.</p>
<p>A second piece of work I&#8217;d like to highlight is Michael Panzer&#8217;s work on dewey.info.</p>
<p>One other project I would like to highlight from within the DCMI community is the work of Jon Phipps and Diane Hillman on the NSDL metadata registry.</p>
<p>In fact, to call it a registry doesn&#8217;t, in my mind, do it justice, because it is a complete vocabulary development, maintenance and publication platform, built using SKOS.</p>
<p>And it doesn&#8217;t only cover vocabularies, it covers metadata schemas (a.k.a., element sets) too.</p>
<p>And not only does the registry make all of the underlying data accessible via normal HTTP requests, (which you could use to implement linked data), it provides a SPARQL endpoint too, so you can query the schemas and vocabularies however you like.</p>
<p>The LCSH, dewey.info, and the NSDL metadata registry, are just three examples of recent uses of SKOS, a good source for more is the <a href="http://www.w3.org/2006/07/SWD/SKOS/reference/20090315/implementation.html">SKOS Implementation Report</a>. </p>
<p>Let me be the first to say that SKOS isn&#8217;t perfect. Neither does it cover every eventuality. While thesauri, classification schemes and subject heading systems do have something in common, they also exhibit diversity. In many cases, that diversity is not just a historical artifact, but exists for good reason, because the vocabulary is adapted for a specialised purpose. </p>
<p>Our goal with SKOS was to capture enough of this commonality to enable some interoperability, but to provide an extensible foundation from which different communities could innovate, and explore solutions to their own particular problems. </p>
<p>We&#8217;ve seen already, at this conference, for example, in Michael Panzer and Marcia Zeng&#8217;s presentation, how work is well underway to develop extensions to SKOS for classification schemes.</p>
<p>This photo was taken during a discussion of how to extend SKOS for the Japanese National Diet Library Subject Headings.</p>
<p>On this note, I&#8217;d like to share a small insight, which I gained while I was working on SKOS, thanks to my colleagues in the W3C Semantic Web Deployment Working Group.</p>
<p>When I started working on SKOS, I thought that developing a standard was about getting everyone to do the same thing. I.e., it was about uniformity.</p>
<p>Now, I have a different perspective. </p>
<p>Consider that, if the developers of the original Web standards had tried to think of every possible way the Web might be used, then tried to design a complete system to accommodate all those possibilities they could think of, the Web would probably not exist today, for two reasons. </p>
<p>First, they would still be here today, imagining new possibilities, and arguing about how to deal with conflicting requirements. </p>
<p>Second, they would have built a system that was too complicated.</p>
<p>Whether by intention, inspiration, or accident, the original Web standards have not led to uniformity, but have rather led to an explosion of innovation and diversity. </p>
<p>Thus, my insight is that, a good standard, at least for the Web, is one that provides a platform for innovation. It musn&#8217;t try to do too much. Of course, it must be clear about everything that is within its scope, and so provide a sound basis for interoperability. But it should be aggressive about limiting its scope. And it must be flexible and extensible, to accommodate differences, and to enable unexpected ideas to be realised.</p>
<p>Striking this balance is, of course, far from easy, and I have no way of knowing whether SKOS has found the right balance. However, the people I have met through my work on SKOS continue to be an inspiration, and I hope at the very least it will provide a stepping stone to the future.</p>
<h3>RDA, FRBR and RDF</h3>
<p>Of course, when it comes to sharing and linking metadata, controlled vocabularies and SKOS are only a small part of the picture. </p>
<p>We also need standards for sharing and linking the metadata itself, standards that provide a basis for interoperability but that also can, as Michael Crandall beautifully illustrated on tuesday, accommodate the richness and complexity of of our descriptions, and of the artifacts they describe, be they literary works, works of art, or the results of scientific inquiry.</p>
<p>Here, too, there are opportunities to build on a significant body of previous work. </p>
<p>Two such bodies of work are the Functional Requirements for Bibliographic Records (FRBR), and the Anglo-American Cataloging Rules (AACR), which is the precursor to the Resource Description and Access (RDA) specification.</p>
<p>Now, I am not an expert on bibliographic metadata, so I cannot comment on the details of these standards.</p>
<p>However, I can tell you that, it is possible to take the data models underlying FRBR and RDA, to publish those data models using Semantic Web standards, and then to use those models as a framework for transforming existing metadata records to RDF and publishing them as linked data in the Web.</p>
<p>Earlier this year, I did a very modest amount of work with the DCMI-RDA task group, proving the concept. Using a set of cataloging examples, and using the RDA elements schema developed by the task group, I developed some patterns for representing bibliographic metadata as RDF. </p>
<p>I then tested these on a larger scale, using a dump of just under 7 million MARC records from LOC. I showed that at least some of the metadata from the MARC records could be transformed to RDF using the FRBR schema and RDA elements schema and vocabularies.</p>
<p>Next steps for this work would be to increase the coverage of the converted data, and to publish it not only as linked data but also via queryable (e.g., SPARQL) web services, which would drastically reduce the barrier to re-use of this fantastic resource.</p>
<h3>Data-Sharing Networks for Malaria Research</h3>
<p>I&#8217;d like to conclude my talk by returning to scientific research.</p>
<p>In June this year, after the FlyWeb Project finished, I moved up the hill in Oxford to join the Centre for Genomics and Global Health, which is a joint research programme of Oxford University and the Wellcome Trust Sanger Institute, directed by Prof. Dominic Kwiatkowski.</p>
<p>The main focus of our research is to assist the global campaign to eliminate malaria.</p>
<p>According to the WHO&#8217;s World Malaria Report 2008, half the world&#8217;s population is at risk of malaria, and an estimated 247 million cases led to nearly 881,000 deaths in 2006. Small children remain by far the most likely to die of the disease.</p>
<p>The recent advances in biotechnology that I mentioned earlier, in particular the rapid advances in DNA sequencing and genotyping technology are, of course, being brought to bear on the problem.</p>
<p>One of the most promising approaches is called genomic epidemiology, which combines genetic data from the lab with clinical data from the field, to understand why, for example, some people are less susceptible to serious infection than others. It is hoped that, by understanding the natural mechanisms of protective immunity against malaria work, this may contribute to the development of an effective malaria vaccine.</p>
<p>The crunch is that this type of research cannot be done on a small scale. Because genomic epidemiology involves analysing hundreds of thousands of points of variation in the human genome, and searching for associations between these genetic differences and different disease outcomes, a large number of samples (i.e., patients) need to be included, to gain the necessary statistical power to find genuine associations.</p>
<p>Thus genomic epidemiology requires research collaboration on an unprecedented scale. And the key to enabling this type of research is data-sharing.</p>
<p>Will the Semantic Web and linked data help? I hope so. The Web, in all its chaos and diversity, will certainly play a pivotal role. But the challenges are too broad to be solved by one family of technologies alone. </p>
<p>Many of the key challenges are social, rather than technological. </p>
<p>For example, enabling a scientific community (i.e, one not trained in data modeling) to quickly reach working agreements on data standards, and enabling scientists to translate between their own view of their data, and a standardised view of their data, is vital. Here, bridging the gap between the technology and the people has never been more important.</p>
<p>Similarly, reaching agreements on when data may be shared, and on how it may be used, is key. Because, of course, in addition to ensuring credit is received for individual scientific research, personal genetic and clinical data is highly sensitive, and there are strict ethical rules about data use and privacy.</p>
<p>In spite of these challenges, I remain hopeful. I am especially encouraged by the openness of communities like this one, and on the willingness of those communities to share their experience and expertise. </p>
<p>With that, I hope you enjoy the rest of the conference, and thank you for listening.</p>
<br />Posted in Uncategorized Tagged: conference, drosophila, dublincore, flyweb, malaria, rda, rdf, semanticweb, skos <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/81/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/81/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/81/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/81/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/81/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/81/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/81/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/81/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/81/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/81/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/81/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/81/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/81/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/81/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=81&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2009/10/15/dc2009-talk-notes-towards-semantic-web-deployment-experiences-with-knowledge-organisation-systems-library-catalogues-and-fruit-flies/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>SKOS is a W3C Recommendation</title>
		<link>http://alimanfoo.wordpress.com/2009/09/23/skos-is-a-w3c-recommendation/</link>
		<comments>http://alimanfoo.wordpress.com/2009/09/23/skos-is-a-w3c-recommendation/#comments</comments>
		<pubDate>Wed, 23 Sep 2009 14:04:32 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[skos]]></category>
		<category><![CDATA[semanticweb]]></category>
		<category><![CDATA[w3c]]></category>
		<category><![CDATA[kos]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=79</guid>
		<description><![CDATA[Just a short post to say that the Simple Knowledge Organization System (SKOS) Reference is now a W3C Recommendation. W3C issued the following press release: From Chaos, Order: W3C Standard Helps Organize Knowledge I&#8217;m proud to have been a part of this work, and extremely grateful to all those who have supported and contributed over [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=79&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Just a short post to say that the <a href="http://www.w3.org/TR/skos-reference/">Simple Knowledge Organization System (SKOS) Reference</a> is now a W3C Recommendation.</p>
<p>W3C issued the following press release: <a href="http://www.w3.org/2009/07/skos-pr">From Chaos, Order: W3C Standard Helps Organize Knowledge</a></p>
<p>I&#8217;m proud to have been a part of this work, and extremely grateful to all those who have supported and contributed over the last 5 years. </p>
<br />Posted in Uncategorized Tagged: kos, semanticweb, skos, w3c <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/79/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/79/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/79/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/79/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/79/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/79/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/79/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/79/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/79/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/79/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/79/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/79/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/79/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/79/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=79&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2009/09/23/skos-is-a-w3c-recommendation/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Running GWT Unit Tests in Manual Mode from Eclipse</title>
		<link>http://alimanfoo.wordpress.com/2009/09/23/running-gwt-unit-tests-in-manual-mode-from-eclipse/</link>
		<comments>http://alimanfoo.wordpress.com/2009/09/23/running-gwt-unit-tests-in-manual-mode-from-eclipse/#comments</comments>
		<pubDate>Wed, 23 Sep 2009 13:59:11 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[eclipse]]></category>
		<category><![CDATA[gwt]]></category>
		<category><![CDATA[testing]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=77</guid>
		<description><![CDATA[A little tidbit, if you want to run GWT unit tests in manual mode from Eclipse, right click the test case class and select Run As &#62; GWT JUnit Test as you would normally, which will create a run configuration for you. The first time round this will run the test in hosted mode. To [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=77&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>A little tidbit, if you want to run GWT unit tests in manual mode from Eclipse, right click the test case class and select <code>Run As &gt; GWT JUnit Test</code> as you would normally, which will create a run configuration for you. The first time round this will run the test in hosted mode. To get the test to run in manual mode, go to <code>Run Configurations</code>, select the run configuration for your test, then in the <code>VM arguments</code> box under the<code> Arguments</code> tab enter the following &#8230;</p>
<pre><code>
-Dgwt.args="-manual"
</code></pre>
<br />Posted in Uncategorized Tagged: eclipse, gwt, testing <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/77/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/77/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/77/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/77/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/77/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/77/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/77/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/77/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/77/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/77/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/77/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/77/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/77/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/77/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=77&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2009/09/23/running-gwt-unit-tests-in-manual-mode-from-eclipse/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>SKOS is a Candidate Recommendation</title>
		<link>http://alimanfoo.wordpress.com/2009/05/13/skos-is-a-candidate-recommendation/</link>
		<comments>http://alimanfoo.wordpress.com/2009/05/13/skos-is-a-candidate-recommendation/#comments</comments>
		<pubDate>Wed, 13 May 2009 15:57:56 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=75</guid>
		<description><![CDATA[Almost two months ago now, the Semantic Web Deployment Working Group published the SKOS Reference Candidate Recommendation. Since then, we&#8217;ve had a good number of high quality implementations (see also Sean&#8217;s SKOS implementations spreadsheet), which is excellent news. Posted in Uncategorized<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=75&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Almost two months ago now, the <a href="http://www.w3.org/2006/07/SWD/">Semantic Web Deployment Working Group</a> published the <a href="http://www.w3.org/TR/2009/CR-skos-reference-20090317/">SKOS Reference Candidate Recommendation</a>. Since then, we&#8217;ve had a good number of high quality<a href="http://www.w3.org/2006/07/SWD/SKOS/reference/20090315/implementation.html"> implementations</a> (see also <a href="http://spreadsheets.google.com/ccc?key=rmQPwhMMWXxY62FinzE44Eg">Sean&#8217;s SKOS implementations spreadsheet</a>), which is excellent news.</p>
<br />Posted in Uncategorized  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/75/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/75/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/75/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/75/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/75/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/75/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/75/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/75/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/75/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/75/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/75/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/75/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/75/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/75/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=75&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2009/05/13/skos-is-a-candidate-recommendation/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>FlyWeb &#8211; Working Across Databases for Drosophila Functional Genomics</title>
		<link>http://alimanfoo.wordpress.com/2009/05/13/flyweb-working-across-databases-for-drosophila-functional-genomics/</link>
		<comments>http://alimanfoo.wordpress.com/2009/05/13/flyweb-working-across-databases-for-drosophila-functional-genomics/#comments</comments>
		<pubDate>Wed, 13 May 2009 15:49:57 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=68</guid>
		<description><![CDATA[Over the last year or so, my main priority has been the FlyWeb Project. Unfortunately, FlyWeb was supported by short-term funding (18 months), and is coming to an end soon. Here are a few belated notes on what we did and why we did it&#8230; The main goal of FlyWeb was to minimize the time [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=68&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Over the last year or so, my main priority has been the <a href="http://flyweb.info/">FlyWeb Project</a>. Unfortunately, FlyWeb was supported by short-term funding (18 months), and is coming to an end soon. Here are a few belated notes on what we did and why we did it&#8230;</p>
<p>The main goal of FlyWeb was to minimize the time required for a researcher in the domain of <a href="http://en.wikipedia.org/wiki/Drosophila"><em>Drosophila</em></a> (fruit fly) functional genomics, with no informatics training, to find and compare gene expression data from different databases on a large number of genes. With this in mind, we developed <a href="http://openflydata.org">openflydata.org</a>, which hosts the following cross-database gene expression data search applications:</p>
<ul>
<li><a href="http://openflydata.org/search/gene-expression">openflydata.org/search/gene-expression</a> &#8211; search for a <em>single gene</em> of interest, and then retrieve and display expression data for that gene, including tissue-specific mRNA levels from<a href="http://flyatlas.org"> FlyAtlas</a>, embryo in situ hybridization images and ontology annotations from <a href="http://www.fruitfly.org/cgi-bin/ex/insitu.pl">BDGP</a>, and testis in situ hybridization images from <a href="http://www.fly-ted.org">FlyTED</a>. Also retrieved are literature references relevant to the selected gene, provided by <a href="http://flybase.org">FlyBase</a>.</li>
<li><a href="http://openflydata.org/search/gene-batch-expression">openflydata.org/search/gene-batch-expression</a> &#8211; search for a <em>batch of genes</em>, then retrieve and compare expression data from <a href="http://flyatlas.org"> FlyAtlas</a>, <a href="http://www.fruitfly.org/cgi-bin/ex/insitu.pl">BDGP</a> and <a href="http://www.fly-ted.org">FlyTED</a>, for all matching genes.</li>
<li><a href="http://openflydata.org/search/by-expression-profile">openflydata.org/search/by-expression-profile</a> &#8211;  search for genes matching a given <em>tissue-specific mRNA expression profile</em>, based on data from <a href="http://flyatlas.org"> FlyAtlas</a>,  and then retrieve further expression data for each gene found.
</ul>
<p>The applications are all pure JavaScript, built using a custom library called <a href="http://flyui.googlecode.com/">FlyUI</a>. They fetch data AJAX-style directly from four SPARQL endpoints, one for each of the four sources of genomic data. On the server side, we use <a href="http://jena.hpl.hp.com/wiki/TDB">Jena TDB</a> as the underlying RDF storage and query engine, and <a href="http://sparqlite.googlecode.com">SPARQLite</a> as the SPARQL protocol server. The whole thing runs on a small EC2 instance.</p>
<p>Further details on our work to convert the four data sources to RDF, in addition to bulk RDF downloads, SPARQL endpoints and more, can be found at the links below:</p>
<ul>
<li><a href="http://code.google.com/p/openflydata/wiki/Flybase">FlyBase</a></li>
<li><a href="http://code.google.com/p/openflydata/wiki/Bdgp">BDGP</a></li>
<li><a href="http://code.google.com/p/openflydata/wiki/FlyAtlas">FlyAtlas</a></li>
<li><a href="http://code.google.com/p/openflydata/wiki/Flyted">FlyTED</a></li>
</ul>
<br />Posted in Uncategorized  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/68/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/68/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/68/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/68/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/68/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/68/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/68/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/68/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/68/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/68/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/68/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/68/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/68/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/68/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=68&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2009/05/13/flyweb-working-across-databases-for-drosophila-functional-genomics/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Change of Contact Details</title>
		<link>http://alimanfoo.wordpress.com/2009/05/13/change-of-contact-details/</link>
		<comments>http://alimanfoo.wordpress.com/2009/05/13/change-of-contact-details/#comments</comments>
		<pubDate>Wed, 13 May 2009 15:20:10 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=66</guid>
		<description><![CDATA[This is just a short post to say that I&#8217;m moving to a new role shortly, and so my contact details are changing also. To reach me via email, use: alimanfoo at gmail dot com I will be on leave from 15-31 May. Posted in Uncategorized<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=66&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This is just a short post to say that I&#8217;m moving to a new role shortly, and so my contact details are changing also.</p>
<p>To reach me via email, use: alimanfoo at gmail dot com</p>
<p>I will be on leave from 15-31 May.</p>
<br />Posted in Uncategorized  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/66/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/66/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/66/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/66/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/66/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/66/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/66/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/66/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/66/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/66/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/66/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/66/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/66/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/66/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=66&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2009/05/13/change-of-contact-details/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Presentation at the Library of Congress: Simple Knowledge Organization System (SKOS) in the context of Semantic Web Deployment</title>
		<link>http://alimanfoo.wordpress.com/2008/05/13/presentation-at-the-library-of-congress-simple-knowledge-organization-system-skos-in-the-context-of-semantic-web-deployment/</link>
		<comments>http://alimanfoo.wordpress.com/2008/05/13/presentation-at-the-library-of-congress-simple-knowledge-organization-system-skos-in-the-context-of-semantic-web-deployment/#comments</comments>
		<pubDate>Tue, 13 May 2008 16:53:18 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[digital libraries]]></category>
		<category><![CDATA[metadata]]></category>
		<category><![CDATA[semantic web]]></category>
		<category><![CDATA[skos]]></category>
		<category><![CDATA[swdwg]]></category>
		<category><![CDATA[taxonomies]]></category>
		<category><![CDATA[thesauri]]></category>
		<category><![CDATA[w3c]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=62</guid>
		<description><![CDATA[I gave a presentation on SKOS and Semantic Web Deployment last week at the Library of Congress. Here&#8217;s the blurb.. Links are valuable. Links between documents, between people, between ideas, between data. Data is now a first class Web citizen, and the Web is expanding as more of these valuable networks are deployed within its [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=62&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I gave a <a href="http://www.slideshare.net/gardensofmeaning/simple-knowledge-organization-system-skos-in-the-context-of-semantic-web-deployment-library-of-congress-may-2008">presentation on SKOS and Semantic Web Deployment</a> last week at the <a href="http://www.loc.gov">Library of Congress</a>. Here&#8217;s <a href="http://inkdroid.org/journal/2008/04/30/skos-in-the-context-of-semantic-web-deployment/">the blurb</a>..</p>
<blockquote><p>Links are valuable. Links between documents, between people, between ideas, between data. Data is now a first class Web citizen, and the Web is expanding as more of these valuable networks are deployed within its fabric. Well-established knowledge organization systems like the Library of Congress Subject Headings will play a major role within these networks, as hubs, connecting people with information and providing a firm foundation for network growth as many new routes to the discovery of information emerge through the collective action of individuals. Or will they?</p>
<p>This talk introduces the Simple Knowledge Organization System (SKOS), a soon-to-be-completed W3C standard for publishing thesauri, classification schemes and subject headings as linked data in the Web. This talk also presents SKOS in the context of the W3C’s Semantic Web Activity, and in particular the work of the W3C’s Semantic Web Deployment Working Group where other specifications are being developed for publishing linked data in the Web, for embedding linked data in Web pages, and for managing Semantic Web vocabularies. Finally, this talk takes a mildly inquisitive look at the value propositions for linked data in the Web, and how LCSH might be deployed in the Web for better information discovery.</p></blockquote>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/62/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/62/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/62/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/62/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/62/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/62/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/62/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/62/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/62/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/62/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/62/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/62/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/62/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/62/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/62/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/62/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=62&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2008/05/13/presentation-at-the-library-of-congress-simple-knowledge-organization-system-skos-in-the-context-of-semantic-web-deployment/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Semantic Web Deployment Final Face-to-Face</title>
		<link>http://alimanfoo.wordpress.com/2008/05/06/semantic-web-deployment-final-face-to-face/</link>
		<comments>http://alimanfoo.wordpress.com/2008/05/06/semantic-web-deployment-final-face-to-face/#comments</comments>
		<pubDate>Tue, 06 May 2008 13:34:21 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[rdf]]></category>
		<category><![CDATA[semantic web]]></category>
		<category><![CDATA[skos]]></category>
		<category><![CDATA[taxonomies]]></category>
		<category><![CDATA[thesauri]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/?p=61</guid>
		<description><![CDATA[The W3C Semantic Web Deployment Working Group is kicking off it&#8217;s final face-to-face meeting at the Library of Congress in Washington, D.C. The main purpose of the meeting is to resolve outstanding issues for the Simple Knowledge Organization System (SKOS), which are summarised on the meeting agenda. As an aside, I heard recently about the [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=61&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The <a href="http://www.w3.org/2006/07/SWD/">W3C Semantic Web Deployment Working Group</a> is kicking off it&#8217;s final face-to-face meeting at the <a href="http://www.loc.gov/">Library of Congress</a> in Washington, D.C. The main purpose of the meeting is to resolve outstanding issues for the <a href="http://www.w3.org/2004/02/skos">Simple Knowledge Organization System (SKOS)</a>, which are summarised on the <a href="http://www.w3.org/2006/07/SWD/wiki/WashingtonAgenda">meeting agenda.</a></p>
<p>As an aside, I heard recently about the <a href="http://lcsh.info">deployment of the Library of Congress Subject Headings (LCSH) as linked data in the Web</a>, using SKOS. This nice work provides a great backdrop to our meeting.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/61/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/61/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/61/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/61/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/61/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/61/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/61/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/61/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/61/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/61/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/61/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/61/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/61/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/61/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/61/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/61/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=61&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2008/05/06/semantic-web-deployment-final-face-to-face/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Installing RDFLIB on Windows, and Making it Work with PyDev</title>
		<link>http://alimanfoo.wordpress.com/2008/04/12/installing-rdflib-on-windows-and-making-it-work-with-pydev/</link>
		<comments>http://alimanfoo.wordpress.com/2008/04/12/installing-rdflib-on-windows-and-making-it-work-with-pydev/#comments</comments>
		<pubDate>Sat, 12 Apr 2008 15:51:22 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[agile development]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[rdf]]></category>
		<category><![CDATA[semantic web]]></category>

		<guid isPermaLink="false">http://isegserv.itd.rl.ac.uk/blogs/alistair/archives/94</guid>
		<description><![CDATA[I had a few troubles installing RDFLIB, the Python RDF library, on my Windows Vista laptop, and getting everything to work with PyDev in Eclipse. I have Python 2.5 installed from the MSI. When I ran python setup.py install from the RDFLIB download directory, I got the message: error: Python was built with Visual Studio [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=60&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I had a few troubles installing <a href="http://rdflib.net">RDFLIB</a>, the Python RDF library, on my Windows Vista laptop, and getting everything to work with PyDev in Eclipse.</p>
<p>I have Python 2.5 installed from the MSI. When I ran <code>python setup.py install</code> from the RDFLIB download directory, I got the message:</p>
<pre><code>error: Python was built with Visual Studio 2003;
extensions must be built with a compiler than can generate compatible binaries.
Visual Studio 2003 was not found on this system. If you have Cygwin installled,
you can try compiling with MingW32, by passing "-c mingw32" to setup.py.
</code></pre>
<p>I have Cygwin installed, so I installed Cygwin&#8217;s Python 2.5, then used that to run <code>python setup.py install</code>, which worked fine.</p>
<p>However, when I tried to use &#8220;Run As &#8230; Python unit-test&#8221; from within Eclipse (with PyDev installed), it didn&#8217;t work. Apparently, there are compatibility problems between PyDev and Cygwin, mostly related to windows path names.</p>
<p>So I went back to trying to install RDFLIB using the Windows Python. I could run <code>python setup.py build -c mingw32</code> (with gcc-mingw32 installed and cygwin&#8217;s binaries directory on my path), but I still couldn&#8217;t run <code>python setup.py install</code> because the &#8216;install&#8217; command doesn&#8217;t accept the &#8216;-c&#8217; argument.</p>
<p>Eventually, I made it work by creating a cfg file for distutils (distutils.cfg) eg: /c/Python2x/Lib/distutils/distutils.cfg containing:</p>
<pre><code>[build]
compiler=mingw32
</code></pre>
<p>as <a href="http://www.mingw.org/MinGWiki/index.php/Python%20extensions">described here at the end of the page, under &#8220;One Last Step&#8221;</a>.</p>
<p>I.e. once I had created the cfg file, and the cygwin binaries were on my path, I could run <code>python setup.py install</code> using the Windows Python, which also works with PyDev in Eclipse.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/60/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/60/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/60/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/60/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/60/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/60/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/60/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/60/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/60/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/60/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/60/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/60/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/60/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/60/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/60/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/60/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=60&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2008/04/12/installing-rdflib-on-windows-and-making-it-work-with-pydev/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>Request for Comments &#8212; SKOS Reference &#8212; W3C Working Draft 25 January 2008</title>
		<link>http://alimanfoo.wordpress.com/2008/01/30/request-for-comments-skos-reference-w3c-working-draft-25-january-2008/</link>
		<comments>http://alimanfoo.wordpress.com/2008/01/30/request-for-comments-skos-reference-w3c-working-draft-25-january-2008/#comments</comments>
		<pubDate>Wed, 30 Jan 2008 21:08:52 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[kos]]></category>
		<category><![CDATA[ontologies]]></category>
		<category><![CDATA[owl]]></category>
		<category><![CDATA[semantic web]]></category>
		<category><![CDATA[skos]]></category>
		<category><![CDATA[swdwg]]></category>
		<category><![CDATA[taxonomies]]></category>
		<category><![CDATA[thesauri]]></category>
		<category><![CDATA[w3c]]></category>

		<guid isPermaLink="false">http://isegserv.itd.rl.ac.uk/blogs/alistair/archives/93</guid>
		<description><![CDATA[The W3C Semantic Web Deployment Working Group has announced the publication of the SKOS Reference as a W3C First Public Working Draft: http://www.w3.org/TR/2008/WD-skos-reference-20080125/ This is a substantial update to and replacement for the previous SKOS Core Vocabulary Specification W3C Working Draft dated 2 November 2005. The publication has been announced in the W3C news, and [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=59&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The <a href="http://TODO">W3C Semantic Web Deployment Working Group</a> has announced the publication of the <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/"><strong>SKOS Reference</strong></a> as a W3C First Public Working Draft:</p>
<ul>
<li><a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/">http://www.w3.org/TR/2008/WD-skos-reference-20080125/</a></li>
</ul>
<p>This is a substantial update to and replacement for the previous <a href="http://www.w3.org/TR/2005/WD-swbp-skos-core-spec-20051102/">SKOS Core Vocabulary Specification</a> W3C Working Draft dated 2 November 2005. The publication has been <a href="http://www.w3.org/News/2008#item12">announced in the W3C news</a>, and a <a href="http://lists.w3.org/Archives/Public/public-esw-thes/2008Jan/0096.html">request for comments</a> has been sent to various mailing lists.</p>
<p>The abstract from this new specification:</p>
<blockquote><p>
This document defines the Simple Knowledge Organization System (SKOS), a common data model for sharing and linking knowledge organization systems via the Semantic Web.</p>
<p>Many knowledge organization systems, such as thesauri, taxonomies, classification schemes and subject heading systems, share a similar structure, and are used in similar applications. SKOS captures much of this similarity and makes it explicit, to enable data and technology sharing across diverse applications.</p>
<p>The SKOS data model provides a standard, low-cost migration path for porting existing knowledge organization systems to the Semantic Web. SKOS also provides a light weight, intuitive language for developing and sharing new knowledge organization systems. It may be used on its own, or in combination with formal knowledge representation languages such as the Web Ontology language (OWL).</p>
<p>This document is the normative specification of the Simple Knowledge Organization System. It is intended for readers who are involved in the design and implementation of information systems, and who already have a good understanding of Semantic Web technology, especially RDF and OWL.</p>
<p>For an informative guide to using SKOS, see the upcoming SKOS Primer.</p>
<p><strong>Synopsis</strong></p>
<p>Using SKOS, <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#conceptual-resources"><strong>conceptual resources</strong></a> can be identified using URIs, <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#lexical-labels"><strong>labeled</strong></a> with lexical strings in one or more natural languages, <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#notes"><strong>documented</strong></a> with various types of note, <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#semantic-relations"><strong>linked to each other</strong></a> and organized into informal hierarchies and association networks, aggregated into <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#concept-schemes"><strong>concept schemes</strong></a>, and <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#mapping"><strong>mapped</strong></a> to conceptual resources in other schemes. In addition, <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#label-relations"><strong>labels can be related</strong></a> to each other, and conceptual resources can be <a href="http://www.w3.org/TR/2008/WD-skos-reference-20080125/#collections"><strong>grouped</strong></a> into labeled and/or ordered collections.</p>
</blockquote>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/59/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/59/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/59/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/59/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/59/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/59/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/59/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/59/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/59/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/59/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/59/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/59/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/59/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/59/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/59/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/59/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=59&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2008/01/30/request-for-comments-skos-reference-w3c-working-draft-25-january-2008/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>On the OAIS Information Model as a Platform-Independent Model (PIM) in a Model-Driven Software Architecture</title>
		<link>http://alimanfoo.wordpress.com/2008/01/30/on-the-oais-information-model-as-a-platform-independent-model-pim-in-a-model-driven-software-architecture/</link>
		<comments>http://alimanfoo.wordpress.com/2008/01/30/on-the-oais-information-model-as-a-platform-independent-model-pim-in-a-model-driven-software-architecture/#comments</comments>
		<pubDate>Wed, 30 Jan 2008 16:27:25 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[java]]></category>
		<category><![CDATA[model-driven architecture]]></category>
		<category><![CDATA[modeling]]></category>
		<category><![CDATA[oais]]></category>
		<category><![CDATA[uml]]></category>

		<guid isPermaLink="false">http://isegserv.itd.rl.ac.uk/blogs/alistair/archives/92</guid>
		<description><![CDATA[Abstract This short paper summarises some work done on the possibility of using OAIS information model as a basis for the model-driven design and implementation of components within a digital preservation software architecture. Two model transformations were defined using the Enterprise Architect template language. The first model-transformation transforms a platform-independent UML class model (PIM) into [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=58&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><strong>Abstract</strong></p>
<p>This short paper summarises some work done on the possibility of using OAIS information model as a basis for the model-driven design and implementation of components within a digital preservation software architecture. Two model transformations were defined using the Enterprise Architect template language. The first model-transformation transforms a platform-independent UML class model (PIM) into a set of UML interfaces specific to the Java 1.5 platform (here called a Java API model). The second model-transformation transforms a platform-independent UML class model (PIM) into set of UML classes specific to the Java 1.5 platform, implementing the interfaces generated by the first model-transformation (here called a Java implementation model). Both were applied to the OAIS information model as PIM, and the generated models are presented here with discussion.</p>
<p><span id="more-58"></span></p>
<p><strong>Introduction</strong></p>
<p>The CASPAR project aims to design and implement components of a distributed digital preservation architecture, taking the Open Archival Information System (OAIS) Reference Model as the basis for this architecture. Parts of the OAIS reference model are defined using UML class diagrams. Can these UML class diagrams be used in the design of CASPAR software components? Can they be used directly, or do they need to be refined in some way? What design patterns, principles and methodologies should be used to guide this refinement?</p>
<p>A standard pattern for the design of software systems using UML is to define a platform-independent model (PIM), and then to use model-transformations to generate other models, each specific to a given software platform (e.g. Java or C++). These platform-specific models (PSMs) are then used to generate code in the given language. These models and the resulting software system constitute a model-driven architecture. @@REF</p>
<p>This paper presents some work exploring the feasibility of using the OAIS information model as a platform-independent model in a model-driven design and implementation process, with Java 1.5 as one of the target platforms.</p>
<p><strong>Methods</strong></p>
<p>Enterprise Architect 7 was used as the primary tool for developing UML models and model-transformations. EA7 supports a bespoke template language, which can be used to define and then execute arbitrary model-transformations. EA7 also includes a number of pre-defined model-transformations for specific platforms, including Java. The pre-defined Java transformation preserves most aspects of a UML class model intact, for example it transforms a class into a class and an interface into an interface; it does, however, convert platform-independent data types to Java primitive types, and it adds public getter and setter methods for each attribute and navigable association of a class or interface, following the Java beans pattern.</p>
<p>Although EA7&#8242;s pre-defined Java transform provides some useful functionality, it is not sufficient for the CASPAR design process, for several reasons. Firstly, it handles multiplicities of greater than one poorly. Secondly, it does not make use of Java generics. Thirdly, the approach taken so far in CASPAR has been to define two models: an interface model (an API) and an implementation model (a set of classes implementing that API), each with their own distinct package paths. EA7&#8242;s pre-defined Java transformation simply converts package to package, class to class and interface to interface, and so cannot be used to generate CASPAR&#8217;s two separate models (API and implementation).</p>
<p>To overcome these limitations, two new model-transformations were defined using EA7&#8242;s template language. These transformations were based on the pre-defined Java transformation, with a number of modifications. The first transformation converts classes in the PIM into interfaces in a Java API model, in a fixed package (info.preserveddigital.infomodel). The second transformation converts classes in the PIM into classes in a Java implementatio model, in a fixed package (eu.casparpreserves.infomodel). Both transformations include modifications to handle all multiplicities appropriately, including the use of Java generics with Java collections where attributes or associations have multiplicities greater than one.</p>
<p>All class diagrams from section 4.2.1 of the OAIS reference model were then added to Enterprise Architect as a single package within a platform-independent model (PIM). The text of section 4.2.1 was completely ignored in the construction of the PIM, i.e. none of the informal statements made in the text were added as features of the model, so the PIM was a faithful transcription of the diagrams in section 4.2.1 only.</p>
<p>The pre-defined Java transformation, the custom Java API transformation, and the custom Java implementation transformation were then all applied to the PIM, and the output stored in EA.</p>
<p><strong>Results</strong></p>
<p>To give a flavour for the outputs of the two model-transformations, consider the class diagram illustrated in figure 4-10 of the OAIS reference model. Figure 4-10 defines the relationships between data objects, information objects and representation information. This class diagram, as the PIM used here, is given below.</p>
<p><a href='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/pim2.png' title='OAIS Figure 4-10 (PIM)'><img src='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/pim2.png' alt='OAIS Figure 4-10 (PIM)' /></a></p>
<p>The diagram below was composed from the output of the custom Java API model-transformation. Notice that each interface in the transformation output has getter and setter methods for each of the associations in which the original class was involved. Notice also that the interface RepresentationInformation has getter and setter methods for associations given in the PIM in another diagram (from figure 4-11 in OAIS).</p>
<p><a href='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/api.png' title='OAIS Figure 4-10 (Java API)'><img src='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/api.png' alt='OAIS Figure 4-10 (Java API)' /></a></p>
<p>The diagram below was composed from the output of the custom Java implementation model-transformation. Notice that all of the getter and setter methods implementing the corresponding interfaces are present. Notice also that private members for each of these properties are also now present, allowing generation of Java source code with initial code for getter and setter method bodies.</p>
<p><a href='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/impl.png' title='OAIS Figure 4-10 (Java Implementation)'><img src='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/impl.png' alt='OAIS Figure 4-10 (Java Implementation)' /></a></p>
<p>Finally, compare these with the output from EA7&#8242;s pre-defined Java transformation.</p>
<p><a href='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/java.png' title='OAIS Figure 4-10 (Java EA Default)'><img src='http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/java.png' alt='OAIS Figure 4-10 (Java EA Default)' /></a></p>
<p><strong>Discussion</strong></p>
<p>In UML, each connector (link) can have a name, and both source and target roles for that connector can also have a name. However, in the OAIS information model, some links have a name, some don&#8217;t, and none of the links have role names. If role names are present, these can be used to give more sensible names to getter and setter methods and private members. However, without role names, the class name of the target type has to be used (e.g. getRepresentationInformation). This is fine when there is only one connector between any two classes, but where there is more than one connector between any two classes, role names are needed to disambiguate between them. Therefore, it is suggested that all connectors in the OAIS information model be given a link name, a source role name (if source is navigable) and a target role name (if target is navigable).</p>
<p>Notice that the class <code>InformationObject</code> has a method <code>getRepresentationInformation():RepresentationInformation</code>, due to the composition association between <code>InformationObject</code> and <code>RepresentationInformation</code>. Notice also that the class <code>RepresentationInformation</code> has a method <code>getRepresentationInformations():java.util.Collection</code> due to the recursive association on that class. The class diagrams in the OAIS information model do not specify that <code>RepresentationInformation</code> is a sub-class of <code>InformationObject</code>, hence this generalization is not present in the PIM used here. However, the text suggests such a generalization. If <code>RepresentationInformation</code> were a sub-class of <code>InformationObject</code>, then the class <code>RepresentationInformation</code> would end up with two methods, both <code>getRepresentationInformation():RepresentationInformation</code> (inherited) and <code>getRepresentationInformations():java.util.Collection</code>. This is due to the fact that the two &#8220;interpreted using&#8221; connectors have different multiplicities in OAIS. This creates an ambiguity, which would have to be resolved.</p>
<p>This is an example of the type of issue that can be revealed by an analysis of the output of a model-transformation on the OAIS information model as a PIM. It is expected that further analysis of other aspects of the transformation output will reveal similar issues with other classes. Therefore, it is recommended that further study be given to the model-transformation output generated using the transformations described here, and that this be fed back into further iterations of the OAIS information model.</p>
<p><strong>Conclusions</strong></p>
<p>There are potential issues with using the OAIS information model as-is as a platform-independent model in a model-driven design process. The output of model-transformations require inspection and refinement before being suitable for implementation in preservation systems. This is not necessarily a shortcoming of the OAIS information model, but does raise an important question: is the OAIS information model intended to serve as a genuine basis for the implementation of software, or is it merely intended as a guide to understanding?</p>
<p>Further study of the outputs of the model-transformations generated here is recommended. This study is likely to complement the findings of any attempt to re-engineer the OAIS information model as a a formal ontology, which has elsewhere revealed potential ambiguities and inconsistencies therein.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/58/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/58/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/58/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=58&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2008/01/30/on-the-oais-information-model-as-a-platform-independent-model-pim-in-a-model-driven-software-architecture/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>

		<media:content url="http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/pim2.png" medium="image">
			<media:title type="html">OAIS Figure 4-10 (PIM)</media:title>
		</media:content>

		<media:content url="http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/api.png" medium="image">
			<media:title type="html">OAIS Figure 4-10 (Java API)</media:title>
		</media:content>

		<media:content url="http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/impl.png" medium="image">
			<media:title type="html">OAIS Figure 4-10 (Java Implementation)</media:title>
		</media:content>

		<media:content url="http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2008/01/java.png" medium="image">
			<media:title type="html">OAIS Figure 4-10 (Java EA Default)</media:title>
		</media:content>
	</item>
		<item>
		<title>Using PicaJet and Flickr to Manage Photos on the Desktop and Online</title>
		<link>http://alimanfoo.wordpress.com/2008/01/03/using-picajet-and-flickr-to-manage-photos-on-the-desktop-and-online/</link>
		<comments>http://alimanfoo.wordpress.com/2008/01/03/using-picajet-and-flickr-to-manage-photos-on-the-desktop-and-online/#comments</comments>
		<pubDate>Thu, 03 Jan 2008 11:25:41 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[flickr]]></category>
		<category><![CDATA[images]]></category>
		<category><![CDATA[photography]]></category>
		<category><![CDATA[review]]></category>
		<category><![CDATA[software]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/2008/01/03/using-picajet-and-flickr-to-manage-photos-on-the-desktop-and-online/</guid>
		<description><![CDATA[I&#8217;ve been looking around for something to help me manage my burgeoning photo collection. I&#8217;ve got a Sony Ericsson K800 and a Nikon D40, and between the two of them I&#8217;m generating quite a few images. Adobe Photoshop Album Starter Edition came with my mobile phone software, so I tried that to start with. The [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=25&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I&#8217;ve been looking around for something to help me manage my burgeoning photo collection. I&#8217;ve got a Sony Ericsson K800 and a Nikon D40, and between the two of them I&#8217;m generating quite a few images. Adobe Photoshop Album Starter Edition came with my mobile phone software, so I tried that to start with. The tagging interface worked well for me &#8212; a quick once through tagging with <em>who</em>, <em>where</em>, <em>when</em> and occasionally <em>what</em> is all I ever have time for, and is usually enough to allow me to find an image again. However, the two things that bugged me about Photoshop Album were (1) that there was no integration with Flickr, so if I uploaded photos I&#8217;d have to retag them completely, and (2) I couldn&#8217;t export my photo catalog or move it between computers easily.</p>
<p>After a not too exhaustive search on the Web, I found <a href="http://www.picajet.com/">PicaJet</a>, and downloaded the free edition. I was encouraged because the tagging interface is great (very similar to Photoshop Album), and because PicaJet has an integrated Flickr uploader which preserves all of your tagging. I also discovered that the photo catalog can be easily exported, so in a nutshell, PicaJet ticks my boxes. You can do quite a lot with the free edition &#8212; tag photos, upload to flickr, some basic editing. I&#8217;ll be upgrading to PicaJet FX (the full version, around £30) mainly because I want to be able to do more with the tag categories &#8212; in the free version you can only have a two-level hierarchy, and you can&#8217;t add new top-level categories.</p>
<p>I tried Picasa2, but that doesn&#8217;t have any tagging support or Flickr integration.</p>
<p>I also downloaded Microsoft Photo Gallery, which advertises Flickr integration. The installation process was painfully slow, then the application crashed when I tried to launch it on my bog-standard Windows XP machine.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/25/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/25/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/25/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/25/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/25/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/25/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/25/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/25/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/25/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/25/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/25/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/25/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/25/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/25/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/25/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/25/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=25&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2008/01/03/using-picajet-and-flickr-to-manage-photos-on-the-desktop-and-online/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>
	</item>
		<item>
		<title>SKOS and RDFa in e-Learning</title>
		<link>http://alimanfoo.wordpress.com/2007/11/14/skos-and-rdfa-in-e-learning/</link>
		<comments>http://alimanfoo.wordpress.com/2007/11/14/skos-and-rdfa-in-e-learning/#comments</comments>
		<pubDate>Wed, 14 Nov 2007 18:11:09 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[e-learning]]></category>
		<category><![CDATA[html]]></category>
		<category><![CDATA[ontologies]]></category>
		<category><![CDATA[owl]]></category>
		<category><![CDATA[rdf]]></category>
		<category><![CDATA[rdfa]]></category>
		<category><![CDATA[semantic web]]></category>
		<category><![CDATA[skos]]></category>
		<category><![CDATA[swdwg]]></category>
		<category><![CDATA[taxonomies]]></category>
		<category><![CDATA[thesauri]]></category>
		<category><![CDATA[w3c]]></category>
		<category><![CDATA[web technology]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/2007/11/14/skos-and-rdfa-in-e-learning/</guid>
		<description><![CDATA[The W3C&#8217;s Semantic Web Deployment Working Group is developing two new technologies which may be relevant to e-learning technology. These are the Simple Knowledge Organisation System (SKOS), and RDFa. SKOS is a lightweight language for representing intuitive, semi-formal conceptual structures. So, for example, the figure below (taken from the SKOS Core Guide) depicts concepts with [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=55&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The <a href="http://www.w3.org/2006/07/SWD/">W3C&#8217;s Semantic Web Deployment Working Group</a> is developing two new technologies which may be relevant to e-learning technology. These are the <a href="http://www.w3.org/2004/02/skos">Simple Knowledge Organisation System (SKOS)</a>, and <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a>.</p>
<p><a href="http://www.w3.org/2004/02/skos">SKOS</a> is a lightweight language for representing intuitive, semi-formal conceptual structures. So, for example, the figure below (taken from the <a href="http://www.w3.org/TR/swbp-skos-core-guide/">SKOS Core Guide</a>) depicts concepts with intuitive hierarchical and associative relationships to other concepts, and with preferred and alternative labels in one (or more) languages &#8212; these are the kinds of structures that can be expressed using <a href="http://www.w3.org/2004/02/skos">SKOS</a>. Once expressed in this form, conceptual structures can easily be published on the Web, shared between applications, linked/mapped to other conceptual structures and so on. Typically, these conceptual structures are used as tools for navigating around complex or unfamiliar subject areas, for retrieving information across languages, and for bringing together related information from different sources. </p>
<p><a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> is a language for embedding richly structured data and metadata within Web pages. This allows a Web page to expose much of its underlying meaning to applications, enabling a range of new functionalities within Web clients, exchanging data between Web sites, services, and the users&#8217; desktop applications. For example, a Web page about a new music album can use <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> to embed structured data expressing facts about that album, such as the track listing, artist, links to sample media files etc. A Web browser with a suitable plugin or extension can use this data to offer new functions to the user, such as download the tracklisting with available samples to my music library, or compare prices from online vendors.</p>
<p>Both of these technologies are on the <a href="http://www.w3.org">W3C</a> Recommendation track, and are scheduled for completion in April 2008.</p>
<p><span id="more-55"></span></p>
<p>  <img src="http://www.w3.org/TR/swbp-skos-core-guide/img/ex-ukat.png" alt="Concept structure" /></p>
<p>  <strong>SKOS Foundations and Motivations</strong></p>
<p>SKOS inherits much from the development of knowledge organisation systems (KOS) within the library and information sciences. Thesauri, classification schemes, subject heading systems and taxonomies are all examples of KOS widely used in information systems today.</p>
<p>The original motivation for SKOS was to provide a standard, low-cost way of migrating or &#8220;porting&#8221; existing KOS, especially thesauri, to the Semantic Web, so that they could be used as-is for the development of lightweight Semantic Web applications such as search/browse Web portals. This remains one of the central <a href="http://www.w3.org/TR/skos-ucr/">requirements for current development of SKOS</a>. However, it&#8217;s worth noting that SKOS is also increasingly seen as a &#8220;bridging&#8221; technology, providing the missing link between the rigorous logical formalism of ontology languages such as OWL and the chaotic, informal and weakly-structured world of social approaches to information management, as exemplified by social tagging applications. </p>
<p>As such, <a href="http://www.w3.org/2004/02/skos">SKOS</a> is a very interesting technology to work on, because in the same thought you have to consider the model-theoretic semantics of <a href="http://www.w3.org/2004/02/skos">RDF</a> and <a href="http://www.w3.org/TR/owl-features/">OWL</a> (and their consequences for formal reasoning), the ways in which people naturally express and organise their own conceptualisations (especially when working as a collaboration), and the potential for computational processes to &#8220;spot&#8221; and analyse emergent patterns in networks of unstructured information. The future for knowledge organisation is undoubtedly in highly collaborative, intuitive, and computer-aided environments, where people interact in a natural but structured way, being guided (perhaps unwittingly) towards the creation of emergent structures, supported by and feeding back into a range of analytic systems working behind the scenes to mine, discover and exploit patterns in information. <a href="http://www.w3.org/2004/02/skos">SKOS</a> is a small but important part of this bigger picture, in as much for the work it will lead to in the future as for the applications it can enable today.</p>
<p>  <strong>SKOS Basics</strong></p>
<p>The basic building block in <a href="http://www.w3.org/2004/02/skos">SKOS</a> is the notion of a <em><strong>conceptual resource</strong></em>, or often simply &#8220;concept&#8221;. Concepts can be <strong><em>labelled</em></strong> in one or more languages, can be <strong><em>annotated</em></strong> with various types of documentation, can be arranged into intuitive <strong><em>hierarchies</em></strong> and <strong><em>association networks</em></strong>, and can be aggregated into <strong><em>concept schemes</em></strong> and <strong><em>linked/mapped</em></strong> to concepts in other schemes. </p>
<p>All of these <a href="http://www.w3.org/2004/02/skos">SKOS</a> primitive features can be extended or refined to support more detailed, fine-grained conceptual models. <a href="http://www.w3.org/2004/02/skos">SKOS</a> can also be used in part or as a whole in a &#8220;mix-and-match&#8221; with other <a href="http://www.w3.org/RDF/">RDF</a> vocabularies and <a href="http://www.w3.org/TR/owl-features/">OWL</a> ontologies. A concrete example of this is the <a href="http://sioc-project.org/">Semantically Interlinked Online Communities (SIOC) ontology</a>, where <a href="http://www.w3.org/2004/02/skos">SKOS</a> can be &#8220;plugged in&#8221; to describe the topics or tags defined on a community Web site.</p>
<p>  <strong>SKOS Design</strong></p>
<p><a href="http://www.w3.org/2004/02/skos">SKOS</a> is built on the <a href="http://www.w3.org/RDF/">Resource Description Framework (RDF)</a> and the <a href="http://www.w3.org/TR/owl-features/">Web Ontology Language (OWL)</a>. However, <a href="http://www.w3.org/2004/02/skos">SKOS</a> deliberately hides much of the compexity of these two languages. It provides an interface between the formal underpinnings of the <a href="http://www.w3.org/2001/sw/">Semantic Web</a>, and the more informal, intuitive ways in which people naturally express and organise knowledge. Thus, informal and semi-formal conceptual structures or knowledge organisation systems can be expressed directly in <a href="http://www.w3.org/2004/02/skos">SKOS</a> and used immediately in <a href="http://www.w3.org/2001/sw/">Semantic Web</a> applications, without requiring any formal re-engineering.</p>
<p>While these topics are still under debate, consensus is emerging that the formal semantics of <a href="http://www.w3.org/2004/02/skos">SKOS</a> are, by design, very limited. Therefore, a small number of logical consequences follow from using only <a href="http://www.w3.org/2004/02/skos">SKOS</a>, compared with the larger number of logical consequences that follow from using <a href="http://www.w3.org/RDF/S">RDFS</a> or especially <a href="http://www.w3.org/TR/owl-features/">OWL</a> directly. Whereas in some situations, a powerful set of logical entailments is very valuable, in others this can be inappropriate and/or unecessary. This is the typically the case where a formal language such as OWL is abused to express what is at best a semi-formal conceptualisation (e.g. a &#8220;concept map&#8221; or thesaurus), and a number of surprising and inappropriate inferences then follow. <a href="http://www.w3.org/2004/02/skos">SKOS</a> provides the option to model at a simpler, less formal level, which is then a starting point for more formalisation as required.</p>
<p><a href="http://www.w3.org/2004/02/skos">SKOS</a> is itself an <a href="http://www.w3.org/RDF/">RDF</a> vocabulary (i.e. a set of URIs), whose semantics is defined using the <a href="http://www.w3.org/RDF/S">RDF Vocabulary Description Language (RDF Schema)</a> and <a href="http://www.w3.org/TR/owl-features/">OWL</a>. <a href="http://www.w3.org/2004/02/skos">SKOS</a> can therefore be used as a lightweight conceptual modeling language in its own right, or can be used as an adjunct to the primitives provided by <a href="http://www.w3.org/RDF/">RDF</a>, <a href="http://www.w3.org/RDF/S">RDFS</a> and <a href="http://www.w3.org/TR/owl-features/">OWL</a> in a &#8220;mixed mode&#8221; modeling environment. </p>
<p>An example of where <a href="http://www.w3.org/2004/02/skos">SKOS</a>, <a href="http://www.w3.org/RDF/S">RDFS</a> and <a href="http://www.w3.org/TR/owl-features/">OWL</a> are used in &#8220;mixed mode&#8221; is the <a href="http://www.swed.org.uk">Semantic Web Environmental Directory (SWED)</a>, a prototype Web portal with &#8220;faceted browsing&#8221; functionality for finding UK organisations and projects in in the environment sector. Here, <a href="http://www.w3.org/RDF/S">RDFS</a> and <a href="http://www.w3.org/TR/owl-features/">OWL</a> are used to model <em>projects</em>, <em>organisations</em> and their properties, such as <em>topic of interest</em>, <em>geographical coverage</em> and so on. <a href="http://www.w3.org/2004/02/skos">SKOS</a> is then used to model the semi-formal taxonomies which provide the descriptive vocabulary for these properties, e.g. <em>animal welfare</em>, <em>welfare of captive animals</em>, <em>biodiversity</em> etc. Of course, this is a Semantic Web application, so information can be drawn together and integrated from many different sources.</p>
<p>  <strong>SKOS Development</strong></p>
<p><a href="http://www.w3.org/2004/02/skos">SKOS</a> is formally developed and maintained by the <a href="http://www.w3.org/2006/07/SWD/">W3C Semantic Web Deployment Working Group (SWDWG)</a>. It is a work item on the <a href="http://www.w3.org">W3C</a> Recommendation track, which means it is subject to the full <a href="http://www.w3.org">W3C</a> Web standardisation process. However, whilst formal responsibility for <a href="http://www.w3.org/2004/02/skos">SKOS</a> rests with the <a href="http://www.w3.org/2006/07/SWD/">SWDWG</a>, the working group carries out all development in an open, consensus-led environment, and is ably supported by an extended community of interest. Informal participation in the <a href="http://www.w3.org/2004/02/skos">SKOS</a> development process is warmly welcomed &#8212; to join in, <a href="mailto:public-esw-thes-request@w3.org?subject=subscribe">subscribe to the public-esw-thes@w3.org mailing list</a> (you can also browse the <a href="http://lists.w3.org/Archives/Public/public-esw-thes/latest">mailing lists&#8217; online archives</a>). To participate formally in the development of <a href="http://www.w3.org/2004/02/skos">SKOS</a>, contact your W3C Advisory Committee representative about <a href="http://www.w3.org/2004/01/pp-impl/">joining the SWDWG</a>.</p>
<p>  <strong>RDFa</strong></p>
<p>From the abstract to the latest version of the <a href="http://www.w3.org/TR/xhtml-rdfa-primerPRIMER">RDFa Primer</a>:</p>
<blockquote><p>Current Web pages, written in XHTML, contain inherent structured data: calendar events, contact information, photo captions, song titles, copyright licensing information, etc. When authors and publishers can express this data precisely, and when tools can read it robustly, a new world of user functionality becomes available, letting users transfer structured data between applications and Web sites. An event on a Web page can be directly imported into a desktop calendar. A license on a document can be detected to inform the user of his rights automatically. A photo&#8217;s creator, camera setting information, resolution, and topic can be published as easily as the original photo itself.</p>
</blockquote>
<p><a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> is simply a collection of XML attributes that can be used within an XHTML document to embed structured data within that document. This data can then be extracted by an RDFa parser as a set of <a href="http://www.w3.org/RDF/">RDF</a> triples. </p>
<p>So, for example, the following snippet of XHTML has embedded data describing a person&#8217;s contact information. The data has been embedded using the special RDFa attributes <code>@instanceof</code>, <code>@property</code>, <code>@about</code>, <code>@rel</code> and <code>@content</code>. </p>
<pre>
  &lt;p class="contactinfo" about="http://example.org/staff/jo"&gt;
    &lt;span property="contact:fn"&gt;Jo Smith&lt;/span&gt;.
    &lt;span property="contact:title"&gt;Web hacker&lt;/span&gt;
    at
    &lt;a rel="contact:org" href="http://example.org"&gt;
      Example.org
    &lt;/a&gt;.
    You can contact me
    &lt;a rel="contact:email" href="mailto:jo@example.org"&gt;
      via email
    &lt;/a&gt;.
  &lt;/p&gt;
</pre>
<p>This snippet, when parsed, yields the following RDF triples:</p>
<pre><code>

   contact:fn "Jo Smith";
   contact:title "Web Hacker";
   contact:org ;
   contact:email .
</code></pre>
<p>Given this data, a Web client could, for example, offer functions to import this contact information into a desktop contact management system. </p>
<p>This is a very simple example, but hopefully it illustrates the general principle that, once data is available in Web pages, new functionality becomes possible. In the e-learning sector, we might imagine Web pages which not only describe historical events, but encode data about the time, place and people involved in those events. A history student might then &#8220;cut and paste&#8221; these data from many different Web pages into their own virtual learning space, allowing them to discover and explore the many-dimensional relationships between people, places and events and build up their own structured &#8220;mini-history&#8221; which is specific to a particular learning objective or research question. </p>
<p>  <strong>RDFa and Microformats</strong></p>
<p>There&#8217;s a lively debate ongoing on the Web today about the relationship between <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> and an analogous technology called &#8220;microformats&#8221;. Microformats have the same objective as <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a>, of embedding data within Web pages. While I&#8217;m not in a position to comment in any depth on the comparisons and relative merits of these two approaches, <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> developers argue that the <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> approach provides a more scalable, general purpose approach, which requires only a single implementation, and which allows different types of data to &#8220;play well&#8221; with each other. On the other hand, a bespoke transformation is required for each different microformat, and microformats could easily &#8220;clash&#8221; with each other under certain circumstances. Having said that, microformats are used on the Web today, whereas <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> has only a number of prototype implementations. A <a href="http://evan.prodromou.name/RDFa_vs_microformats">blog post by Evan Prodromou</a> discusses the issue in more detail, although some of the information there may be out of date (see e.g. the comments at the end).</p>
<p>  <strong>Conclusions</strong></p>
<p>Both <a href="http://www.w3.org/2004/02/skos">SKOS</a> and <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> have interesting consequences for e-learning technology, and especially for leveraging the Web as a platform for delivering learning. </p>
<p><a href="http://www.w3.org/2004/02/skos">SKOS</a> provides a lightweight technology for overlaying distributed learning content with intuitive conceptual structures, which could for example be used to aid discovery and navigation of learning resources. Conceptual structures are also themselves learning resources in their own right, and although <a href="http://www.w3.org/2004/02/skos">SKOS</a> is oriented towards information retrieval applications, the use of <a href="http://www.w3.org/2004/02/skos">SKOS</a> to express, evolve, exchange and publish &#8220;knowledge&#8221; as part of a learning process remains an intriguing avenue for exploration.</p>
<p><a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> provides a general purpose technology for embedding data in Web pages. This in turn can enable a richer experience when interacting with the Web, moving beyond Web pages as unbreakable chunks of information towards Web pages as highly flexible containers of data which can be extracted, adapted, re-used and re-purposed. This technology has the potential to completely change out interaction with the Web as a learning experience.</p>
<p>Both technologies would greatly benefit from active participation from the e-learning community, to ensure that the requirements of near-future learning technologies are met by <a href="http://www.w3.org/2004/02/skos">SKOS</a> and <a href="http://www.w3.org/TR/xhtml-rdfa-primer">RDFa</a> within the timeline set for standardisation. </p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/55/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/55/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/55/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/55/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/55/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/55/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/55/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/55/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/55/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/55/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/55/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/55/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/55/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/55/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/55/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/55/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=55&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2007/11/14/skos-and-rdfa-in-e-learning/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>

		<media:content url="http://www.w3.org/TR/swbp-skos-core-guide/img/ex-ukat.png" medium="image">
			<media:title type="html">Concept structure</media:title>
		</media:content>
	</item>
		<item>
		<title>Versioning and the Web</title>
		<link>http://alimanfoo.wordpress.com/2007/11/07/versioning-and-the-web/</link>
		<comments>http://alimanfoo.wordpress.com/2007/11/07/versioning-and-the-web/#comments</comments>
		<pubDate>Wed, 07 Nov 2007 14:18:00 +0000</pubDate>
		<dc:creator>Alistair Miles</dc:creator>
				<category><![CDATA[change management]]></category>
		<category><![CDATA[digital libraries]]></category>
		<category><![CDATA[rdf]]></category>
		<category><![CDATA[repositories]]></category>
		<category><![CDATA[semantic web]]></category>
		<category><![CDATA[uri]]></category>
		<category><![CDATA[version management]]></category>
		<category><![CDATA[web architecture]]></category>
		<category><![CDATA[web technology]]></category>

		<guid isPermaLink="false">http://alimanfoo.wordpress.com/2007/11/07/versioning-and-the-web/</guid>
		<description><![CDATA[This post looks at some of the problems of identifying, decribing and linking &#8220;versions&#8221; of &#8220;digital objects&#8221;, from the point of view of the Web, drawing especially on the Architecture of the Web published by W3C. These thoughts were stimulated by the recent kickoff meeting of the new Version Information Framework (VIF) project, at which [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=54&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This post looks at some of the problems of identifying, decribing and linking &#8220;versions&#8221; of &#8220;digital objects&#8221;, from the point of view of the Web, drawing especially on the <a href="http://www.w3.org/TR/webarch/">Architecture of the Web</a> published by W3C. These thoughts were stimulated by the recent kickoff meeting of the new <a href="http://www.lse.ac.uk/library/vif/">Version Information Framework (VIF) project</a>, at which versioning was discussed in the context of adding value to digital repositories &#8212; I hope this post provides some useful input to the VIF project team. </p>
<p><span id="more-54"></span></p>
<p>One of the difficulties of talking about information is that it is so intangible. A book on a shelf or a file on a computer hard drive are tangible enough, but when we come to talk about a book in abstract (e.g. the complete works of Shakespeare) or a Web page, things become less tangible. One of the problems is that our normal vocabulary for talking about things at these intangible/virtual/abstract levels is not very well developed &#8212; e.g. we use &#8220;book&#8221; to refer both to a physical book on a shelf and the abstract notion of a book (which may have many copies on many shelves). </p>
<p>The Functional Requirements for Bibliographic Records (FRBR) specification goes a long way towards improving this situation, giving us vocabulary for four separate levels of abstraction &#8212; Work, Expression, Manifestation and Item. However, these concepts can be ambiguous and hard to apply in some situations. Nevertheless, they are an important reference point.</p>
<p><strong>Web Architecture</strong></p>
<p>On the Web, we face a similar situation, in that the vocabulary for talking about different levels of abstraction and the relationships between them is not well developed. However, the Web is by nature a virtual information space, and the <a href="http://www.w3.org/TR/webarch/">Architecture of the Web</a> provides some very interesting mechanisms for talking about information as a virtual commodity. The Web has also had to deal with the conceptual relationships between an information resource as a virtual entity (i.e. a Web page), and the many different media types (i.e. data formats, e.g. HTML, XML, PDF) and languages (i.e. English, Japanese etc.) which can be used to transmit or carry information.</p>
<p>From the <a href="http://www.w3.org/TR/webarch/">Architecture of the Web, Volume One</a>:</p>
<blockquote><p>The World Wide Web uses relatively simple technologies with sufficient scalability, efficiency and utility that they have resulted in a remarkable information space of interrelated resources, growing across languages, cultures, and media. In an effort to preserve these properties of the information space as the technologies evolve, this architecture document discusses the core design components of the Web. They are identification of resources, representation of resource state, and the protocols that support the interaction between agents and resources in the space. </p>
</blockquote>
<p>According to the <a href="http://www.w3.org/TR/webarch/">Architecture of the Web</a>, the Web is built from &#8220;resources&#8221;, and in particular, &#8220;information resources&#8221;. In practice, the notion of an &#8220;information resource&#8221; is hard to pin down, but an &#8220;information resource&#8221; is roughly defined as a resource whose essential characteristics can be conveyed in a message. Resources are identified by URIs, and you can use a URI to access a resource. The most common type of access involves requesting a &#8220;representation&#8221; of a resource, which is also known as &#8220;dereferencing a URI&#8221;. This is what your Web browser does when you type a URI in the address bar and click &#8220;Go&#8221;.</p>
<p><strong>Content Negotiation</strong></p>
<p>One of the interesting properties of the Web is the ability to provide several alternative representations in different media types from the same URI, which is supported in the HTTP protocol. This is also known as &#8220;<a href="http://www.w3.org/QA/2006/02/content_negotiation.html">content negotiation</a>&#8220;. When a user agent (e.g. a Web browser) requests a representation of a resource, it specifies which media types it can &#8220;accept&#8221; in response. The server then sends a response in the media type best matching the request.</p>
<p>To take a very simple example, the URI &#8220;<a href="http://www.w3.org/Icons/w3c_main">http://www.w3.org/Icons/w3c_main</a>&#8221; identifies the main W3C logo icon. This image is available in two different media types &#8212; GIF and PNG. You can vary which representation you receive by changing the &#8220;Accept&#8221; header in the HTTP request.</p>
<p>So the URI &#8220;<a href="http://www.w3.org/Icons/w3c_main">http://www.w3.org/Icons/w3c_main</a>&#8221; clearly identifies something virtual, which is &#8220;above&#8221; (at a higher level of abstraction than) the notion of media type or format. Let&#8217;s call this a <em>content negotiable resource</em>.</p>
<p>Media type is not the only axis along which representations of a resource may vary. A multilingual Web site can provide representations in different languages from the same URI. For example, the Debian Web site &#8220;<a href="http://www.debian.org">http://www.debian.org</a>&#8221; provides alternative representations in English, French, Spanish and many other languages &#8212; i.e. this is also a content negotiable resource.</p>
<p>So it&#8217;s clear that an <em>information resource</em> might be a <em>content negotiable resource</em> &#8212; something for which representations might be provided in one or more media types and one or more languages.</p>
<p>However, many information resources in the Web only provide a single representation, in some media type and language. For example, &#8220;<a href="http://www.w3.org/Icons/w3c_main.png">http://www.w3.org/Icons/w3c_main.png</a>&#8221; only provides a PNG representation of the main W3C logo. Let&#8217;s call this a <em>content invariant resource</em>.</p>
<p>The interesting thing for our discussion of &#8220;versioning&#8221; is that we might express a relationship between a content negotiable resource A and a content invariant resource B, where the single representation provided by B is equivalent to one of the representations provided by A. </p>
<p>If we had some appropriate RDF vocabulary, we might even state this formally, e.g.</p>
<pre><code>
# links between resources
 rdf:type vif:ContentNegotiable.
 rdf:type vif:ContentInvariant.
 rdf:type vif:ContentInvariant.
 vif:contentVariant .
 vif:contentVariant .
 vif:contentAlternate .

# definition of the vocabulary we used above
vif:ContentNegotiable rdfs:subClassOf vif:InformationResource.
vif:ContentInvariant rdfs:subClassOf vif:InformationResource.
vif:contentVariant rdfs:domain vif:ContentNegotiable; rdfs:range vif:ContentInvariant.
vif:contentAlternate rdfs:domain vif:ContentInvariant; rdfs:range vif:ContentInvariant; rdf:type owl:SymmetricProperty.
</code></pre>
<p>If this metadata were made available to an application (user agent), the application could then make a user aware that various alternative representations of some Web resource were available, for example. </p>
<p><strong>Changes Over Time</strong></p>
<p>The other interesting property of the Web architecture is that things can change over time. For example, if you go to &#8220;<a href="http://www.bbc.co.uk/news">http://www.bbc.co.uk/news</a>&#8221; today, you&#8217;ll see something different from what was there yesterday. Yet, the URI &#8220;http://www.bbc.co.uk/news&#8221; identifies an information resource, so this resource must be changing over time. Let&#8217;s call this a <em>changeable resource</em>.</p>
<p>On the other hand, some Web resource haven&#8217;t ever changed, and are promised to never change. For example, &#8220;<a href="http://www.w3.org/TR/2004/REC-webarch-20041215/">http://www.w3.org/TR/2004/REC-webarch-20041215/</a>&#8221; identifies a time-specific &#8220;edition&#8221; or &#8220;version&#8221; of the Architecture of the World Wide Web technical report. Let&#8217;s call this an <em>unchangeable resource</em>. However, note that W3C also provides the URI &#8220;<a href="http://www.w3.org/TR/webarch/">http://www.w3.org/TR/webarch/</a>&#8220;, which always corresponds to the latest version in the report series.</p>
<p>The interesting thing for our discussion of &#8220;versioning&#8221; is that we might express a relationship between a changeable resource A and an unchanging resource B, where the representation provided B is equivalent to the representation provided by A at some specific point in time. If we had some appropriate RDF vocabulary, we might state this formally, e.g.</p>
<pre><code>
# links between resources
 rdf:type vif:Changeable.
 rdf:type vif:Unchanging.
 rdf:type vif:Unchanging.
 vif:snapshot .
 vif:snapshot .
 vif:priorState .

# definition of vocabulary used above
vif:Changeable rdfs:subClassOf vif:InformationResource.
vif:Unchanging rdfs:subClassOf vif:InformationResource.
vif:snapshot rdfs:domain vif:Changeable; rdfs:range vif:Unchanging.
vif:priorState rdfs:domain vif:Unchanging; rdfs:range vif:Unchanging.
</code></pre>
<p>If this metadata were made available to a user agent, the application could then make the user aware that a more recent representation of some Web resource was available, or that a history of changes to that resource was available, for example.</p>
<p><strong>Design Patterns</strong></p>
<p>So on the Web, there are <em>content-negotiable resources</em> and <em>content-invariant resources</em>; and there are <em>changeable resources</em>, and <em>unchanging resources</em>. The Web Architecture itself is entirely ambivalent to which is which. However, we might make use of these different classes, to provide some useful functionality to users of digital repositories, accessible through the Web.</p>
<p>Given that any information resource in the Web could be either changeable or unchanging, content-negotiable or content-invariant, there are four different possibilities to consider for every resource. This makes for a fairly complicated set of possible interrelationships. However, we might define some <strong><em>design patterns</em></strong>, which could be useful in particular situations &#8212; for example, in exposing information from digital repositories as part of the Web.</p>
<p>We might, for example, describe a three-level pattern.</p>
<p>At the top level is a <em>changeable</em>, <em>content-negotiable</em> information resource. </p>
<p>At the second level are a set of <em>unchanging</em>, <em>content-negotiable</em> information resources. These are less abstract, corresponding to a specific snapshot or &#8220;revision&#8221; of a resource.</p>
<p>At the bottom level, are a set of <em>unchanging</em>, <em>content-invariant</em> information resources. These are the most contrete entities, corresponding to a specific content variant of a specific snapshot of a resource. </p>
<p>The diagram below in an attempt to illustrate this pattern.</p>
<p><a class="imagelink" href="http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2007/11/network.png" title="Version pattern"><img src="http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2007/11/network.png" alt="Version pattern" /></a></p>
<p>The interesting thing about this pattern is that we might say very concretely how this pattern should be implemented in the Web &#8212; i.e. how Web servers could be set up to help user agents understand better the differences and interrelationships between the various information resources. For example, the URI denoting the top level information resource could be set up to redirect to the URI of the most recent snapshot at the second level of information resources; these URIs at the second level could then be set up to content-negotiate directly as per the HTTP protocol, but offering information in the &#8220;Content-Location&#8221; HTTP header which gives the user agent a clue as to how to link to a specific content variant.</p>
<p>We could also cook up some RDF vocabulary to formally express all of these interrelationships, as shown in the examples above, then publish this in the Web or embed it in web pages. This would allow user agents to be fully aware of the structure of the information space, and do even more intelligent things like make the user aware that he/she is viewing an out-of-date version and that a more recent version is available, or that content variants are available in a range of languages.</p>
<p>Of course, this is only one of a number of possible design patterns, which need further exploration. But hopefully, this at least gives a few ideas as to how the architecture of the Web &#8212; a virtual information space &#8212; might shed light on a discussion of identifying, describing and linking &#8220;versions&#8221; of &#8220;digital objects&#8221;.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/alimanfoo.wordpress.com/54/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/alimanfoo.wordpress.com/54/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/alimanfoo.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/alimanfoo.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/alimanfoo.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/alimanfoo.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/alimanfoo.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/alimanfoo.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/alimanfoo.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/alimanfoo.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/alimanfoo.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/alimanfoo.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/alimanfoo.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/alimanfoo.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/alimanfoo.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/alimanfoo.wordpress.com/54/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=alimanfoo.wordpress.com&amp;blog=2481953&amp;post=54&amp;subd=alimanfoo&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://alimanfoo.wordpress.com/2007/11/07/versioning-and-the-web/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6bbb1a29798652153eae95526b6322b6?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">alimanfoo</media:title>
		</media:content>

		<media:content url="http://isegserv.itd.rl.ac.uk/blogs/alistair/wp-content/uploads/2007/11/network.png" medium="image">
			<media:title type="html">Version pattern</media:title>
		</media:content>
	</item>
	</channel>
</rss>
