{"id":2931,"date":"2024-04-17T07:26:23","date_gmt":"2024-04-17T06:26:23","guid":{"rendered":"https:\/\/blogs.qub.ac.uk\/dipsa\/?p=2931"},"modified":"2025-04-24T05:41:10","modified_gmt":"2025-04-24T04:41:10","slug":"ms-biographs-on-ieee-dataport","status":"publish","type":"post","link":"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-on-ieee-dataport\/","title":{"rendered":"MS-BioGraphs on IEEE DataPort"},"content":{"rendered":"\n<p class=\"has-text-align-justify\">MS-BioGraph sequence similarity graph datasets are now publicly available on IEEE DataPort: <a href=\"https:\/\/doi.org\/10.21227\/gmd9-1534\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/doi.org\/10.21227\/gmd9-1534<\/a> .<\/p>\n\n\n\n<p class=\"has-text-align-justify\">To access the files, you need to register\/login to IEEE DataPort and then visit the MS-BioGraphs page. By saving the page as an HTML file such as <code><strong>dp.html<\/strong><\/code>, you may download the datasets (as an example <code><strong>MS1<\/strong><\/code>) using the following script:<\/p>\n\n\n\n<pre style='font-size:1em;border:1px solid gray;padding:20px' class=\"wp-block-code\"><code>dsname=\"MS1\"\nhtml_file=\"dp.html\"\n\nurls=`cat $html_file  | sed  -e 's\/\\&amp;amp;\/\\&amp;\/g'  | grep -Eo \"(http|https):\/\/&#091;a-zA-Z0-9.\/?&amp;=_%:-]*\" | grep amazonaws  | sort | uniq | grep -E \"$dsname&#091;-_\\.]\"`\n\nfor u in $urls; do\n    wget $u\n    if &#091; $? != 0 ]; then break; fi\ndone\n\n# removing query strings\nfor f in $(find $1 -type f); do\n    if &#091; $f = ${f%%\\?*} ]; then continue; fi\n    mv \"${f}\" \"${f%%\\?*}\"\ndone\n\n# liking offsets.bin to be found by ParaGrapher\nln -s ${dsname}_offsets.bin ${dsname}-underlying_offsets.bin\n\n<\/code><\/pre>\n\n\n\n<p class=\"has-text-align-justify\">Instead of <code><strong>wget<\/strong><\/code> you may use <code><strong>axel -n 10<\/strong><\/code> to use multiple connections (here, 10) for downloading each file (<a href=\"https:\/\/manpages.ubuntu.com\/manpages\/noble\/en\/man1\/axel.1.html\">https:\/\/manpages.ubuntu.com\/manpages\/noble\/en\/man1\/axel.1.html<\/a>).<\/p>\n\n\n\n<p class=\"has-medium-font-size\"><strong><a rel=\"noreferrer noopener\" href=\"https:\/\/blogs.qub.ac.uk\/DIPSA\/MS-BioGraphs\/\" target=\"_blank\">MS-BioGraphs<\/a><\/strong><br><br><strong>Related Posts<\/strong><\/p>\n\n\n<ul class=\"wp-block-latest-posts__list has-dates wp-block-latest-posts\"><li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2024\/08\/trees-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/minimum-spanning-forest-of-ms-biographs\/\">Minimum Spanning Forest of MS-BioGraphs<\/a><time datetime=\"2024-08-09T14:11:36+01:00\" class=\"wp-block-latest-posts__post-date\">9 August 2024<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2024\/04\/ivy-2-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-on-ieee-dataport\/\">MS-BioGraphs on IEEE DataPort<\/a><time datetime=\"2024-04-17T07:26:23+01:00\" class=\"wp-block-latest-posts__post-date\">17 April 2024<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2024\/02\/poplar2-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/paragrapher-source-code-for-webgraph-types\/\">ParaGrapher Source Code For WebGraph Types<\/a><time datetime=\"2024-02-16T08:13:13+00:00\" class=\"wp-block-latest-posts__post-date\">16 February 2024<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/11\/goldcrest-1-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/on-overcoming-hpc-challenges-of-trillion-scale-real-world-graph-datasets\/\">On Overcoming HPC Challenges of  Trillion-Scale Real-World Graph Datasets \u2013 BigData&#8217;23 (Short Paper)<\/a><time datetime=\"2023-12-15T02:47:00+00:00\" class=\"wp-block-latest-posts__post-date\">15 December 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/10-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/dataset-announcement-ms-biographs-trillion-scale-public-real-world-sequence-similarity-graphs\/\">Dataset Announcement: MS-BioGraphs, Trillion-Scale Public Real-World Sequence Similarity Graphs &#8211; IISWC&#8217;23 (Poster)<\/a><time datetime=\"2023-10-02T00:26:00+01:00\" class=\"wp-block-latest-posts__post-date\">2 October 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/2-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-sequence-similarity-graph-datasets\/\">MS-BioGraphs: Sequence Similarity Graph Datasets<\/a><time datetime=\"2023-08-30T06:52:00+01:00\" class=\"wp-block-latest-posts__post-date\">30 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/1-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-ms\/\">MS-BioGraphs MS<\/a><time datetime=\"2023-08-10T09:53:42+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/6-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-msa500\/\">MS-BioGraphs MSA500<\/a><time datetime=\"2023-08-10T09:52:00+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/3-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-ms200\/\">MS-BioGraphs MS200<\/a><time datetime=\"2023-08-10T09:51:00+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/7-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-msa200\/\">MS-BioGraphs MSA200<\/a><time datetime=\"2023-08-10T09:50:00+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/4-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-ms50\/\">MS-BioGraphs MS50<\/a><time datetime=\"2023-08-10T09:49:00+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/8-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-msa50\/\">MS-BioGraphs MSA50<\/a><time datetime=\"2023-08-10T09:48:00+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/9-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-msa10\/\">MS-BioGraphs MSA10<\/a><time datetime=\"2023-08-10T09:44:41+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/5-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-ms1\/\">MS-BioGraphs MS1<\/a><time datetime=\"2023-08-10T09:41:14+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<li><div class=\"wp-block-latest-posts__featured-image alignleft\"><img loading=\"lazy\" decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2023\/08\/11-150x150.jpg\" class=\"attachment-thumbnail size-thumbnail wp-post-image\" alt=\"\" style=\"max-width:60px;max-height:60px;\" \/><\/div><a class=\"wp-block-latest-posts__post-title\" href=\"https:\/\/blogs.qub.ac.uk\/dipsa\/ms-biographs-validation\/\">MS-BioGraphs Validation<\/a><time datetime=\"2023-08-10T09:40:00+01:00\" class=\"wp-block-latest-posts__post-date\">10 August 2023<\/time><\/li>\n<\/ul>","protected":false},"excerpt":{"rendered":"<p>MS-BioGraph sequence similarity graph datasets are now publicly available on IEEE DataPort: https:\/\/doi.org\/10.21227\/gmd9-1534 . To access the files, you need to register\/login to IEEE DataPort and then visit the MS-BioGraphs page. By saving the page as an HTML file such as dp.html, you may download the datasets (as an example MS1) using the following script: [&hellip;]<\/p>\n","protected":false},"author":1315,"featured_media":3078,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[63],"tags":[116,68,67,35,64,66,65,19],"class_list":{"0":"post-2931","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-ms-biographs","8":"tag-biological-networks","9":"tag-dataset","10":"tag-graph-datasets","11":"tag-graph-processing","12":"tag-high-performance-graph-processing","13":"tag-real-world-graphs","14":"tag-sequence-similarity-graphs","15":"tag-source-code","16":"czr-hentry"},"jetpack_featured_media_url":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-content\/uploads\/sites\/14\/2024\/04\/ivy-2.jpg","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/posts\/2931","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/users\/1315"}],"replies":[{"embeddable":true,"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/comments?post=2931"}],"version-history":[{"count":34,"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/posts\/2931\/revisions"}],"predecessor-version":[{"id":3488,"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/posts\/2931\/revisions\/3488"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/media\/3078"}],"wp:attachment":[{"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/media?parent=2931"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/categories?post=2931"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blogs.qub.ac.uk\/dipsa\/wp-json\/wp\/v2\/tags?post=2931"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}