chromosome.ttl — 9,227 triples
sequence_variant.ttl — 6,455 triples
disease.ttl — 27,421 triples
# For each sequence variant, which chromosome is it located on?
# <chromosome> holds the chromosome entities (wdt:P31 wd:Q37748);
# <sequence_variant> links each variant to its chromosome via
# wdt:P1057 (Wikidata property "chromosome"). The join matches
# those links to the chromosome set, returning (variant, chromosome)
# pairs. LIMIT 20 caps the result at the first twenty matches.
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT ?variant ?chrom
WHERE {
GRAPH <urn:kgx:chromosome> { ?chrom wdt:P31 wd:Q37748 }
GRAPH <urn:kgx:sequence_variant> { ?variant wdt:P1057 ?chrom }
}
LIMIT 20
# Which diseases have the most known genetic associations?
# For each disease, count how many distinct genes are linked to
# it via wdt:P2293 ("genetic association") in the <disease> graph.
# ORDER BY DESC + LIMIT 10 returns the top ten diseases by count.
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?disease (COUNT(?gene) AS ?associated_genes)
WHERE {
GRAPH <urn:kgx:disease> { ?disease wdt:P2293 ?gene }
}
GROUP BY ?disease
ORDER BY DESC(?associated_genes)
LIMIT 10
# How many triples does each named graph contain?
# The triple pattern ?s ?p ?o is fully unbound, so it matches
# every triple; GROUP BY ?g partitions those matches by the
# containing graph and COUNT(*) reports the size of each partition.
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?g (COUNT(*) AS ?triples)
WHERE {
GRAPH ?g { ?s ?p ?o }
}
GROUP BY ?g
ORDER BY DESC(?triples)
# What are the most common entity types across the three graphs?
# Types are stated in two parallel ways: wdt:P31 (Wikidata's
# "instance of") and rdf:type. UNION pulls both in. For each type
# we count how many distinct subjects claim it, then take the top 10.
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?type (COUNT(?s) AS ?instances)
WHERE {
{ GRAPH ?g { ?s wdt:P31 ?type } }
UNION
{ GRAPH ?g { ?s rdf:type ?type } }
}
GROUP BY ?type
ORDER BY DESC(?instances)
LIMIT 10
# Diseases with a stated cause. wdt:P828 is Wikidata's "has cause"
# property; each binding pairs a disease with one of its recorded
# causes (a pathogen, environmental factor, or other entity). The
# result is a starting point for exploring disease-aetiology links.
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?disease ?cause
WHERE {
GRAPH <urn:kgx:disease> { ?disease wdt:P828 ?cause }
}
LIMIT 25