Subject: | Postgresql quirks |
Hi, love your program!
When I loaded the attached schema into Postgres and tried graphing
it, no edges were created, and all of the system/internal tables were
shown, resulting in a massive png file of unconnected nodes when done.
I tweaked the GraphViz-DBI module to make it work, which involved
screening out any table that didn't start with 'public.', removing the
'public.', and removing quotation marks around field names.
It turned out that postgres puts quotation marks around some field
names, such as "operator", "location", and "database", but not the rest.
In the attached schema, you'll find "myoperator", and "mylocation" as
tablenames; remove the 'my' to exercise this bug.
I'm also attaching my hacked-up GraphViz::DBI, but it's probably only
good for an example of what was done, and not for general use, as it
hasn't been tested against any other database. In particular, it throws
away any table that doesn't start with 'public.', which is bound to have
a negative impact.
Oh, and this work is being done on a freshly installed and updated
Fedora Core 5, with perl 5.8.8 and Postgres 8.1.3. No actual code has
been written; the schema comes first.
Thanks!
Subject: | DBI.pm |
package GraphViz::DBI;
require 5.005_62;
use strict;
use warnings;
use Carp;
use GraphViz;
our $AUTOLOAD;
our $VERSION = '0.02';
sub new {
my $this = shift;
my $class = ref($this) || $this;
my $self = {};
bless $self, $class;
$self->_init(@_);
return $self;
}
sub _init {
my $self = shift;
$self->set_dbh(+shift) if @_;
$self->{g} = GraphViz->new();
}
sub set_dbh {
my ($self, $dbh) = @_;
$self->{dbh} = $dbh;
return $self;
}
sub get_dbh {
my $self = shift;
return $self->{dbh};
}
sub get_tables {
my $self = shift;
$self->{tables} ||= [ $self->get_dbh->tables ];
my @result;
my $t;
foreach (@{$self->{tables}}) {
$t=$_;
$t=~s/"//g;
if (m/^public[.](.*)/) {push @result,$1;}
}
#return @{ $self->{tables} };
return @result;
}
sub is_table {
my ($self, $table) = @_;
my @tables=$self->get_tables;
$self->{is_table} ||= { map { $_ => 1 } @tables };
my $t=$self->{is_table}{$table};
if (!defined $t) {$t=0;}
return $t;
}
sub is_foreign_key {
# if the field name is of the form "<table>_id" and
# "<table>" is an actual table in the database, treat
# this as a foreign key.
# This is my convention; override it to suit your needs.
my ($self, $table, $field) = @_;
return if $field =~ /$table[_-]id/i;
return unless $field =~ /^(.*)[_-]id$/i;
my $candidate = $1;
return unless $self->is_table($candidate);
return $candidate;
}
sub graph_tables {
my $self = shift;
my %table = map { $_ => 1 } $self->get_tables;
for my $table ($self->get_tables) {
my $sth = $self->get_dbh->prepare(
"select * from $table where 1 = 0");
$sth->execute;
my @fields = @{ $sth->{NAME} };
$sth->finish;
my $label = "{$table|";
for my $field (@fields) {
$label .= $field.'\l';
if (my $dep = $self->is_foreign_key($table, $field)) {
$self->{g}->add_edge({ from => $table, to => $dep });
}
}
$self->{g}->add_node({ name => $table,
shape => 'record',
label => "$label}",
});
}
return $self->{g};
}
sub AUTOLOAD {
my $self = shift;
my $type = ref($self) or croak "$self is not an object";
(my $name = $AUTOLOAD) =~ s/.*:://;
return if $name =~ /DESTROY/;
# hm, maybe GraphViz knows what to do with it...
$self->{g}->$name(@_);
}
1;
__END__
=head1 NAME
GraphViz::DBI - graph database tables and relations
=head1 SYNOPSIS
use GraphViz::DBI;
print GraphViz::DBI->new($dbh)->graph_tables->as_png;
=head1 DESCRIPTION
This module constructs a graph for a database showing tables and
connecting them if they are related. While or after constructing the
object, pass an open database handle, then call C<graph_tables> to
determine database metadata and construct a GraphViz graph from the
table and field information.
=head1 METHODS
The following methods are defined by this class; all other method calls
are passed to the underlying GraphViz object:
=over 4
=item new( [$dbh] )
Constructs the object; also creates a GraphViz object. The constructor
accepts an optional open database handle.
=item set_dbh($dbh)
Sets the database handle.
=item get_dbh()
Returns the database handle.
=item is_table($table)
Checks the database metadata whether the argument is a valid table name.
=item is_foreign_key($table, $field)
Determines whether the field belonging to the table is a foreign key
into some other table. If so, it is expected to return the name of that
table. If not, it is expected to return a false value.
For example, if there is a table called "product" and another table
contains a field called "product_id", then to indicate that this field is
a foreign key into the product table, the method returns "product". This
is the logic implemented in this class. You can override this method in
a subclass to suit your needs.
=item graph_tables()
This method goes through all tables and fields and calls appropriate
methods to determine which tables and which dependencies exist, then
hand the results over to GraphViz. It returns the GraphViz object.
=back
=head1 TODO
=over 4
=item *
Test with various database drivers to see whether they support the
metadata interface.
=item *
Make each table a vertical port with dependencies using those ports.
=item *
Provide the possibility to name edges to specify the type of relationship
('has-a', 'is-a', etc.).
=back
=head1 BUGS
None known so far. If you find any bugs or oddities, please do inform the
author.
=head1 AUTHOR
Marcel GrE<uuml>nauer <marcel@codewerk.com>
=head1 COPYRIGHT
Copyright 2001 Marcel GrE<uuml>nauer. All rights reserved.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
perl(1), GraphViz(3pm).
=cut
Subject: | schema.sql |
create table Study (
id serial primary key,
PubMed_id Integer,
Description varchar(40),
Authors varchar(40),
Institution varchar(40));
create table Experiment (
id serial primary key,
Study_id integer,
Description varchar(40),
Proteins varchar(40),
Preparation_parameter_id integer,
Acquisition_parameter_id integer,
Search_parameter_id integer);
create table Search_parameter (
id serial primary key,
Proteinbank_id varchar(40),
Search_engine_id integer,
Taxonomy varchar(40),
Cleavage_agent varchar(40),
Allowed_missed_cleavages Integer,
Fixed_modifications varchar(40),
Variable_modifications varchar(40),
Precursor_ion_mass_tolerance varchar(40),
Fragment_ion_mass_tolerance varchar(40),
Thresholds varchar(40));
create table Protein (
id serial primary key,
Experiment_id integer,
Accession_code varchar(40),
Protein_description varchar(100),
Score real,
Validation_status varchar(40),
Percent_coverage real,
Sequence varchar(40),
Quantitation varchar(40));
create table Peptide (
id serial primary key,
Protein_id integer,
Sequence varchar(40),
Score real,
Deviation_from_expected_cleavage varchar(40),
Observed_m_z real,
Observed_charge Integer,
Modifications varchar(40),
Spectra varchar(40));
create table Proteinbank (
id serial primary key,
Name varchar(40),
Url varchar(200));
create table Search_engine (
id serial primary key,
Name varchar(40));
create table Mass_spectrometer (
id serial primary key,
name varchar(40));
create table Analytical_column (
id serial primary key,
name varchar(40));
create table MyOperator (
id serial primary key,
name varchar(40));
create table MyLocation (
id serial primary key,
name varchar(40));
create table Mobile_phase_A (
id serial primary key,
name varchar(40));
create table Mobile_phase_B (
id serial primary key,
name varchar(40));
create table Acquisition_parameter (
id serial primary key,
Mass_spectrometer_id integer,
Spectrum_acquisition_cycle varchar(40),
HPLC varchar(40),
Analytical_column_id integer,
Pre_column varchar(40),
Mobile_phase_A_id integer,
Mobile_phase_B_id integer,
Gradient varchar(40),
MyOperator_id integer,
MyLocation_id integer);
create table Preparation_parameter (
id serial primary key,
Sample_source varchar(40),
Biochemical_purification varchar(40),
Denaturant varchar(40),
Digestion_sequence varchar(40),
Enzyme_source varchar(40));