User:Jhellingman/DP20/DP20DB

From DPWiki
Jump to navigation Jump to search

Proposed DP 2.0 Database structure

User Table

This table represent users that have registered. For non-registered visitors of the site, no information is stored. Note that for primary identification, we use the email address, combined with a password.

   CREATE TABLE user
   (
       userid          int
       name            varchar(128)    -- user display name
       email           varchar(128)
       emailverified   bool
       password        varchar(32)     -- actually salted hash over password
       token           varchar(128)    -- token used to verify email, and when recovering password
       language                        -- primary language for user (used for UI when available)
       languages                       -- other languages spoken by user (in order of preference, 
                                       -- first one will be used for UI when primary language is 
                                       -- not available)

       joindate        int             -- date of joining
       seendate        int             -- date of last activitiy on site

       -- calculated fields
   )

Role Table

   CREATE TABLE role
   (
       roleid          int     
       abbreviation    varchar(3)      -- short abbreviation for role
       name            varchar(32)
   )

Phase Table

   CREATE TABLE phase
   (
       phaseid         int
       abbreviation    varchar(6)
       name            varchar(32)

       -- stopping criteria (these are defaults, and can be adjusted per project)
       minimumrounds   int             -- The minimum number of rounds in this phase
       maximumrounds   int             -- The maximum number of rounds in this phase
       repeattrust     double          -- repeat round if level of trust is below this level
       promotiontrust  double          -- promote to next phase if level of trust above this level
   )
   CREATE TABLE userphase              -- activities of user per phase
   (
       userid          int
       phaseid         int
       access          bool

       projectcount    int             -- number of projects worked on in this phase
       pagecount       int             -- number of pages worked on in this phase
       size            int             -- combined size of all pages worked on
       
       -- trust in user calculations, calculated over last 100 pages that completed 
       -- all rounds. Left undefined if less than 20 pages completed.
       -- updated on request.
       delta           int             -- combined delta of all pages worked on
       textdelta       int
       tagdelta        int

       change
       textchange
       tagchange

       residu          int
       textresidu      int
       tagresidu       int

       trust
       texttrust
       tagtrust
   )

Author Table

   CREATE TABLE author
   (
       authorid        int
       name
   )

Project Table

The Project table contains information about projects.

   CREATE TABLE project
   (
       projectid       int
       parentprojectid int                 -- parent project of this project, null if none.
       userid          int                 -- user responsible for this project
       title           varchar(256)
       year            varchar(16)
       clearance       varchar(16)
       sourceurl       varchar(128)        -- url to page image source
       oln             varchar(20)         -- Open Library Number of source book
       path            varchar(128)        -- storage path for image files
       language        varchar(16)         -- primary language for project
       languages       varchar(64)         -- additional languages used

       routing         varchar(64)         -- default routing for pages in this project
       projectcomments text                -- instructions for each phase. This is a wiki format page.
   )

The AuthorProject table links authors to projects.

   CREATE TABLE authorproject              -- link authors to projects
   (
       authorid        int
       projectid       int
       roleid          int                 -- role of the author in this project (e.g. Author, Illustrator, etc.)
   )

The ProjectPhase table collects some project information for each phase.

   CREATE TABLE projectphase              -- information on a phase in a project
   (
       projectid       int
       phaseid         int

       size            int                 -- total size of all pages
       edits           int                 -- total edit distance (as summed from each individual page)
       change          int 
       trust
   )

Page Table

The Page table contains only some limited information on the page, as most of the actual content and meta data for pages is kept in the Revision table.

   CREATE TABLE page
   (
       pageid          int
       projectid       int
       image           varchar(128)        -- name of image file
       phase           int                 -- current phase of the page
       state           int                 -- state of page (1 = checked in, 2 = checked out)
       userid          int                 -- user id of user currently holding page
       checkoutdate    int                 -- date of checkout
       languages       varchar(64)         -- additional languages used on this page
       routing         varchar(64)         -- routing for this page
   )

Revision Table

The Revision table contains the results of the various rounds of proofreading.

   CREATE TABLE revision
   (
       pageid          int
       userid          int                 -- user responsible for this revision
       revisionnumber  int                 -- sequence number of revision
       revisiondate    int                 -- date of revision
       pagenumber      varchar(16)         -- page number as printed on page
       page            text                -- actual text of page
       metadata        text                -- meta data associated with page
       phase           int                 -- phase the page was in when revision was made

       -- calculated fields
       size            int                 -- size (in characters) of page
       textsize        int                 -- size (in characters) of page after normalization
       edits           int                 -- edit distance from previous revision
       textedits       int                 -- edit distance from previous revision, after normalization
       tagcount        int                 -- number of tags on page
       tagedits        int                 -- edit distance, when considering only tags.

       change          double              -- fraction of changes made, (1.0 <= change <= 0.0).
       textchange      double              -- fraction of changes made, after normalization.
       tagchange       double              -- fraction of changes made, tags only

       residu          int                 -- edit distance from final revision
       textresidu      int                 -- edit distance from final revision, after normalization
       tagresidu       int                 -- edit distance from final revision, considering only tags.

       trust           double              -- trust in page: estimate of correctness, (1.0 <= trust <= 0.0).
   )

Support tables

CREATE TABLE language
(
 
)
CREATE TABLE dictionary
(
 
)