User:Jhellingman/DP20/DP20DB
< User:Jhellingman | DP20
Jump to navigation
Jump to search
Proposed DP 2.0 Database structure
User Table
This table represent users that have registered. For non-registered visitors of the site, no information is stored. Note that for primary identification, we use the email address, combined with a password.
CREATE TABLE user
(
userid int
name varchar(128) -- user display name
email varchar(128)
emailverified bool
password varchar(32) -- actually salted hash over password
token varchar(128) -- token used to verify email, and when recovering password
language -- primary language for user (used for UI when available)
languages -- other languages spoken by user (in order of preference,
-- first one will be used for UI when primary language is
-- not available)
joindate int -- date of joining
seendate int -- date of last activitiy on site
-- calculated fields
)
Role Table
CREATE TABLE role
(
roleid int
abbreviation varchar(3) -- short abbreviation for role
name varchar(32)
)
Phase Table
CREATE TABLE phase
(
phaseid int
abbreviation varchar(6)
name varchar(32)
-- stopping criteria (these are defaults, and can be adjusted per project)
minimumrounds int -- The minimum number of rounds in this phase
maximumrounds int -- The maximum number of rounds in this phase
repeattrust double -- repeat round if level of trust is below this level
promotiontrust double -- promote to next phase if level of trust above this level
)
CREATE TABLE userphase -- activities of user per phase
(
userid int
phaseid int
access bool
projectcount int -- number of projects worked on in this phase
pagecount int -- number of pages worked on in this phase
size int -- combined size of all pages worked on
-- trust in user calculations, calculated over last 100 pages that completed
-- all rounds. Left undefined if less than 20 pages completed.
-- updated on request.
delta int -- combined delta of all pages worked on
textdelta int
tagdelta int
change
textchange
tagchange
residu int
textresidu int
tagresidu int
trust
texttrust
tagtrust
)
Author Table
CREATE TABLE author
(
authorid int
name
)
Project Table
The Project table contains information about projects.
CREATE TABLE project
(
projectid int
parentprojectid int -- parent project of this project, null if none.
userid int -- user responsible for this project
title varchar(256)
year varchar(16)
clearance varchar(16)
sourceurl varchar(128) -- url to page image source
oln varchar(20) -- Open Library Number of source book
path varchar(128) -- storage path for image files
language varchar(16) -- primary language for project
languages varchar(64) -- additional languages used
routing varchar(64) -- default routing for pages in this project
projectcomments text -- instructions for each phase. This is a wiki format page.
)
The AuthorProject table links authors to projects.
CREATE TABLE authorproject -- link authors to projects
(
authorid int
projectid int
roleid int -- role of the author in this project (e.g. Author, Illustrator, etc.)
)
The ProjectPhase table collects some project information for each phase.
CREATE TABLE projectphase -- information on a phase in a project
(
projectid int
phaseid int
size int -- total size of all pages
edits int -- total edit distance (as summed from each individual page)
change int
trust
)
Page Table
The Page table contains only some limited information on the page, as most of the actual content and meta data for pages is kept in the Revision table.
CREATE TABLE page
(
pageid int
projectid int
image varchar(128) -- name of image file
phase int -- current phase of the page
state int -- state of page (1 = checked in, 2 = checked out)
userid int -- user id of user currently holding page
checkoutdate int -- date of checkout
languages varchar(64) -- additional languages used on this page
routing varchar(64) -- routing for this page
)
Revision Table
The Revision table contains the results of the various rounds of proofreading.
CREATE TABLE revision
(
pageid int
userid int -- user responsible for this revision
revisionnumber int -- sequence number of revision
revisiondate int -- date of revision
pagenumber varchar(16) -- page number as printed on page
page text -- actual text of page
metadata text -- meta data associated with page
phase int -- phase the page was in when revision was made
-- calculated fields
size int -- size (in characters) of page
textsize int -- size (in characters) of page after normalization
edits int -- edit distance from previous revision
textedits int -- edit distance from previous revision, after normalization
tagcount int -- number of tags on page
tagedits int -- edit distance, when considering only tags.
change double -- fraction of changes made, (1.0 <= change <= 0.0).
textchange double -- fraction of changes made, after normalization.
tagchange double -- fraction of changes made, tags only
residu int -- edit distance from final revision
textresidu int -- edit distance from final revision, after normalization
tagresidu int -- edit distance from final revision, considering only tags.
trust double -- trust in page: estimate of correctness, (1.0 <= trust <= 0.0).
)
Support tables
CREATE TABLE language ( )
CREATE TABLE dictionary ( )