ModelGraph

This dataset corresponds to the paper “Connecting the Models: A Global Mega-model of MDE Projects on GitHub”.

The dataset consists of the following elements:

Raw dataset

The relevant tables of the schema are:

-- All projects crawled from GitHub which may contain MDE artefacts
CREATE TABLE projects (
  project_path VARCHAR(255),
  name VARCHAR(255),
  PRIMARY KEY (project_path)
);

-- All artefacts with relevant extensions and contents related to MDE
CREATE TABLE files (
  project_path  VARCHAR(255),
  file_path     TEXT,
  filename      VARCHAR(255),
  extension     VARCHAR(32),
  type          VARCHAR(32),
  created_at    TEXT,
  created_commit TEXT,
  created_author TEXT,
  updated_at      TEXT,
  updated_commit  TEXT,
  updated_author  TEXT,
  PRIMARY KEY (file_path)
);

-- Project information found in GitHub (id refers to projects.project_path)
CREATE TABLE repo_info(
  id TEXT,
  name TEXT,
  full_name TEXT,
  description TEXT,
  total_issues INT,
  total_prs INT,
  stars INT,
  watchers INT,
  forks INT,
  subscribers INT,
  topics TEXT,
  labels TEXT,
  created_at TEXT,
  updated_at TEXT,
  contributors_count INT,
  contributors_detail TEXT,
  parent_repo TEXT,
  repo_type TEXT
);

-- Project information from Gi (id refers to projects.project_path)
CREATE TABLE repo_git(
  id TEXT,
  ci TEXT,
  readme_size INT,
  commits_per_month REAL,
  license TEXT
);

Mega-model

The relevant parts of the schema of the database is as follows:

-- Repositories (from the RawDb)
CREATE TABLE projects (
    id            varchar(255) PRIMARY KEY,
    url           text NOT NULL
);

-- Artefact nodes (i.e., nodes that correspond to artefacts)
CREATE TABLE artefacts (
    id            varchar(255) PRIMARY KEY,
    type          varchar(255) NOT NULL,
    category      varchar(255) NOT NULL,
    name          varchar(255) NOT NULL,
    file_status   varchar(255) NOT NULL,
    project_id    varchar(255)
);

-- Duplicates found
CREATE TABLE duplication (
    group_id      varchar(255) NOT NULL,
    node_id       varchar(255) NOT NULL,
    type          varchar(255) NOT NULL,
    PRIMARY KEY (group_id, node_id)
);

-- Relationships (the same source/target can be repeated with a different type to have edges with multiple annotations)
CREATE TABLE relationships (
    source    varchar(255) NOT NULL,
    target    varchar(255) NOT NULL,
    type  varchar (255) NOT NULL
);