Skip to content

views

Dataset

Bases: TaskMixin, TimeStampMixin, OwnableMixin

The internal dataset model.

Source code in backend/datasets/models.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class Dataset(TaskMixin, TimeStampMixin, OwnableMixin):
    """
    The internal dataset model.
    """
    STATES = ((state.value, state.value) for state in DatasetState)

    class Mode(models.TextChoices):
        """
        The Mode class is an enumeration of the possible modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        SPARQL = 'SPARQL', _('From SPARQL endpoint')

    class SearchMode(models.TextChoices):
        """
        The SearchMode class is an enumeration of the possible search modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        WIKIDATA = 'WIKIDATA', _('From Wikidata')
        TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

    id = models.UUIDField(default=uuid.uuid4, primary_key=True)
    """The identifier of the dataset."""
    name = models.CharField(max_length=255)
    """The name of the dataset."""
    description = models.TextField(blank=True)
    """The description of the dataset."""
    source = models.JSONField()
    """The source of the dataset."""
    mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL)
    """The mode of the dataset."""
    search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL)
    """The search mode of the dataset."""
    creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True)
    """The user who created the dataset."""

    local_database: str = models.CharField(max_length=255, null=True)
    """The local blazegraph database identifier of the dataset."""
    sparql_endpoint = models.CharField(max_length=255, null=True)
    """The SPARQL endpoint of the dataset."""

    statistics = models.JSONField(null=True)
    """The statistics of the dataset."""
    namespaces = models.JSONField(null=True)
    """The list of sparql namespaces/prefixes in the dataset."""
    state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255)
    """The import state of the dataset."""
    import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True)
    """The import task of the dataset."""

    objects = models.Manager()

    @property
    def search_index_name(self) -> str:
        """
        The path to the search index of the dataset.
        :return:
        """
        return self.local_database if self.local_database else None

    def get_search_service(self) -> SearchService:
        """
        Return appropriate search service depending on the search mode
        """
        match self.search_mode:
            case self.SearchMode.LOCAL:
                if not self.search_index_name:
                    raise Exception('Dataset search index has not been created yet')
                return LocalSearchService(index_name=self.search_index_name)
            case self.SearchMode.WIKIDATA:
                return WikidataSearchService()
            case self.SearchMode.TRIPLYDB:
                if 'tdb_id' not in self.source:
                    raise Exception('Dataset is not a TriplyDB dataset')
                return TriplyDBSearchService(self.source['tdb_id'])
            case _:
                raise ValueError(f'Unknown search mode {self.search_mode}')

    def get_query_service(self) -> QueryService:
        """
        If the mode is local, return a local query service, otherwise return a SPARQL query service
        """
        match self.mode:
            case self.Mode.LOCAL:
                if not self.local_database:
                    raise Exception('Dataset local database has not been imported yet')
                return LocalQueryService(str(self.local_database))
            case self.Mode.SPARQL:
                return SPARQLQueryService(str(self.sparql_endpoint))
            case _:
                raise ValueError(f'Unknown mode {self.mode}')

    def can_view(self, user: User):
        return bool(user)

    def can_edit(self, user: User):
        return super().can_edit(user) or self.creator == user

creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True) class-attribute instance-attribute

The user who created the dataset.

description = models.TextField(blank=True) class-attribute instance-attribute

The description of the dataset.

id = models.UUIDField(default=uuid.uuid4, primary_key=True) class-attribute instance-attribute

The identifier of the dataset.

import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True) class-attribute instance-attribute

The import task of the dataset.

local_database: str = models.CharField(max_length=255, null=True) class-attribute instance-attribute

The local blazegraph database identifier of the dataset.

mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL) class-attribute instance-attribute

The mode of the dataset.

name = models.CharField(max_length=255) class-attribute instance-attribute

The name of the dataset.

namespaces = models.JSONField(null=True) class-attribute instance-attribute

The list of sparql namespaces/prefixes in the dataset.

search_index_name: str property

The path to the search index of the dataset. :return:

search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL) class-attribute instance-attribute

The search mode of the dataset.

source = models.JSONField() class-attribute instance-attribute

The source of the dataset.

sparql_endpoint = models.CharField(max_length=255, null=True) class-attribute instance-attribute

The SPARQL endpoint of the dataset.

state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255) class-attribute instance-attribute

The import state of the dataset.

statistics = models.JSONField(null=True) class-attribute instance-attribute

The statistics of the dataset.

Mode

Bases: TextChoices

The Mode class is an enumeration of the possible modes of a dataset

Source code in backend/datasets/models.py
33
34
35
36
37
38
class Mode(models.TextChoices):
    """
    The Mode class is an enumeration of the possible modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    SPARQL = 'SPARQL', _('From SPARQL endpoint')

SearchMode

Bases: TextChoices

The SearchMode class is an enumeration of the possible search modes of a dataset

Source code in backend/datasets/models.py
40
41
42
43
44
45
46
class SearchMode(models.TextChoices):
    """
    The SearchMode class is an enumeration of the possible search modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    WIKIDATA = 'WIKIDATA', _('From Wikidata')
    TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

get_query_service()

If the mode is local, return a local query service, otherwise return a SPARQL query service

Source code in backend/datasets/models.py
105
106
107
108
109
110
111
112
113
114
115
116
117
def get_query_service(self) -> QueryService:
    """
    If the mode is local, return a local query service, otherwise return a SPARQL query service
    """
    match self.mode:
        case self.Mode.LOCAL:
            if not self.local_database:
                raise Exception('Dataset local database has not been imported yet')
            return LocalQueryService(str(self.local_database))
        case self.Mode.SPARQL:
            return SPARQLQueryService(str(self.sparql_endpoint))
        case _:
            raise ValueError(f'Unknown mode {self.mode}')

get_search_service()

Return appropriate search service depending on the search mode

Source code in backend/datasets/models.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def get_search_service(self) -> SearchService:
    """
    Return appropriate search service depending on the search mode
    """
    match self.search_mode:
        case self.SearchMode.LOCAL:
            if not self.search_index_name:
                raise Exception('Dataset search index has not been created yet')
            return LocalSearchService(index_name=self.search_index_name)
        case self.SearchMode.WIKIDATA:
            return WikidataSearchService()
        case self.SearchMode.TRIPLYDB:
            if 'tdb_id' not in self.source:
                raise Exception('Dataset is not a TriplyDB dataset')
            return TriplyDBSearchService(self.source['tdb_id'])
        case _:
            raise ValueError(f'Unknown search mode {self.search_mode}')

DatasetViewSet

Bases: ModelViewSet

API endpoint that allows users to be viewed or edited.

Source code in backend/datasets/views/datasets.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class DatasetViewSet(viewsets.ModelViewSet):
    """
    API endpoint that allows users to be viewed or edited.
    """
    queryset = Dataset.objects.all()
    serializer_class = DatasetSerializer
    pagination_class = LimitOffsetPagination
    filter_backends = [DjangoFilterBackend, filters.SearchFilter, filters.OrderingFilter]
    filterset_fields = ['mode', 'search_mode', 'state', 'id', 'creator']
    search_fields = ['name', 'source', 'description']

    def perform_create(self, serializer):
        if serializer.validated_data.get('mode') == Dataset.Mode.SPARQL.value and \
                serializer.validated_data.get('search_mode') == Dataset.SearchMode.LOCAL.value:
            raise ValidationError('Local search index for sparql datasets is not yet supported')

        if serializer.validated_data.get('search_mode', None) == Dataset.SearchMode.TRIPLYDB.value and \
                'tdb_id' not in serializer.validated_data.get('source', {}):
            raise ValidationError('TriplyDB dataset must be a TriplyDB dataset')

        if not settings.BLAZEGRAPH_ENABLE and (
            serializer.validated_data.get('mode') != Dataset.Mode.SPARQL.value or
            serializer.validated_data.get('search_mode') == Dataset.SearchMode.LOCAL.value
        ):
            raise ValidationError('Local datasets are not enabled on this server')

        super().perform_create(serializer)

        instance: Dataset = serializer.instance
        instance.creator = self.request.user
        instance.save()

        files = None
        # If a files are uploaded, store them in a temporary folder
        if instance.source.get('source_type') == 'upload':
            tmp_dir = DOWNLOAD_DIR / random_string(10)
            tmp_dir.mkdir(parents=True)
            files = []
            for file in self.request.FILES.getlist('files'):
                file_path = tmp_dir / file.name
                with file_path.open('wb+') as destination:
                    for chunk in file.chunks():
                        destination.write(chunk)
                files.append(str(file_path.absolute()))

        instance.apply_async(
            import_dataset,
            (instance.id, files),
            creator=self.request.user,
            name=f'Import dataset {instance.name}'
        )

    def perform_destroy(self, instance):
        instance.apply_async(
            delete_dataset,
            (instance.id,),
            creator=self.request.user,
            name=f'Deleting dataset {instance.name}'
        )

    def perform_update(self, serializer):
        super().perform_update(serializer)

    def get_permissions(self):
        permissions = super().get_permissions()

        if self.action in ['destroy']:
            permissions.append(IsOwner())

        return permissions