views

`Dataset`

Bases: TaskMixin, TimeStampMixin, OwnableMixin

The internal dataset model.

Source code in backend/datasets/models.py

class Dataset(TaskMixin, TimeStampMixin, OwnableMixin):
    """
    The internal dataset model.
    """
    STATES = ((state.value, state.value) for state in DatasetState)

    class Mode(models.TextChoices):
        """
        The Mode class is an enumeration of the possible modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        SPARQL = 'SPARQL', _('From SPARQL endpoint')

    class SearchMode(models.TextChoices):
        """
        The SearchMode class is an enumeration of the possible search modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        WIKIDATA = 'WIKIDATA', _('From Wikidata')
        TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

    id = models.UUIDField(default=uuid.uuid4, primary_key=True)
    """The identifier of the dataset."""
    name = models.CharField(max_length=255)
    """The name of the dataset."""
    description = models.TextField(blank=True)
    """The description of the dataset."""
    source = models.JSONField()
    """The source of the dataset."""
    mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL)
    """The mode of the dataset."""
    search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL)
    """The search mode of the dataset."""
    creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True)
    """The user who created the dataset."""

    local_database: str = models.CharField(max_length=255, null=True)
    """The local blazegraph database identifier of the dataset."""
    sparql_endpoint = models.CharField(max_length=255, null=True)
    """The SPARQL endpoint of the dataset."""

    statistics = models.JSONField(null=True)
    """The statistics of the dataset."""
    namespaces = models.JSONField(null=True)
    """The list of sparql namespaces/prefixes in the dataset."""
    state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255)
    """The import state of the dataset."""
    import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True)
    """The import task of the dataset."""

    objects = models.Manager()

    @property
    def search_index_name(self) -> str:
        """
        The path to the search index of the dataset.
        :return:
        """
        return self.local_database if self.local_database else None

    def get_search_service(self) -> SearchService:
        """
        Return appropriate search service depending on the search mode
        """
        match self.search_mode:
            case self.SearchMode.LOCAL:
                if not self.search_index_name:
                    raise Exception('Dataset search index has not been created yet')
                return LocalSearchService(index_name=self.search_index_name)
            case self.SearchMode.WIKIDATA:
                return WikidataSearchService()
            case self.SearchMode.TRIPLYDB:
                if 'tdb_id' not in self.source:
                    raise Exception('Dataset is not a TriplyDB dataset')
                return TriplyDBSearchService(self.source['tdb_id'])
            case _:
                raise ValueError(f'Unknown search mode {self.search_mode}')

    def get_query_service(self) -> QueryService:
        """
        If the mode is local, return a local query service, otherwise return a SPARQL query service
        """
        match self.mode:
            case self.Mode.LOCAL:
                if not self.local_database:
                    raise Exception('Dataset local database has not been imported yet')
                return LocalQueryService(str(self.local_database))
            case self.Mode.SPARQL:
                return SPARQLQueryService(str(self.sparql_endpoint))
            case _:
                raise ValueError(f'Unknown mode {self.mode}')

    def can_view(self, user: User):
        return bool(user)

    def can_edit(self, user: User):
        return super().can_edit(user) or self.creator == user

`creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True)` `class-attribute` `instance-attribute`

The user who created the dataset.

`description = models.TextField(blank=True)` `class-attribute` `instance-attribute`

The description of the dataset.

`id = models.UUIDField(default=uuid.uuid4, primary_key=True)` `class-attribute` `instance-attribute`

The identifier of the dataset.

`import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True)` `class-attribute` `instance-attribute`

The import task of the dataset.

`local_database: str = models.CharField(max_length=255, null=True)` `class-attribute` `instance-attribute`

The local blazegraph database identifier of the dataset.

`mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL)` `class-attribute` `instance-attribute`

The mode of the dataset.

`name = models.CharField(max_length=255)` `class-attribute` `instance-attribute`

The name of the dataset.

`namespaces = models.JSONField(null=True)` `class-attribute` `instance-attribute`

The list of sparql namespaces/prefixes in the dataset.

`search_index_name: str` `property`

The path to the search index of the dataset. :return:

`search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL)` `class-attribute` `instance-attribute`

The search mode of the dataset.

`source = models.JSONField()` `class-attribute` `instance-attribute`

The source of the dataset.

`sparql_endpoint = models.CharField(max_length=255, null=True)` `class-attribute` `instance-attribute`

The SPARQL endpoint of the dataset.

`state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255)` `class-attribute` `instance-attribute`

The import state of the dataset.

`statistics = models.JSONField(null=True)` `class-attribute` `instance-attribute`

The statistics of the dataset.

`Mode`

Bases: TextChoices

The Mode class is an enumeration of the possible modes of a dataset

Source code in backend/datasets/models.py

class Mode(models.TextChoices):
    """
    The Mode class is an enumeration of the possible modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    SPARQL = 'SPARQL', _('From SPARQL endpoint')

`SearchMode`

Bases: TextChoices

The SearchMode class is an enumeration of the possible search modes of a dataset

Source code in backend/datasets/models.py

class SearchMode(models.TextChoices):
    """
    The SearchMode class is an enumeration of the possible search modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    WIKIDATA = 'WIKIDATA', _('From Wikidata')
    TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

`get_query_service()`

If the mode is local, return a local query service, otherwise return a SPARQL query service

Source code in backend/datasets/models.py

def get_query_service(self) -> QueryService:
    """
    If the mode is local, return a local query service, otherwise return a SPARQL query service
    """
    match self.mode:
        case self.Mode.LOCAL:
            if not self.local_database:
                raise Exception('Dataset local database has not been imported yet')
            return LocalQueryService(str(self.local_database))
        case self.Mode.SPARQL:
            return SPARQLQueryService(str(self.sparql_endpoint))
        case _:
            raise ValueError(f'Unknown mode {self.mode}')

`get_search_service()`

Return appropriate search service depending on the search mode

Source code in backend/datasets/models.py

def get_search_service(self) -> SearchService:
    """
    Return appropriate search service depending on the search mode
    """
    match self.search_mode:
        case self.SearchMode.LOCAL:
            if not self.search_index_name:
                raise Exception('Dataset search index has not been created yet')
            return LocalSearchService(index_name=self.search_index_name)
        case self.SearchMode.WIKIDATA:
            return WikidataSearchService()
        case self.SearchMode.TRIPLYDB:
            if 'tdb_id' not in self.source:
                raise Exception('Dataset is not a TriplyDB dataset')
            return TriplyDBSearchService(self.source['tdb_id'])
        case _:
            raise ValueError(f'Unknown search mode {self.search_mode}')

`DatasetViewSet`

Bases: ModelViewSet

API endpoint that allows users to be viewed or edited.

Source code in backend/datasets/views/datasets.py

class DatasetViewSet(viewsets.ModelViewSet):
    """
    API endpoint that allows users to be viewed or edited.
    """
    queryset = Dataset.objects.all()
    serializer_class = DatasetSerializer
    pagination_class = LimitOffsetPagination
    filter_backends = [DjangoFilterBackend, filters.SearchFilter, filters.OrderingFilter]
    filterset_fields = ['mode', 'search_mode', 'state', 'id', 'creator']
    search_fields = ['name', 'source', 'description']

    def perform_create(self, serializer):
        if serializer.validated_data.get('mode') == Dataset.Mode.SPARQL.value and \
                serializer.validated_data.get('search_mode') == Dataset.SearchMode.LOCAL.value:
            raise ValidationError('Local search index for sparql datasets is not yet supported')

        if serializer.validated_data.get('search_mode', None) == Dataset.SearchMode.TRIPLYDB.value and \
                'tdb_id' not in serializer.validated_data.get('source', {}):
            raise ValidationError('TriplyDB dataset must be a TriplyDB dataset')

        if not settings.BLAZEGRAPH_ENABLE and (
            serializer.validated_data.get('mode') != Dataset.Mode.SPARQL.value or
            serializer.validated_data.get('search_mode') == Dataset.SearchMode.LOCAL.value
        ):
            raise ValidationError('Local datasets are not enabled on this server')

        super().perform_create(serializer)

        instance: Dataset = serializer.instance
        instance.creator = self.request.user
        instance.save()

        files = None
        # If a files are uploaded, store them in a temporary folder
        if instance.source.get('source_type') == 'upload':
            tmp_dir = DOWNLOAD_DIR / random_string(10)
            tmp_dir.mkdir(parents=True)
            files = []
            for file in self.request.FILES.getlist('files'):
                file_path = tmp_dir / file.name
                with file_path.open('wb+') as destination:
                    for chunk in file.chunks():
                        destination.write(chunk)
                files.append(str(file_path.absolute()))

        instance.apply_async(
            import_dataset,
            (instance.id, files),
            creator=self.request.user,
            name=f'Import dataset {instance.name}'
        )

    def perform_destroy(self, instance):
        instance.apply_async(
            delete_dataset,
            (instance.id,),
            creator=self.request.user,
            name=f'Deleting dataset {instance.name}'
        )

    def perform_update(self, serializer):
        super().perform_update(serializer)

    def get_permissions(self):
        permissions = super().get_permissions()

        if self.action in ['destroy']:
            permissions.append(IsOwner())

        return permissions

views

Dataset

creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True) class-attribute instance-attribute

description = models.TextField(blank=True) class-attribute instance-attribute

id = models.UUIDField(default=uuid.uuid4, primary_key=True) class-attribute instance-attribute

import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True) class-attribute instance-attribute

local_database: str = models.CharField(max_length=255, null=True) class-attribute instance-attribute

mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL) class-attribute instance-attribute

name = models.CharField(max_length=255) class-attribute instance-attribute

namespaces = models.JSONField(null=True) class-attribute instance-attribute

search_index_name: str property

search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL) class-attribute instance-attribute

source = models.JSONField() class-attribute instance-attribute

sparql_endpoint = models.CharField(max_length=255, null=True) class-attribute instance-attribute

state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255) class-attribute instance-attribute

statistics = models.JSONField(null=True) class-attribute instance-attribute

Mode

SearchMode

get_query_service()

get_search_service()

DatasetViewSet

`Dataset`

`creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True)` `class-attribute` `instance-attribute`

`description = models.TextField(blank=True)` `class-attribute` `instance-attribute`

`id = models.UUIDField(default=uuid.uuid4, primary_key=True)` `class-attribute` `instance-attribute`

`import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True)` `class-attribute` `instance-attribute`

`local_database: str = models.CharField(max_length=255, null=True)` `class-attribute` `instance-attribute`

`mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL)` `class-attribute` `instance-attribute`

`name = models.CharField(max_length=255)` `class-attribute` `instance-attribute`

`namespaces = models.JSONField(null=True)` `class-attribute` `instance-attribute`

`search_index_name: str` `property`

`search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL)` `class-attribute` `instance-attribute`

`source = models.JSONField()` `class-attribute` `instance-attribute`

`sparql_endpoint = models.CharField(max_length=255, null=True)` `class-attribute` `instance-attribute`

`state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255)` `class-attribute` `instance-attribute`

`statistics = models.JSONField(null=True)` `class-attribute` `instance-attribute`

`Mode`

`SearchMode`

`get_query_service()`

`get_search_service()`

`DatasetViewSet`