{
  "manifest_version": "1.0.0",
  "semantic_id": "dk_cvr_source_v1",
  "data_status": "SOURCE",
  "intent": {
    "scientific_purpose": "CVR is the authoritative Danish register of businesses, production units, and legal entities. It records legal form, industry classification, address relations, registration status, and temporal validity for all entities registered in Denmark. It serves as the primary source for any analysis requiring identification, classification, or spatial representation of business activity.",
    "research_question_links": [
      "What is the distribution of registered businesses by industry across Danish municipalities?",
      "How can legal form and industry classification be combined with spatial data to characterize economic activity in an area?",
      "Which business entities are spatially co-located with other registers such as DAR or BBR?"
    ]
  },
  "provenance": {
    "source_origin": "Danish Central Business Register (CVR)",
    "acquisition_date": "2026-04-03",
    "licence": "Datafordeler license terms and institutional API key agreement",
    "source_manifest_refs": [
      "semanticgis-data/metadata-loader/xml_models/CentraleVirksomhedsregister/2.2.0_CentraleVirksomhedsregister.xml",
      "semanticgis-data/metadata-loader/schemas/cvr.json",
      "https://grunddatamodel.datafordeler.dk/domaenemodeller/",
      "https://grunddatamodel.datafordeler.dk/objekttypekatalog/CentraleVirksomhedsregister/package-summary.html",
      "https://datafordeler.dk/GraphQLSchema/CVR.graphql",
      "https://confluence.sdfi.dk/pages/viewpage.action?pageId=193921410",
      "data_sanctuary/sources/datafordeler.platform.manifest.v1.json",
      "data_sanctuary/shared/datafordeler_graphql_contract.md",
      "data_sanctuary/shared/time_semantics_shared.md"
    ]
  },
  "grounding_logic": {
    "type": "api",
    "uri": "https://datafordeler.dk/",
    "layer_filter": "status = 'aktiv'",
    "crs": "EPSG:25832",
    "geometry_type": "NONE",
    "join_hints": [
      {
        "target_semantic_id": "dk_dar_source_v1",
        "join_type": "attribute",
        "left_key": "CVRAdresse_vejnavn + CVRAdresse_husnummerFra + CVRAdresse_postnummer",
        "right_key": "vejnavn + husnummer + postnummer"
      }
    ]
  },
  "privacy_governance": {
    "gdpr_classification": "Public",
    "retention_period": "Project duration + 5 years",
    "access_requirements": "Institutional Datafordeler API key and register access rights",
    "ethical_risks": [
      "Business type filters can over- or under-represent nightlife if category codes are incomplete.",
      "Address text quality varies and may produce false non-matches during join operations."
    ]
  },
  "sanitisation_rituals": [
    "Restrict to active entities unless historical analysis is intended.",
    "Remove non-essential datafordeler system fields (datafordelerRowId, datafordelerRowVersion, datafordelerRegisterImportSequenceNumber) not needed for analysis.",
    "Normalize Danish characters and casing in address components before joining to DAR.",
    "Retain both hoofdbranche and bibranche fields when industry composition is part of the research question."
  ],
  "attributes": [
    {
      "name": "CVREnhedsId",
      "logical_type": "Nominal",
      "meaning": "Stable unique identifier for the CVR business unit. Serves as primary key and cross-register join anchor.",
      "provenance_level": "source"
    },
    {
      "name": "CVR_Branche.vaerdi (hoofdbranche)",
      "logical_type": "Nominal",
      "meaning": "Primary industry classification code for the business unit. Operational default is DB25 (NACE Rev. 2.1) for current filtering policy. Historical and backward-compatible mapping to DB07 must be documented per project to preserve reproducibility.",
      "provenance_level": "source",
      "vocabulary_ref": "https://www.dst.dk/da/Statistik/dokumentation/dansk-branchekode-er-gaeldende"
    },
    {
      "name": "CVR_Branche.vaerdiTekst (hoofdbranche)",
      "logical_type": "Nominal",
      "meaning": "Human-readable Danish label for the primary industry code from Branchebogen.",
      "provenance_level": "source",
      "vocabulary_ref": "https://www.dst.dk/da/Statistik/dokumentation/dansk-branchekode-er-gaeldende"
    },
    {
      "name": "CVR_Branche.vaerdi (bibranche)",
      "logical_type": "Nominal",
      "meaning": "Secondary industry classification code(s). Operational default is DB25; retain mapping support for legacy DB07-coded records in temporal analyses.",
      "provenance_level": "source",
      "vocabulary_ref": "https://www.dst.dk/da/Statistik/dokumentation/dansk-branchekode-er-gaeldende"
    },
    {
      "name": "AdresseringAnvendelse",
      "logical_type": "Nominal",
      "meaning": "Classifies the purpose of the address relation (e.g. beliggenhedsadresse, postadresse). Different usages may yield different street-level locations for the same entity.",
      "provenance_level": "source"
    },
    {
      "name": "CVRAdresse_vejnavn",
      "logical_type": "Nominal",
      "meaning": "Street name component embedded in the CVR address record, used for attribute matching to DAR address points.",
      "provenance_level": "source"
    },
    {
      "name": "CVRAdresse_husnummerFra",
      "logical_type": "Ordinal",
      "meaning": "Lower bound of house number range. Used in deterministic address join operations.",
      "provenance_level": "source"
    },
    {
      "name": "CVRAdresse_postnummer",
      "logical_type": "Nominal",
      "meaning": "Postal code used as a disambiguation key when street names are ambiguous across municipalities.",
      "provenance_level": "source"
    },
    {
      "name": "registreringFra",
      "logical_type": "Interval",
      "meaning": "Timestamp from which the current registration version is valid. Used for temporal filtering and bitemporality-aware queries.",
      "provenance_level": "source"
    }
  ],
  "agent_hints": {
    "recommended_filters": [
      "Filter by the status field to remove inactive or dissolved entities unless a historical view is required.",
      "Select a branche range using DB07 (six-digit codes) matching your research domain before spatial operations.",
      "Specify municipality code (kommunekode) to subset the register before downloading.",
      "Require at least vejnavn + husnummerFra + postnummer for a reliable DAR join; flag records that produce no match.",
      "Apply DB25 code lists as operational default; include explicit DB25-to-DB07 mapping when historical comparability is required."
    ],
    "known_pitfalls": [
      "CVR contains three distinct address types (beliggenhedsadresse, postadresse, etc.); always select the correct AdresseringAnvendelse for your spatial purpose.",
      "A single CVR entity may have multiple harBibranche entries — decide whether to use only hoofdbranche or all branches before filtering.",
      "Branchekode vocabulary is maintained by Danmarks Statistik and may change when DB07 is superseded by DB25 (NACE Rev. 2.1). Keep a snapshot of the lookup table used in any project.",
      "CVR address text does not always match DAR exactly; normalize characters, case and house number format before joining.",
      "DB25 is treated as operational default, but mixed historical coding can still occur; preserve explicit mapping rules when combining periods."
    ],
    "quality_notes": [
      "Primary source of truth is Grunddatamodel and Datafordeler, with XML models as canonical structure references.",
      "CVR GraphQL schema is available from Datafordeler for query construction.",
      "Entity-based file download API can be used for bulk extraction workflows.",
      "CVR source includes rich business context but no native geometry.",
      "Spatial analyses depend on successful linkage to DAR address points.",
      "When virkningstid is set too narrowly, active entities can be under-returned; compare against omitted virkningstid runs for coverage checks.",
      "Observed extraction behavior suggests latest virkning periods may not fully align with all currently active restaurants; validate DB07 versus DB25 mapping assumptions before exclusion.",
      "Operational policy in this repository: use DB25 as default branch classification and document fallback mapping for DB07 when needed."
    ]
  },
  "shared_contract_refs": [
    "data_sanctuary/sources/datafordeler.platform.manifest.v1.json",
    "data_sanctuary/shared/datafordeler_graphql_contract.md",
    "data_sanctuary/shared/time_semantics_shared.md",
    "data_sanctuary/sources/datafordeler.graphql.services.manifest.v1.json"
  ],
  "execution_contract": {
    "interface_type": "graphql",
    "auth": {
      "scheme": "apikey",
      "location": "query_param",
      "parameter_name": "apikey",
      "required_permissions": [
        "CVR GraphQL access"
      ]
    },
    "pagination": {
      "style": "cursor",
      "page_size_param": "first",
      "cursor_param": "after",
      "recommended_page_size": 200,
      "max_page_size": 1000
    },
    "timeouts": {
      "server_timeout_seconds": 60,
      "retry_strategy": "staged extraction and reduced page size on timeout"
    },
    "time_parameters": {
      "supports_virkningstid": true,
      "supports_registreringstid": false,
      "default_query_mode": "register_default"
    },
    "filterability_notes": [
      "CVRAdresse_kommunekode is returned in CVR_Adressering nodes but is not filterable in CVR_AdresseringFilterInput.",
      "Use CVREnhedsId-based staged extraction for municipality post-filtering when needed.",
      "Branch filters should use DB25 code sets by default; if endpoint behavior indicates DB07 values, run a mapping pass instead of silent exclusion."
    ],
    "known_error_codes": [
      "400",
      "401",
      "HC0045"
    ]
  },
  "platform_links": [
    "https://semanticgis.dk"
  ],
  "resource_scope": "dataset"
}
