Sure. I have just simplified the task. I have created plain Python Source connector.
I did not touch check function. I copied all of the schema files from source-github connector. I am trying to just test against one: comments.json
In discover, I am just loading that json file into json object and passing to AirbyteStream (just like default code does).
In read, I just add an example of record (static) to AirbyteMessage (just like default code does).
That’s it. I send it to Local JSON destination connector. And it fails with the original error about $ref user.json not resolved.
Here is the code of source.py and comments.json (copied from source_github)
import json
from datetime import datetime
from typing import Dict, Generator
from airbyte_cdk.logger import AirbyteLogger
from airbyte_cdk.models import (
AirbyteCatalog,
AirbyteConnectionStatus,
AirbyteMessage,
AirbyteRecordMessage,
AirbyteStream,
ConfiguredAirbyteCatalog,
Status,
Type,
)
from airbyte_cdk.sources import Source
import os
main_path = "/airbyte/integration_code/source_github_mine/"
class SourceGithubMine(Source):
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
try:
return AirbyteConnectionStatus(status=Status.SUCCEEDED)
except Exception as e:
return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {str(e)}")
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
streams = []
stream_name = "comments" # Example
with open(os.path.join(main_path,"schemas","comments.json")) as f:
json_schema = json.load(f)
streams.append(AirbyteStream(name=stream_name, json_schema=json_schema))
return AirbyteCatalog(streams=streams)
def read(
self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
) -> Generator[AirbyteMessage, None, None]:
stream_name = "comments" # Example
data = {"url":"https://api.github.com/repos/curl/curl/issues/comments/785098704","html_url":"https://github.com/curl/curl/pull/6654#issuecomment-785098704","issue_url":"https://api.github.com/repos/curl/curl/issues/6654","id":785098704,"node_id":"MDEyOklzc3VlQ29tbWVudDc4NTA5ODcwNA==","user":{"login":"ghost","id":10137,"node_id":"MDQ6VXNlcjEwMTM3","avatar_url":"https://avatars.githubusercontent.com/u/10137?v=4","gravatar_id":"","url":"https://api.github.com/users/ghost","html_url":"https://github.com/ghost","followers_url":"https://api.github.com/users/ghost/followers","following_url":"https://api.github.com/users/ghost/following{/other_user}","gists_url":"https://api.github.com/users/ghost/gists{/gist_id}","starred_url":"https://api.github.com/users/ghost/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/ghost/subscriptions","organizations_url":"https://api.github.com/users/ghost/orgs","repos_url":"https://api.github.com/users/ghost/repos","events_url":"https://api.github.com/users/ghost/events{/privacy}","received_events_url":"https://api.github.com/users/ghost/received_events","type":"User","site_admin":False},"created_at":"2021-02-24T14:05:29Z","updated_at":"2021-04-19T09:16:36Z","author_association":"NONE","body":"<img src=\"https://www.deepcode.ai/icons/green_check.svg\" width= \"50px\" align= \"left\"/> Congratulations :tada:. DeepCode [analyzed](https://www.deepcode.ai/app/gh/curl/curl/56a037cc0ad1b2a770d0c08d3d09dee1ce600f0f/curl/curl/bfde4230450e7756e42a43f866879037e4bba340/pr/_/%2F/code/?utm_source=gh_review&c=0&w=0&i=0&) your code in 2.831 seconds and we found no issues. Enjoy a moment of no bugs :sunny:.\n\n#### 👉 View analysis in [**DeepCode’s Dashboard**](https://www.deepcode.ai/app/gh/curl/curl/56a037cc0ad1b2a770d0c08d3d09dee1ce600f0f/curl/curl/bfde4230450e7756e42a43f866879037e4bba340/pr/_/%2F/code/?utm_source=gh_review&c=0&w=0&i=0&) | [_Configure the bot_](https://www.deepcode.ai/app/gh/?ownerconfig=curl)\n","reactions":{"url":"https://api.github.com/repos/curl/curl/issues/comments/785098704/reactions","total_count":0,"+1":0,"-1":0,"laugh":0,"hooray":0,"confused":0,"heart":0,"rocket":0,"eyes":0},"performed_via_github_app":None,"repository":"curl/curl"}
yield AirbyteMessage(
type=Type.RECORD,
record=AirbyteRecordMessage(stream=stream_name, data=data, emitted_at=int(datetime.now().timestamp()) * 1000),
)
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"repository": {
"type": ["string"]
},
"id": {
"type": ["null", "integer"]
},
"node_id": {
"type": ["null", "string"]
},
"user": {
"$ref": "user.json"
},
"url": {
"type": ["null", "string"]
},
"html_url": {
"type": ["null", "string"]
},
"body": {
"type": ["null", "string"]
},
"user_id": {
"type": ["null", "integer"]
},
"created_at": {
"type": ["null", "string"],
"format": "date-time"
},
"updated_at": {
"type": ["null", "string"],
"format": "date-time"
},
"issue_url": {
"type": ["null", "string"]
},
"author_association": {
"type": ["null", "string"]
}
}
}
As per Airbyte’s documentation user.json is sitting in “shared” subfolder in schemas.
So the only weird thing I might be doing, is loading comments.json from disk with full path. But I am not sure if I can use relative or what is the best practice.
Thank you.