|
1 | 1 | # Copyright (c) 2024 Microsoft Corporation. |
2 | 2 | # Licensed under the MIT License |
3 | | -"""Common field name definitions for community reports.""" |
| 3 | +"""Common field name definitions for data frames.""" |
4 | 4 |
|
5 | 5 | ID = "id" |
6 | 6 | SHORT_ID = "human_readable_id" |
7 | 7 | TITLE = "title" |
8 | 8 | DESCRIPTION = "description" |
9 | 9 |
|
| 10 | +TYPE = "type" |
| 11 | + |
10 | 12 | # POST-PREP NODE TABLE SCHEMA |
11 | 13 | NODE_DEGREE = "degree" |
12 | 14 | NODE_FREQUENCY = "frequency" |
13 | 15 | NODE_DETAILS = "node_details" |
14 | | - |
15 | | -NODE_PARENT_COMMUNITY = "parent_community" |
| 16 | +NODE_X = "x" |
| 17 | +NODE_Y = "y" |
16 | 18 |
|
17 | 19 | # POST-PREP EDGE TABLE SCHEMA |
18 | 20 | EDGE_SOURCE = "source" |
|
23 | 25 |
|
24 | 26 | # POST-PREP CLAIM TABLE SCHEMA |
25 | 27 | CLAIM_SUBJECT = "subject_id" |
26 | | -CLAIM_TYPE = "type" |
27 | 28 | CLAIM_STATUS = "status" |
28 | 29 | CLAIM_DETAILS = "claim_details" |
29 | 30 |
|
30 | 31 | # COMMUNITY HIERARCHY TABLE SCHEMA |
31 | 32 | SUB_COMMUNITY = "sub_community" |
32 | | -COMMUNITY_LEVEL = "level" |
33 | 33 |
|
34 | 34 | # COMMUNITY CONTEXT TABLE SCHEMA |
35 | 35 | ALL_CONTEXT = "all_context" |
|
40 | 40 | # COMMUNITY REPORT TABLE SCHEMA |
41 | 41 | COMMUNITY_ID = "community" |
42 | 42 | COMMUNITY_LEVEL = "level" |
| 43 | +COMMUNITY_PARENT = "parent" |
| 44 | +COMMUNITY_CHILDREN = "children" |
43 | 45 | TITLE = "title" |
44 | 46 | SUMMARY = "summary" |
45 | 47 | FINDINGS = "findings" |
|
48 | 50 | FULL_CONTENT = "full_content" |
49 | 51 | FULL_CONTENT_JSON = "full_content_json" |
50 | 52 |
|
| 53 | +ENTITY_IDS = "entity_ids" |
| 54 | +RELATIONSHIP_IDS = "relationship_ids" |
51 | 55 | TEXT_UNIT_IDS = "text_unit_ids" |
| 56 | +COVARIATE_IDS = "covariate_ids" |
| 57 | +DOCUMENT_IDS = "document_ids" |
| 58 | + |
| 59 | +PERIOD = "period" |
| 60 | +SIZE = "size" |
52 | 61 |
|
53 | 62 | # text units |
54 | 63 | ENTITY_DEGREE = "entity_degree" |
55 | 64 | ALL_DETAILS = "all_details" |
56 | 65 | TEXT = "text" |
| 66 | +N_TOKENS = "n_tokens" |
| 67 | + |
| 68 | +CREATION_DATE = "creation_date" |
| 69 | +METADATA = "metadata" |
| 70 | + |
| 71 | +# the following lists define the final content and ordering of columns in the data model parquet outputs |
| 72 | +ENTITIES_FINAL_COLUMNS = [ |
| 73 | + ID, |
| 74 | + SHORT_ID, |
| 75 | + TITLE, |
| 76 | + TYPE, |
| 77 | + DESCRIPTION, |
| 78 | + TEXT_UNIT_IDS, |
| 79 | + NODE_FREQUENCY, |
| 80 | + NODE_DEGREE, |
| 81 | + NODE_X, |
| 82 | + NODE_Y, |
| 83 | +] |
| 84 | + |
| 85 | +RELATIONSHIPS_FINAL_COLUMNS = [ |
| 86 | + ID, |
| 87 | + SHORT_ID, |
| 88 | + EDGE_SOURCE, |
| 89 | + EDGE_TARGET, |
| 90 | + DESCRIPTION, |
| 91 | + EDGE_WEIGHT, |
| 92 | + EDGE_DEGREE, |
| 93 | + TEXT_UNIT_IDS, |
| 94 | +] |
| 95 | + |
| 96 | +COMMUNITIES_FINAL_COLUMNS = [ |
| 97 | + ID, |
| 98 | + SHORT_ID, |
| 99 | + COMMUNITY_ID, |
| 100 | + COMMUNITY_LEVEL, |
| 101 | + COMMUNITY_PARENT, |
| 102 | + COMMUNITY_CHILDREN, |
| 103 | + TITLE, |
| 104 | + ENTITY_IDS, |
| 105 | + RELATIONSHIP_IDS, |
| 106 | + TEXT_UNIT_IDS, |
| 107 | + PERIOD, |
| 108 | + SIZE, |
| 109 | +] |
| 110 | + |
| 111 | +COMMUNITY_REPORTS_FINAL_COLUMNS = [ |
| 112 | + ID, |
| 113 | + SHORT_ID, |
| 114 | + COMMUNITY_ID, |
| 115 | + COMMUNITY_LEVEL, |
| 116 | + COMMUNITY_PARENT, |
| 117 | + COMMUNITY_CHILDREN, |
| 118 | + TITLE, |
| 119 | + SUMMARY, |
| 120 | + FULL_CONTENT, |
| 121 | + RATING, |
| 122 | + EXPLANATION, |
| 123 | + FINDINGS, |
| 124 | + FULL_CONTENT_JSON, |
| 125 | + PERIOD, |
| 126 | + SIZE, |
| 127 | +] |
| 128 | + |
| 129 | +COVARIATES_FINAL_COLUMNS = [ |
| 130 | + ID, |
| 131 | + SHORT_ID, |
| 132 | + "covariate_type", |
| 133 | + TYPE, |
| 134 | + DESCRIPTION, |
| 135 | + "subject_id", |
| 136 | + "object_id", |
| 137 | + "status", |
| 138 | + "start_date", |
| 139 | + "end_date", |
| 140 | + "source_text", |
| 141 | + "text_unit_id", |
| 142 | +] |
| 143 | + |
| 144 | +TEXT_UNITS_FINAL_COLUMNS = [ |
| 145 | + ID, |
| 146 | + SHORT_ID, |
| 147 | + TEXT, |
| 148 | + N_TOKENS, |
| 149 | + DOCUMENT_IDS, |
| 150 | + ENTITY_IDS, |
| 151 | + RELATIONSHIP_IDS, |
| 152 | + COVARIATE_IDS, |
| 153 | +] |
| 154 | + |
| 155 | +DOCUMENTS_FINAL_COLUMNS = [ |
| 156 | + ID, |
| 157 | + SHORT_ID, |
| 158 | + TITLE, |
| 159 | + TEXT, |
| 160 | + TEXT_UNIT_IDS, |
| 161 | + CREATION_DATE, |
| 162 | + METADATA, |
| 163 | +] |
0 commit comments