File tree Expand file tree Collapse file tree 1 file changed +7
-16
lines changed Expand file tree Collapse file tree 1 file changed +7
-16
lines changed Original file line number Diff line number Diff line change @@ -107,13 +107,13 @@ def render_features(features):
107
107
#
108
108
109
109
110
- def filter_english_datasets ():
110
+ def filter_datasets ():
111
111
"""
112
- Filter English datasets based on language tags in metadata .
112
+ Filter datasets from HuggingFace API .
113
113
114
114
Also includes the datasets of any users listed in INCLUDED_USERS
115
115
"""
116
- english_datasets = []
116
+ filtered_datasets = []
117
117
118
118
response = requests .get ("https://huggingface.co/api/datasets?full=true" )
119
119
tags = response .json ()
@@ -125,25 +125,16 @@ def filter_english_datasets():
125
125
if is_community_dataset :
126
126
user = dataset_name .split ("/" )[0 ]
127
127
if user in INCLUDED_USERS :
128
- english_datasets .append (dataset_name )
128
+ filtered_datasets .append (dataset_name )
129
129
continue
130
130
131
- if "cardData" not in dataset :
132
- continue
133
- metadata = dataset ["cardData" ]
134
-
135
- if "languages" not in metadata :
136
- continue
137
- languages = metadata ["languages" ]
138
-
139
- if "en" in languages or "en-US" in languages :
140
- english_datasets .append (dataset_name )
131
+ filtered_datasets .append (dataset_name )
141
132
142
- return sorted (english_datasets )
133
+ return sorted (filtered_datasets )
143
134
144
135
145
136
def list_datasets ():
146
137
"""Get all the datasets to work with."""
147
- dataset_list = filter_english_datasets ()
138
+ dataset_list = filter_datasets ()
148
139
dataset_list .sort (key = lambda x : x .lower ())
149
140
return dataset_list
You can’t perform that action at this time.
0 commit comments