{"id":416,"date":"2020-04-03T19:48:05","date_gmt":"2020-04-03T10:48:05","guid":{"rendered":"https:\/\/avadawebsites.wpengine.com\/business\/?p=416"},"modified":"2024-02-14T22:33:01","modified_gmt":"2024-02-14T13:33:01","slug":"the-best-25-datasets-for-natural-language-processing","status":"publish","type":"blog","link":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/","title":{"rendered":"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081"},"content":{"rendered":"<div class=\"fusion-fullwidth fullwidth-box fusion-builder-row-1 fusion-flex-container has-pattern-background has-mask-background nonhundred-percent-fullwidth non-hundred-percent-height-scrolling\" style=\"--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;\" ><div class=\"fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap\" style=\"max-width:1372.8px;margin-left: calc(-4% \/ 2 );margin-right: calc(-4% \/ 2 );\"><div class=\"fusion-layout-column fusion_builder_column fusion-builder-column-0 fusion_builder_column_1_1 1_1 fusion-flex-column\" style=\"--awb-bg-size:cover;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:1.92%;--awb-margin-bottom-large:0px;--awb-spacing-left-large:1.92%;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:1.92%;--awb-spacing-left-medium:1.92%;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:1.92%;--awb-spacing-left-small:1.92%;\"><div class=\"fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column\"><div class=\"fusion-text fusion-text-1 fusion-text-no-margin\" style=\"--awb-margin-bottom:40px;\"><p>\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u306f\u3001\u3069\u3053\u3067\u63a2\u3059\u306e\u304c\u4e00\u756a\u3044\u3044\u3067\u3057\u3087\u3046\u304b\u3002<\/p>\n<p>\u30a4\u30f3\u30bf\u30fc\u30cd\u30c3\u30c8\u4e0a\u3092\u9688\u306a\u304f\u8abf\u3079\u3066\u7a76\u6975\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u306e\u30ea\u30b9\u30c8\u3092\u4f5c\u6210\u3057\u3001\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u4e09\u3064\u306b\u5206\u985e\u3057\u307e\u3057\u305f\u3002<\/p>\n<\/div><div class=\"fusion-title title fusion-title-1 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-top:20px;--awb-margin-top-small:10px;--awb-margin-right-small:0px;--awb-margin-bottom-small:10px;--awb-margin-left-small:0px;\"><h3 class=\"fusion-title-heading title-heading-left\" style=\"margin:0;\">\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u5411\u3051\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/h3><\/div><ul style=\"--awb-size:17px;--awb-iconcolor:var(--awb-color5);--awb-line-height:28.9px;--awb-icon-width:28.9px;--awb-icon-height:28.9px;--awb-icon-margin:11.9px;--awb-content-margin:40.8px;--awb-circlecolor:var(--awb-color1);--awb-circle-yes-font-size:14.96px;\" class=\"fusion-checklist fusion-checklist-1 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/qwone.com\/~jason\/20Newsgroups\/\" target=\"_blank\" rel=\"noopener\">20\u306e\u30cb\u30e5\u30fc\u30b9\u30b0\u30eb\u30fc\u30d7<\/a>: 20\u306e\u7570\u306a\u308b\u30cb\u30e5\u30fc\u30b9\u30b0\u30eb\u30fc\u30d7\u306e\u7d042\u4e07\u306e\u6587\u66f8\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/archive.ics.uci.edu\/ml\/datasets\/Reuters-21578+Text+Categorization+Collection\" target=\"_blank\" rel=\"noopener\">\u30ed\u30a4\u30bf\u30fc\u30fb\u30cb\u30e5\u30fc\u30b9\u30fb\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/a>: 1987\u5e74\u4ee5\u964d\u306e\u30ed\u30a4\u30bf\u30fc\u306e\u30c6\u30ad\u30b9\u30c8\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/archive.ics.uci.edu\/ml\/datasets\/Spambase\" target=\"_blank\" rel=\"noopener\">\u30ab\u30ea\u30d5\u30a9\u30eb\u30cb\u30a2\u5927\u5b66\u30a2\u30fc\u30d0\u30a4\u30f3\u6821\u306e\u30b9\u30d1\u30e0\u30d9\u30fc\u30b9<\/a>: \u30b9\u30d1\u30e0\u306e\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u306b\u5f79\u7acb\u3064\u30b9\u30d1\u30e0\u30e1\u30fc\u30eb\u306e\u5927\u578b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/www.yelp.com\/dataset\" target=\"_blank\" rel=\"noopener\">Yelp\u30ec\u30d3\u30e5\u30fc<\/a>: Yelp\u304c\u30ea\u30ea\u30fc\u30b9\u3057\u305f\u30aa\u30fc\u30d7\u30f3\u306a\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002500\u4e07\u3092\u8d85\u3048\u308b\u30ec\u30d3\u30e5\u30fc\u304b\u3089\u6210\u308b\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/wordnet.princeton.edu\/\" target=\"_blank\" rel=\"noopener\">WordNet<\/a>:\u300csynset\u300d\u3068\u547c\u3070\u308c\u308b\u82f1\u8a9e\u306e\u540c\u7fa9\u8a9e\u30b0\u30eb\u30fc\u30d7\u306e\u5927\u578b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002\u610f\u5473\u306e\u7570\u306a\u308b\u8a9e\u53e5\u306f\u5225\u306e\u300csynset\u300d\u306b\u5206\u985e\u3055\u308c\u308b\u3002<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-text fusion-text-2 fusion-text-no-margin\" style=\"--awb-margin-bottom:40px;\"><p><a href=\"http:\/\/aozora-word.hahasoha.net\/index.html\" target=\"_blank\" rel=\"noopener\">\u9752\u7a7a\u6587\u5eab\u5f62\u614b\u7d20\u89e3\u6790\u30c7\u30fc\u30bf\u96c6<\/a>: \u3053\u3061\u3089\u306f\u65e5\u672c\u8a9e\u306b\u306a\u308a\u307e\u3059\u304c\u3001\u9752\u7a7a\u6587\u5eab\u306b\u53ce\u9332\u3055\u308c\u3066\u3044\u308b\u4f5c\u54c1\u306b\u5bfe\u3057\u5f62\u614b\u7d20\u89e3\u6790\u3092\u884c\u3063\u305f\u30c7\u30fc\u30bf\u3067\u3059\u3002CC\u30e9\u30a4\u30bb\u30f3\u30b9\u3067\u3001\u5546\u7528\u5229\u7528\u3082\u53ef\u80fd\u3002\u5bfe\u8c61\u306e\u4f5c\u54c1\u306f2012\/12\u6642\u70b9\u3067\u516c\u958b\u3055\u308c\u3066\u304a\u308a\u3001\u8457\u4f5c\u6a29\u30d5\u30e9\u30b0\u306e\u306a\u304411,176\u4f5c\u54c1\u3002<\/p>\n<\/div><div class=\"fusion-separator fusion-full-width-sep\" style=\"align-self: center;margin-left: auto;margin-right: auto;margin-top:20px;margin-bottom:20px;width:100%;\"><div class=\"fusion-separator-border sep-single sep-dotted\" style=\"--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color4);border-color:var(--awb-color4);border-top-width:1px;\"><\/div><\/div><div class=\"fusion-title title fusion-title-2 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-top:20px;--awb-margin-top-small:10px;--awb-margin-right-small:0px;--awb-margin-bottom-small:10px;--awb-margin-left-small:0px;\"><h3 class=\"fusion-title-heading title-heading-left\" style=\"margin:0;\">\u611f\u60c5\u5206\u6790\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/h3><\/div><ul style=\"--awb-size:17px;--awb-iconcolor:var(--awb-color5);--awb-line-height:28.9px;--awb-icon-width:28.9px;--awb-icon-height:28.9px;--awb-icon-margin:11.9px;--awb-content-margin:40.8px;--awb-circlecolor:var(--awb-color1);--awb-circle-yes-font-size:14.96px;\" class=\"fusion-checklist fusion-checklist-2 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/www.cs.jhu.edu\/~mdredze\/datasets\/sentiment\/\" target=\"_blank\" rel=\"noopener\">\u30de\u30eb\u30c1\u30c9\u30e1\u30a4\u30f3\u611f\u60c5\u5206\u6790\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/a>: \u30a2\u30de\u30be\u30f3\u306e\u5546\u54c1\u30ec\u30d3\u30e5\u30fc\u306b\u7684\u3092\u7d5e\u3063\u305f\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/ai.stanford.edu\/~amaas\/data\/sentiment\/\" target=\"_blank\" rel=\"noopener\">\u6620\u753b\u30ec\u30d3\u30e5\u30fc<\/a>: \u611f\u60c5\u306e\u4e8c\u9805\u5206\u985e\u306e\u305f\u3081\u306e\u3001\u3084\u3084\u53e4\u3044\u6bd4\u8f03\u7684\u5c0f\u3055\u306a\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3067\u300125,000\u306e\u6620\u753b\u30ec\u30d3\u30e5\u30fc\u304b\u3089\u6210\u308b\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/nlp.stanford.edu\/sentiment\/code.html\">\u30b9\u30bf\u30f3\u30d5\u30a9\u30fc\u30c9\u30fb\u30bb\u30f3\u30c1\u30e1\u30f3\u30c8\u30fb\u30c4\u30ea\u30fc\u30d0\u30f3\u30af<\/a>: \u611f\u60c5\u306e\u30a2\u30ce\u30c6\u30fc\u30b7\u30e7\u30f3\u3092\u4ed8\u3057\u305f\u30b9\u30bf\u30f3\u30c0\u30fc\u30c9\u306a\u611f\u60c5\u5206\u6790\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/help.sentiment140.com\/for-students\/\" target=\"_blank\" rel=\"noopener\">\u30bb\u30f3\u30c1\u30e1\u30f3\u30c8140<\/a>: \u9854\u6587\u5b57\u3092\u3042\u3089\u304b\u3058\u3081\u53d6\u308a\u9664\u3044\u305f16\u4e07\u306e\u30c4\u30a4\u30fc\u30c8\u3092\u4f7f\u7528\u3057\u305f\u3001\u4eba\u6c17\u306e\u9ad8\u3044\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/www.kaggle.com\/crowdflower\/twitter-airline-sentiment\" target=\"_blank\" rel=\"noopener\">\u30c4\u30a4\u30c3\u30bf\u30fc\u7c73\u822a\u7a7a\u4f1a\u793e\u30bb\u30f3\u30c1\u30e1\u30f3\u30c8<\/a>: \u30dd\u30b8\u30c6\u30a3\u30d6\u3001\u30cd\u30ac\u30c6\u30a3\u30d6\u3001\u30cb\u30e5\u30fc\u30c8\u30e9\u30eb\u3067\u5206\u985e\u3057\u305f\u30012015\u5e742\u6708\u4ee5\u964d\u306e\u7c73\u56fd\u822a\u7a7a\u4f1a\u793e\u306b\u95a2\u3059\u308b\u30c4\u30a4\u30c3\u30bf\u30fc\u306e\u30c7\u30fc\u30bf\u3002<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-separator fusion-full-width-sep\" style=\"align-self: center;margin-left: auto;margin-right: auto;margin-top:40px;margin-bottom:40px;width:100%;\"><div class=\"fusion-separator-border sep-single sep-dotted\" style=\"--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color4);border-color:var(--awb-color4);border-top-width:1px;\"><\/div><\/div><div class=\"fusion-title title fusion-title-3 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-top:20px;--awb-margin-top-small:10px;--awb-margin-right-small:0px;--awb-margin-bottom-small:10px;--awb-margin-left-small:0px;\"><h3 class=\"fusion-title-heading title-heading-left\" style=\"margin:0;\">\u82f1\u8a9e\u767a\u8a71\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/h3><\/div><ul style=\"--awb-size:17px;--awb-iconcolor:var(--awb-color5);--awb-line-height:28.9px;--awb-icon-width:28.9px;--awb-icon-height:28.9px;--awb-icon-margin:11.9px;--awb-content-margin:40.8px;--awb-circlecolor:var(--awb-color1);--awb-circle-yes-font-size:14.96px;\" class=\"fusion-checklist fusion-checklist-3 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/catalog.ldc.upenn.edu\/LDC2002T43\" target=\"_blank\" rel=\"noopener\">2000HUB5\u82f1\u8a9e\u8a55\u4fa1\u8a18\u9332<\/a>: 40\u672c\u306e\u96fb\u8a71\u306e\u4f1a\u8a71\u304b\u3089\u6210\u308b\u82f1\u8a9e\u306e\u767a\u8a71\u30c7\u30fc\u30bf\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/www.openslr.org\/12\/\" target=\"_blank\" rel=\"noopener\">LibriSpeech<\/a>: \u30aa\u30fc\u30c7\u30a3\u30aa\u30d6\u30c3\u30af\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002\u8907\u6570\u306e\u6717\u8aad\u8005\u306b\u3088\u308b500\u6642\u9593\u306b\u53ca\u3076\u30aa\u30fc\u30c7\u30a3\u30aa\u30d6\u30c3\u30af\u304b\u3089\u6210\u308b\u3002\u30aa\u30fc\u30c7\u30a3\u30aa\u30d6\u30c3\u30af\u306e\u7ae0\u3067\u6574\u7406\u3055\u308c\u3066\u3044\u308b\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\"><a href=\"https:\/\/www.kaggle.com\/primaryobjects\/voicegender\" target=\"_blank\" rel=\"noopener\">\u58f0\u306b\u3088\u308b\u6027\u5225\u8a8d\u8b58<\/a>: \u58f0\u3084\u767a\u8a71\u306e\u97f3\u97ff\u7279\u6027\u306b\u57fa\u3065\u3044\u3066\u7537\u6027\u306e\u58f0\u304b\u5973\u6027\u306e\u58f0\u304b\u3092\u8b58\u5225\u3059\u308b\u30b7\u30b9\u30c6\u30e0\u306e\u958b\u767a\u3092\u652f\u63f4\u3059\u308b\u305f\u3081\u306b\u69cb\u7bc9\u3055\u308c\u305f\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3002\u7537\u6027\u304a\u3088\u3073\u5973\u6027\u306e\u8a71\u8005\u306b\u3088\u308b\u97f3\u58f0\u9332\u97f3\u304c3,000\u4ef6\u4ee5\u4e0a\u542b\u307e\u308c\u308b\u3002<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/github.com\/Jakobovski\/free-spoken-digit-dataset\" target=\"_blank\" rel=\"noopener\">\u767a\u8a71\u3055\u308c\u305f\u6570\u5b57\u306e\u7121\u6599\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/a>: \u82f1\u8a9e\u306b\u304a\u3051\u308b1,500\u306e\u767a\u8a71\u3055\u308c\u305f\u6570\u5b57\u306e\u9332\u97f3\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/catalog.ldc.upenn.edu\/LDC93S1\" target=\"_blank\" rel=\"noopener\">TIMIT<\/a>: 630\u4eba\u306e\u30a2\u30e1\u30ea\u30ab\u82f1\u8a9e\u8a71\u8005\u306e\u9332\u97f3\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3002<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-separator fusion-full-width-sep\" style=\"align-self: center;margin-left: auto;margin-right: auto;margin-top:40px;margin-bottom:40px;width:100%;\"><div class=\"fusion-separator-border sep-single sep-dotted\" style=\"--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color4);border-color:var(--awb-color4);border-top-width:1px;\"><\/div><\/div><div class=\"fusion-title title fusion-title-4 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-top:20px;--awb-margin-top-small:10px;--awb-margin-right-small:0px;--awb-margin-bottom-small:10px;--awb-margin-left-small:0px;\"><h3 class=\"fusion-title-heading title-heading-left\" style=\"margin:0;\">\u305d\u306e\u4ed6\u3001\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u5411\u3051\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/h3><\/div><ul style=\"--awb-size:17px;--awb-iconcolor:var(--awb-color5);--awb-line-height:28.9px;--awb-icon-width:28.9px;--awb-icon-height:28.9px;--awb-icon-margin:11.9px;--awb-content-margin:40.8px;--awb-circlecolor:var(--awb-color1);--awb-circle-yes-font-size:14.96px;\" class=\"fusion-checklist fusion-checklist-4 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/www.cs.cmu.edu\/~.\/enron\/\" target=\"_blank\" rel=\"noopener\">\u30a8\u30f3\u30ed\u30f3\u30fb\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8<\/a>: \u30a8\u30f3\u30ed\u30f3\u793e\u7ba1\u7406\u8077\u306e\u96fb\u5b50\u30e1\u30fc\u30eb\u306e\u30c7\u30fc\u30bf\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/snap.stanford.edu\/data\/web-Amazon.html\" target=\"_blank\" rel=\"noopener\">\u30a2\u30de\u30be\u30f3\u30fb\u30ec\u30d3\u30e5\u30fc<\/a>: 18\u5e74\u306b\u308f\u305f\u308b\u30a2\u30de\u30be\u30f3\u306e\u304a\u3088\u305d3,500\u4e07\u306e\u30ec\u30d3\u30e5\u30fc\u304b\u3089\u6210\u308b\u3002\u30c7\u30fc\u30bf\u306b\u306f\u3001\u88fd\u54c1\u53ca\u3073\u30e6\u30fc\u30b6\u30fc\u60c5\u5831\u3001\u8a55\u4fa1\u3001\u30d7\u30ec\u30fc\u30f3\u30c6\u30ad\u30b9\u30c8\u306e\u30ec\u30d3\u30e5\u30fc\u304c\u542b\u307e\u308c\u308b\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\"><a href=\"https:\/\/aws.amazon.com\/datasets\/google-books-ngrams\/\" target=\"_blank\" rel=\"noopener\">Google\u30d6\u30c3\u30af\u30b9Ngrams<\/a>: Google\u30d6\u30c3\u30af\u30b9\u306e\u5358\u8a9e\u306e\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3002<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/u.cs.biu.ac.il\/~koppel\/BlogCorpus.htm\" target=\"_blank\" rel=\"noopener\">Blogger\u30b3\u30fc\u30d1\u30b9<\/a>: blogger.com\u304b\u3089\u53ce\u96c6\u3057\u305f681,288\u672c\u306e\u30d6\u30ed\u30b0\u8a18\u4e8b\u306e\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3002\u5404\u30d6\u30ed\u30b0\u306b\u306f\u6700\u4f4e\u3067\u3082200\u306e\u4e00\u822c\u7684\u82f1\u5358\u8a9e\u306e\u4f7f\u7528\u304c\u542b\u307e\u308c\u3066\u3044\u308b\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"https:\/\/code.google.com\/p\/wiki-links\/downloads\/list\" target=\"_blank\" rel=\"noopener\">\u30a6\u30a3\u30ad\u30da\u30c7\u30a3\u30a2\u30fb\u30ea\u30f3\u30af\u30fb\u30c7\u30fc\u30bf<\/a>: \u30a6\u30a3\u30ad\u30da\u30c7\u30a3\u30a2\u306e\u30c6\u30ad\u30b9\u30c8\u5168\u6587\u3002400\u4e07\u3092\u8d85\u3048\u308b\u8a18\u4e8b\u306e\u7d0419\u5104\u8a9e\u304b\u3089\u6210\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002\u5358\u8a9e\u3084\u30d5\u30ec\u30fc\u30ba\u3001\u6bb5\u843d\u306e\u4e00\u90e8\u5206\u3067\u691c\u7d22\u3067\u304d\u308b\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/www.gutenberg.org\/wiki\/Gutenberg:Offline_Catalogs\" target=\"_blank\" rel=\"noopener\">\u30b0\u30fc\u30c6\u30f3\u30d9\u30eb\u30af\u96fb\u5b50\u66f8\u7c4d\u30ea\u30b9\u30c8<\/a>: \u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u30fb\u30b0\u30fc\u30c6\u30f3\u30d9\u30eb\u30af\u306e\u96fb\u5b50\u66f8\u7c4d\u306e\u30a2\u30ce\u30c6\u30fc\u30b7\u30e7\u30f3\u4ed8\u304d\u30ea\u30b9\u30c8\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/www.isi.edu\/natural-language\/download\/hansard\/\" target=\"_blank\" rel=\"noopener\">\u30ab\u30ca\u30c0\u8b70\u4f1a\u8b70\u4e8b\u9332<\/a>: \u7b2c36\u56de\u30ab\u30ca\u30c0\u8b70\u4f1a\u8b70\u4e8b\u9332\u306e\u30012\u30ab\u56fd\u8a9e\u306e130\u4e07\u306e\u30c6\u30ad\u30b9\u30c8\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/www.reddit.com\/r\/datasets\/comments\/1uyd0t\/200000_jeopardy_questions_in_a_json_file\/\" target=\"_blank\" rel=\"noopener\">Jeopardy<\/a>: \u30af\u30a4\u30ba\u756a\u7d44\u300eJeopardy\u300f\u3067\u4f7f\u308f\u308c\u305f20\u4e07\u3092\u8d85\u3048\u308b\u8cea\u554f\u96c6\u3002<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-yes\"><i class=\"fusion-li-icon fa-database fas\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p><a href=\"http:\/\/www.dt.fee.unicamp.br\/~tiago\/smsspamcollection\/\" target=\"_blank\" rel=\"noopener\">\u82f1\u8a9e\u306eSMS\u30b9\u30d1\u30e0\u30fb\u30b3\u30ec\u30af\u30b7\u30e7\u30f3<\/a>: \u82f1\u8a9e\u306e5,574\u306eSMS\u30b9\u30d1\u30e0\u30e1\u30c3\u30bb\u30fc\u30b8\u304b\u3089\u6210\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3002<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-separator fusion-full-width-sep\" style=\"align-self: center;margin-left: auto;margin-right: auto;margin-top:40px;margin-bottom:40px;width:100%;\"><div class=\"fusion-separator-border sep-single sep-dotted\" style=\"--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color4);border-color:var(--awb-color4);border-top-width:1px;\"><\/div><\/div><div class=\"fusion-title title fusion-title-5 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-top:20px;--awb-margin-top-small:10px;--awb-margin-right-small:0px;--awb-margin-bottom-small:10px;--awb-margin-left-small:0px;\"><h3 class=\"fusion-title-heading title-heading-left\" style=\"margin:0;\">\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u5411\u3051\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u304a\u63a2\u3057\u3067\u3059\u304b\uff1f<\/h3><\/div><div class=\"fusion-text fusion-text-3 fusion-text-no-margin\" style=\"--awb-margin-bottom:40px;\"><p>\u5fc5\u8981\u306a\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u304c\u898b\u3064\u304b\u3089\u306a\u3044\u5834\u5408\u306f\u3001\u5f53\u793e\u304c\u4f5c\u6210\u3044\u305f\u3057\u307e\u3059\u3002\u304a\u5ba2\u69d8\u306e\u3054\u8981\u671b\u306b\u5408\u308f\u305b\u305f\u30ab\u30b9\u30bf\u30e0\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u63d0\u4f9b\u3057\u307e\u3059\u3002<a href=\"https:\/\/transynk.co.jp\/contact\">\u7121\u6599\u898b\u7a4d\u3082\u308a\u3001\u304a\u554f\u3044\u5408\u308f\u305b\u306f\u3053\u3061\u3089\u304b\u3089\u3002<\/a><\/p>\n<\/div><div class=\"fusion-separator fusion-full-width-sep\" style=\"align-self: center;margin-left: auto;margin-right: auto;margin-top:40px;margin-bottom:40px;width:100%;\"><div class=\"fusion-separator-border sep-single sep-dotted\" style=\"--awb-height:20px;--awb-amount:20px;--awb-sep-color:var(--awb-color4);border-color:var(--awb-color4);border-top-width:1px;\"><\/div><\/div><\/div><\/div><\/div><\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u306f\u3001\u3069\u3053\u3067\u63a2\u3059\u306e\u304c\u4e00\u756a\u3044\u3044\u3067\u3057\u3087\u3046\u304b\u3002\u30a4\u30f3\u30bf\u30fc\u30cd\u30c3\u30c8\u4e0a\u3092\u9688\u306a\u304f\u8abf\u3079\u3066\u7a76\u6975\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u306e\u30ea\u30b9\u30c8\u3092\u4f5c\u6210\u3057\u3001\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u4e09\u3064\u306b\u5206\u985e\u3057\u307e\u3057\u305f\u3002<\/p>\n","protected":false},"featured_media":410,"menu_order":0,"comment_status":"open","ping_status":"open","template":"","categories":[5],"tags":[37],"class_list":["post-416","blog","type-blog","status-publish","has-post-thumbnail","hentry","category-natural-language-processing","tag-english"],"acf":[],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.4 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081 - AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e<\/title>\n<meta name=\"description\" content=\"\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3059\u3002\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081 - AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e\" \/>\n<meta property=\"og:description\" content=\"\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3059\u3002\" \/>\n<meta property=\"og:url\" content=\"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/\" \/>\n<meta property=\"og:site_name\" content=\"AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/TranSynk\/\" \/>\n<meta property=\"article:modified_time\" content=\"2024-02-14T13:33:01+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/transynk.co.jp\/wp-content\/uploads\/2022\/09\/blog-image-career.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"900\" \/>\n\t<meta property=\"og:image:height\" content=\"758\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:site\" content=\"@TranSynk\" \/>\n<meta name=\"twitter:label1\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data1\" content=\"5 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/\",\"url\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/\",\"name\":\"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081 - AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/#primaryimage\"},\"image\":{\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/i0.wp.com\\\/transynk.co.jp\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/blog-image-career.jpg?fit=900%2C758&ssl=1\",\"datePublished\":\"2020-04-03T10:48:05+00:00\",\"dateModified\":\"2024-02-14T13:33:01+00:00\",\"description\":\"\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3059\u3002\",\"breadcrumb\":{\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/#primaryimage\",\"url\":\"https:\\\/\\\/i0.wp.com\\\/transynk.co.jp\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/blog-image-career.jpg?fit=900%2C758&ssl=1\",\"contentUrl\":\"https:\\\/\\\/i0.wp.com\\\/transynk.co.jp\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/blog-image-career.jpg?fit=900%2C758&ssl=1\",\"width\":900,\"height\":758},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/blog\\\/the-best-25-datasets-for-natural-language-processing\\\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/#website\",\"url\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/\",\"name\":\"AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e\",\"description\":\"TranSynk\u306f\u69d8\u3005\u306a\u591a\u8a00\u8a9e\u97f3\u58f0\u3068\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u63d0\u4f9b\u3057\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30c7\u30fc\u30bf\u306b\u306f\u97f3\u58f0\u30c7\u30fc\u30bf\u306e\u307b\u304b\u3001\u66f8\u304d\u8d77\u3053\u3057\u30c6\u30ad\u30b9\u30c8\u3082\u542b\u307e\u308c\u307e\u3059\u3002\u97f3\u58f0\u8a8d\u8b58\u6280\u8853\u306e\u958b\u767a\u306b\u3054\u6d3b\u7528\u3044\u305f\u3060\u304f\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30ab\u30b9\u30bf\u30e0\u30c7\u30fc\u30bf\u306e\u3054\u8981\u671b\u3082\u627f\u308a\u307e\u3059\u3002\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\\\/\\\/transynk.co.jp\\\/en\\\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081 - AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e","description":"\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3059\u3002","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/","og_locale":"en_US","og_type":"article","og_title":"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081 - AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e","og_description":"\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3059\u3002","og_url":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/","og_site_name":"AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e","article_publisher":"https:\/\/www.facebook.com\/TranSynk\/","article_modified_time":"2024-02-14T13:33:01+00:00","og_image":[{"width":900,"height":758,"url":"https:\/\/transynk.co.jp\/wp-content\/uploads\/2022\/09\/blog-image-career.jpg","type":"image\/jpeg"}],"twitter_card":"summary_large_image","twitter_site":"@TranSynk","twitter_misc":{"Est. reading time":"5 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/","url":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/","name":"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081 - AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e","isPartOf":{"@id":"https:\/\/transynk.co.jp\/en\/#website"},"primaryImageOfPage":{"@id":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/#primaryimage"},"image":{"@id":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/#primaryimage"},"thumbnailUrl":"https:\/\/i0.wp.com\/transynk.co.jp\/wp-content\/uploads\/2022\/09\/blog-image-career.jpg?fit=900%2C758&ssl=1","datePublished":"2020-04-03T10:48:05+00:00","dateModified":"2024-02-14T13:33:01+00:00","description":"\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u7121\u6599\u306e\u30c6\u30ad\u30b9\u30c8\u3001\u611f\u60c5\u5206\u6790\u3001\u97f3\u58f0\u30b9\u30d4\u30fc\u30c1\u306e\u30aa\u30fc\u30d7\u30f3\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3059\u3002","breadcrumb":{"@id":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/#primaryimage","url":"https:\/\/i0.wp.com\/transynk.co.jp\/wp-content\/uploads\/2022\/09\/blog-image-career.jpg?fit=900%2C758&ssl=1","contentUrl":"https:\/\/i0.wp.com\/transynk.co.jp\/wp-content\/uploads\/2022\/09\/blog-image-career.jpg?fit=900%2C758&ssl=1","width":900,"height":758},{"@type":"BreadcrumbList","@id":"https:\/\/transynk.co.jp\/en\/blog\/the-best-25-datasets-for-natural-language-processing\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/transynk.co.jp\/en\/"},{"@type":"ListItem","position":2,"name":"\u301025\u500b\u63b2\u8f09\u3011\u82f1\u8a9e\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306b\u4f7f\u3048\u308b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u307e\u3068\u3081"}]},{"@type":"WebSite","@id":"https:\/\/transynk.co.jp\/en\/#website","url":"https:\/\/transynk.co.jp\/en\/","name":"AI\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u30fb\u6a5f\u68b0\u5b66\u7fd2\u30c7\u30fc\u30bf\u4f5c\u6210\u306a\u3089\uff5c\u30c8\u30e9\u30f3\u30b7\u30f3\u30af\u682a\u5f0f\u4f1a\u793e","description":"TranSynk\u306f\u69d8\u3005\u306a\u591a\u8a00\u8a9e\u97f3\u58f0\u3068\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u3054\u63d0\u4f9b\u3057\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30c7\u30fc\u30bf\u306b\u306f\u97f3\u58f0\u30c7\u30fc\u30bf\u306e\u307b\u304b\u3001\u66f8\u304d\u8d77\u3053\u3057\u30c6\u30ad\u30b9\u30c8\u3082\u542b\u307e\u308c\u307e\u3059\u3002\u97f3\u58f0\u8a8d\u8b58\u6280\u8853\u306e\u958b\u767a\u306b\u3054\u6d3b\u7528\u3044\u305f\u3060\u304f\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30ab\u30b9\u30bf\u30e0\u30c7\u30fc\u30bf\u306e\u3054\u8981\u671b\u3082\u627f\u308a\u307e\u3059\u3002","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/transynk.co.jp\/en\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"}]}},"jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/blog\/416","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/blog"}],"about":[{"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/types\/blog"}],"replies":[{"embeddable":true,"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/comments?post=416"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/media\/410"}],"wp:attachment":[{"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/media?parent=416"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/categories?post=416"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/transynk.co.jp\/en\/wp-json\/wp\/v2\/tags?post=416"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}